;;;;====================================================================
;;;; ID3 Algorithm
;;;; Author: Tam T. Lien
;;;; Date:   13-Jul-1995
;;;;====================================================================
;;;; CHANGE HISTORY
;;;;
;;;; 23-8-95 Kevin Korb: bug fixes & cosmetic changes
;;;;
;;;;====================================================================


;;;*********************************************************************
;;; Declarations
;;;*********************************************************************

(defstruct property name values)

(defstruct classifier name values)

;;; A decision tree is either a symbol representing a leaf or a structure
;;; where decision-tree-feature is the number (starting from 1) of the
;;; feature being tested and decision-tree-subtrees is an assoc list of the
;;; form ((value1 subtree1) (value2 subtree2) ... ) representing the
;;; branches and subtrees

(defstruct decision-tree feature subtrees)


;;;*********************************************************************
;;; Public functions
;;;*********************************************************************


(defun ID3 (examples properties classifier)
  "This funciton implements the ID3 algorithm.
	examples - the list of examples, each example is represented as a
	           list of property name-value pairs. e.g,

		   (((Risk . high) (Credit-History . bad)
		     (Debt . high) (Collateral . none) (Income . 0-15K))
		    ((Risk . high) (Credit-History . unknown)
		     (Debt . high) (Collateral . none) (Income . 15-35K)))

	properties - a list of 'property' structures.  e.g,
			(list
			 (make-property :name 'Credit-History
			                :values '(good bad unknown))
			 (make-property :name 'Debt
				        :values '(high low)))

	classifier - a structure of type 'classifier' that is used to
	             classify the examples.  e.g,
			(make-classifier :name 'Risk
					 :values '(high moderate low))

   This function constructs a decision tree in a top-down fashion.  It
   selects a property to test at the current node of the tree and uses
   this test to partition the set of examples; the algorithm then
   recursively constructs a subtree for each partition.  This continues
   until all members of the partition are in the same class; that class
   becomes a leaf node of the tree."

  (let ((info (information-content examples classifier)))
    (cond
     ((null examples)
      "unable to classify: no examples")
     ((null properties)
      (assoc (classifier-name classifier)(car examples)))
     ((zerop info)
      (assoc (classifier-name classifier) (car examples)))
     (t
      (let ((property (select-property examples properties classifier)))
	(make-decision-tree
	 :feature 
	   (property-name property)
	 :subtrees 
	   (mapcar #'(lambda (v)
		       (cons v (ID3
				(select-partition
				 examples
				 (cons (property-name property) v))
				(remove property properties)
				classifier)))
		   (property-values property))))))))

;;;*********************************************************************
;;; Private functions
;;;*********************************************************************

(defun information-content (examples classifier)

  "Calculates the information content of 'examples' given 'classifier'."

  ;; count # of examples which falls into each of the class of 'classifier'
  
  (let (
	;; initialize class-counts to be a list of cons, the 'car' of each
	;; cons is a classifier value and the 'cdr' is 0.  e.g, if the
	;; values of 'classifier' is: 
	;;	(high moderate low)
	;; then 'class-count' is initialized to be
	;;	((high . 0) (moderate . 0) (low . 0))

	(class-counts
	 (mapcar #'(lambda (x)
		     (cons x 0))
		 (classifier-values classifier)))
	;; initialize 'size' to be the length of 'examples'
	(size (length examples)))

    ;; count # of instances in each class
    (dolist (instance examples)
	    (let ((pv (cdr (assoc (classifier-name classifier)
					      instance))))
	      (incf (cdr (assoc pv class-counts)))))

    ;; compute info content of examples
    (sum #'(lambda (x)
	     (if (= (cdr x) 0)
		 0
	       (* -1
		  (/ (cdr x) size)
		  (log (/ (cdr x) size) 2))))
	 class-counts)))

;;;---------------------------------------------------------------------

(defun sum (f list-of-numbers)

  "Returns the sum of (f 'list-of-numbers').  e.g, we apply the function
   'f' to each element of 'list-of-numbers', and then returns the sum of
   the results."

  (apply '+ (mapcar f list-of-numbers)))

;;;---------------------------------------------------------------------

(defun select-property (examples properties classifier) 

   "Returns the property which maximizes information gain."

   (let ((E-list (mapcar 
		  #'(lambda (property)
		      (cons (expected-info examples property classifier)
			    property))
		  properties)))
     ;; returns the property which maximizes information gain (minimizes E)
     (cdr (assoc (apply 'min (mapcar #'car E-list)) E-list))))

;;;---------------------------------------------------------------------

(defun expected-info (examples property classifier)

  "Returns the expected information needed to complete the tree after
   making 'property' the root."

  (let ((parts (partition examples property))
	(examples-length (length examples)))
    (sum #'(lambda (part-i)
	     (* 
	      (/ (length part-i) examples-length)
	      (information-content part-i classifier)))
	 parts)))

;;;---------------------------------------------------------------------

(defun partition (examples property)
  "Partition the 'examples' using 'property'."
  
  (let ((p-name (property-name property)))
    (mapcar #'(lambda (p-val)
		(select-partition examples (cons p-name p-val)))
	    (property-values property))))

;;;---------------------------------------------------------------------

(defun select-partition (examples p &aux result)
  "Returns a list of examples having property 'p'."
  
  (dolist (ex examples)
	  (if (member p ex :test 'equal)
	      (push ex result)))
  result)
    


Disclaimer