;;;;====================================================================
;;;; ID3 Algorithm
;;;; Author: Tam T. Lien
;;;; Date: 13-Jul-1995
;;;;====================================================================
;;;; CHANGE HISTORY
;;;;
;;;; 23-8-95 Kevin Korb: bug fixes & cosmetic changes
;;;;
;;;;====================================================================
;;;*********************************************************************
;;; Declarations
;;;*********************************************************************
(defstruct property name values)
(defstruct classifier name values)
;;; A decision tree is either a symbol representing a leaf or a structure
;;; where decision-tree-feature is the number (starting from 1) of the
;;; feature being tested and decision-tree-subtrees is an assoc list of the
;;; form ((value1 subtree1) (value2 subtree2) ... ) representing the
;;; branches and subtrees
(defstruct decision-tree feature subtrees)
;;;*********************************************************************
;;; Public functions
;;;*********************************************************************
(defun ID3 (examples properties classifier)
"This funciton implements the ID3 algorithm.
examples - the list of examples, each example is represented as a
list of property name-value pairs. e.g,
(((Risk . high) (Credit-History . bad)
(Debt . high) (Collateral . none) (Income . 0-15K))
((Risk . high) (Credit-History . unknown)
(Debt . high) (Collateral . none) (Income . 15-35K)))
properties - a list of 'property' structures. e.g,
(list
(make-property :name 'Credit-History
:values '(good bad unknown))
(make-property :name 'Debt
:values '(high low)))
classifier - a structure of type 'classifier' that is used to
classify the examples. e.g,
(make-classifier :name 'Risk
:values '(high moderate low))
This function constructs a decision tree in a top-down fashion. It
selects a property to test at the current node of the tree and uses
this test to partition the set of examples; the algorithm then
recursively constructs a subtree for each partition. This continues
until all members of the partition are in the same class; that class
becomes a leaf node of the tree."
(let ((info (information-content examples classifier)))
(cond
((null examples)
"unable to classify: no examples")
((null properties)
(assoc (classifier-name classifier)(car examples)))
((zerop info)
(assoc (classifier-name classifier) (car examples)))
(t
(let ((property (select-property examples properties classifier)))
(make-decision-tree
:feature
(property-name property)
:subtrees
(mapcar #'(lambda (v)
(cons v (ID3
(select-partition
examples
(cons (property-name property) v))
(remove property properties)
classifier)))
(property-values property))))))))
;;;*********************************************************************
;;; Private functions
;;;*********************************************************************
(defun information-content (examples classifier)
"Calculates the information content of 'examples' given 'classifier'."
;; count # of examples which falls into each of the class of 'classifier'
(let (
;; initialize class-counts to be a list of cons, the 'car' of each
;; cons is a classifier value and the 'cdr' is 0. e.g, if the
;; values of 'classifier' is:
;; (high moderate low)
;; then 'class-count' is initialized to be
;; ((high . 0) (moderate . 0) (low . 0))
(class-counts
(mapcar #'(lambda (x)
(cons x 0))
(classifier-values classifier)))
;; initialize 'size' to be the length of 'examples'
(size (length examples)))
;; count # of instances in each class
(dolist (instance examples)
(let ((pv (cdr (assoc (classifier-name classifier)
instance))))
(incf (cdr (assoc pv class-counts)))))
;; compute info content of examples
(sum #'(lambda (x)
(if (= (cdr x) 0)
0
(* -1
(/ (cdr x) size)
(log (/ (cdr x) size) 2))))
class-counts)))
;;;---------------------------------------------------------------------
(defun sum (f list-of-numbers)
"Returns the sum of (f 'list-of-numbers'). e.g, we apply the function
'f' to each element of 'list-of-numbers', and then returns the sum of
the results."
(apply '+ (mapcar f list-of-numbers)))
;;;---------------------------------------------------------------------
(defun select-property (examples properties classifier)
"Returns the property which maximizes information gain."
(let ((E-list (mapcar
#'(lambda (property)
(cons (expected-info examples property classifier)
property))
properties)))
;; returns the property which maximizes information gain (minimizes E)
(cdr (assoc (apply 'min (mapcar #'car E-list)) E-list))))
;;;---------------------------------------------------------------------
(defun expected-info (examples property classifier)
"Returns the expected information needed to complete the tree after
making 'property' the root."
(let ((parts (partition examples property))
(examples-length (length examples)))
(sum #'(lambda (part-i)
(*
(/ (length part-i) examples-length)
(information-content part-i classifier)))
parts)))
;;;---------------------------------------------------------------------
(defun partition (examples property)
"Partition the 'examples' using 'property'."
(let ((p-name (property-name property)))
(mapcar #'(lambda (p-val)
(select-partition examples (cons p-name p-val)))
(property-values property))))
;;;---------------------------------------------------------------------
(defun select-partition (examples p &aux result)
"Returns a list of examples having property 'p'."
(dolist (ex examples)
(if (member p ex :test 'equal)
(push ex result)))
result)
Disclaimer