;;;; Rebecca Orton ;;;; COSC 387: Artificial Intelligence ;;;; Project 4 ;;;; Fall 2000 ;;;; Due: Nov 22 @ 5 P.M. ;;;; 13 points ;;;; Several researchers have used machine learning algorithms to learn ;;;; behavioral profiles of people, for marketing or for customizing ;;;; software. Another important use of these profiles is for computer ;;;; intrusion detection. The basic idea is to learn a profile of each ;;;; individual user based on their past computer use and then use the ;;;; profile to verify his or her identify. ;;;; The file elmer_session.htm is an example of a UNIX accounting file for ;;;; a user named `elmer', and you can get more information about the ;;;; fields from the man page for the acctcom command. (Type `man ;;;; acctcom' at the UNIX prompt.) I'll give the details of how I went ;;;; from the audit trail to the data set in lecture, but you can also ;;;; find this information in a technical report. The section of ;;;; interest is probably 4.1 on page 15. ;;;; Tasks: ;;;; Number 1. Implement the naive Bayes and k-nearest neighbor ;;;; learning algorithms. ;;;; attrfile.txt contains additional info ;;;; sec_data_lisp.htm is the training file ;;;; sec_unknown_lisp.htm is the test file ;(setq time-series '((TWEETY 0.0 0.02 63914.67 0.0 0.02 63914.67 0.0 ;0.0 32.28 0.0 0.0 0.85 0.0 9.0 416448.0 0.0 0.0 0.0 0.0 0.0 1.0 ))) ;(setq class (first (car time-series))) ;(setq avgreal (second (car time-series))) ;(setq minreal (third (car time-series))) ;(setq maxreal (fourth (car time-series))) ;(setq avgsys (nth 4 (car time-series))) ;(setq minsys (nth 5 (car time-series))) ;(setq maxsys (nth 6 (car time-series))) ;(setq avguser (nth 7 (car time-series))) ;(setq minuser (nth 8 (car time-series))) ;(setq maxuser (nth 9 (car time-series))) ;(setq avgchar (nth 10 (car time-series))) ;(setq minchar (nth 11 (car time-series))) ;(setq maxchar (nth 12 (car time-series))) ;(setq avgblks (nth 13 (car time-series))) ;(setq minblks (nth 14 (car time-series))) ;(setq maxblks (nth 15 (car time-series))) ;(setq avgcpu (nth 16 (car time-series))) ;(setq mincpu (nth 17 (car time-series))) ;(setq maxcpu (nth 18 (car time-series))) ;(setq avghog (nth 19 (car time-series))) ;(setq minhog (nth 20 (car time-series))) ;(setq maxhog (nth 21 (car time-series))) (defun sqr (x) (cond ((null x) nil) ((not (realp x)) nil) (t(* x x)))) (defun distance (x1 y1 x2 y2) (cond ((null x1) nil) ((null y1) nil) ((null x2) nil) ((null y2) nil) (t(sqrt (+ (sqr (- x1 x2)) (sqr (- y1 y2))))))) ;(distance 63914.67 1.0 63911.67 1.0) (defun dis (real-num1 real-num2) (cond ((not (realp real-num1)) nil) ((not (realp real-num2)) nil) (t(distance real-num1 1.0 real-num2 1.0)))) ;(dis 63914.67 63911.67) ;(setq time-series '((TWEETY 0.0 0.02 63914.67 0.0 0.02 63914.67 0.0 ;0.0 32.28 0.0 0.0 0.85 0.0 9.0 416448.0 0.0 0.0 0.0 0.0 0.0 1.0 ) ;(PORKY 15.4483 0.02 435.6 0.0683 0.0 0.22 0.0383 0.0 0.33 7466.6665 ;0.0 32648.0 7.6667 0.0 0.0 0.3233 0.0 1.0 0.2183 0.0 1.0))) ;(mapcar #'dis (first time-series) (second time-series)) (defun add-dis (reallist) (cond ((null reallist) 0.0) ((not (null (car reallist))) (+ (car reallist) (add-dis (cdr reallist)))))) ;(add-dis '(1.0 1.0 1.0)) ;(setq a-serial '(tweety 1.0 1.0 1.0)) ;(setq unknown-serial '(porky 2.0 2.0 2.0)) (defun comp-dis (a-serial unknown-serial) (progn (setq reallist1 (cdr a-serial)) (setq reallist2 (cdr unknown-serial)) (setq dis-list (mapcar #'dis reallist1 reallist2)) (add-dis dis-list) )) ;(setq serial-metric (comp-dis a-serial unknown-serial)) ;(setq series-dis (list a-serial serial-metric)) (defun eol-t (a-list) (if (null (cdr (cdr a-list))) 't)) (defun insert-node-in-order (node-item node-metric sort-list) (cond ((null node-item) sort-list) ((null sort-list) (list node-item node-metric)) ((member node-item sort-list :test #'equal) sort-list) ((< node-metric (second sort-list)) (append (list node-item node-metric) sort-list) ) ((null (fourth sort-list)) (append sort-list (list node-item node-metric)) ) ((< node-metric (fourth sort-list)) (append (list (first sort-list) (second sort-list)) (list node-item node-metric) (cdr (cdr sort-list)) ) ) (t (append (list (first sort-list) (second sort-list)) (insert-node-in-order node-item node-metric (cdr (cdr sort-list)) ) ) ) )) ;(setq sort-set (insert-node-in-order a-serial serial-metric nil)) ;(setq 2nd-serial '(porky 1.0 2.0 1.0)) ;(setq 2nd-serial-metric (comp-dis 2nd-serial unknown-serial)) ;(setq sort-set (insert-node-in-order 2nd-serial 2nd-serial-metric ;sort-set)) (defun sort-series (time-series sort-set unknown-serial) (cond ((null time-series) sort-set) (t(progn (setq sort-set (insert-node-in-order (car time-series) (comp-dis (car time-series) unknown-serial) sort-set ) ) (sort-series (cdr time-series) sort-set unknown-serial) ) ) )) ;(setq unknown-serial '(petepuma 78.7893 0.0200 193399.4700 0.2040 ;0.0000 2.1200 0.7027 0.0000 9.9012 11042.3333 0.0000 38520.0000 9.4547 ;0.0000 0.0000 0.3680 0.0000 1.0000 0.0920 0.0000 1.0000)) ;(setq sort-set (sort-series *examples* nil unknown-serial)) (defun count-set (node-item set-list) (cond ((null node-item) nil) ((null set-list) 0) ((not (null (car (car set-list)))) (if (eq node-item (car (car set-list))) (+ 1 (count-set node-item (cdr (cdr set-list)))) (+ 0 (count-set node-item (cdr (cdr set-list))))) ))) (setq time-series '((reba 0.0 2.0 1.0)(PORKY 1.0 2.0 1.0)(TWEETY 1.0 1.0 1.0)(reba 3.0 2.0 1.0))) (setq unknown-serial '(porky 2.0 2.0 2.0)) (setq sort-set (sort-series time-series nil unknown-serial)) (count-set 'reba sort-set) (count-set 'nul sort-set) (count-set 'tweety sort-set) ;(defun trim-sets (node-item set-list) ; (cond ((null node-item) nil) ; ((null set-list) nil) ; ((eq node-item (car (car set-list))) ; (trim-sets node-item ; (cdr (cdr set-list)))) ; (t(append (list (first set-list) ; (second set-list)) ; (trim-sets node-item ; (cdr (cdr set-list)) ; ) ; ) ; ))) ;(count-set 'reba sort-set) ;(setq sort-set (trim-sets 'reba sort-set)) ;(count-set 'tweety sort-set) ;(setq sort-set (trim-sets 'tweety sort-set)) ;(count-set 'porky sort-set) ;(setq sort-set (trim-sets 'porky sort-set)) (defun sort-counts (a-set pass-set sort-list) (cond ((null a-set) sort-list) (t(progn (setq sort-list (insert-node-in-order (first a-set) (* -1 (count-set (car(car a-set)) pass-set)) sort-list ) ) (sort-counts (cdr (cdr a-set)) pass-set sort-list) ) ) )) ;(setq time-series '((reba 0.0 2.0 1.0)(PORKY 1.0 2.0 1.0)(TWEETY 1.0 ;1.0 1.0)(reba 3.0 2.0 1.0))) ;(setq unknown-serial '(porky 2.0 2.0 2.0)) ;(setq sort-set (sort-series time-series nil unknown-serial)) ; (sort-counts sort-set sort-set nil) (defun get-k-entries (k sort-set) (progn (setq counter 1) (setq ctr-even 0) (setq ctr-odd 1) (setq maxtimes k) (setq exit-loop-t 'f) (setq new-set nil) (loop (setq new-set (append new-set (list (nth ctr-even sort-set) (nth ctr-odd sort-set)))) (setq ctr-odd (+ 2 ctr-odd)) (setq ctr-even (+ 2 ctr-even)) (if (equal counter maxtimes) (setq exit-loop-t 't) (setq counter (+ 1 counter))) (if (null (nth ctr-even sort-set)) (setq exit-loop-t 't)) (when (equal 't exit-loop-t) (return new-set)) ) ) ) ;(setq results-1 (get-k-entries 3 sort-set)) ;(setq results-2 (get-k-entries 5 sort-set)) ;(sort-counts results-1 results-1 nil) ;(sort-counts results-2 results-2 nil) ;(setq label (car (second (reverse (sort-counts results-2 results-2 ;nil))))) (defun k-nearest-neighbor (k unknown-serial time-series) (progn (setq sort-set (sort-series time-series nil unknown-serial)) (setq k-entries (get-k-entries k sort-set)) (setq results (sort-counts k-entries k-entries nil)) (setq label (car (car results))) )) ;(setq time-series '((reba 1.0 1.0 1.0)(PORKY 2.0 2.0 2.0)(TWEETY 3.0 ;3.0 3.0)(reba 1.0 1.0 2.0))) ;(setq unknown-serial '(porky 2.0 2.0 3.0)) ;(setq k 3) ;(k-nearest-neighbor k unknown-serial time-series) ;(setq time-series '((reba 0.0 2.0 1.0)(PORKY 1.0 2.0 1.0)(TWEETY 1.0 ;1.0 1.0)(reba 3.0 2.0 1.0))) ;(setq unknown-serial '(porky 2.0 2.0 2.0)) ;(k-nearest-neighbor k unknown-serial time-series)