;;;; Rebecca Orton
;;;; COSC 387: Artificial Intelligence
;;;; Project 4
;;;; Fall 2000
;;;; Due: Nov 22 @ 5 P.M.
;;;; 13 points
;;;; Several researchers have used machine learning algorithms to learn
;;;; behavioral profiles of people, for marketing or for customizing
;;;; software. Another important use of these profiles is for computer
;;;; intrusion detection. The basic idea is to learn a profile of each
;;;; individual user based on their past computer use and then use the
;;;; profile to verify his or her identify.
;;;; The file elmer_session.htm is an example of a UNIX accounting file for
;;;; a user named `elmer', and you can get more information about the
;;;; fields from the man page for the acctcom command. (Type `man
;;;; acctcom' at the UNIX prompt.) I'll give the details of how I went
;;;; from the audit trail to the data set in lecture, but you can also
;;;; find this information in a technical report. The section of
;;;; interest is probably 4.1 on page 15.
;;;; Tasks:
;;;; Number 1. Implement the naive Bayes and k-nearest neighbor
;;;; learning algorithms.
;;;; attrfile.txt contains additional info
;;;; sec_data_lisp.htm is the training file
;;;; sec_unknown_lisp.htm is the test file
;(setq time-series '((TWEETY 0.0 0.02 63914.67 0.0 0.02 63914.67 0.0 ;0.0 32.28 0.0 0.0 0.85 0.0 9.0 416448.0 0.0 0.0 0.0 0.0 0.0 1.0 )))
;(setq class (first (car time-series)))
;(setq avgreal (second (car time-series)))
;(setq minreal (third (car time-series)))
;(setq maxreal (fourth (car time-series)))
;(setq avgsys (nth 4 (car time-series)))
;(setq minsys (nth 5 (car time-series)))
;(setq maxsys (nth 6 (car time-series)))
;(setq avguser (nth 7 (car time-series)))
;(setq minuser (nth 8 (car time-series)))
;(setq maxuser (nth 9 (car time-series)))
;(setq avgchar (nth 10 (car time-series)))
;(setq minchar (nth 11 (car time-series)))
;(setq maxchar (nth 12 (car time-series)))
;(setq avgblks (nth 13 (car time-series)))
;(setq minblks (nth 14 (car time-series)))
;(setq maxblks (nth 15 (car time-series)))
;(setq avgcpu (nth 16 (car time-series)))
;(setq mincpu (nth 17 (car time-series)))
;(setq maxcpu (nth 18 (car time-series)))
;(setq avghog (nth 19 (car time-series)))
;(setq minhog (nth 20 (car time-series)))
;(setq maxhog (nth 21 (car time-series)))
(defun sqr (x)
(cond ((null x) nil)
((not (realp x)) nil)
(t(* x x))))
(defun distance (x1 y1 x2 y2)
(cond ((null x1) nil)
((null y1) nil)
((null x2) nil)
((null y2) nil)
(t(sqrt (+ (sqr (- x1 x2))
(sqr (- y1 y2)))))))
;(distance 63914.67 1.0 63911.67 1.0)
(defun dis (real-num1 real-num2)
(cond ((not (realp real-num1)) nil)
((not (realp real-num2)) nil)
(t(distance real-num1 1.0 real-num2 1.0))))
;(dis 63914.67 63911.67)
;(setq time-series '((TWEETY 0.0 0.02 63914.67 0.0 0.02 63914.67 0.0 ;0.0 32.28 0.0 0.0 0.85 0.0 9.0 416448.0 0.0 0.0 0.0 0.0 0.0 1.0 )
;(PORKY 15.4483 0.02 435.6 0.0683 0.0 0.22 0.0383 0.0 0.33 7466.6665 ;0.0 32648.0 7.6667 0.0 0.0 0.3233 0.0 1.0 0.2183 0.0 1.0)))
;(mapcar #'dis (first time-series) (second time-series))
(defun add-dis (reallist)
(cond ((null reallist) 0.0)
((not (null (car reallist)))
(+ (car reallist)
(add-dis (cdr reallist))))))
;(add-dis '(1.0 1.0 1.0))
;(setq a-serial '(tweety 1.0 1.0 1.0))
;(setq unknown-serial '(porky 2.0 2.0 2.0))
(defun comp-dis (a-serial unknown-serial)
(progn
(setq reallist1 (cdr a-serial))
(setq reallist2 (cdr unknown-serial))
(setq dis-list (mapcar #'dis reallist1 reallist2))
(add-dis dis-list)
))
;(setq serial-metric (comp-dis a-serial unknown-serial))
;(setq series-dis (list a-serial serial-metric))
(defun eol-t (a-list)
(if (null (cdr (cdr a-list))) 't))
(defun insert-node-in-order (node-item node-metric sort-list)
(cond ((null node-item) sort-list)
((null sort-list) (list node-item node-metric))
((member node-item sort-list :test #'equal) sort-list)
((< node-metric (second sort-list))
(append (list node-item node-metric) sort-list)
)
((null (fourth sort-list))
(append sort-list (list node-item node-metric))
)
((< node-metric (fourth sort-list))
(append
(list (first sort-list) (second sort-list))
(list node-item node-metric)
(cdr (cdr sort-list))
)
)
(t (append
(list (first sort-list) (second sort-list))
(insert-node-in-order node-item
node-metric
(cdr (cdr sort-list))
)
)
)
))
;(setq sort-set (insert-node-in-order a-serial serial-metric nil))
;(setq 2nd-serial '(porky 1.0 2.0 1.0))
;(setq 2nd-serial-metric (comp-dis 2nd-serial unknown-serial))
;(setq sort-set (insert-node-in-order 2nd-serial 2nd-serial-metric ;sort-set))
(defun sort-series (time-series sort-set unknown-serial)
(cond ((null time-series) sort-set)
(t(progn
(setq sort-set
(insert-node-in-order
(car time-series)
(comp-dis (car time-series)
unknown-serial)
sort-set
)
)
(sort-series (cdr time-series) sort-set unknown-serial)
)
)
))
;(setq unknown-serial '(petepuma 78.7893 0.0200 193399.4700 0.2040 ;0.0000 2.1200 0.7027 0.0000 9.9012 11042.3333 0.0000 38520.0000 9.4547 ;0.0000 0.0000 0.3680 0.0000 1.0000 0.0920 0.0000 1.0000))
;(setq sort-set (sort-series *examples* nil unknown-serial))
(defun count-set (node-item set-list)
(cond ((null node-item) nil)
((null set-list) 0)
((not (null (car (car set-list))))
(if (eq node-item (car (car set-list)))
(+ 1 (count-set node-item
(cdr (cdr set-list))))
(+ 0 (count-set node-item
(cdr (cdr set-list)))))
)))
(setq time-series '((reba 0.0 2.0 1.0)(PORKY 1.0 2.0 1.0)(TWEETY 1.0 1.0 1.0)(reba 3.0 2.0 1.0)))
(setq unknown-serial '(porky 2.0 2.0 2.0))
(setq sort-set (sort-series time-series nil unknown-serial))
(count-set 'reba sort-set)
(count-set 'nul sort-set)
(count-set 'tweety sort-set)
;(defun trim-sets (node-item set-list)
; (cond ((null node-item) nil)
; ((null set-list) nil)
; ((eq node-item (car (car set-list)))
; (trim-sets node-item
; (cdr (cdr set-list))))
; (t(append (list (first set-list)
; (second set-list))
; (trim-sets node-item
; (cdr (cdr set-list))
; )
; )
; )))
;(count-set 'reba sort-set)
;(setq sort-set (trim-sets 'reba sort-set))
;(count-set 'tweety sort-set)
;(setq sort-set (trim-sets 'tweety sort-set))
;(count-set 'porky sort-set)
;(setq sort-set (trim-sets 'porky sort-set))
(defun sort-counts (a-set pass-set sort-list)
(cond ((null a-set) sort-list)
(t(progn
(setq sort-list
(insert-node-in-order
(first a-set)
(* -1 (count-set (car(car a-set))
pass-set))
sort-list
)
)
(sort-counts (cdr (cdr a-set)) pass-set sort-list)
)
)
))
;(setq time-series '((reba 0.0 2.0 1.0)(PORKY 1.0 2.0 1.0)(TWEETY 1.0 ;1.0 1.0)(reba 3.0 2.0 1.0)))
;(setq unknown-serial '(porky 2.0 2.0 2.0))
;(setq sort-set (sort-series time-series nil unknown-serial))
; (sort-counts sort-set sort-set nil)
(defun get-k-entries (k sort-set)
(progn
(setq counter 1)
(setq ctr-even 0)
(setq ctr-odd 1)
(setq maxtimes k)
(setq exit-loop-t 'f)
(setq new-set nil)
(loop
(setq new-set (append new-set
(list (nth ctr-even sort-set)
(nth ctr-odd sort-set))))
(setq ctr-odd (+ 2 ctr-odd))
(setq ctr-even (+ 2 ctr-even))
(if (equal counter maxtimes)
(setq exit-loop-t 't)
(setq counter (+ 1 counter)))
(if (null (nth ctr-even sort-set))
(setq exit-loop-t 't))
(when (equal 't exit-loop-t) (return new-set))
)
)
)
;(setq results-1 (get-k-entries 3 sort-set))
;(setq results-2 (get-k-entries 5 sort-set))
;(sort-counts results-1 results-1 nil)
;(sort-counts results-2 results-2 nil)
;(setq label (car (second (reverse (sort-counts results-2 results-2 ;nil)))))
(defun k-nearest-neighbor (k unknown-serial time-series)
(progn
(setq sort-set (sort-series time-series nil unknown-serial))
(setq k-entries (get-k-entries k sort-set))
(setq results (sort-counts k-entries k-entries nil))
(setq label (car (car results)))
))
;(setq time-series '((reba 1.0 1.0 1.0)(PORKY 2.0 2.0 2.0)(TWEETY 3.0 ;3.0 3.0)(reba 1.0 1.0 2.0)))
;(setq unknown-serial '(porky 2.0 2.0 3.0))
;(setq k 3)
;(k-nearest-neighbor k unknown-serial time-series)
;(setq time-series '((reba 0.0 2.0 1.0)(PORKY 1.0 2.0 1.0)(TWEETY 1.0 ;1.0 1.0)(reba 3.0 2.0 1.0)))
;(setq unknown-serial '(porky 2.0 2.0 2.0))
;(k-nearest-neighbor k unknown-serial time-series)