SAS codes for most of the examples of book by A.J. Dobson; publisher:
Chapman & Hall.on Generalized Linear and Non-Linear Modelling.


 Example : 1.6.5 

 Goal : I want to plot Likelihood and see if the MLE is close 
 to the global minimum or not.
 
 Following is the SAS code :

 data cyclones;
 input sn number @@;
 datalines;
 1 6 7 12  2 5 8 7 
 3 4 9 4  4 6 10 2 
 5 6 11 6 6 3 12 7  13 4
 ;

/* SAS reads the data row-wise..*/

proc iml;
 use cyclones var{sn number};
 read all;
  m=13;n=1;
  theta=J(m,n,0);
  lik=J(m,n,0);
  a=3;b=8;
  do i = 1 to m;
     theta[i]= a+(b-a)*i/m;
     lik[i]=sum(number*log(theta[i]) - theta[i]-log(gamma(number+1))); 
  end;
  print sn number theta lik;
  create cyclone var{sn number theta lik};  
  append;
  close cyclone;
run;

 proc plot data=cyclone;
  plot lik*theta=sn;
 run; 
  
 Following is the R code.
 
 #y := number of cyclones
 y=c( 6,5,4,6,6,3,12,7,4,2,6,7,4);
 sn=1:13;
 m=20; a=3;b=8;

 #defining the variables....
 
 lik=array(0,dim=c(m,1))
 theta=array(0,dim=c(m,1))
 
 for(i in 1:m)
 {
   theta[i]=a+(i/m)*(b-a);
   # write the log-likelihood function ...
   lik[i]=sum(y*log(theta[i]) - theta[i] - log(gamma(y+1)));
 }
 plot(theta,lik,type="b",xlab="Theta",ylab="log-likelihood")

 Exercise : 1.6  
 
    data progeny;
    input   pgroup fem  tot @@; 
    datalines;
      1  18 29   2  31 53     3  34 61
      4  33 62   5  27 51     6  33 62
      7  28 53   8  23 49     9  33 71
     10  12 26  11  19 42    12  25 56
     13  14 34  14   4 10    15  22 56
     16   7 19
     ;
     
  proc iml;
   use progeny var _all_;
   read all;

   prop=fem/tot; /* this gives me output for part (a) */
   mlee=sum(fem)/sum(tot); /* gives output for part (b) */   
  /* print prop mlee fem tot;
    part (c), trying to use Newton Raphson algo */

   start F_fn(theta) global(fem,tot);
     f1=0;
     do i = 1 to 16;
       f1=f1+log(pdf('binomial',fem[i],theta,tot[i]));
     end;
     return(f1);
   finish F_fn;  

  theta=0.5;
  call nlpnra(rc,xr,"F_fn",theta);
  run;


 
 Example : 2.2.1  

/* number of chronic medical conditions in Town women vs Country women 
   data set */
   
  data medic;
  input group numb @@; 
  datalines;
  
   1 0 	1 2 	1 1 	2 1	 2 1
   1 1 	1 0 	1 3 	2 1	 2 1
   1 1 	1 1 	1 2 	2 0	 2 1
   1 0 	1 3 	1 0 	2 0	 2 0
   1 2 	1 0 	2 2 	2 2	 2 2
   1 3 	1 1 	2 0 	2 2
   1 0 	1 2 	2 3 	2 0
   1 1 	1 1 	2 0 	2 1
   1 1 	1 3 	2 0 	2 2
   1 1 	1 3 	2 1 	2 0
   1 1 	1 4 	2 1 	2 0
   ;
 
 /* assuming that the corner parametrization..*/

 proc genmod data=medic;
  class group;
  model numb=group/ dist=poi link=log lrci ;
         /*following line is for testing no group effect*/
  estimate 'testing th(1)=th(2)' group 1 -1; 
  
  output out=ress 
  	reschi=Reschi;
 run; 
 
 /*  The likelihood calculated by SAS in this case is not exactly
 correct. it gives : log-L(i)= y(i)*log(lambda(i)) - lambda(i) 
 log(y(i)!) is missing.. but for the calculation of deviance and all
 those statistics this is not necessary. */
 
  proc iml ;
   use ress var _all_;
   read all;
   tot= sum(Reschi##2);
   print tot;
 run;    
   
 Example : 2.2.2  
/* Birthweight and gestational age. */

 data gest;
 input gender age bweight @@; /* male-1, female-2 */
 datalines;
 
 1 40 2968 1 38 2795 1 40 3163 1 35 2925
 1 36 2625 1 37 2847 1 41 3292 1 40 3473
 1 37 2628 1 38 3176 1 40 3421 1 38 2975
 2 40 3317 2 36 2729 2 40 2935 2 38 2754
 2 42 3210 2 39 2817 2 40 3126 2 37 2539
 2 36 2412 2 38 2991 2 39 2875 2 40 3231
 ;

 proc genmod data=gest;
  class gender;
  model bweight=gender age;
  output out=outHo pred=Predicted stdreschi=Residuals; 
  proc print data=outHo;
 run;
 
 proc genmod data=gest;
  class gender;
  model bweight=gender gender*age;
  estimate 'Test :b(1)=b(2)' gender*age -1 1;
  output out=outH1 pred=Predicted stdreschi=Residuals; 
  proc print data=outH1;
 run; 
 
 Exercise : 2.1  
/* weight of plants */

 data plant;
 input grp wt @@;
 datalines;
 
 1 4.81 1 4.17 1 4.41 1 3.59 1 5.87 1 3.83 1 6.03 1 4.98 1 4.90 1 5.75
 1 5.36 1 3.48 1 4.69 1 4.44 1 4.89 1 4.71 1 5.48 1 4.32 1 5.15 1 6.34
 2 4.17 2 3.05 2 5.18 2 4.01 2 6.11 2 4.10 2 5.17 2 3.57 2 5.33 2 5.59
 2 4.66 2 5.58 2 3.66 2 4.50 2 3.90 2 4.61 2 5.62 2 4.53 2 6.05 2 5.14
 ;
 
/* target of the study is to test means of grp(1)=grp(2) */ 

/* (a) says, to do all the box plots and stem-leaf plot etc..
the following SAS code gives everything... with lots of summary statistics 

proc univariate data=plant plots;
  by grp;
  var wt;
run;  
*/

/* (b) testing for the equality of two means..use the following code
which gives the details of this test and infer that the two means are
almost equal...

 proc glm data=plant;
  class grp;
  model wt=grp;
  estimate 'Test: b(1)=b(2)' grp 1 -1;
 run;
*/
 
/* (c) now I have to find the estimates of these parameters under two
 models Ho: mu(1)=mu(2) vs H1:~Ho*/  
 
/* The following is the code for calculating the MLEs and LS estimates
 for mu(1) and mu(2)....*/
 proc genmod data=plant;
  class grp;
  model wt=grp;
  lsmeans grp;
 run; 
 
/*how to find the parameter estimates under Ho 
  ie. how to fit a model called E(y)=constant */
  
 proc genmod data=plant;
  model wt= ;
 run; 

/* (d) now if I want to do the pairwise test ie, calculate 
	 d(i)=y(1,i)-y(2,i) for each i=1..10 and then 
	 check fit the model E(d) = mu + error
	 test Ho : mu=0;  */

/*  For doing this paired tests.... I will have to modify my data 
first ie make a new variable d(i)=y(1,i)-y(2,i). and then only I 
will be able to do any test. */

 In that case its better not to enter the data as I have entered above.
 it is better to enter the data in the following manner..
 
 
data original;
input id y1 y2; 
datalines;
 1  23 54
 2  43 23
 3  45 12
 ....
;

 and then modify the data as 

data new1;     
  set original;
    diff=y1-y2;
  output;
run;

 /* and then test for the intercept = 0 ...*/
 
/* and now for further analysis.. you can modify the data in the
 following manner */
 
 data new2(keep=y pair group);
  set original;
     y=y1;
     group=1;
     output;
     y=y2;
     group=2;
     output;
 run;

 Example: 4.2  
data failure;
 input lifetimes @@;
 like=0;
 datalines;
 
  1051 4921 7886 10861 13520 1337 5445 8108 11026 13670
  1389 5620 8546 11214 14110 1921 5817 8666 11362 14496
  1942 5905 8831 11604 15395 2322 5956 9106 11608 16179
  3629 6068 9711 11745 17092 4006 6121 9806 11762 17568
  4012 6473 10205 11895 17568 4063 7501 10396 12044
  ;
/*  
  proc univariate data=failure plots;
    var lifetimes;
    probplot /weibull(c=2 theta=0 sigma=9890);
  run; 
*/  

  proc nlin data=failure method=gauss;
    parms theta=100;
    eq1=pdf('weibull',lifetimes,2,theta);
    model.like=(-2*log(eq1));
  run;  
  
 Exercise: 4.1  
 data aids;
 input id yr num @@;
 datalines;

 1 1984 01 09 1986 43 17 1988 110
 2 1984 06 10 1986 51 18 1988 113
 3 1984 16 11 1986 63 19 1988 149
 4 1984 23 12 1986 70 20 1988 159
 5 1985 27 13 1987 88
 6 1985 39 14 1987 97
 7 1985 31 15 1987 91
 8 1985 30 16 1987 104
 ;
 
 data new1;
  set aids;
  logid=log(id);
  logy=log(num);
  output;
 run;
 
 proc plot data=new1;
  plot num*id;
  plot logy*logid;
 run; 
 
 proc genmod data=new1;
  model num=logid / dist=poisson link=log lrci;
 run;
 
   

 Exercise : 6.4  
/* data extracted from table-6.17 */
 data conc;
 input chol age bmi @@;
 datalines;
 5.94 52 20.7 4.71 46 21.3 5.86 51 25.4
 6.52 44 22.7 6.80 70 23.9 5.23 33 24.3
 4.97 21 22.2 8.78 63 26.2 5.13 56 23.3
 6.74 54 29.2 5.95 44 22.7 5.83 71 21.9
 5.74 39 22.4 4.92 58 20.2 6.69 58 24.4
 ;
 
 proc glm data=conc;
  model chol=age bmi;
 run;


/* note for checking the effect of factor : bmi look at the 
 type-3 SS ... this will give you the following output
 
	  [ SSE(2)-SSE(1) ]/1
	  -------------------  ~ F(1,df)
	      [SSE(1)]/df

here Model(1) : chol = age bmi
and  Model(2) : chol = age 

so we can see that SSE(2) > SSE(1) always.. */

 Example: 7.3.1  
/* Beetle mortality data */

 data beetle;
 input dose totnum numkill;
 datalines;
 
 1.6907 59 6
 1.7242 60 13
 1.7552 62 18
 1.7842 56 28
 1.8113 63 52
 1.8369 59 53
 1.8610 62 61
 1.8839 60 60
 ;
 
 data new1;
  set beetle;
  prop=numkill/totnum;
  output;
 run;

/* 
 proc plot data=new1;
  plot prop*dose;
 run; 
 
  The above plot is curve "S" in shape and therefore it is very 
  likely to go for logistic regression,you can check that after the 
  logit transformation the data gets straightend a bit */
  
/* looking at the result I am going to fit a logistic regression
   ie. logit(prop) = b(1) + b(2)*dose */  
   
   
 proc genmod data=new1;
  model numkill/totnum=dose /dist=binomial link=logit lrci;
 run;
   
 Exercise: 7.3  

 data graduation;
 input year gender $ faculty $ surv tot @@;
 datalines;
 
 1938 m med 18 22 1938 m art 16 30 1938 m sci 09 14 1938 m eng 10 16
 1939 m med 16 23 1939 m art 13 22 1939 m sci 09 12 1939 m eng 07 11
 1940 m med 07 17 1940 m art 11 25 1940 m sci 12 19 1940 m eng 12 15
 1941 m med 12 25 1941 m art 12 14 1941 m sci 12 15 1941 m eng 08 09
 1942 m med 24 50 1942 m art 08 12 1942 m sci 20 28 1942 m eng 05 07
 1943 m med 16 21 1943 m art 11 20 1943 m sci 16 21 1943 m eng 01 02
 1944 m med 22 32 1944 m art 04 10 1944 m sci 25 31 1944 m eng 16 22
 1945 m med 12 14 1945 m art 04 12 1945 m sci 32 38 1945 m eng 19 25
 1946 m med 22 34 1946 m art . . 1946 m sci 04 05 1946 m eng . .
 1947 m med 28 37 1947 m art 13 23 1947 m sci 25 31 1947 m eng 25 35
 1938 f art 14 19 1938 f sci 01 01 1939 f art 11 16 1939 f sci 04 04
 1940 f art 15 18 1940 f sci 06 07 1941 f art 15 21 1941 f sci 03 03
 1942 f art 08 09 1942 f sci 04 04 1943 f art 13 13 1943 f sci 08 09
 1944 f art 18 22 1944 f sci 05 05 1945 f art 18 22 1945 f sci 16 17
 1946 f art 01 01 1946 f sci 01 01 1947 f art 13 16 1947 f sci 10 10
 
 ;

/* look at the data entered */
/*
 proc print data=graduation;
 run;
*/ 

/* part(a) 
proc genmod data=graduation;
 class year;
 model surv/tot = year / type1 type3 dist=binomial link=logit lrci;
run;
*/


/* part(b) & (c) 
proc sort data=graduation out=sortgradbygen;
 by gender;
run; 

proc genmod data=sortgradbygen;
 by gender;
 class faculty;
 model surv/tot = faculty / type1 type3 dist=binomial link=logit lrci;
run; 
*/

/* part (d) for this part look at the subset of the male dataset for only those
two faculties.. and then look at the interaction of gender and faculty.*/


 Example: 8.3.1   
/*car preference data..*/

 data car;
 input sex $ age respcat count @@;
 datalines;
 f 1 1 26 m 1 1 40
 f 1 2 12 m 1 2 17
 f 1 3 07 m 1 3 08
 f 2 1 09 m 2 1 17
 f 2 2 21 m 2 2 15
 f 2 3 15 m 2 3 12
 f 3 1 05 m 3 1 08
 f 3 2 14 m 3 2 15
 f 3 3 41 m 3 3 18
 ;
 proc sort data=car out=new1;
  by sex;
 run; 
 
/*following is the way to do the profile plot...this will give me some idea
about the interaction between the variables.. age & respcat
 
 symbol1 interpol=join value=dot;
 symbol2 interpol=join value=square;
 symbol3 interpol=join value=star;	 

 proc gplot data=new1;
  by sex;
  plot count*respcat=age;
 run; 
*/
 
/* now I will have to do some analysis.. fit nominal logistic model ,good.. so
what I have seen today is how to do nominal logistic regression .. for this I
need following stuffs
(1) funda of "glogit" 
(2) look at the "order" statement in the "proc logistic" expression.
(3) you need to be careful with the concept called "ref" ie reference 
    variables, so look at the line "class" very carefully........

 proc logistic data = new1 order = internal;
  class sex(ref='f') age(ref='1')/param=ref;
  freq count;
  model respcat (ref='1')= sex age/link = glogit alpha=0.01;
  output out=outdat reschi=Resid pred=Predicted;
run;

proc print data=outdat;
run;	*/


/* if I would like to fit an ordinal logistic regression , then use the following
 code ... */

 proc logistic data = new1;
  class sex(ref='f') age(ref='3')/param=ref;
  freq count;
  model respcat = sex age/link = clogit alpha=0.05;
run;

 Example: 9.2.1  
/* British doctors' smoking and coronary deaths*/

 data coronary;
 input smoke age deaths person_yr @@;
 datalines;
 1 1 32 52407 0 1 02 18790
 1 2 104 43248 0 2 12 10673
 1 3 206 28612 0 3 28 5710
 1 4 186 12663 0 4 28 2585
 1 5 102 5317 0 5 31 1462
 ;
 
 data new1;
  set coronary;
  drate=100000*deaths/person_yr;
  lpers=log(person_yr);
  output;
 run;

/*
 symbol1 i=spline c=red v=1;
 symbol2 i=spline c=blue v=2;

 proc gplot data=new1;
  plot drate*age=smoke;
 run;
*/

/* 
proc genmod data=new1;
 model deaths/person_yr=smoke|age age*age / dist=poisson link=log;
run; 

 or I can do the following.. both gives the same output.. */

proc genmod data=new1;
 model deaths =smoke|age age*age / offset=lpers dist=poisson link=log ;
 output out=outdata pred=Predicted reschi=Pearson_residual resdev=residual_deviance;
run; 

proc print data=outdata;
run;

 How to calculate MLE ? 
/* for a given likelihood, how to find the maximum likelihood..??? */
/* example shown below .... you have to use PROC NLIN to get the MLE*/

 data aids;
 input id yr num @@;
 like=0;
 datalines;

 1 1984 01 09 1986 43 17 1988 110
 2 1984 06 10 1986 51 18 1988 113
 3 1984 16 11 1986 63 19 1988 149
 4 1984 23 12 1986 70 20 1988 159
 5 1985 27 13 1987 88
 6 1985 39 14 1987 97
 7 1985 31 15 1987 91
 8 1985 30 16 1987 104
 ;
 run;
 
 data new1;
  set aids;
  logid=log(id);
  logy=log(num);
  output;
 run;

 proc genmod data=new1;
  model num=logid / dist=poisson link=log lrci;
 run;
 

 proc nlin data=new1 ;
    /* specified that these are the parameters */
    parms a=1 b=1;
    
    /* eta OR the linear part */
    eq1=exp(a+b*logid);
    
    p=pdf('poisson',num,eq1);
    model.like=(-2*log(p));
 run;

 How to do Nominal Logistic Regression ? 

data school;
input school program  style $ count;
datalines;
1 1 self  10
1 1 team  17
1 1 class 26
1 2 self   5
1 2 team  12
1 2 class 50
2 1 self  21
2 1 team  17
2 1 class 26
2 2 self  16
2 2 team  12
2 2 class 36
3 1 self  15
3 1 team  15
3 1 class 16
3 2 self  12
3 2 team  12
3 2 class 20
;
/*
proc freq data = school;
  weight count;
  tables style /list chisq relrisk;
  ods output OneWayFreqs = test;
run;
proc print data=test;
run;

data test1;
   set test;
   godds = frequency/85;
run;
proc print data = test1;
run;

*/


proc logistic data = school order = internal;
  freq count;
  model style = /link = glogit ;
  ods output parameterestimates= odds;
run;
data odds1;
  set odds;
  odds = exp(estimate);
run;

proc print data = odds1;
  var response estimate odds; 
run;



Hosted by www.Geocities.ws

1