/*This is a real life data set. The researcher was interested in seeing the effect oftime of exposure(time), temperature(temper), Relative Humidity(RH) and light intensity(light), whether the specimen was in sun or shade. The unit was a leaf of a potato plant infected with late blight (phytopthera infestans) and we were measuring number of 'successes', out of number of 'tries'. We are using a GROUPED data in a multiple logistic regression framework */ data blight; input success tries time temper RH light sun_shade; prop=success/tries; cards; 0 4 0 9.6 18.9 1.5 0 0 4 0 12.6 17.2 1.5 0 3 4 0 11.0 18.3 1.5 0 1 4 0 21.0 15.4 1.5 0 1 4 0 22.0 13.8 1.5 0 4 4 0 20.0 38.0 1.5 0 2 4 0 13.0 45.0 1.5 0 4 4 0 16.0 58.0 1.5 0 4 4 0 23.0 27.0 1.5 0 3 4 0 25.0 29.0 1.5 0 3 12 0 28.0 14.0 1.5 0 4 4 0 26.0 7.0 1.5 0 8 8 0 12.5 60.0 1.5 0 1 4 0 16.1 15.3 1.5 0 0 4 1 31.6 17.3 948.9 1 0 12 1 28.1 8.0 786.7 1 0 4 1 25.5 8.1 645.0 1 1 8 1 15.0 48.5 141.2 1 0 4 1 17.6 14.8 291.5 1 0 4 2 11.1 17.4 786.2 1 0 4 2 15.1 15.9 716.2 1 0 4 2 19.0 26.2 715.6 1 0 4 2 25.7 12.6 788.6 1 0 4 2 28.4 18.5 735.3 1 0 4 2 19.5 14.4 764.6 1 1 4 2 14.3 16.4 712.7 1 0 4 2 17.1 31.1 645.1 1 0 4 2 26.4 23.2 920.5 1 0 4 2 31.6 17.3 948.9 1 0 12 2 28.1 8.0 786.7 1 0 4 2 25.5 8.1 645.0 1 0 4 2 17.6 14.8 291.5 1 0 4 3 26.4 23.2 920.5 1 0 4 3 31.6 17.3 948.9 1 0 12 3 28.1 8.0 786.7 1 0 4 3 25.5 8.1 645.0 1 0 8 3 15.0 48.5 141.2 1 1 4 4 11.1 17.4 786.2 1 0 4 4 15.1 15.9 716.2 1 0 4 4 19.0 26.2 715.6 1 0 4 4 25.7 12.6 788.6 1 0 4 4 28.4 18.5 735.3 1 0 4 4 19.5 14.4 764.6 1 0 4 4 14.3 16.4 712.7 1 0 4 4 17.1 31.1 645.1 1 0 4 4 26.4 23.2 920.5 1 0 4 4 31.6 17.3 948.9 1 0 12 4 28.1 8.0 786.7 1 0 4 4 25.5 8.1 645.0 1 0 4 4 17.6 14.8 291.5 1 0 4 5 26.4 23.2 920.5 1 0 4 5 31.6 17.3 948.9 1 0 8 5 15.0 48.5 141.2 1 2 4 6 11.1 17.4 786.2 1 0 4 6 15.1 15.9 716.2 1 0 4 6 19.0 26.2 715.6 1 0 3 6 25.7 12.6 788.6 1 0 4 6 28.4 18.5 735.3 1 0 4 6 19.5 14.4 764.6 1 0 4 6 14.3 16.4 712.7 1 0 4 6 17.1 31.1 645.1 1 0 4 6 26.4 23.2 920.5 1 0 3 7 25.9 12.7 116.6 1 0 4 8 11.1 17.4 786.2 1 0 4 8 15.1 15.9 716.2 1 0 4 8 19.0 26.2 715.6 1 0 4 8 25.7 12.6 788.6 1 0 4 8 28.4 18.5 735.3 1 0 4 8 19.5 14.4 764.6 1 0 4 8 14.3 16.4 712.7 1 0 4 8 17.1 31.1 645.1 1 0 1 24 11.1 17.4 786.2 1 1 4 24 15.1 15.9 716.2 1 0 4 24 19.0 26.2 715.6 1 0 4 24 25.7 12.6 788.6 1 0 4 24 28.4 18.5 735.3 1 0 4 24 19.5 14.4 764.6 1 0 4 24 14.3 16.4 712.7 1 0 4 24 17.6 14.8 291.5 1 0 4 36 17.1 31.1 645.1 1 ; ods rtf file='c:\blight1.rtf'; ods graphics on; /*plotting to get an idea of trends*/ proc gplot data=blight; plot prop*(time temper RH light sun_shade); run; /*loess to see the shape of relationship*/ proc loess data=blight; model prop=RH/smooth=0.5 residual clm alpha=0.01; ods output OutputStatistics=Results1; run; proc gplot data=Results1; plot (DepVar pred LowerCL UpperCL)*RH/overlay; run; /*stepwise selection to get a final model*/ proc logistic data=blight; model success/tries =time RH temper light sun_shade/selection=stepwise; run; /*running logit using final model with options to get residuals, predivted and lackfit for diagnostics*/ proc logistic data=blight; model success/tries=RH sun_shade/Rsq lackfit influence; output out=new reschi=rp predicted=p resdev=rd dfbetas=_all_ difchisq=dc h=hat; run; /*loessing the residual vs predicted to rule out trends*/ proc loess data=new; model rp=RH/smooth=0.5 residual clm alpha=0.01; ods output OutputStatistics=Results2; run; proc gplot data=Results2; plot (DepVar pred LowerCL UpperCL)*RH/overlay; run; /*these options are better for a simple logit model*/ proc logistic data=blight; model success/tries=RH/ scale=none clparm=wald clodds=pl rsquare; run; /*running probit with logit options*/ proc probit data=blight; model success/tries=RH/dist=logistic lackfit itprint; run; /*running probit with probit options*/ proc probit data=blight; model success/tries=RH/dist=normal lackfit itprint; run; ods graphics off; ods rtf close; /*An example of what happens with separated data in SAS, change the y=1 for x=5.3 to 0 for the real run of the data*/ data test; input y x; cards; 1 3.2 0 1.2 0 2.1 1 5.3 1 4.5 1 5.8 1 6.3 ; proc logistic data=test; model y=x/ covb ctable; output out=a p=pred; run; proc print data=a; var y pred; run; *Poisson Model* /*Anthropology data set about how many names of medicinal plants an individual can name based on age, nearness to forest and gender done in Madagascar*/ data plant2; input subject count age near $ gender; cards; 1 10 17 f 0 2 13 20 n 0 3 10 20 f 0 4 9 20 n 0 5 10 20 f 0 6 9 20 n 0 7 14 20 f 0 8 14 20 n 0 9 5 21 f 0 10 11 22 n 0 11 12 23 f 0 12 11 23 n 0 13 11 23 n 0 14 9 24 f 0 15 12 24 n 0 16 8 25 n 0 17 13 25 n 0 18 13 26 f 0 19 12 26 f 0 20 13 26 n 0 21 14 27 f 0 22 14 27 f 0 23 11 28 f 0 24 11 28 f 0 25 7 28 f 0 26 12 28 n 0 27 13 29 n 0 28 14 30 n 0 29 13 30 f 0 30 10 30 f 0 31 12 30 f 0 32 14 30 f 0 33 14 31 f 0 34 8 31 n 0 35 14 31 n 0 36 14 32 f 0 37 14 32 n 0 38 13 32 f 0 39 14 35 f 0 40 7 35 f 0 41 13 35 f 0 42 13 36 n 0 43 13 36 f 0 44 13 36 n 0 45 14 36 n 0 46 14 37 n 0 47 11 38 f 0 48 13 38 n 0 49 13 38 f 0 50 14 38 n 0 51 11 39 n 0 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 14 14 14 11 13 14 12 14 8 12 11 10 11 13 9 13 11 12 12 12 11 12 11 12 10 13 12 10 10 10 10 12 11 13 13 12 10 12 10 10 13 10 11 10 11 11 11 11 14 3 12 10 10 9 12 13 13 40 40 40 40 40 40 40 40 40 42 42 44 45 45 45 45 46 46 47 50 50 50 50 52 52 53 53 54 54 55 55 55 56 56 56 60 60 60 60 60 62 65 66 70 70 70 78 15 15 17 17 18 18 20 20 20 20 n n n n n n f f n n f f n f n f n f f n n n f f n n f f f f n f n n f f n n f f f f n f f n n f f n n f n f f f f 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 109 110 111 112 113 114 115 116 117 118 119 120 121 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 12 11 14 9 14 11 13 12 13 12 13 14 11 11 10 14 13 13 11 9 13 13 14 14 14 13 13 14 13 14 14 13 14 13 14 14 10 10 13 13 14 14 14 14 14 13 14 13 12 12 14 14 14 13 14 14 14 20 20 20 20 21 22 22 22 22 22 22 23 23 23 24 25 25 25 25 25 25 26 26 26 27 27 27 28 28 28 28 30 30 30 30 30 30 30 31 31 32 32 32 33 34 34 35 35 35 35 39 40 40 40 40 40 40 f n n f n f n n f f n n n f f f n f n f f f n n n n n f n n n n f f f n n n n n f n f n f n n f f f n f f f n f n 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 167 14 40 n 1 168 13 40 f 1 169 14 40 f 1 170 14 40 n 1 171 14 40 n 1 172 14 42 f 1 173 14 42 n 1 174 14 44 n 1 175 14 44 n 1 176 13 45 n 1 177 14 45 f 1 178 10 45 f 1 179 14 45 f 1 180 13 46 f 1 181 14 46 n 1 182 14 47 n 1 183 13 47 f 1 184 14 48 n 1 185 13 50 f 1 186 12 50 f 1 187 10 50 f 1 188 14 50 f 1 189 14 50 n 1 190 14 55 f 1 191 13 58 n 1 192 14 60 f 1 193 14 60 f 1 194 14 60 f 1 195 14 60 n 1 196 10 60 n 1 197 14 60 n 1 198 14 62 n 1 199 12 62 n 1 200 14 63 n 1 201 14 70 f 1 202 13 70 n 1 203 11 80 f 1 ; proc standard data=plant2 mean=0 std=1 out=plant1; var age; run; data plants; set plant1; agesq=age*age; agec=agesq*age; run; ods rtf file=’c:\plants.rtf’; ods graphics on; proc genmod data=plants plot=all; class near; model count = age agesq near gender / dist = poisson link = log; run; ods graphics off; ods rtf close; run;