lab_11_logistic_2016.doc

advertisement
/*This is a real life data set. The researcher was interested in
seeing the effect oftime of exposure(time), temperature(temper),
Relative Humidity(RH) and light intensity(light), whether the specimen
was in sun or shade. The unit was a leaf of a potato plant infected
with late blight (phytopthera infestans) and we were measuring number
of 'successes', out of number of 'tries'. We are using a GROUPED data
in a multiple logistic regression framework
*/
data blight;
input success tries time temper RH
light sun_shade;
prop=success/tries;
cards;
0
4
0
9.6
18.9
1.5
0
0
4
0
12.6
17.2
1.5
0
3
4
0
11.0
18.3
1.5
0
1
4
0
21.0
15.4
1.5
0
1
4
0
22.0
13.8
1.5
0
4
4
0
20.0
38.0
1.5
0
2
4
0
13.0
45.0
1.5
0
4
4
0
16.0
58.0
1.5
0
4
4
0
23.0
27.0
1.5
0
3
4
0
25.0
29.0
1.5
0
3
12
0
28.0
14.0
1.5
0
4
4
0
26.0
7.0
1.5
0
8
8
0
12.5
60.0
1.5
0
1
4
0
16.1
15.3
1.5
0
0
4
1
31.6
17.3
948.9
1
0
12
1
28.1
8.0
786.7
1
0
4
1
25.5
8.1
645.0
1
1
8
1
15.0
48.5
141.2
1
0
4
1
17.6
14.8
291.5
1
0
4
2
11.1
17.4
786.2
1
0
4
2
15.1
15.9
716.2
1
0
4
2
19.0
26.2
715.6
1
0
4
2
25.7
12.6
788.6
1
0
4
2
28.4
18.5
735.3
1
0
4
2
19.5
14.4
764.6
1
1
4
2
14.3
16.4
712.7
1
0
4
2
17.1
31.1
645.1
1
0
4
2
26.4
23.2
920.5
1
0
4
2
31.6
17.3
948.9
1
0
12
2
28.1
8.0
786.7
1
0
4
2
25.5
8.1
645.0
1
0
4
2
17.6
14.8
291.5
1
0
4
3
26.4
23.2
920.5
1
0
4
3
31.6
17.3
948.9
1
0
12
3
28.1
8.0
786.7
1
0
4
3
25.5
8.1
645.0
1
0
8
3
15.0
48.5
141.2
1
1
4
4
11.1
17.4
786.2
1
0
4
4
15.1
15.9
716.2
1
0
4
4
19.0
26.2
715.6
1
0
4
4
25.7
12.6
788.6
1
0
4
4
28.4
18.5
735.3
1
0
4
4
19.5
14.4
764.6
1
0
4
4
14.3
16.4
712.7
1
0
4
4
17.1
31.1
645.1
1
0
4
4
26.4
23.2
920.5
1
0
4
4
31.6
17.3
948.9
1
0
12
4
28.1
8.0
786.7
1
0
4
4
25.5
8.1
645.0
1
0
4
4
17.6
14.8
291.5
1
0
4
5
26.4
23.2
920.5
1
0
4
5
31.6
17.3
948.9
1
0
8
5
15.0
48.5
141.2
1
2
4
6
11.1
17.4
786.2
1
0
4
6
15.1
15.9
716.2
1
0
4
6
19.0
26.2
715.6
1
0
3
6
25.7
12.6
788.6
1
0
4
6
28.4
18.5
735.3
1
0
4
6
19.5
14.4
764.6
1
0
4
6
14.3
16.4
712.7
1
0
4
6
17.1
31.1
645.1
1
0
4
6
26.4
23.2
920.5
1
0
3
7
25.9
12.7
116.6
1
0
4
8
11.1
17.4
786.2
1
0
4
8
15.1
15.9
716.2
1
0
4
8
19.0
26.2
715.6
1
0
4
8
25.7
12.6
788.6
1
0
4
8
28.4
18.5
735.3
1
0
4
8
19.5
14.4
764.6
1
0
4
8
14.3
16.4
712.7
1
0
4
8
17.1
31.1
645.1
1
0
1
24
11.1
17.4
786.2
1
1
4
24
15.1
15.9
716.2
1
0
4
24
19.0
26.2
715.6
1
0
4
24
25.7
12.6
788.6
1
0
4
24
28.4
18.5
735.3
1
0
4
24
19.5
14.4
764.6
1
0
4
24
14.3
16.4
712.7
1
0
4
24
17.6
14.8
291.5
1
0
4
36
17.1
31.1
645.1
1
;
ods rtf file='c:\blight1.rtf';
ods graphics on;
/*plotting to get an idea of trends*/
proc gplot data=blight;
plot prop*(time temper RH light sun_shade);
run;
/*loess to see the shape of relationship*/
proc loess data=blight;
model prop=RH/smooth=0.5 residual clm
alpha=0.01;
ods output OutputStatistics=Results1;
run;
proc gplot data=Results1;
plot (DepVar pred LowerCL UpperCL)*RH/overlay;
run;
/*stepwise selection to get a final model*/
proc logistic data=blight;
model success/tries =time RH temper light sun_shade/selection=stepwise;
run;
/*running logit using final model with options to get residuals,
predivted and lackfit for diagnostics*/
proc logistic data=blight;
model success/tries=RH sun_shade/Rsq lackfit influence;
output out=new reschi=rp predicted=p resdev=rd dfbetas=_all_
difchisq=dc h=hat;
run;
/*loessing the residual vs predicted to rule out trends*/
proc loess data=new;
model rp=RH/smooth=0.5 residual clm
alpha=0.01;
ods output OutputStatistics=Results2;
run;
proc gplot data=Results2;
plot (DepVar pred LowerCL UpperCL)*RH/overlay;
run;
/*these options are better for a simple logit model*/
proc logistic data=blight;
model success/tries=RH/ scale=none
clparm=wald
clodds=pl
rsquare;
run;
/*running probit with logit options*/
proc probit data=blight;
model success/tries=RH/dist=logistic lackfit itprint;
run;
/*running probit with probit options*/
proc probit data=blight;
model success/tries=RH/dist=normal lackfit itprint;
run;
ods graphics off;
ods rtf close;
/*An example of what happens with separated data in SAS,
change the y=1 for x=5.3 to 0 for the real run of the data*/
data test;
input y x;
cards;
1 3.2
0 1.2
0 2.1
1 5.3
1 4.5
1 5.8
1 6.3
;
proc logistic data=test;
model y=x/ covb ctable;
output out=a p=pred;
run;
proc print data=a;
var y pred;
run;
*Poisson Model*
/*Anthropology data set about how many names of medicinal plants an
individual can name based on age, nearness to forest and gender done in
Madagascar*/
data plant2;
input subject count age near $ gender;
cards;
1
10
17
f
0
2
13
20
n
0
3
10
20
f
0
4
9
20
n
0
5
10
20
f
0
6
9
20
n
0
7
14
20
f
0
8
14
20
n
0
9
5
21
f
0
10
11
22
n
0
11
12
23
f
0
12
11
23
n
0
13
11
23
n
0
14
9
24
f
0
15
12
24
n
0
16
8
25
n
0
17
13
25
n
0
18
13
26
f
0
19
12
26
f
0
20
13
26
n
0
21
14
27
f
0
22
14
27
f
0
23
11
28
f
0
24
11
28
f
0
25
7
28
f
0
26
12
28
n
0
27
13
29
n
0
28
14
30
n
0
29
13
30
f
0
30
10
30
f
0
31
12
30
f
0
32
14
30
f
0
33
14
31
f
0
34
8
31
n
0
35
14
31
n
0
36
14
32
f
0
37
14
32
n
0
38
13
32
f
0
39
14
35
f
0
40
7
35
f
0
41
13
35
f
0
42
13
36
n
0
43
13
36
f
0
44
13
36
n
0
45
14
36
n
0
46
14
37
n
0
47
11
38
f
0
48
13
38
n
0
49
13
38
f
0
50
14
38
n
0
51
11
39
n
0
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
14
14
14
11
13
14
12
14
8
12
11
10
11
13
9
13
11
12
12
12
11
12
11
12
10
13
12
10
10
10
10
12
11
13
13
12
10
12
10
10
13
10
11
10
11
11
11
11
14
3
12
10
10
9
12
13
13
40
40
40
40
40
40
40
40
40
42
42
44
45
45
45
45
46
46
47
50
50
50
50
52
52
53
53
54
54
55
55
55
56
56
56
60
60
60
60
60
62
65
66
70
70
70
78
15
15
17
17
18
18
20
20
20
20
n
n
n
n
n
n
f
f
n
n
f
f
n
f
n
f
n
f
f
n
n
n
f
f
n
n
f
f
f
f
n
f
n
n
f
f
n
n
f
f
f
f
n
f
f
n
n
f
f
n
n
f
n
f
f
f
f
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
109
110
111
112
113
114
115
116
117
118
119
120
121
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
12
11
14
9
14
11
13
12
13
12
13
14
11
11
10
14
13
13
11
9
13
13
14
14
14
13
13
14
13
14
14
13
14
13
14
14
10
10
13
13
14
14
14
14
14
13
14
13
12
12
14
14
14
13
14
14
14
20
20
20
20
21
22
22
22
22
22
22
23
23
23
24
25
25
25
25
25
25
26
26
26
27
27
27
28
28
28
28
30
30
30
30
30
30
30
31
31
32
32
32
33
34
34
35
35
35
35
39
40
40
40
40
40
40
f
n
n
f
n
f
n
n
f
f
n
n
n
f
f
f
n
f
n
f
f
f
n
n
n
n
n
f
n
n
n
n
f
f
f
n
n
n
n
n
f
n
f
n
f
n
n
f
f
f
n
f
f
f
n
f
n
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
167
14
40
n
1
168
13
40
f
1
169
14
40
f
1
170
14
40
n
1
171
14
40
n
1
172
14
42
f
1
173
14
42
n
1
174
14
44
n
1
175
14
44
n
1
176
13
45
n
1
177
14
45
f
1
178
10
45
f
1
179
14
45
f
1
180
13
46
f
1
181
14
46
n
1
182
14
47
n
1
183
13
47
f
1
184
14
48
n
1
185
13
50
f
1
186
12
50
f
1
187
10
50
f
1
188
14
50
f
1
189
14
50
n
1
190
14
55
f
1
191
13
58
n
1
192
14
60
f
1
193
14
60
f
1
194
14
60
f
1
195
14
60
n
1
196
10
60
n
1
197
14
60
n
1
198
14
62
n
1
199
12
62
n
1
200
14
63
n
1
201
14
70
f
1
202
13
70
n
1
203
11
80
f
1
;
proc standard data=plant2 mean=0 std=1 out=plant1;
var age;
run;
data plants;
set plant1;
agesq=age*age;
agec=agesq*age;
run;
ods rtf file=’c:\plants.rtf’;
ods graphics on;
proc genmod data=plants plot=all;
class near;
model count = age agesq near gender / dist
= poisson
link
= log;
run;
ods graphics off;
ods rtf close;
run;
Download