SAS Procs

advertisement
/* Multiple Regression Useful SAS Procedures: 九個程式 */
/* 加州落雨量之計量分析*/
/* 變數: number city precip altitude(高度) latitude (緯度) distance.(與海距離)
/* "city" a qualitative (non-numeric) variable, */
DATA calirain;
INPUT number city $ precip altitude latitude distance;
datalines;
1 Eureka
43 40.8
1
2 RedBluff
23.27 341 40.2
97
3 Thermal
18.20 4152 33.8
70
4 FortBragg
37.48
5 SodaSprings
49.26 6752 39.3 150
6 SanFrancisco
21.82
52 37.8
5
7 Sacramento
18.07
25 38.5
80
8 SanJose
14.17
95 37.4
28
9 GiantForest
39.57
13.85
11 Fresno
9.44
13 PasaRobles
14 Bakersfield
1
42.63 6360 36.6 145
10 Salinas
12 PtPiedras
74 39.4
74 36.7
331 36.7 114
19.33
15.67
6.00
12
57 35.7
1
740 35.7
31
489 35.4
75
15 Bishop
5.73 4108 37.3 198
16 Mineral
47.82 4850 40.4 142
17 SantaBarbara
18 Susanville
19 TuleLake
17.95
120 34.4
1
18.20 4152 40.3 198
10.03 4036 41.9 140
20 Needles
4.63
21 Burbank
14.74
699 34.2
47
22 LosAngeles
15.02
312 34.1
16
12.36
50 33.8
12
8.26
125 37.8
74
23 LongBeach
24 LosBanos
25 Blythe
26 SanDiego
4.05
9.94
913 34.8 192
268 33.6 155
19 32.7
5
27 Daggett
4.25 2105 34.1
28 DeathValley
1.66 -178 36.5 194
29 CrescentCity
30 Colusa
;
74.87
15.95
35 41.7
60 39.2
85
1
91
*/
/* 1 PROC REG 基本迴歸: */
PROC REG DATA=calirain;
MODEL precip = altitude latitude distance / ALL;
RUN;
/* 2 PROC GLM 更進一步推論所需 inference:
*/
PROC GLM DATA=calirain;
MODEL precip = altitude latitude distance;
RUN;
/* Can you interpret what the Type I SS section and Type III SS section are saying? */
/* 3 Testing 檢定: */
PROC REG DATA=calirain;
MODEL precip = altitude latitude distance;
TEST altitude=0, distance=0;
/*檢定 H0: β1=β3=0, 給定 X2
*/
RUN;
/* 4
/*
INFERENCES ABOUT THE RESPONSE VARIABLE */
(1) estimate the mean precipitation for cities of altitude 100 feet, */
/*
/*
latitude 40 degrees, and 70 miles from the coast.
(2) predict the precipitation of a new city of altitude 100 feet,
/*
latitude 40 degrees, and 70 miles from the coast.
*/
*/
*/
DATA Xvalues;
INPUT number city $ precip altitude latitude distance;
CARDS;
. . . 100 40 70
;
DATA calirain;
SET calirain Xvalues;
;
/* The options clm and cli will give us CIs for the mean of Y and PIs for Y, */
/* for the values of X1, X2, X3 in the data set.
PROC REG DATA=calirain;
MODEL
RUN;
precip = altitude latitude distance / clm alpha=.10 cli alpha=.10;
*/
/*** 5 produce residual plots for this multiple regression ****/
/*** The standard SAS plots are somewhat crude **************************************/
PROC REG DATA=calirain;
MODEL precip = altitude latitude distance / P R;
OUTPUT OUT=NEW P=PRED R=RES;
PROC PLOT DATA=NEW;
PLOT RES*PRED='+'/ VREF=0;
PROC UNIVARIATE PLOT NORMAL;
VAR RES;
RUN;
/**** 6 The INSIGHT procedure produces somewhat nicer-looking plots *****/
/* For the Q-Q plot, choose "Residual Normal Q-Q" under the Graphs menu */
PROC INSIGHT;
OPEN calirain;
FIT precip = altitude latitude distance;
/* 7
RUN;
Getting Variance Inflation Factors and Influence Statistics: */
/* just add some options to the MODEL statement:
*/
PROC REG DATA=calirain;
MODEL precip = altitude latitude distance / VIF influence;
RUN;
/*
8
SAS 自動選擇變數機置 variable selection guides:
/*
Using the C(p) and Adjusted R^2 Criteria to find the best model(s): */
PROC RSQUARE DATA=calirain;
MODEL precip = altitude latitude distance / cp adjrsq;
RUN;
PROC STEPWISE DATA=calirain;
MODEL precip = altitude latitude distance / f b stepwise;
RUN;
*/
/* 9
----模型抉擇 Model Selection ------------*/
/* all possible models with several criteria: R-sq, adjusted R-sq, Cp, AIC */
proc reg;
model sqrttl =lnarea lnelev lndistn lndistsc lnarean/selection=rsquare adjrsq cp aic;
/* mechanical sequential selection */
proc reg;
model sqrttl =lnarea lnelev lndistn lndistsc lnarean/selection=backward;
model sqrttl =lnarea lnelev lndistn lndistsc lnarean/selection=forward;
model sqrttl =lnarea lnelev lndistn lndistsc lnarean/selection=forward slentry=0.4;
model sqrttl =lnarea lnelev lndistn lndistsc lnarean/selection=stepwise;
run;
/* Default "F_IN"
set by slentry option
- forward selection:
P-value of the partial F < 0.50
- stepwise selection: P-value of the partial F < 0.15
and "F_OUT" values
set by slstay option
- backward elimination: P-value of the partial F < 0.10
- stepwise selection
*/
: P-value of the partial F < 0.15
Download