/* Multiple Regression Useful SAS Procedures: 九個程式 */ /* 加州落雨量之計量分析*/ /* 變數: number city precip altitude(高度) latitude (緯度) distance.(與海距離) /* "city" a qualitative (non-numeric) variable, */ DATA calirain; INPUT number city $ precip altitude latitude distance; datalines; 1 Eureka 43 40.8 1 2 RedBluff 23.27 341 40.2 97 3 Thermal 18.20 4152 33.8 70 4 FortBragg 37.48 5 SodaSprings 49.26 6752 39.3 150 6 SanFrancisco 21.82 52 37.8 5 7 Sacramento 18.07 25 38.5 80 8 SanJose 14.17 95 37.4 28 9 GiantForest 39.57 13.85 11 Fresno 9.44 13 PasaRobles 14 Bakersfield 1 42.63 6360 36.6 145 10 Salinas 12 PtPiedras 74 39.4 74 36.7 331 36.7 114 19.33 15.67 6.00 12 57 35.7 1 740 35.7 31 489 35.4 75 15 Bishop 5.73 4108 37.3 198 16 Mineral 47.82 4850 40.4 142 17 SantaBarbara 18 Susanville 19 TuleLake 17.95 120 34.4 1 18.20 4152 40.3 198 10.03 4036 41.9 140 20 Needles 4.63 21 Burbank 14.74 699 34.2 47 22 LosAngeles 15.02 312 34.1 16 12.36 50 33.8 12 8.26 125 37.8 74 23 LongBeach 24 LosBanos 25 Blythe 26 SanDiego 4.05 9.94 913 34.8 192 268 33.6 155 19 32.7 5 27 Daggett 4.25 2105 34.1 28 DeathValley 1.66 -178 36.5 194 29 CrescentCity 30 Colusa ; 74.87 15.95 35 41.7 60 39.2 85 1 91 */ /* 1 PROC REG 基本迴歸: */ PROC REG DATA=calirain; MODEL precip = altitude latitude distance / ALL; RUN; /* 2 PROC GLM 更進一步推論所需 inference: */ PROC GLM DATA=calirain; MODEL precip = altitude latitude distance; RUN; /* Can you interpret what the Type I SS section and Type III SS section are saying? */ /* 3 Testing 檢定: */ PROC REG DATA=calirain; MODEL precip = altitude latitude distance; TEST altitude=0, distance=0; /*檢定 H0: β1=β3=0, 給定 X2 */ RUN; /* 4 /* INFERENCES ABOUT THE RESPONSE VARIABLE */ (1) estimate the mean precipitation for cities of altitude 100 feet, */ /* /* latitude 40 degrees, and 70 miles from the coast. (2) predict the precipitation of a new city of altitude 100 feet, /* latitude 40 degrees, and 70 miles from the coast. */ */ */ DATA Xvalues; INPUT number city $ precip altitude latitude distance; CARDS; . . . 100 40 70 ; DATA calirain; SET calirain Xvalues; ; /* The options clm and cli will give us CIs for the mean of Y and PIs for Y, */ /* for the values of X1, X2, X3 in the data set. PROC REG DATA=calirain; MODEL RUN; precip = altitude latitude distance / clm alpha=.10 cli alpha=.10; */ /*** 5 produce residual plots for this multiple regression ****/ /*** The standard SAS plots are somewhat crude **************************************/ PROC REG DATA=calirain; MODEL precip = altitude latitude distance / P R; OUTPUT OUT=NEW P=PRED R=RES; PROC PLOT DATA=NEW; PLOT RES*PRED='+'/ VREF=0; PROC UNIVARIATE PLOT NORMAL; VAR RES; RUN; /**** 6 The INSIGHT procedure produces somewhat nicer-looking plots *****/ /* For the Q-Q plot, choose "Residual Normal Q-Q" under the Graphs menu */ PROC INSIGHT; OPEN calirain; FIT precip = altitude latitude distance; /* 7 RUN; Getting Variance Inflation Factors and Influence Statistics: */ /* just add some options to the MODEL statement: */ PROC REG DATA=calirain; MODEL precip = altitude latitude distance / VIF influence; RUN; /* 8 SAS 自動選擇變數機置 variable selection guides: /* Using the C(p) and Adjusted R^2 Criteria to find the best model(s): */ PROC RSQUARE DATA=calirain; MODEL precip = altitude latitude distance / cp adjrsq; RUN; PROC STEPWISE DATA=calirain; MODEL precip = altitude latitude distance / f b stepwise; RUN; */ /* 9 ----模型抉擇 Model Selection ------------*/ /* all possible models with several criteria: R-sq, adjusted R-sq, Cp, AIC */ proc reg; model sqrttl =lnarea lnelev lndistn lndistsc lnarean/selection=rsquare adjrsq cp aic; /* mechanical sequential selection */ proc reg; model sqrttl =lnarea lnelev lndistn lndistsc lnarean/selection=backward; model sqrttl =lnarea lnelev lndistn lndistsc lnarean/selection=forward; model sqrttl =lnarea lnelev lndistn lndistsc lnarean/selection=forward slentry=0.4; model sqrttl =lnarea lnelev lndistn lndistsc lnarean/selection=stepwise; run; /* Default "F_IN" set by slentry option - forward selection: P-value of the partial F < 0.50 - stepwise selection: P-value of the partial F < 0.15 and "F_OUT" values set by slstay option - backward elimination: P-value of the partial F < 0.10 - stepwise selection */ : P-value of the partial F < 0.15