Uploaded by Brian Cheney

S631 Lec00

advertisement
> datain <- read.csv('carprice.csv')
> datain
Car Age Miles Price
1
1
5
57
85
2
2
4
40
103
3
3
6
77
70
4
4
5
60
82
5
5
5
49
89
6
6
5
47
98
7
7
6
58
66
8
8
6
39
95
9
9
2
8
169
10 10
7
69
70
11 11
7
89
48
> Age <- datain[,2]
> price <- datain[,4]
> plot(price)
> t.test(price,mu=85,conf.level=0.95)
One Sample t-test
data: price
t = 0.3871, df = 10, p-value = 0.7068
alternative hypothesis: true mean is not equal to 85
95 percent confidence interval:
67.70377 109.56895
sample estimates:
mean of x
88.63636
> plot(Age,price)
> lmout <- lm(price~Age)
> summary(lmout)
Call:
lm(formula = price ~ Age)
Residuals:
Min
1Q
-12.162 -8.532
Median
-5.162
3Q
8.946
Max
21.099
Coefficients:
Estimate Std. Error t value
(Intercept)
195.47
15.24 12.826
Age
-20.26
2.80 -7.237
--Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01
Pr(>|t|)
4.36e-07 ***
4.88e-05 ***
‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 12.58 on 9 degrees of freedom
Multiple R-squared: 0.8534,
Adjusted R-squared: 0.8371
F-statistic: 52.38 on 1 and 9 DF, p-value: 4.882e-05
> predict.lm(lmout,newdata=data.frame(Age=6),interval="confidence")
fit
lwr
upr
1 73.9009 64.16457 83.63723
> library(faraway)
> data(psid)
> head(psid,20)
age educ sex income year person
1
31
12
M
6000
68
1
2
31
12
M
5300
69
1
3
31
12
M
5200
70
1
4
31
12
M
6900
71
1
5
31
12
M
7500
72
1
6
31
12
M
8000
73
1
7
31
12
M
8000
74
1
8
31
12
M
9600
75
1
9
31
12
M
9000
76
1
10 31
12
M
9000
77
1
11 31
12
M 23000
78
1
12 31
12
M 22000
79
1
13 31
12
M
8000
80
1
14 31
12
M 10000
81
1
15 31
12
M 21800
82
1
16 31
12
M 19000
83
1
24 29
16
M
7500
68
2
25 29
16
M
7300
69
2
26 29
16
M
9250
70
2
27 29
16
M 10300
71
2
> tail(psid)
age educ sex income year person
1949 33
6
M
4050
84
85
1950 33
6
M
6000
85
85
1951 33
6
M
7000
86
85
1952 33
6
M 10000
87
85
1953 33
6
M 10000
88
85
1954 33
6
M
2000
89
85
> library(lattice)
Attaching package: 'lattice'
The following object(s) are masked from package:faraway :
melanoma
> xyplot(income~year|person,psid,type="l",subset=(person < 21),
strip=F)
> xyplot(log(income+100)~year|sex,psid,type="l",groups=person)
> lmod <- lm(log(income)~I(year-78),subset=(person==1),psid)
> coef(lmod)
(Intercept) I(year - 78)
9.3999568
0.0842667
> slopes <- rep(0,85); intercepts <- rep(0,85)
> for(i in 1:85){
+ lmod <- lm(log(income)~I(year-78),subset=(person==i),psid)
+ intercepts[i] <- coef(lmod)[1]
+ slopes[i] <- coef(lmod)[2]
+ }
> plot(intercepts,slopes,xlab="Intercept",ylab="Slope")
> psex <- psid$sex[match(1:85,psid$person)]
> boxplot(split(slopes,psex))
> boxplot(split(intercepts,psex))
> t.test(slopes[psex=="M"],slopes[psex=="F"])
Welch Two Sample t-test
data: slopes[psex == "M"] and slopes[psex == "F"]
t = -2.3786, df = 56.736, p-value = 0.02077
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.05916871 -0.00507729
sample estimates:
mean of x mean of y
0.05691046 0.08903346
> t.test(intercepts[psex=="M"],intercepts[psex=="F"])
Welch Two Sample t-test
data: intercepts[psex == "M"] and intercepts[psex == "F"]
t = 8.2199, df = 79.719, p-value = 3.065e-12
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.8738792 1.4322218
sample estimates:
mean of x mean of y
9.382325 8.229275
> library(lme4)
Loading required package: Matrix
Attaching package: 'Matrix'
The following object(s) are masked from package:stats :
xtabs
The following object(s) are masked from package:base :
colMeans,
colSums,
rcond,
rowMeans,
rowSums
> psid$cyear <- psid$year-78
> mmod <- lmer(log(income)~cyear*sex+age+educ+(cyear|person),psid)
> summary(mmod)
Linear mixed model fit by REML
Formula: log(income) ~ cyear * sex + age + educ + (cyear | person)
Data: psid
AIC BIC logLik deviance REMLdev
3840 3894 -1910
3786
3820
Random effects:
Groups
person
Name
Variance Std.Dev. Corr
(Intercept) 0.2816566 0.53071
cyear
0.0024000 0.04899 0.187
Residual
0.4672724 0.68357
Number of obs: 1661, groups: person, 85
Fixed effects:
Estimate Std. Error t value
(Intercept) 6.674178
0.543334 12.284
cyear
0.085312
0.008999
9.480
sexM
1.150315
0.121293
9.484
age
0.010932
0.013524
0.808
educ
0.104212
0.021437
4.861
cyear:sexM -0.026306
0.012238 -2.150
Correlation of Fixed Effects:
(Intr) cyear sexM
age
educ
cyear
0.020
sexM
-0.104 -0.098
age
-0.874 0.002 -0.026
educ
-0.597 0.000 0.008 0.167
cyear:sexM -0.003 -0.735 0.156 -0.010 -0.011
> qqmath(~resid(mmod)|sex,psid)
> xyplot(resid(mmod)~fitted(mmod)|
cut(educ,c(0,8.5,12.5,20)),psid,layout=c(3,1),xlab="Fitted",ylab="Res
iduals")
>
> mmod.female <- lmer(log(income)~cyear+age+educ+(cyear|
person),subset=(sex=="F"),psid)
> summary(mmod.female)
Linear mixed model fit by REML
Formula: log(income) ~ cyear + age + educ + (cyear | person)
Data: psid
Subset: (sex == "F")
AIC BIC logLik deviance REMLdev
2084 2121 -1034
2048
2068
Random effects:
Groups
Name
Variance Std.Dev. Corr
person
(Intercept) 0.3692110 0.607627
cyear
0.0039091 0.062523 0.129
Residual
0.7924775 0.890212
Number of obs: 732, groups: person, 39
Fixed effects:
Estimate Std. Error t value
(Intercept) 7.802415
0.948153
8.229
cyear
age
educ
0.085287
-0.007056
0.057316
0.011534
0.021705
0.040022
7.394
-0.325
1.432
Correlation of Fixed Effects:
(Intr) cyear age
cyear 0.005
age
-0.872 0.005
educ -0.703 0.000 0.282
> mmod.male <- lmer(log(income)~cyear+age+educ+(cyear|
person),subset=(sex=="M"),psid)
> summary(mmod.male)
Linear mixed model fit by REML
Formula: log(income) ~ cyear + age + educ + (cyear | person)
Data: psid
Subset: (sex == "M")
AIC BIC logLik deviance REMLdev
1439 1478 -711.5
1399
1423
Random effects:
Groups
Name
Variance Std.Dev. Corr
person
(Intercept) 0.1957672 0.442456
cyear
0.0011361 0.033706 0.270
Residual
0.2125482 0.461030
Number of obs: 929, groups: person, 46
Fixed effects:
Estimate Std. Error t value
(Intercept) 7.067665
0.603116 11.719
cyear
0.058610
0.005687 10.306
age
0.025691
0.016245
1.581
educ
0.128151
0.022249
5.760
Correlation of Fixed Effects:
(Intr) cyear age
cyear 0.041
age
-0.896 -0.009
educ -0.487 -0.011 0.064
Download