cp13Routput.doc

advertisement
R version 3.0.1 (2013-05-16) -- "Good Sport"
Copyright (C) 2013 The R Foundation for Statistical Computing
Platform: i386-w64-mingw32/i386 (32-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
[Previously saved workspace restored]
> #downlaod data and save it in myRfolder and make that the defualt folder
>
> my.data=read.table("cp13.csv",header=TRUE,sep=",")
>
head(my.data)
class CumTemp CumMax CumMin CumRH CumDewpoin CumSoilTemp Lat Lng
1 A 44.27154 62.81672 25.6128 60.9397 30.38243 43.06657 47.8 120.2
2 A 44.27154 62.81672 25.6128 60.9397 30.38243 43.06657 47.8 120.2
3 A 44.27154 62.81672 25.6128 60.9397 30.38243 43.06657 47.8 120.2
4 A 44.27154 62.81672 25.6128 60.9397 30.38243 43.06657 47.8 120.2
5 A 44.27154 62.81672 25.6128 60.9397 30.38243 43.06657 47.8 120.2
6 A 44.27154 62.81672 25.6128 60.9397 30.38243 43.06657 47.8 120.2
Elevation gdd JULIAN_DATE
1 1329 11086.43
77
2 1329 11086.43
77
3 1329 11086.43
77
4 1329 11086.43
77
5 1329 11086.43
77
6 1329 11086.43
77
>
> my.x=my.data[,-1]
>
>
library(corrplot)
Warning message:
package ‘corrplot’ was built under R version 3.0.2
>
>
corr=cor(my.x)
>
corrplot(corr,order="original")
>
>
>
>
n=dim(my.data)
>
my.x=c(1:n,1)
Warning message:
In 1:n : numerical expression has 2 elements: only the first used
>
>
> #breaking data intotraining and testing sets
>
>
train=sample(my.x,5000,replace=FALSE,prob=NULL)
>
head(train)
[1] 5224 2734 1849 308 1739 3058
>
>
my.y=my.data$class
>
pred=my.data[,-1]
>
train.x=pred[train,]
>
train.y=my.y[train]
>
test.x=pred[-train,]
>
test.y=my.y[-train]
>
train.all=my.data[train,]
> test.all=my.data[-train,]
>
> #multinomial logit
>
> library(nnet)
> mlog <- multinom(class ~ ., data = train.all)
# weights: 104 (84 variable)
initial value 10397.207708
iter 10 value 10205.493563
iter 20 value 8485.363086
iter 30 value 6665.313011
iter 40 value 6402.421167
iter 50 value 5926.664806
iter 60 value 5174.186070
iter 70 value 4500.681225
iter 80 value 3114.919437
iter 90 value 2701.146321
iter 100 value 2631.398579
final value 2631.398579
stopped after 100 iterations
> summary(mlog)
Call:
multinom(formula = class ~ ., data = train.all)
Coefficients:
(Intercept) CumTemp CumMax CumMin CumRH CumDewpoin CumSoilTemp
B -4.082718 11.716682 0.8074156 0.07801086 5.510868 -12.324777 0.2118794
C 35.819674 23.574589 1.5233602 0.40217534 10.151525 -24.225141 -0.3020384
D
E
F
G
H
18.810541 18.883198 1.5056660 0.01517851 8.631093 -19.684373 0.6107711
5.261461 13.984482 2.0118651 -0.26947887 6.986282 -14.872287 0.3705393
-8.405122 6.053086 2.0490853 -0.64337145 3.982722 -7.382469 0.5438371
3.710809 12.027716 2.4172049 -1.33529229 6.980774 -13.689703 -0.6273748
-47.445001 16.267291 2.1872605 -2.00784404 9.132875 -18.280940 -0.6109551
Lat
Lng Elevation
gdd JULIAN_DATE
B 1.029814 -5.0849198 -0.008890343 -0.0001069588 0.4279532
C -5.378494 -7.1115418 -0.005732578 -0.0002983902 0.6682230
D -7.015750 -5.8704059 -0.004780789 -0.0002906053 1.5149656
E -12.626858 -3.0720915 -0.006477368 -0.0003737260 2.5334077
F -18.111667 1.2213621 -0.009668047 -0.0004586604 3.2005495
G -20.770285 -0.2505658 -0.019301648 -0.0006870581 4.0566654
H -19.702173 -2.2308178 -0.030386653 -0.0008192904 4.9957098
Std. Errors:
(Intercept) CumTemp CumMax CumMin
CumRH CumDewpoin
B 4.536305e-05 0.001365763 0.003190469 0.001426795 0.004837417 0.002278447
C 3.259526e-05 0.001936056 0.005248913 0.002627472 0.004379510 0.001992957
D 3.991952e-05 0.001637002 0.001256976 0.001645287 0.004042845 0.001686277
E 3.800582e-05 0.001626186 0.003265220 0.003433076 0.005837160 0.002478425
F 3.760376e-05 0.001810763 0.002856680 0.002867838 0.005046856 0.002346899
G 4.117440e-05 0.001896329 0.002931549 0.004117792 0.007757743 0.003290464
H 3.725046e-05 0.001797835 0.002734966 0.001592752 0.003953796 0.001881260
CumSoilTemp
Lat
Lng Elevation
gdd JULIAN_DATE
B 0.002093616 0.002341749 0.006040482 0.0007448971 1.546328e-05 0.008129836
C 0.001439359 0.001403167 0.003823417 0.0009273640 1.762234e-05 0.006680893
D 0.001668462 0.001605659 0.004649356 0.0009717575 1.966896e-05 0.003627431
E 0.001648283 0.001548978 0.004500438 0.0010625089 2.115235e-05 0.008352077
F 0.001663866 0.001556161 0.004421967 0.0011219566 2.235374e-05 0.007681050
G 0.001781908 0.001623278 0.004824136 0.0011799979 2.349066e-05 0.009216514
H 0.001777394 0.001608976 0.004397171 0.0012169813 2.504018e-05 0.003910490
Residual Deviance: 5262.797
AIC: 5430.797
> p=predict(mlog,data=train.all)
> table(p,train.all$class)
p A B C D E F G H
A 350 66 0 0 0 0 0 0
B 88 835 93 0 0 0 0 0
C 0 62 389 71 0 0 0 0
D 0 0 50 238 68 0 0 0
E 0 0 11 67 746 107 11 0
F 0 0 1 0 77 324 48 1
G 0 0 0 0 13 94 579 84
H 0 0 0 0 0 0 59 468
>
>
> #lda and qda
>
> library(MASS)
> qdamodel=qda(class~.,data=train.all)
> ldamodel=lda(class~.,data=train.all)
>
>
> qdamodel
Call:
qda(class ~ ., data = train.all)
Prior probabilities of groups:
A B C D E F G H
0.0876 0.1926 0.1088 0.0752 0.1808 0.1050 0.1394 0.1106
Group means:
CumTemp CumMax CumMin CumRH CumDewpoin CumSoilTemp Lat Lng
A 43.73530 63.53633 26.81068 61.92841 30.29465 42.41804 47.32945 120.0660
B 44.17370 66.33325 24.96429 61.91811 30.64263 43.47058 46.98972 120.0742
C 45.46430 72.00287 25.45387 59.26185 30.79643 44.54333 46.96562 119.9108
D 46.08698 75.39080 25.76250 58.86660 31.17715 45.15330 47.20878 120.0141
E 46.62413 76.14132 25.46819 59.24844 31.85029 46.03554 47.15000 120.0281
F 46.95592 76.45672 24.97241 59.10644 32.07004 46.89157 47.02705 120.0682
G 47.52047 76.96089 24.38851 58.62997 32.38107 47.53212 46.82898 119.9516
H 47.90790 78.52935 25.56750 56.77968 31.84055 47.94269 47.10199 119.9770
Elevation gdd JULIAN_DATE
A 1170.0753 15620.88 78.81050
B 959.8733 20753.15 81.76324
C 967.1268 12016.55 88.31434
D 1047.3005 16498.82 94.76862
E 1026.6184 17054.51 100.24668
F 1007.5810 19440.10 105.59429
G 931.3630 16567.70 108.87374
H 1001.2622 14583.84 116.03255
>
> ldamodel
Call:
lda(class ~ ., data = train.all)
Prior probabilities of groups:
A B C D E F G H
0.0876 0.1926 0.1088 0.0752 0.1808 0.1050 0.1394 0.1106
Group means:
CumTemp CumMax CumMin CumRH CumDewpoin CumSoilTemp Lat Lng
A 43.73530 63.53633 26.81068 61.92841 30.29465 42.41804 47.32945 120.0660
B 44.17370 66.33325 24.96429 61.91811 30.64263 43.47058 46.98972 120.0742
C 45.46430 72.00287 25.45387 59.26185 30.79643 44.54333 46.96562 119.9108
D 46.08698 75.39080 25.76250 58.86660
E 46.62413 76.14132 25.46819 59.24844
F 46.95592 76.45672 24.97241 59.10644
G 47.52047 76.96089 24.38851 58.62997
H 47.90790 78.52935 25.56750 56.77968
Elevation gdd JULIAN_DATE
A 1170.0753 15620.88 78.81050
B 959.8733 20753.15 81.76324
C 967.1268 12016.55 88.31434
D 1047.3005 16498.82 94.76862
E 1026.6184 17054.51 100.24668
F 1007.5810 19440.10 105.59429
G 931.3630 16567.70 108.87374
H 1001.2622 14583.84 116.03255
31.17715
31.85029
32.07004
32.38107
31.84055
45.15330 47.20878 120.0141
46.03554 47.15000 120.0281
46.89157 47.02705 120.0682
47.53212 46.82898 119.9516
47.94269 47.10199 119.9770
Coefficients of linear discriminants:
LD1
LD2
LD3
LD4
CumTemp -1.755947e+00 -7.715112e+00 -8.968144e+00 -2.384077e+00
CumMax -6.196011e-02 5.447284e-01 -1.284076e-01 1.086636e-01
CumMin
2.855111e-02 -3.346768e-02 -2.753460e-02 5.659243e-03
CumRH
-7.001448e-01 -3.472984e+00 -4.007910e+00 -6.367493e-01
CumDewpoin 1.150656e+00 7.518022e+00 9.703088e+00 1.461902e+00
CumSoilTemp -7.041050e-02 9.892890e-02 4.573735e-01 -6.197001e-02
Lat
1.029448e+00 -1.479614e+00 -1.348601e+00 -1.166813e+00
Lng
-1.200285e+00 7.019817e-01 4.720907e+00 8.816812e-02
Elevation 4.441296e-04 4.178674e-03 3.330116e-03 -4.810704e-03
gdd
5.175546e-05 -5.416208e-07 5.649604e-05 -2.207714e-06
JULIAN_DATE -2.823514e-01 -1.634762e-01 1.862307e-02 7.278004e-02
LD5
LD6
LD7
CumTemp 3.401037e+00 -1.371269e+00 3.333167e-01
CumMax
9.509635e-02 8.548111e-02 8.547886e-02
CumMin
5.616754e-02 4.844613e-01 2.984807e-01
CumRH
1.642646e+00 -3.415869e-01 6.198787e-01
CumDewpoin -3.457719e+00 4.446176e-01 -7.606336e-01
CumSoilTemp -1.664975e-01 5.351942e-01 2.136622e-01
Lat
2.205962e+00 -2.503575e+00 -2.200056e-01
Lng
-1.480522e+00 1.097212e+00 2.458267e+00
Elevation -3.414073e-03 2.151482e-03 -2.742151e-03
gdd
6.123963e-05 3.234613e-05 -9.549238e-05
JULIAN_DATE -1.549974e-02 -1.038226e-02 1.400448e-02
Proportion of trace:
LD1 LD2 LD3 LD4 LD5 LD6 LD7
0.9230 0.0550 0.0131 0.0061 0.0024 0.0003 0.0001
>
> ldapred2=predict(ldamodel,test.all)
> ldapred1=predict(ldamodel,train.all)
>
>
>
> library(caret)
Loading required package: lattice
Loading required package: ggplot2
Find out what's changed in ggplot2 with
news(Version == "0.9.3.1", package = "ggplot2")
Warning messages:
1: package ‘caret’ was built under R version 3.0.2
2: package ‘ggplot2’ was built under R version 3.0.2
>
> confusionMatrix(data=ldapred2$class,test.all$class)
Loading required package: class
Confusion Matrix and Statistics
Reference
Prediction A B C D E F G H
A 23 7 0 0 0 0 0 0
B 11 78 9 0 0 0 0 0
C 0 6 31 3 0 0 0 0
D 0 0 9 28 5 0 0 0
E 0 0 1 8 57 4 1 0
F 0 0 0 0 3 24 8 0
G 0 0 0 0 1 5 41 9
H 0 0 0 0 0 0 8 41
Overall Statistics
Accuracy : 0.7672
95% CI : (0.7239, 0.8068)
No Information Rate : 0.2162
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.7288
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: A Class: B Class: C Class: D Class: E Class: F
Sensitivity
0.67647 0.8571 0.62000 0.71795 0.8636 0.72727
Specificity
0.98191 0.9394 0.97574 0.96335 0.9606 0.97165
Pos Pred Value
0.76667 0.7959 0.77500 0.66667 0.8028 0.68571
Neg Pred Value
0.97187 0.9598 0.95013 0.97098 0.9743 0.97668
Prevalence
0.08076 0.2162 0.11876 0.09264 0.1568 0.07838
Detection Rate
0.05463 0.1853 0.07363 0.06651 0.1354 0.05701
Detection Prevalence 0.07126 0.2328 0.09501 0.09976 0.1686 0.08314
Balanced Accuracy 0.82919 0.8983 0.79787 0.84065 0.9121 0.84946
Class: G Class: H
Sensitivity
0.70690 0.82000
Specificity
0.95868 0.97844
Pos Pred Value
0.73214 0.83673
Neg Pred Value
0.95342 0.97581
Prevalence
0.13777 0.11876
Detection Rate
0.09739 0.09739
Detection Prevalence 0.13302 0.11639
Balanced Accuracy 0.83279 0.89922
Warning message:
package ‘e1071’ was built under R version 3.0.2
>
>
>
>
> confusionMatrix(data=ldapred1$class,train.all$class)
Confusion Matrix and Statistics
Reference
Prediction A B C D E F G H
A 296 95 0 0 0 0 0 0
B 141 811 120 0 0 0 0 0
C 1 57 314 38 0 0 0 0
D 0 0 102 266 100 0 0 0
E 0 0 8 72 684 109 12 0
F 0 0 0 0 110 305 89 1
G 0 0 0 0 10 108 472 94
H 0 0 0 0 0 3 124 458
Overall Statistics
Accuracy : 0.7212
95% CI : (0.7085, 0.7336)
No Information Rate : 0.1926
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.6763
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: A Class: B Class: C Class: D Class: E Class: F
Sensitivity
0.6758 0.8422 0.5772 0.7074 0.7566 0.5810
Specificity
0.9792 0.9353 0.9785 0.9563 0.9509 0.9553
Pos Pred Value
0.7570 0.7565 0.7659 0.5684 0.7729 0.6040
Neg Pred Value
0.9692 0.9613 0.9499 0.9757 0.9465 0.9511
Prevalence
0.0876 0.1926 0.1088 0.0752 0.1808 0.1050
Detection Rate
0.0592 0.1622 0.0628 0.0532 0.1368 0.0610
Detection Prevalence 0.0782 0.2144 0.0820 0.0936 0.1770 0.1010
Balanced Accuracy 0.8275 0.8888 0.7778 0.8319 0.8538 0.7681
Class: G Class: H
Sensitivity
0.6772 0.8282
Specificity
0.9507 0.9714
Pos Pred Value
0.6901 0.7829
Neg Pred Value
0.9479 0.9785
Prevalence
0.1394 0.1106
Detection Rate
0.0944 0.0916
Detection Prevalence 0.1368 0.1170
Balanced Accuracy 0.8140 0.8998
>
>
> qdapred=predict(qdamodel,test.all)
> confusionMatrix(data=qdapred$class,test.all$class)
Confusion Matrix and Statistics
Reference
Prediction A B C D E F G H
A 30 15 0 0 0 0 0 0
B 4 65 1 0 0 0 0 0
C 0 11 35 3 0 0 0 0
D 0 0 12 26 2 0 0 0
E 0 0 2 10 59 6 1 0
F 0 0 0 0 3 24 7 0
G 0 0 0 0 2 3 41 8
H 0 0 0 0 0 0 9 42
Overall Statistics
Accuracy : 0.7648
95% CI : (0.7214, 0.8046)
No Information Rate : 0.2162
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.7283
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: A Class: B Class: C Class: D Class: E Class: F
Sensitivity
0.88235 0.7143 0.70000 0.66667 0.8939 0.72727
Specificity
0.96124 0.9848 0.96226 0.96335 0.9465 0.97423
Pos Pred Value
0.66667 0.9286 0.71429 0.65000 0.7564 0.70588
Neg Pred Value
0.98936 0.9259 0.95968 0.96588 0.9796 0.97674
Prevalence
0.08076 0.2162 0.11876 0.09264 0.1568 0.07838
Detection Rate
0.07126 0.1544 0.08314 0.06176 0.1401 0.05701
Detection Prevalence 0.10689 0.1663 0.11639 0.09501 0.1853 0.08076
Balanced Accuracy 0.92180 0.8496 0.83113 0.81501 0.9202 0.85075
Class: G Class: H
Sensitivity
0.70690 0.84000
Specificity
0.96419 0.97574
Pos Pred Value
0.75926 0.82353
Neg Pred Value
0.95368 0.97838
Prevalence
0.13777 0.11876
Detection Rate
0.09739 0.09976
Detection Prevalence 0.12827 0.12114
Balanced Accuracy 0.83554 0.90787
>
> #pls and enet
>
> library(pls)
Attaching package: ‘pls’
The following object is masked from ‘package:caret’:
R2
The following object is masked from ‘package:corrplot’:
corrplot
The following object is masked from ‘package:stats’:
loadings
Warning message:
package ‘pls’ was built under R version 3.0.2
> y=as.factor(train.y)
> plsdamodel=plsda(x=train.x,y=y,scale=TRUE,probMethod="Bayes",ncomp=4,type="class")
Warning message:
package ‘klaR’ was built under R version 3.0.2
> plspred=predict(plsdamodel,newdata=test.x)
> plsprob=predict(plsdamodel,newdata=test.x,type="prob")
> head(plspred)
[1] A A A A A A
Levels: A B C D E F G H
> head(plsprob)
[1] 0.8706257 0.8706257 0.8706257 0.8481500 0.8481500 0.8481500
> confusionMatrix(data=plspred,test.all$class)
Confusion Matrix and Statistics
Reference
Prediction A B C D E F G H
A 29 14 0 0 0 0 0 0
B 5 69 2 1 0 0 0 0
C 0 8 37 3 0 0 0 0
D 0 0 7 20 6 0 0 0
E 0 0 4 13 48 8 1 0
F 0 0 0 2 12 20 7 0
G 0 0 0 0 0 4 39 6
H 0 0 0 0 0 1 11 44
Overall Statistics
Accuracy : 0.7268
95% CI : (0.6816, 0.7689)
No Information Rate : 0.2162
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.684
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: A Class: B Class: C Class: D Class: E Class: F
Sensitivity
0.85294 0.7582 0.74000 0.51282 0.7273 0.60606
Specificity
0.96382 0.9758 0.97035 0.96597 0.9268 0.94588
Pos Pred Value
0.67442 0.8961 0.77083 0.60606 0.6486 0.48780
Neg Pred Value
0.98677 0.9360 0.96515 0.95103 0.9481 0.96579
Prevalence
0.08076 0.2162 0.11876 0.09264 0.1568 0.07838
Detection Rate
0.06888 0.1639 0.08789 0.04751 0.1140 0.04751
Detection Prevalence 0.10214 0.1829 0.11401 0.07838 0.1758 0.09739
Balanced Accuracy 0.90838 0.8670 0.85518 0.73939 0.8270 0.77597
Class: G Class: H
Sensitivity
0.67241 0.8800
Specificity
0.97245 0.9677
Pos Pred Value
0.79592 0.7857
Neg Pred Value
0.94892 0.9836
Prevalence
0.13777 0.1188
Detection Rate
0.09264 0.1045
Detection Prevalence 0.11639 0.1330
Balanced Accuracy 0.82243 0.9238
>
>
>
> #neural networks
>
> nnetmodel=nnet(class~.,data=train.all,size=8,decay=.2,linout=FALSE, entropy=TRUE)
# weights: 168
initial value 10319.643871
iter 10 value 10035.731138
iter 20 value 9932.903324
iter 30 value 9894.456230
iter 40 value 9872.421150
iter 50 value 9837.307868
iter 60 value 9791.826852
iter 70 value 9764.092295
iter 80 value 9699.186427
iter 90 value 9637.626226
iter 100 value 9551.141164
final value 9551.141164
stopped after 100 iterations
> nnetmodel
a 11-8-8 network with 168 weights
inputs: CumTemp CumMax CumMin CumRH CumDewpoin CumSoilTemp Lat Lng Elevation gdd
JULIAN_DATE
output(s): class
options were - softmax modelling decay=0.2
> nnetpred1=predict(nnetmodel,newdata=train.all,type="class")
> nnetpred2=predict(nnetmodel,newdata=test.all,type="class")
> table(nnetpred2,test.all$class)
nnetpred2 A B C D E F G H
B 31 80 17 23 40 27 36 21
C 3 11 32 13 16 6 15 26
E 0 0 1 3 10 0 0 0
G 0 0 0 0 0 0 7 3
>
> library(devtools)
WARNING: Rtools is required to build R packages, but is not currently installed.
Please download and install Rtools 3.1 from http://cran.r-project.org/bin/windows/Rtools/ and then run
find_rtools().
Warning message:
package ‘devtools’ was built under R version 3.0.2
>
source_url('https://gist.github.com/fawda123/7471137/raw/c720af2cea5f312717f020a09946800d55b8
f45b/nnet_plot_update.r')
SHA-1 hash of file is 3e535ef9cbcad648f0fffb89a8879e12c09be1e7
>
>
> plot.nnet(nnetmodel)
Loading required package: scales
Loading required package: reshape
Loading required package: plyr
Attaching package: ‘reshape’
The following object is masked from ‘package:plyr’:
rename, round_any
The following object is masked from ‘package:class’:
condense
Warning messages:
1: package ‘scales’ was built under R version 3.0.2
2: package ‘reshape’ was built under R version 3.0.2
3: package ‘plyr’ was built under R version 3.0.2
>
> #naive bayes
> cl=naiveBayes(train.all[,2:8],train.all$class)
> table(predict(cl, train.all[,-1]), train.all[,1], dnn=list('predicted','actual'))
actual
predicted A B C D E F G H
A 331 251 0 0 0 0 0 0
B 82 644 147 9 0 0 0 0
C 25 67 255 75 43 0 0 0
D 0 1 88 118 84 30 5 0
E 0 0 49 160 514 214 204 30
F 0 0 0 3 95 50 52 23
G 0 0 5 11 168 198 362 243
H 0 0 0 0 0 33 74 257
> cl$tables$x1
NULL
>
>
>
>
>
>
>
>
Download