Kernel PCA Example

advertisement
Kernel PCA Example
>
>
>
>
>
>
>
>
>
>
>
>
#Here is Some Code for a Kernel PCA Example
#First prepare, examine, and plot a small fake data set
D<-matrix(1:20,nrow=10)
D[1,]<-c(2,2)
D[2,]<-c(3,3)
D[3,]<-c(2,3)
D[4,]<-c(3,2)
D[5,]<-c(2.5,2.5)
D[6:10,]<- -D[1:5,]
D
[,1] [,2]
[1,] 2.0 2.0
[2,] 3.0 3.0
[3,] 2.0 3.0
[4,] 3.0 2.0
[5,] 2.5 2.5
[6,] -2.0 -2.0
[7,] -3.0 -3.0
[8,] -2.0 -3.0
[9,] -3.0 -2.0
[10,] -2.5 -2.5
> plot(D,lwd=5)
>
> mean(D[,1])
[1] 0
> mean(D[,2])
[1] 0
>
> #Note that both columns have sample mean 0
>
> #Consider an eigen analysis of D'D
>
> eigen(t(D)%*%D)
$values
[1] 127
2
$vectors
[,1]
[,2]
[1,] 0.7071068 -0.7071068
[2,] 0.7071068 0.7071068
1 >
>
>
>
>
>
>
>
>
#The eigenvectors of D'D for the already-centered D give
#principal component directions for the data set
#Prinicpal component vectors can be gotten from the directions
PC1<-D%*%eigen(t(D)%*%D)$vectors[,1]
PC2<-D%*%eigen(t(D)%*%D)$vectors[,2]
cbind(PC1,PC2)
[,1]
[,2]
[1,] 2.828427 0.0000000
[2,] 4.242641 0.0000000
[3,] 3.535534 0.7071068
[4,] 3.535534 -0.7071068
[5,] 3.535534 0.0000000
[6,] -2.828427 0.0000000
[7,] -4.242641 0.0000000
[8,] -3.535534 -0.7071068
[9,] -3.535534 0.7071068
[10,] -3.535534 0.0000000
>
> #Now consider DD' and an eigen analysis of it
>
> DDprime<-D%*%t(D)
> DDprime
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,]
8
12 10.0 10.0 10.0
-8 -12 -10.0 -10.0 -10.0
[2,]
12
18 15.0 15.0 15.0 -12 -18 -15.0 -15.0 -15.0
[3,]
10
15 13.0 12.0 12.5 -10 -15 -13.0 -12.0 -12.5
[4,]
10
15 12.0 13.0 12.5 -10 -15 -12.0 -13.0 -12.5
[5,]
10
15 12.5 12.5 12.5 -10 -15 -12.5 -12.5 -12.5
[6,]
-8 -12 -10.0 -10.0 -10.0
8
12 10.0 10.0 10.0
[7,] -12 -18 -15.0 -15.0 -15.0
12
18 15.0 15.0 15.0
[8,] -10 -15 -13.0 -12.0 -12.5
10
15 13.0 12.0 12.5
[9,] -10 -15 -12.0 -13.0 -12.5
10
15 12.0 13.0 12.5
[10,] -10 -15 -12.5 -12.5 -12.5
10
15 12.5 12.5 12.5
>
> eigen(DDprime)
$values
[1] 1.270000e+02 2.000000e+00 4.263256e-14 5.904563e-16 4.174461e-16
[6] 7.595154e-18 -5.551115e-17 -6.329458e-17 -6.983256e-17 -4.508266e-15
$vectors
[1,]
[2,]
[3,]
[4,]
[5,]
[6,]
[7,]
[8,]
[9,]
[10,]
[,1]
-0.2509823
-0.3764735
-0.3137279
-0.3137279
-0.3137279
0.2509823
0.3764735
0.3137279
0.3137279
0.3137279
[,2]
0.000000e+00
-1.619131e-16
-5.000000e-01
5.000000e-01
-1.488044e-16
-5.304109e-17
-1.017269e-17
5.000000e-01
-5.000000e-01
-4.548468e-17
[,3]
[,4]
[,5]
0.96799167 0.000000000 0.00000000
-0.09761261 0.044607815 -0.29527481
-0.08134384 -0.069470082 -0.03532352
-0.08134384 0.001977324 -0.10046418
-0.08134384 -0.312357425 0.84839641
0.06507507 -0.916868254 -0.26456691
0.09761261 0.029611890 0.30946883
0.08134384 0.118426334 0.10927950
0.08134384 0.189873740 0.04413884
0.08134384 0.063339457 0.04515151
2 [1,]
[2,]
[3,]
[4,]
[5,]
[6,]
[7,]
[8,]
[9,]
[10,]
>
>
>
>
>
>
>
>
>
>
>
>
[,6]
0.000000000
-0.005326275
0.142897277
0.021047023
-0.006623764
-0.065876853
0.006549370
-0.272666821
-0.394517075
0.862955141
[,7]
0.00000000
-0.05527900
-0.22609995
-0.22609995
0.20030149
0.01031938
-0.83732379
0.22609995
0.22609995
0.22609995
[,8]
[,9]
[,10]
0.000000000 0.00000000 0.00000000
-0.007062695 -0.00995344 0.86967197
-0.683268309 -0.26402563 -0.17543719
-0.334612968 0.66013443 -0.18855650
-0.004037545 0.08705168 0.17278743
-0.081259099 0.07525080 0.07360769
0.009572193 0.01996475 0.22460505
-0.576919199 -0.35822940 0.17988532
-0.228263858 0.56593066 0.16676601
-0.171690353 0.17935676 0.17733655
#Note that only two eigenvalues are non-zero, and they are the same
#as the eigenvalues of D'D
#See also that the eigenvectors times the roots of the eigenvalues
#are the principal component vectors of the data set (up to the
#arbitrary multiplication by -1)
z1<-sqrt(eigen(DDprime)$values[1])*eigen(DDprime)$vectors[,1]
z2<-sqrt(eigen(DDprime)$values[2])*eigen(DDprime)$vectors[,2]
cbind(z1,z2)
z1
z2
[1,] -2.828427 0.000000e+00
[2,] -4.242641 -2.289797e-16
[3,] -3.535534 -7.071068e-01
[4,] -3.535534 7.071068e-01
[5,] -3.535534 -2.104411e-16
[6,] 2.828427 -7.501142e-17
[7,] 4.242641 -1.438636e-17
[8,] 3.535534 7.071068e-01
[9,] 3.535534 -7.071068e-01
[10,] 3.535534 -6.432505e-17
>
> #Notice that the PC's quite naturally measure displacement of data
> #points from the origin first in the (1,1) (or (-1,-1)) direction
> #and second in the (-1,1) (or (1,-1)) direction
>
> #Now contrast the original analysis to a kernel PC analysis based on
> #the kernel function K(x,z)=exp(-c*||x-z||^2) for x,z both 2-vectors
>
> #Make and inspect the Gram matrix
>
> lambda<-2
> K<-matrix(1:100,nrow=10)
> for (i in 1:10) {
+
for (j in 1:10) {
+
K[i,j]<-exp(-lambda*((D[i,1]-D[j,1])^2+(D[i,2]-D[j,2])^2))
+
}
+
}
3 > round(K,3)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 1.000 0.018 0.135 0.135 0.368 0.000 0.000 0.000 0.000 0.000
[2,] 0.018 1.000 0.135 0.135 0.368 0.000 0.000 0.000 0.000 0.000
[3,] 0.135 0.135 1.000 0.018 0.368 0.000 0.000 0.000 0.000 0.000
[4,] 0.135 0.135 0.018 1.000 0.368 0.000 0.000 0.000 0.000 0.000
[5,] 0.368 0.368 0.368 0.368 1.000 0.000 0.000 0.000 0.000 0.000
[6,] 0.000 0.000 0.000 0.000 0.000 1.000 0.018 0.135 0.135 0.368
[7,] 0.000 0.000 0.000 0.000 0.000 0.018 1.000 0.135 0.135 0.368
[8,] 0.000 0.000 0.000 0.000 0.000 0.135 0.135 1.000 0.018 0.368
[9,] 0.000 0.000 0.000 0.000 0.000 0.135 0.135 0.018 1.000 0.368
[10,] 0.000 0.000 0.000 0.000 0.000 0.368 0.368 0.368 0.368 1.000
>
> #Now make and inspect a version of the Gram matrix approptiate for
> #a "centered" version of the (only implicitly defined) transformed
> #variables corresponding to the kernel
>
> J<-matrix(c(rep(1,100)),nrow=10)
> J
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,]
1
1
1
1
1
1
1
1
1
1
[2,]
1
1
1
1
1
1
1
1
1
1
[3,]
1
1
1
1
1
1
1
1
1
1
[4,]
1
1
1
1
1
1
1
1
1
1
[5,]
1
1
1
1
1
1
1
1
1
1
[6,]
1
1
1
1
1
1
1
1
1
1
[7,]
1
1
1
1
1
1
1
1
1
1
[8,]
1
1
1
1
1
1
1
1
1
1
[9,]
1
1
1
1
1
1
1
1
1
1
[10,]
1
1
1
1
1
1
1
1
1
1
> Ktilde<-K-(.1)*J%*%K-(.1)*K%*%J+(.01)*J%*%K%*%J
> round(Ktilde,3)
[,1]
[,2]
[,3]
[,4]
[,5]
[,6]
[,7]
[,8]
[,9]
[1,] 0.851 -0.131 -0.014 -0.014 0.137 -0.149 -0.149 -0.149 -0.149
[2,] -0.131 0.851 -0.014 -0.014 0.137 -0.149 -0.149 -0.149 -0.149
[3,] -0.014 -0.014 0.851 -0.131 0.137 -0.149 -0.149 -0.149 -0.149
[4,] -0.014 -0.014 -0.131 0.851 0.137 -0.149 -0.149 -0.149 -0.149
[5,] 0.137 0.137 0.137 0.137 0.688 -0.231 -0.231 -0.231 -0.231
[6,] -0.149 -0.149 -0.149 -0.149 -0.231 0.851 -0.131 -0.014 -0.014
[7,] -0.149 -0.149 -0.149 -0.149 -0.231 -0.131 0.851 -0.014 -0.014
[8,] -0.149 -0.149 -0.149 -0.149 -0.231 -0.014 -0.014 0.851 -0.131
[9,] -0.149 -0.149 -0.149 -0.149 -0.231 -0.014 -0.014 -0.131 0.851
[10,] -0.231 -0.231 -0.231 -0.231 -0.312 0.137 0.137 0.137 0.137
>
> #Have a look at an eigen analysis for the "centered" Gram matrix
>
> round(eigen(Ktilde)$values,3)
[1] 1.894 0.982 0.982 0.982 0.982 0.748 0.748 0.469 0.395 0.000
> round(eigen(Ktilde)$vectors,3)
[,1]
[,2]
[,3]
[,4]
[,5]
[,6]
[,7]
[,8]
[,9]
[1,] 0.273 -0.164 0.000 0.308 0.615 0.004 -0.500 -0.158 0.225
[2,] 0.273 0.164 0.000 -0.308 -0.615 0.004 -0.500 -0.158 0.225
[3,] 0.273 -0.624 0.106 0.182 -0.257 -0.004 0.500 -0.158 0.225
[4,] 0.273 0.624 -0.106 -0.182 0.257 -0.004 0.500 -0.158 0.225
[5,] 0.449 0.000 0.000 0.000 0.000 0.000 0.000 0.632 -0.546
[6,] -0.273 0.282 0.080 0.602 -0.226 0.500 0.004 -0.158 -0.225
[7,] -0.273 -0.282 -0.080 -0.602 0.226 0.500 0.004 -0.158 -0.225
[8,] -0.273 -0.063 -0.694 0.097 -0.065 -0.500 -0.004 -0.158 -0.225
[9,] -0.273 0.063 0.694 -0.097 0.065 -0.500 -0.004 -0.158 -0.225
[10,] -0.449 0.000 0.000 0.000 0.000 0.000 0.000 0.632 0.546
>
[,10]
-0.231
-0.231
-0.231
-0.231
-0.312
0.137
0.137
0.137
0.137
0.688
[,10]
-0.316
-0.316
-0.316
-0.316
-0.316
-0.316
-0.316
-0.316
-0.316
-0.316
4 >
>
>
>
>
>
>
>
+
+
+
+
#Here are the first 5 kernel PC vectors
#Only the first of these is interpretable, but is interesting.
#The signs in the first PC break the points into the obvious
#"1st quadrant" and "2nd quadrant" groups. The magnitudes in the
#first kernel PC distinguish between the "center" points of the
#two groups and the "outside" points.
cbind(round(sqrt(eigen(Ktilde)$values[1])*eigen(Ktilde)$vectors[,1],3),
round(sqrt(eigen(Ktilde)$values[2])*eigen(Ktilde)$vectors[,2],3),
round(sqrt(eigen(Ktilde)$values[3])*eigen(Ktilde)$vectors[,3],3),
round(sqrt(eigen(Ktilde)$values[4])*eigen(Ktilde)$vectors[,4],3),
round(sqrt(eigen(Ktilde)$values[5])*eigen(Ktilde)$vectors[,5],3))
[,1]
[,2]
[,3]
[,4]
[,5]
[1,] 0.376 -0.162 0.000 0.305 0.610
[2,] 0.376 0.162 0.000 -0.305 -0.610
[3,] 0.376 -0.618 0.105 0.180 -0.255
[4,] 0.376 0.618 -0.105 -0.180 0.255
[5,] 0.618 0.000 0.000 0.000 0.000
[6,] -0.376 0.280 0.079 0.597 -0.224
[7,] -0.376 -0.280 -0.079 -0.597 0.224
[8,] -0.376 -0.063 -0.688 0.096 -0.065
[9,] -0.376 0.063 0.688 -0.096 0.065
[10,] -0.618 0.000 0.000 0.000 0.000
5 
Download