Kernel PCA Example > > > > > > > > > > > > #Here is Some Code for a Kernel PCA Example #First prepare, examine, and plot a small fake data set D<-matrix(1:20,nrow=10) D[1,]<-c(2,2) D[2,]<-c(3,3) D[3,]<-c(2,3) D[4,]<-c(3,2) D[5,]<-c(2.5,2.5) D[6:10,]<- -D[1:5,] D [,1] [,2] [1,] 2.0 2.0 [2,] 3.0 3.0 [3,] 2.0 3.0 [4,] 3.0 2.0 [5,] 2.5 2.5 [6,] -2.0 -2.0 [7,] -3.0 -3.0 [8,] -2.0 -3.0 [9,] -3.0 -2.0 [10,] -2.5 -2.5 > plot(D,lwd=5) > > mean(D[,1]) [1] 0 > mean(D[,2]) [1] 0 > > #Note that both columns have sample mean 0 > > #Consider an eigen analysis of D'D > > eigen(t(D)%*%D) $values [1] 127 2 $vectors [,1] [,2] [1,] 0.7071068 -0.7071068 [2,] 0.7071068 0.7071068 1 > > > > > > > > > #The eigenvectors of D'D for the already-centered D give #principal component directions for the data set #Prinicpal component vectors can be gotten from the directions PC1<-D%*%eigen(t(D)%*%D)$vectors[,1] PC2<-D%*%eigen(t(D)%*%D)$vectors[,2] cbind(PC1,PC2) [,1] [,2] [1,] 2.828427 0.0000000 [2,] 4.242641 0.0000000 [3,] 3.535534 0.7071068 [4,] 3.535534 -0.7071068 [5,] 3.535534 0.0000000 [6,] -2.828427 0.0000000 [7,] -4.242641 0.0000000 [8,] -3.535534 -0.7071068 [9,] -3.535534 0.7071068 [10,] -3.535534 0.0000000 > > #Now consider DD' and an eigen analysis of it > > DDprime<-D%*%t(D) > DDprime [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [1,] 8 12 10.0 10.0 10.0 -8 -12 -10.0 -10.0 -10.0 [2,] 12 18 15.0 15.0 15.0 -12 -18 -15.0 -15.0 -15.0 [3,] 10 15 13.0 12.0 12.5 -10 -15 -13.0 -12.0 -12.5 [4,] 10 15 12.0 13.0 12.5 -10 -15 -12.0 -13.0 -12.5 [5,] 10 15 12.5 12.5 12.5 -10 -15 -12.5 -12.5 -12.5 [6,] -8 -12 -10.0 -10.0 -10.0 8 12 10.0 10.0 10.0 [7,] -12 -18 -15.0 -15.0 -15.0 12 18 15.0 15.0 15.0 [8,] -10 -15 -13.0 -12.0 -12.5 10 15 13.0 12.0 12.5 [9,] -10 -15 -12.0 -13.0 -12.5 10 15 12.0 13.0 12.5 [10,] -10 -15 -12.5 -12.5 -12.5 10 15 12.5 12.5 12.5 > > eigen(DDprime) $values [1] 1.270000e+02 2.000000e+00 4.263256e-14 5.904563e-16 4.174461e-16 [6] 7.595154e-18 -5.551115e-17 -6.329458e-17 -6.983256e-17 -4.508266e-15 $vectors [1,] [2,] [3,] [4,] [5,] [6,] [7,] [8,] [9,] [10,] [,1] -0.2509823 -0.3764735 -0.3137279 -0.3137279 -0.3137279 0.2509823 0.3764735 0.3137279 0.3137279 0.3137279 [,2] 0.000000e+00 -1.619131e-16 -5.000000e-01 5.000000e-01 -1.488044e-16 -5.304109e-17 -1.017269e-17 5.000000e-01 -5.000000e-01 -4.548468e-17 [,3] [,4] [,5] 0.96799167 0.000000000 0.00000000 -0.09761261 0.044607815 -0.29527481 -0.08134384 -0.069470082 -0.03532352 -0.08134384 0.001977324 -0.10046418 -0.08134384 -0.312357425 0.84839641 0.06507507 -0.916868254 -0.26456691 0.09761261 0.029611890 0.30946883 0.08134384 0.118426334 0.10927950 0.08134384 0.189873740 0.04413884 0.08134384 0.063339457 0.04515151 2 [1,] [2,] [3,] [4,] [5,] [6,] [7,] [8,] [9,] [10,] > > > > > > > > > > > > [,6] 0.000000000 -0.005326275 0.142897277 0.021047023 -0.006623764 -0.065876853 0.006549370 -0.272666821 -0.394517075 0.862955141 [,7] 0.00000000 -0.05527900 -0.22609995 -0.22609995 0.20030149 0.01031938 -0.83732379 0.22609995 0.22609995 0.22609995 [,8] [,9] [,10] 0.000000000 0.00000000 0.00000000 -0.007062695 -0.00995344 0.86967197 -0.683268309 -0.26402563 -0.17543719 -0.334612968 0.66013443 -0.18855650 -0.004037545 0.08705168 0.17278743 -0.081259099 0.07525080 0.07360769 0.009572193 0.01996475 0.22460505 -0.576919199 -0.35822940 0.17988532 -0.228263858 0.56593066 0.16676601 -0.171690353 0.17935676 0.17733655 #Note that only two eigenvalues are non-zero, and they are the same #as the eigenvalues of D'D #See also that the eigenvectors times the roots of the eigenvalues #are the principal component vectors of the data set (up to the #arbitrary multiplication by -1) z1<-sqrt(eigen(DDprime)$values[1])*eigen(DDprime)$vectors[,1] z2<-sqrt(eigen(DDprime)$values[2])*eigen(DDprime)$vectors[,2] cbind(z1,z2) z1 z2 [1,] -2.828427 0.000000e+00 [2,] -4.242641 -2.289797e-16 [3,] -3.535534 -7.071068e-01 [4,] -3.535534 7.071068e-01 [5,] -3.535534 -2.104411e-16 [6,] 2.828427 -7.501142e-17 [7,] 4.242641 -1.438636e-17 [8,] 3.535534 7.071068e-01 [9,] 3.535534 -7.071068e-01 [10,] 3.535534 -6.432505e-17 > > #Notice that the PC's quite naturally measure displacement of data > #points from the origin first in the (1,1) (or (-1,-1)) direction > #and second in the (-1,1) (or (1,-1)) direction > > #Now contrast the original analysis to a kernel PC analysis based on > #the kernel function K(x,z)=exp(-c*||x-z||^2) for x,z both 2-vectors > > #Make and inspect the Gram matrix > > lambda<-2 > K<-matrix(1:100,nrow=10) > for (i in 1:10) { + for (j in 1:10) { + K[i,j]<-exp(-lambda*((D[i,1]-D[j,1])^2+(D[i,2]-D[j,2])^2)) + } + } 3 > round(K,3) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [1,] 1.000 0.018 0.135 0.135 0.368 0.000 0.000 0.000 0.000 0.000 [2,] 0.018 1.000 0.135 0.135 0.368 0.000 0.000 0.000 0.000 0.000 [3,] 0.135 0.135 1.000 0.018 0.368 0.000 0.000 0.000 0.000 0.000 [4,] 0.135 0.135 0.018 1.000 0.368 0.000 0.000 0.000 0.000 0.000 [5,] 0.368 0.368 0.368 0.368 1.000 0.000 0.000 0.000 0.000 0.000 [6,] 0.000 0.000 0.000 0.000 0.000 1.000 0.018 0.135 0.135 0.368 [7,] 0.000 0.000 0.000 0.000 0.000 0.018 1.000 0.135 0.135 0.368 [8,] 0.000 0.000 0.000 0.000 0.000 0.135 0.135 1.000 0.018 0.368 [9,] 0.000 0.000 0.000 0.000 0.000 0.135 0.135 0.018 1.000 0.368 [10,] 0.000 0.000 0.000 0.000 0.000 0.368 0.368 0.368 0.368 1.000 > > #Now make and inspect a version of the Gram matrix approptiate for > #a "centered" version of the (only implicitly defined) transformed > #variables corresponding to the kernel > > J<-matrix(c(rep(1,100)),nrow=10) > J [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [1,] 1 1 1 1 1 1 1 1 1 1 [2,] 1 1 1 1 1 1 1 1 1 1 [3,] 1 1 1 1 1 1 1 1 1 1 [4,] 1 1 1 1 1 1 1 1 1 1 [5,] 1 1 1 1 1 1 1 1 1 1 [6,] 1 1 1 1 1 1 1 1 1 1 [7,] 1 1 1 1 1 1 1 1 1 1 [8,] 1 1 1 1 1 1 1 1 1 1 [9,] 1 1 1 1 1 1 1 1 1 1 [10,] 1 1 1 1 1 1 1 1 1 1 > Ktilde<-K-(.1)*J%*%K-(.1)*K%*%J+(.01)*J%*%K%*%J > round(Ktilde,3) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [1,] 0.851 -0.131 -0.014 -0.014 0.137 -0.149 -0.149 -0.149 -0.149 [2,] -0.131 0.851 -0.014 -0.014 0.137 -0.149 -0.149 -0.149 -0.149 [3,] -0.014 -0.014 0.851 -0.131 0.137 -0.149 -0.149 -0.149 -0.149 [4,] -0.014 -0.014 -0.131 0.851 0.137 -0.149 -0.149 -0.149 -0.149 [5,] 0.137 0.137 0.137 0.137 0.688 -0.231 -0.231 -0.231 -0.231 [6,] -0.149 -0.149 -0.149 -0.149 -0.231 0.851 -0.131 -0.014 -0.014 [7,] -0.149 -0.149 -0.149 -0.149 -0.231 -0.131 0.851 -0.014 -0.014 [8,] -0.149 -0.149 -0.149 -0.149 -0.231 -0.014 -0.014 0.851 -0.131 [9,] -0.149 -0.149 -0.149 -0.149 -0.231 -0.014 -0.014 -0.131 0.851 [10,] -0.231 -0.231 -0.231 -0.231 -0.312 0.137 0.137 0.137 0.137 > > #Have a look at an eigen analysis for the "centered" Gram matrix > > round(eigen(Ktilde)$values,3) [1] 1.894 0.982 0.982 0.982 0.982 0.748 0.748 0.469 0.395 0.000 > round(eigen(Ktilde)$vectors,3) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [1,] 0.273 -0.164 0.000 0.308 0.615 0.004 -0.500 -0.158 0.225 [2,] 0.273 0.164 0.000 -0.308 -0.615 0.004 -0.500 -0.158 0.225 [3,] 0.273 -0.624 0.106 0.182 -0.257 -0.004 0.500 -0.158 0.225 [4,] 0.273 0.624 -0.106 -0.182 0.257 -0.004 0.500 -0.158 0.225 [5,] 0.449 0.000 0.000 0.000 0.000 0.000 0.000 0.632 -0.546 [6,] -0.273 0.282 0.080 0.602 -0.226 0.500 0.004 -0.158 -0.225 [7,] -0.273 -0.282 -0.080 -0.602 0.226 0.500 0.004 -0.158 -0.225 [8,] -0.273 -0.063 -0.694 0.097 -0.065 -0.500 -0.004 -0.158 -0.225 [9,] -0.273 0.063 0.694 -0.097 0.065 -0.500 -0.004 -0.158 -0.225 [10,] -0.449 0.000 0.000 0.000 0.000 0.000 0.000 0.632 0.546 > [,10] -0.231 -0.231 -0.231 -0.231 -0.312 0.137 0.137 0.137 0.137 0.688 [,10] -0.316 -0.316 -0.316 -0.316 -0.316 -0.316 -0.316 -0.316 -0.316 -0.316 4 > > > > > > > > + + + + #Here are the first 5 kernel PC vectors #Only the first of these is interpretable, but is interesting. #The signs in the first PC break the points into the obvious #"1st quadrant" and "2nd quadrant" groups. The magnitudes in the #first kernel PC distinguish between the "center" points of the #two groups and the "outside" points. cbind(round(sqrt(eigen(Ktilde)$values[1])*eigen(Ktilde)$vectors[,1],3), round(sqrt(eigen(Ktilde)$values[2])*eigen(Ktilde)$vectors[,2],3), round(sqrt(eigen(Ktilde)$values[3])*eigen(Ktilde)$vectors[,3],3), round(sqrt(eigen(Ktilde)$values[4])*eigen(Ktilde)$vectors[,4],3), round(sqrt(eigen(Ktilde)$values[5])*eigen(Ktilde)$vectors[,5],3)) [,1] [,2] [,3] [,4] [,5] [1,] 0.376 -0.162 0.000 0.305 0.610 [2,] 0.376 0.162 0.000 -0.305 -0.610 [3,] 0.376 -0.618 0.105 0.180 -0.255 [4,] 0.376 0.618 -0.105 -0.180 0.255 [5,] 0.618 0.000 0.000 0.000 0.000 [6,] -0.376 0.280 0.079 0.597 -0.224 [7,] -0.376 -0.280 -0.079 -0.597 0.224 [8,] -0.376 -0.063 -0.688 0.096 -0.065 [9,] -0.376 0.063 0.688 -0.096 0.065 [10,] -0.618 0.000 0.000 0.000 0.000 5