Macierz składająca się z wektorów własnych/loadings

df <- read.table("dane/Miasta.txt", header=TRUE)

df_std <- scale(df)

df_std <- data.frame(df_std)

X <- as.matrix(df_std)

\[X'X = PDP'\]

\(P = [v_1,..., v_p]\) - macierz składająca się z wektorów własnych

\(D = \textrm{diag}(\lambda_1, ..., \lambda_p)\)

\(X'X\) - symetryczna

\(P'\) - odwrotność \(P\)

eigen(t(X) %*% X)
## eigen() decomposition
## $values
## [1] 97.209164 28.963982  8.826854
## 
## $vectors
##            [,1]      [,2]         [,3]
## [1,]  0.4847566 0.8746251  0.006481871
## [2,] -0.6178516 0.3476678 -0.705256351
## [3,] -0.6190884 0.3378728  0.708922750
t(X) %*% X
##            Work     Price    Salary
## Work    45.0000 -20.34790 -20.57340
## Price  -20.3479  45.00000  36.17211
## Salary -20.5734  36.17211  45.00000
45*var(X)
##            Work     Price    Salary
## Work    45.0000 -20.34790 -20.57340
## Price  -20.3479  45.00000  36.17211
## Salary -20.5734  36.17211  45.00000
eigen.values <- eigen(t(X) %*% X)$values
eigen.vectors <- eigen(t(X) %*% X)$vectors
PCA <- princomp(X)

PCA$loadings
## 
## Loadings:
##        Comp.1 Comp.2 Comp.3
## Work    0.485  0.875       
## Price  -0.618  0.348 -0.705
## Salary -0.619  0.338  0.709
## 
##                Comp.1 Comp.2 Comp.3
## SS loadings     1.000  1.000  1.000
## Proportion Var  0.333  0.333  0.333
## Cumulative Var  0.333  0.667  1.000

Zmiana bazy

\(E = I_p\)

\(P = [v_1,..., v_p]\)

\(x_{E} = (x_{E})_{1} * (1, 0, 0)' + (x_{E})_{2} * (0, 1, 0)' + (x_{E})_{3} * (0, 0, 1)' = (x_{P})_{1} * v_1 + (x_{P})_{2} * v_2 + (x_{P})_{3} * v_3 = Px_{P}\)

x_{P} = P'x_{E}

x_E <- X[1,]

P <- PCA$loadings

t(P) %*% x_E
##              [,1]
## Comp.1 -0.5677451
## Comp.2 -0.7764563
## Comp.3  0.4129280
PCA$scores[1,]
##     Comp.1     Comp.2     Comp.3 
## -0.5677451 -0.7764563  0.4129280
print("***")
## [1] "***"
x_E
##       Work      Price     Salary 
## -0.9516497 -0.2103868  0.3818750
PCA$scores[1,] %*% t(P)
##            Work      Price   Salary
## [1,] -0.9516497 -0.2103868 0.381875
print("***")
## [1] "***"
x_E %*% t(P)
##            Work     Price    Salary
## [1,] -0.6428528 0.2455138 0.7887912

Zmienność

var(X)
##              Work      Price     Salary
## Work    1.0000000 -0.4521755 -0.4571867
## Price  -0.4521755  1.0000000  0.8038247
## Salary -0.4571867  0.8038247  1.0000000
sum(diag(var(X)))
## [1] 3
sum(eigen(var(X))$values)
## [1] 3
sum(diag(var(PCA$scores)))
## [1] 3
diag(var(PCA$scores))
##    Comp.1    Comp.2    Comp.3 
## 2.1602037 0.6436440 0.1961523
(nrow(X))/(nrow(X)-1)*PCA$sdev^2
##    Comp.1    Comp.2    Comp.3 
## 2.1602037 0.6436440 0.1961523
eigen.val <- eigen(t(X) %*% X)$values

eigen.val / sum(eigen.val)
## [1] 0.7200679 0.2145480 0.0653841
PCA$sdev^2 / sum(PCA$sdev^2)
##    Comp.1    Comp.2    Comp.3 
## 0.7200679 0.2145480 0.0653841

Przykład 2D

PCA2D <- princomp(df_std[,c("Work", "Price")])

v1 <- PCA2D$loadings[,1]
v2 <- PCA2D$loadings[,2]

x_E <- X[6, c("Work", "Price")]

plot(x_E[1], x_E[2], ylim=c(-1.2, 1.2), xlim=c(-1.2, 1.2), asp=1)
arrows(0,0,1,0)
text(1, 0, "e1", pos=1)
arrows(0,0,0,1)
text(0, 1, "e2", pos=1)

arrows(0,0,v1[1],v1[2], col="red")
text(v1[1],v1[2], "v1", pos=1, col="red")
arrows(0,0,v2[1],v2[2], col="red")
text(v2[1],v2[2], "v2", pos=1, col="red")

PCA2D$scores[6,]
##     Comp.1     Comp.2 
##  0.8322621 -0.0933925