library(MASS)
library(dplyr)
library(tidyr)
library(tidyverse)
Wczytanie danych:
Wina <- read.table("wine.data", sep=",")
Wina <- Wina %>% mutate(V1=as.factor(V1)) %>% select(V1, V2, V8)
Wykres rozproszenia:
plot(Wina[,c("V2", "V8")], col=Wina[,"V1"])
LDA, siatka i predykcja na siatce:
lda.fit <- lda(V1 ~ ., data=Wina)
Wina.grid <- expand.grid(V2=seq(min(Wina$V2), max(Wina$V2), length.out=200),
V8=seq(min(Wina$V8), max(Wina$V8), length.out=200))
lda.predict <- predict(lda.fit, newdata=Wina.grid)$class
Wina.grid.long <- data.frame(V1=lda.predict,
Wina.grid)
head(Wina.grid.long)
## V1 V2 V8
## 1 2 11.03000 0.34
## 2 2 11.04910 0.34
## 3 2 11.06819 0.34
## 4 2 11.08729 0.34
## 5 2 11.10638 0.34
## 6 2 11.12548 0.34
Jeśli nie chcemy się męczyć:
plot(Wina[,c("V2", "V8")], col=Wina[,"V1"])
points(Wina.grid.long[,c("V2", "V8")], col=Wina.grid.long[,"V1"])
Jeśli chcemy mieć same linie:
# przeksztaćĽă¸łcenie z tabelki "dćĽă¸ługiej" tzn. kolumny to (x, y, z) i w wierszach obserwacje,
# do "szerokiej" tzn. wiersze odpowiadajćĽă¸ą "x", kolumny "y", a warto㤼㹣ci tabelki to "z"
Wina.grid.wide <- Wina.grid.long %>%
pivot_wider(names_from=c("V8"), values_from = "V1") %>%
column_to_rownames(var="V2")
Wina.grid.wide[1:5, 1:5]
## 0.34 0.363819095477387 0.387638190954774 0.411457286432161
## 11.03 2 2 2 2
## 11.0490954773869 2 2 2 2
## 11.0681909547739 2 2 2 2
## 11.0872864321608 2 2 2 2
## 11.1063819095477 2 2 2 2
## 0.435276381909548
## 11.03 2
## 11.0490954773869 2
## 11.0681909547739 2
## 11.0872864321608 2
## 11.1063819095477 2
plot(Wina[,c("V2", "V8")], col=Wina[,"V1"])
contour(x=as.numeric(rownames(Wina.grid.wide)),
y=as.numeric(colnames(Wina.grid.wide)),
z=as.matrix(Wina.grid.wide), add=TRUE,
drawlabels=FALSE, nlevels=2) # ćĽăą¦eby lepiej wyglćĽă¸ądaćĽă¸ło
A jeśli chcemy narysować dodatkowo poziomice rozkładu normalnego, to najpierw potrzebujemy trzech dwuwymiarowych gęstości rozkładu normalnego: \(N_1(\mu_1, \Sigma)\), \(N_2(\mu_2, \Sigma)\) i \(N_3(\mu_3, \Sigma)\).
Estymacja \(\Sigma\):
S1 <- cov(Wina[Wina$V1==1, c("V2", "V8")])
S2 <- cov(Wina[Wina$V1==2, c("V2", "V8")])
S3 <- cov(Wina[Wina$V1==3, c("V2", "V8")])
n1 <- sum(Wina$V1==1)
n2 <- sum(Wina$V1==2)
n3 <- sum(Wina$V1==3)
n <- nrow(Wina)
W <- (S1*(n1-1) + S2*(n2-1) + S3*(n3-1))/(n-2)
Estymacja średnich:
m.V2 <- tapply(Wina[, "V2"], Wina[, "V1"], mean)
m.V8 <- tapply(Wina[, "V8"], Wina[, "V1"], mean)
Czyli dostajemy gęstości (potrzebny pakiet mvtnorm
):
gestosc1 <- mvtnorm::dmvnorm(x=Wina.grid, mean=c(m.V2[1], m.V8[1]), sigma=W)
gestosc1.long <- data.frame(gestosc=gestosc1, Wina.grid)
gestosc1.wide <- gestosc1.long %>%
pivot_wider(names_from=c("V8"), values_from = "gestosc") %>%
column_to_rownames(var="V2")
gestosc2 <- mvtnorm::dmvnorm(x=Wina.grid, mean=c(m.V2[2], m.V8[2]), sigma=W)
gestosc2.long <- data.frame(gestosc=gestosc2, Wina.grid)
gestosc2.wide <- gestosc2.long %>%
pivot_wider(names_from=c("V8"), values_from = "gestosc") %>%
column_to_rownames(var="V2")
gestosc3 <- mvtnorm::dmvnorm(x=Wina.grid, mean=c(m.V2[3], m.V8[3]), sigma=W)
gestosc3.long <- data.frame(gestosc=gestosc3, Wina.grid)
gestosc3.wide <- gestosc3.long %>%
pivot_wider(names_from=c("V8"), values_from = "gestosc") %>%
column_to_rownames(var="V2")
A teraz wykres:
plot(Wina[,c("V2", "V8")], col=Wina[,"V1"])
contour(x=as.numeric(rownames(Wina.grid.wide)),
y=as.numeric(colnames(Wina.grid.wide)),
z=as.matrix(Wina.grid.wide), add=TRUE, drawlabels=FALSE, nlevels=2)
contour(x=as.numeric(rownames(gestosc1.wide)),
y=as.numeric(colnames(gestosc1.wide)),
z=as.matrix(gestosc1.wide), add=TRUE, drawlabels=FALSE)
contour(x=as.numeric(rownames(gestosc2.wide)),
y=as.numeric(colnames(gestosc2.wide)),
z=as.matrix(gestosc2.wide), add=TRUE, drawlabels=FALSE, col="red")
contour(x=as.numeric(rownames(gestosc3.wide)),
y=as.numeric(colnames(gestosc3.wide)),
z=as.matrix(gestosc3.wide), add=TRUE, drawlabels=FALSE, col="green")
To samo w ggplot2
:
library(ggplot2)
Wina.grid.long$V1 <- as.numeric(Wina.grid.long$V1)
ggplot() +
geom_point(data=Wina, aes(x=V2, y=V8, color=V1)) +
geom_contour(data=Wina.grid.long, aes(x=V2, y=V8, z=V1), size=1, bins=2, color="black") +
theme_bw()
Wina.grid.long$klasa1 <- ifelse(Wina.grid.long$V1==1, 1, 0)
Wina.grid.long$klasa2 <- ifelse(Wina.grid.long$V1==2, 1, 0)
Wina.grid.long$klasa3 <- ifelse(Wina.grid.long$V1==3, 1, 0)
ggplot() +
geom_point(data=Wina, aes(x=V2, y=V8, color=V1)) +
geom_contour(data=Wina.grid.long, aes(x=V2, y=V8, z=klasa1), size=1, bins=1, color="red") +
geom_contour(data=Wina.grid.long, aes(x=V2, y=V8, z=klasa2), size=1, bins=1, color="green") +
geom_contour(data=Wina.grid.long, aes(x=V2, y=V8, z=klasa3), size=1, bins=1, color="blue") +
theme_bw()
ggplot() +
geom_point(data=Wina, aes(x=V2, y=V8, color=V1)) +
geom_contour(data=gestosc1.long, aes(x=V2, y=V8, z=gestosc), color="red") +
geom_contour(data=gestosc2.long, aes(x=V2, y=V8, z=gestosc), color="green") +
geom_contour(data=gestosc3.long, aes(x=V2, y=V8, z=gestosc), color="blue") +
theme_bw()