library(MASS)
library(dplyr)
library(tidyr)
library(tidyverse)

Wczytanie danych:

Wina <- read.table("wine.data", sep=",")
Wina <- Wina %>% mutate(V1=as.factor(V1)) %>% select(V1, V2, V8) 

Wykres rozproszenia:

plot(Wina[,c("V2", "V8")], col=Wina[,"V1"])

LDA, siatka i predykcja na siatce:

lda.fit <- lda(V1 ~ ., data=Wina)

Wina.grid <- expand.grid(V2=seq(min(Wina$V2), max(Wina$V2), length.out=200),
                         V8=seq(min(Wina$V8), max(Wina$V8), length.out=200))

lda.predict <- predict(lda.fit, newdata=Wina.grid)$class

Wina.grid.long <- data.frame(V1=lda.predict,
                             Wina.grid)

head(Wina.grid.long)
##   V1       V2   V8
## 1  2 11.03000 0.34
## 2  2 11.04910 0.34
## 3  2 11.06819 0.34
## 4  2 11.08729 0.34
## 5  2 11.10638 0.34
## 6  2 11.12548 0.34

Jeśli nie chcemy się męczyć:

plot(Wina[,c("V2", "V8")], col=Wina[,"V1"])

points(Wina.grid.long[,c("V2", "V8")], col=Wina.grid.long[,"V1"])

Jeśli chcemy mieć same linie:

# przekszta戼㸳cenie z tabelki "d戼㸳ugiej" tzn. kolumny to (x, y, z) i w wierszach obserwacje, 
# do "szerokiej" tzn. wiersze odpowiadaj戼㸹 "x", kolumny "y", a warto㤼㹣ci tabelki to "z"
Wina.grid.wide <- Wina.grid.long %>% 
  pivot_wider(names_from=c("V8"), values_from = "V1") %>%
  column_to_rownames(var="V2")

Wina.grid.wide[1:5, 1:5]
##                  0.34 0.363819095477387 0.387638190954774 0.411457286432161
## 11.03               2                 2                 2                 2
## 11.0490954773869    2                 2                 2                 2
## 11.0681909547739    2                 2                 2                 2
## 11.0872864321608    2                 2                 2                 2
## 11.1063819095477    2                 2                 2                 2
##                  0.435276381909548
## 11.03                            2
## 11.0490954773869                 2
## 11.0681909547739                 2
## 11.0872864321608                 2
## 11.1063819095477                 2
plot(Wina[,c("V2", "V8")], col=Wina[,"V1"])

contour(x=as.numeric(rownames(Wina.grid.wide)),
        y=as.numeric(colnames(Wina.grid.wide)), 
        z=as.matrix(Wina.grid.wide), add=TRUE, 
        drawlabels=FALSE, nlevels=2) # 戼㹦eby lepiej wygl戼㸹da戼㸳o

A jeśli chcemy narysować dodatkowo poziomice rozkładu normalnego, to najpierw potrzebujemy trzech dwuwymiarowych gęstości rozkładu normalnego: \(N_1(\mu_1, \Sigma)\), \(N_2(\mu_2, \Sigma)\) i \(N_3(\mu_3, \Sigma)\).

Estymacja \(\Sigma\):

S1 <- cov(Wina[Wina$V1==1, c("V2", "V8")])
S2 <- cov(Wina[Wina$V1==2, c("V2", "V8")])
S3 <- cov(Wina[Wina$V1==3, c("V2", "V8")])
n1 <- sum(Wina$V1==1)
n2 <- sum(Wina$V1==2)
n3 <- sum(Wina$V1==3)
n <- nrow(Wina)
W <- (S1*(n1-1) + S2*(n2-1) + S3*(n3-1))/(n-2)

Estymacja średnich:

m.V2 <- tapply(Wina[, "V2"], Wina[, "V1"], mean)
m.V8 <- tapply(Wina[, "V8"], Wina[, "V1"], mean)

Czyli dostajemy gęstości (potrzebny pakiet mvtnorm):

gestosc1 <- mvtnorm::dmvnorm(x=Wina.grid, mean=c(m.V2[1], m.V8[1]), sigma=W)
gestosc1.long <- data.frame(gestosc=gestosc1, Wina.grid)
gestosc1.wide <- gestosc1.long %>% 
  pivot_wider(names_from=c("V8"), values_from = "gestosc") %>% 
  column_to_rownames(var="V2")

gestosc2 <- mvtnorm::dmvnorm(x=Wina.grid, mean=c(m.V2[2], m.V8[2]), sigma=W)
gestosc2.long <- data.frame(gestosc=gestosc2, Wina.grid)
gestosc2.wide <- gestosc2.long %>% 
  pivot_wider(names_from=c("V8"), values_from = "gestosc") %>% 
  column_to_rownames(var="V2")

gestosc3 <- mvtnorm::dmvnorm(x=Wina.grid, mean=c(m.V2[3], m.V8[3]), sigma=W)
gestosc3.long <- data.frame(gestosc=gestosc3, Wina.grid)
gestosc3.wide <- gestosc3.long %>% 
  pivot_wider(names_from=c("V8"), values_from = "gestosc") %>% 
  column_to_rownames(var="V2")

A teraz wykres:

plot(Wina[,c("V2", "V8")], col=Wina[,"V1"])

contour(x=as.numeric(rownames(Wina.grid.wide)),
        y=as.numeric(colnames(Wina.grid.wide)), 
        z=as.matrix(Wina.grid.wide), add=TRUE, drawlabels=FALSE, nlevels=2)

contour(x=as.numeric(rownames(gestosc1.wide)),
        y=as.numeric(colnames(gestosc1.wide)), 
        z=as.matrix(gestosc1.wide), add=TRUE, drawlabels=FALSE)
contour(x=as.numeric(rownames(gestosc2.wide)),
        y=as.numeric(colnames(gestosc2.wide)), 
        z=as.matrix(gestosc2.wide), add=TRUE, drawlabels=FALSE, col="red")
contour(x=as.numeric(rownames(gestosc3.wide)),
        y=as.numeric(colnames(gestosc3.wide)), 
        z=as.matrix(gestosc3.wide), add=TRUE, drawlabels=FALSE, col="green")

To samo w ggplot2:

library(ggplot2)

Wina.grid.long$V1 <- as.numeric(Wina.grid.long$V1)
ggplot() +
  geom_point(data=Wina, aes(x=V2, y=V8, color=V1)) +
  geom_contour(data=Wina.grid.long, aes(x=V2, y=V8, z=V1), size=1, bins=2, color="black") +
  theme_bw()

Wina.grid.long$klasa1 <- ifelse(Wina.grid.long$V1==1, 1, 0)
Wina.grid.long$klasa2 <- ifelse(Wina.grid.long$V1==2, 1, 0)
Wina.grid.long$klasa3 <- ifelse(Wina.grid.long$V1==3, 1, 0)

ggplot() +
  geom_point(data=Wina, aes(x=V2, y=V8, color=V1)) +
  geom_contour(data=Wina.grid.long, aes(x=V2, y=V8, z=klasa1), size=1, bins=1, color="red") +
  geom_contour(data=Wina.grid.long, aes(x=V2, y=V8, z=klasa2), size=1, bins=1, color="green") +
  geom_contour(data=Wina.grid.long, aes(x=V2, y=V8, z=klasa3), size=1, bins=1, color="blue") +
  theme_bw()

ggplot() +
  geom_point(data=Wina, aes(x=V2, y=V8, color=V1)) +
  geom_contour(data=gestosc1.long, aes(x=V2, y=V8, z=gestosc), color="red") +
  geom_contour(data=gestosc2.long, aes(x=V2, y=V8, z=gestosc), color="green") +
  geom_contour(data=gestosc3.long, aes(x=V2, y=V8, z=gestosc), color="blue") +
  theme_bw()