1
問候箱圖只離羣值
我只想繪製異常值箱線圖 這是我的解決方案,但它似乎並不十分有效,或者優雅。 任何軟件包或更好的代碼。 正如你可以看到我打電話箱線兩次做到這一點 所以,如果我的數據集是非常大的比這將是壞
感謝
set.seed(1501)
y <- c(4, 0, 7, -5, rnorm(16))
x1 <- c("a", "a", "b", "b", sample(letters[1:5], 16, T))
lab_y <- sample(letters, 20)
datxx <- as.matrix(cbind(y,x1,lab_y))
boxplot_outlier<- function(dat){
bx <- boxplot(as.numeric(dat[,"y"]) ~ dat[,"x1"])
out_label <- c()
for (i in seq(bx$out)){
out_label[i] <- dat[which(dat[,"y"]==bx$out[i]),"lab_y"]
}
out_label
out_g <- c()
for (i in seq(bx$out)){
out_g[i] <- dat[which(dat[,"y"]==bx$out[i]),"x1"]
}
out_g
out_y <- c()
for (i in seq(bx$out)){
out_y[i] <- dat[which(dat[,"y"]==bx$out[i]),"y"]
}
out_y
out_all<-cbind(out_y,out_g,out_label)
out_all <- as.matrix(out_all)
out_g <- as.matrix(out_g)
colnames(out_g)[1]<-"x1"
out_g_x <- out_g[which(!duplicated(out_g[,"x1"]))]
out_g_x <- as.matrix(out_g_x)
colnames(out_g_x)[1]<-"x1"
datsub <- merge(dat,out_g_x,by=c("x1"))
datsub <- as.matrix(datsub)
bx2 <- boxplot(as.numeric(datsub[,"y"]) ~ datsub[,"x1"],data=datsub)
mynum <- cbind(as.numeric(c(1:nrow(out_g_x))),out_g_x)
mynumxx <- merge(x=out_g,y=mynum,by=c("x1"))
colnames(mynumxx)[2]<-"v1"
text(as.numeric(mynumxx[,"v1"])+0.2,as.numeric(out_all[,"out_y"]),out_all[,"out_label"])
}
boxplot_outlier(datxx)