這可能不是最有效的方法,也不是最優雅的方式,但它可以滿足您的需求。鑑於您的初始列名是'客戶'和'產品',
library(stringr)
Count.Sales <- function(df){
df3 <- as.data.frame(t(combn(paste0(df$client, df$product), 2)))
df4 <- as.data.frame(table(df3[str_extract(df3$V1, '[[:digit:]]+') == str_extract(df3$V2, '[[:digit:]]+'),]))
df4 <- subset(df4, df4$Freq > 0)
df4$customer <- str_extract(df4$V1, '[[:digit:]]+')
df4[, !(colnames(df4) %in% c("Freq","customer"))] <- apply(df4[, !(colnames(df4) %in% c("Freq","customer"))], 2, function(i) sub('[[:digit:]]+', '', i))
new.df<- within(df4, rm(Freq))
new.df[] <- lapply(new.df, as.character)
r1 <- apply(new.df[,-3], 1, function(i)any(i[-1] != i[1]))
new.df <- new.df[r1,]
new.df$pairs <- do.call(paste, c(new.df[,-3], ' '))
new.df$pairs <- vapply(new.df$pairs, function(i) paste(sort(strsplit(i, ' ')[[1]]), collapse=' '), ' ')
t4 <- data.frame(with(new.df, table(pairs, customer)))
t4 <- t4[t4$Freq != 0,]
per_customer <- as.data.frame(table(t4$pairs))
total <- as.data.frame(table(new.df$pairs))
ls1 <- list(per_customer, total)
names(ls1) <- c('Unique.Customer', 'Total')
return(ls1)
}
Count.Sales(df)
#$Unique.Customer
# Var1 Freq
#1 pants shirt 2
#2 pants shoes 1
#3 shirt shoes 1
#
#$Total
# Var1 Freq
#1 pants shirt 3
#2 pants shoes 1
#3 shirt shoes 1
有關如何在R中編碼的問題在這裏是關閉的主題。他們屬於[SO]。如果您可以稍等一下,我們會爲您遷移。 – gung
抱歉,我確實發佈了錯誤的地方。是的,請將它遷移到它所屬的地方 – DroppingOff
「tuplas」是什麼意思? – Zhanxiong