2017-05-29 35 views
2

我想從這個重新安排我的數據時:Data.Table:錯誤在我的代碼重新排列數據--R

Type Student Rt1 Rt2 Rt3 Rt4 Rt5 Rt6 Rt7 Rt8 Rt9 Rt10 Rt11 
1: SNR 789331 3.6 3.8 4.0 4.2 3.4 2.4 3.0 3.2 3.2 3.6 4.0 
2: SNR 805933 4.8 4.0 4.0 3.6 3.2 3.2 3.2 3.2 NaN NaN 3.2 
3: SNR 826523 4.4 4.2 4.2 4.4 4.6 4.6 NaN 4.6 NaN 4.2 4.2 
4: SNR 832929 3.8 3.8 3.8 4.0 3.6 NaN NaN NaN NaN NaN NaN 
5: SNR 838607 5.0 5.0 5.0 5.0 5.0 4.4 4.2 4.4 3.8 NaN 3.6 
6: SNR 841903 3.2 4.2 4.2 NaN 3.6 NaN 4.0 3.4 4.2 NaN 4.6 

這樣:

Student Type timePeriod week Rating 
1 789331 SNR  Rt1 Jan11 3.6 
2 805933 SNR  Rt1 Jan11 4.8 
3 826523 SNR  Rt1 Jan11 4.4 
4 832929 SNR  Rt1 Jan11 3.8 
5 838607 SNR  Rt1 Jan11 5.0 
6 841903 SNR  Rt1 Jan11 3.2 

下面是我的代碼一直在嘗試使用。它使列和一切都很好,但它分配的評級是不正確的。我不確定我做錯了什麼。

pulse1<-NULL 
    timePeriods<-c("Rt1", "Rt2", "Rt3", "Rt4", "Rt5", "Rt6", "Rt7", "Rt8", "Rt9", "Rt10", "Rt11") 
    weeks<-c("Jan11","Jan25","Feb1","Feb8", "Feb15", "Mar1", "Mar8", "Mar15","Mar22", "Mar29", "Apr5") 
    measureType<-c("Time", 11) 

    for (columnNumber in 1:11) 
    { 
    temp.data<-data.frame(Student=pulse$Student, Type=pulse$Type, 
          timePeriod=timePeriods[columnNumber], week=weeks[columnNumber], 
          Rating=pulse[, columnNumber+2]) 
    pulse1<-rbind(pulse1, temp.data) 
    } 

,此代碼產生的輸出是:

Student Type timePeriod week Rating 
1 789331 SNR  Rt1 Jan11  3 
2 805933 SNR  Rt1 Jan11  3 
3 826523 SNR  Rt1 Jan11  3 
4 832929 SNR  Rt1 Jan11  3 
5 838607 SNR  Rt1 Jan11  3 
6 841903 SNR  Rt1 Jan11  3 

謝謝! 我重視我的數據如下:

> dput (pulse) 
structure(list(Type = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = c("FYS", "SNR"), class = "factor"), Student = c(789331L, 
805933L, 826523L, 832929L, 838607L, 841903L, 843618L, 852125L, 
876406L, 879972L, 885650L, 888712L, 903303L, 796882L, 827911L, 
830271L, 831487L, 834598L, 836364L, 839802L, 855524L, 873527L, 
885409L, 894218L, 928026L, 932196L, 955389L, 956952L, 957206L, 
957759L, 959200L, 962490L, 968728L, 969005L, 971179L, 976863L, 
981621L, 952797L, 965873L, 967416L, 975424L), Rt1 = c(3.6, 4.8, 
4.4, 3.8, 5, 3.2, 4.4, 3.2, 3.6, 3.8, 4, 4.4, 3.6, NaN, NaN, 
NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, 
4, 3.8, 3, NaN, 3.6, NaN, 4.4, NaN, NaN, 3.6, 3.4, 4.2, NaN), 
    Rt2 = c(3.8, 4, 4.2, 3.8, 5, 4.2, 4.4, NaN, 4, 3.8, 4.4, 
    4, 3.8, 4.4, 4.2, 4.6, 4.4, 5, 4, 3.4, 5, 3.8, 4.8, 4.4, 
    4.6, 3.2, 5, 4.2, 4.4, 4.4, 3.4, 3.8, 3.8, 3.6, 4.8, 4.4, 
    4.8, NaN, 4.75, NaN, 4), Rt3 = c(4, 4, 4.2, 3.8, 5, 4.2, 
    4.6, 3.8, 4.2, 3.8, 4, NaN, 4.6, 4, 3.6, 4.8, 4.2, 3.8, 4, 
    2, 4.6, 3.8, 4.6, 4.4, 4.8, NaN, 4.6, NaN, 4, 4.4, NaN, 4.2, 
    3.6, 4.6, 4.4, 5, 4.6, NaN, 5, 4.2, 3.4), Rt4 = c(4.2, 3.6, 
    4.4, 4, 5, NaN, 4.4, 4, 4, NaN, 4, 4.2, 4, 4, NaN, 5, 4.6, 
    4, 4, 1.8, 4.6, 4.2, 4.8, 4.6, NaN, NaN, NaN, NaN, NaN, 4.4, 
    NaN, 4.2, 3.4, 4.4, NaN, 3.8, NaN, 4, 5, NaN, NaN), Rt5 = c(3.4, 
    3.2, 4.6, 3.6, 5, 3.6, 4.4, 3.8, 4, 4, 4.2, 4.4, NaN, 2.8, 
    3.4, 5, 4.4, 4.2, 3.6, 4.2, 4.2, 4, 4.4, 5, NaN, NaN, 4, 
    NaN, 4, 3.8, 3.2, 4.2, 3.4, NaN, 4.4, NaN, 5, 4.4, 4, 4.2, 
    NaN), Rt6 = c(2.4, 3.2, 4.6, NaN, 4.4, NaN, 4.4, 3.6, 2.4, 
    4.2, 4, 4.4, 3.4, 3.6, 3.4, 4.4, NaN, 4, 3.2, 2.2, 4.4, NaN, 
    4.4, 5, NaN, NaN, NaN, 3.2, 4.4, 4, 3, 4.6, 3, NaN, 4.25, 
    NaN, 4.2, 3.6, 3.8, 4.4, NaN), Rt7 = c(3, 3.2, NaN, NaN, 
    4.2, 4, 4.4, 3.6, 2.8, 4, 4.4, 4.6, 3.8, 2.8, NaN, 4.8, 4.2, 
    4, 3.6, 3, 4.8, 4.2, 4.2, 5, NaN, NaN, 4.4, 4.4, 4, 3.2, 
    NaN, NaN, 1, 4.4, 4.2, 3.6, 3.8, 4, 1.4, 4.6, 2.8), Rt8 = c(3.2, 
    3.2, 4.6, NaN, 4.4, 3.4, 4.2, 4, 3.8, 4, 4.2, 3.8, 3.6, 1.4, 
    NaN, NaN, 4.6, NaN, 3.6, 4.2, 4, 4.4, 4.4, NaN, NaN, NaN, 
    4.6, 4.2, 4.2, 3.2, 4, 3.6, 3, 4.6, 4.8, 3.6, 4.2, 4.2, 2.2, 
    5, NaN), Rt9 = c(3.2, NaN, NaN, NaN, 3.8, 4.2, 3.6, NaN, 
    3, 4, 3.8, 4.2, 3.8, 2.2, NaN, 5, 4.8, NaN, 3.4, 2.8, 5, 
    NaN, NaN, NaN, NaN, NaN, 4.4, NaN, 4, 3, NaN, 1, 3, NaN, 
    NaN, NaN, NaN, NaN, 1.6, NaN, NaN), Rt10 = c(3.6, NaN, 4.2, 
    NaN, NaN, NaN, 4, 3.4, 3.2, 4, 4, 4, 3.6, 2, NaN, NaN, 4.4, 
    4, 3.4, 1.8, 4.2, 3.8, 3.8, 4, NaN, NaN, NaN, 4.2, 3.8, 4.2, 
    4.2, 3.2, 1.6, 4.6, NaN, 4, 5, 4, 3.4, NaN, 3.6), Rt11 = c(4, 
    3.2, 4.2, NaN, 3.6, 4.6, 4.4, 4.6, 4.2, NaN, NaN, 4.6, 4.6, 
    4.2, NaN, 5, 4.6, 4.2, 4, 4, 4.6, 4.4, 3.6, 5, NaN, NaN, 
    NaN, NaN, 4.4, 4.6, NaN, NaN, 1.6, 4.6, 5, NaN, 5, 4, NaN, 
    NaN, NaN)), row.names = c(NA, -41L), class = c("data.table", 
"data.frame"), .Names = c("Type", "Student", "Rt1", "Rt2", "Rt3", 
"Rt4", "Rt5", "Rt6", "Rt7", "Rt8", "Rt9", "Rt10", "Rt11"), .internal.selfref = <pointer: 0x0000000007b30788>) 
+2

的打印輸出和兩者都指向您正在使用類的一個項目工作的事實,'dput'輸出的方法:data.table(沒有數據。框架除了繼承。)MikeH。顯然理解這一點,但如果你使這更明顯,對未來可能更天真的用戶來說,這將是更有用的問題。 –

回答

3

一個可能的解決辦法是融化那麼你的數據集上weeks合併:

weeks_time <- data.frame(timePeriod=c("Rt1", "Rt2", "Rt3", "Rt4", "Rt5", "Rt6", "Rt7", "Rt8", "Rt9", "Rt10", "Rt11"), 
        weeks=c("Jan11","Jan25","Feb1","Feb8", "Feb15", "Mar1", "Mar8", "Mar15","Mar22", "Mar29", "Apr5")) 

pulse_m <- melt(pulse, id.vars = c("Student", "Type"), variable.name = "timePeriod", value.name = "Rating") 
merge(pulse_m, weeks_time) 

    # timePeriod Student Type Rating weeks 
    #1:  Rt1 789331 SNR 3.6 Jan11 
    #2:  Rt1 805933 SNR 4.8 Jan11 
    #3:  Rt1 826523 SNR 4.4 Jan11 
    #4:  Rt1 832929 SNR 3.8 Jan11 
    #5:  Rt1 838607 SNR 5.0 Jan11 
#---          
#447:  Rt11 981621 FYS 5.0 Apr5 
#448:  Rt11 952797 FYS 4.0 Apr5 
#449:  Rt11 965873 FYS NaN Apr5 
#450:  Rt11 967416 FYS NaN Apr5 
#451:  Rt11 975424 FYS NaN Apr5 

這可以讓你避免任何的循環。

1

下面是使用dplyrtidyr

library(dplyr) 
library(tidyr) 

df.weeks <- data.frame(timePeriod = c("Rt1", "Rt2", "Rt3", "Rt4", "Rt5", "Rt6", "Rt7", "Rt8", "Rt9", "Rt10", "Rt11"), 
         week = c("Jan11","Jan25","Feb1","Feb8", "Feb15", "Mar1", "Mar8", "Mar15","Mar22", "Mar29", "Apr5")) 


pulse <- pulse %>% 
     gather(timePeriod, Rating, Rt1:Rt11) %>% 
     merge(df.weeks) %>% 
     select(Student, Type, timePeriod, week, Rating)