0

是否有一種將r數據框中的行解包爲列的有效方法?這是我遇到的一個重新發生的問題,當我從一個SQL腳本獲得的數據應該解開成幾列。例如,對於時間序列預測,我沒有使用rnn,而是使用標準神經網絡。代替具有重複網我打算弄平數據預先使得淨接收行對於t-1,T-2,T-3等。參見下面如何將行解包到r中的列中

enter image description here

基本上我的花式油漆作業對於每一行,我想連接右邊的前n行,其中n取決於我們想要用來預測當前行中的值的先前時間步數。

大多數情況下,我正在尋找一種智能而高效的方法,最好使用r中的現有庫/函數。我可以用幾種語言編寫程序,但目的是找到一個解決方案。我在Java之前(這是非常快的)和r(永遠花費了1小時)完成了這項工作。

目前我有3000多行和10列。例如,如果我想使用15個以前的時間步,我們可以得到10 + 15 * 10列。 15是否是我不知道的好選擇,因此我需要能夠快速測試N = 5,10,15,20,25,50

編輯

說實話,我是一個新手,當涉及到R,所以我尋求幫助,而不是再次編碼我的這個自定義函數。

dput給出:

structure(list(Date = structure(c(10L, 9L, 8L, 7L, 6L, 5L, 4L, 
3L, 2L, 1L), .Label = c("6/10/2016", "6/13/2016", "6/14/2016", 
"6/15/2016", "6/16/2016", "6/17/2016", "6/20/2016", "6/21/2016", 
"6/22/2016", "6/23/2016"), class = "factor"), Bid = c(5.04, 4.97, 
4.96, 4.93, 4.84, 5.09, 5.05, 4.96, 5.08, 5), Ask = c(5.04, 4.97, 
4.96, 4.94, 4.84, 5.09, 5.06, 4.97, 5.08, 5.01), Opening.price = c(4.98, 
4.97, 4.95, 4.94, 4.92, 5.01, 5.01, 5.01, 4.95, 5.05), High.price = c(5.07, 
4.98, 4.97, 4.99, 4.93, 5.14, 5.06, 5.1, 5.13, 5.09), Low.price = c(4.94, 
4.91, 4.89, 4.92, 4.81, 5.01, 4.94, 4.94, 4.89, 4.97), Closing.price = c(5.04, 
4.97, 4.95, 4.94, 4.86, 5.08, 5.05, 4.94, 5.06, 4.98), Average.price = c(5.02, 
4.96, 4.94, 4.94, 4.87, 5.08, 5.01, 5, 5.01, 5.01), Total.volume = c(18997216L, 
17969939L, 21430529L, 20725035L, 66884495L, 32994371L, 24600829L, 
24439514L, 26540825L, 24756699L), Turnover = c(95382241.29, 89106913.2, 
105823382.96, 102379207.58, 325592595.95, 167697936.93, 123243137.11, 
122189815.88, 133063486.77, 124080799.95), Trades = c(9220L, 
9317L, 10075L, 10230L, 16446L, 13544L, 11888L, 10923L, 11981L, 
9696L)), .Names = c("Date", "Bid", "Ask", "Opening.price", "High.price", 
"Low.price", "Closing.price", "Average.price", "Total.volume", 
"Turnover", "Trades"), class = "data.frame", row.names = c(NA, 
-10L)) 

結果當n = 2(追加右側2個之前的時間步長):

structure(list(Date = structure(c(8L, 7L, 6L, 5L, 4L, 3L, 2L, 
1L), .Label = c("6/14/2016", "6/15/2016", "6/16/2016", "6/17/2016", 
"6/20/2016", "6/21/2016", "6/22/2016", "6/23/2016"), class = "factor"), 
Bid = c(5.04, 4.97, 4.96, 4.93, 4.84, 5.09, 5.05, 4.96), 
Ask = c(5.04, 4.97, 4.96, 4.94, 4.84, 5.09, 5.06, 4.97), 
Opening.price = c(4.98, 4.97, 4.95, 4.94, 4.92, 5.01, 5.01, 
5.01), High.price = c(5.07, 4.98, 4.97, 4.99, 4.93, 5.14, 
5.06, 5.1), Low.price = c(4.94, 4.91, 4.89, 4.92, 4.81, 5.01, 
4.94, 4.94), Closing.price = c(5.04, 4.97, 4.95, 4.94, 4.86, 
5.08, 5.05, 4.94), Average.price = c(5.02, 4.96, 4.94, 4.94, 
4.87, 5.08, 5.01, 5), Total.volume = c(18997216L, 17969939L, 
21430529L, 20725035L, 66884495L, 32994371L, 24600829L, 24439514L 
), Turnover = c(95382241.29, 89106913.2, 105823382.96, 102379207.58, 
325592595.95, 167697936.93, 123243137.11, 122189815.88), 
Trades = c(9220L, 9317L, 10075L, 10230L, 16446L, 13544L, 
11888L, 10923L), X1_Bid = c(4.97, 4.96, 4.93, 4.84, 5.09, 
5.05, 4.96, 5.08), X1_Ask = c(4.97, 4.96, 4.94, 4.84, 5.09, 
5.06, 4.97, 5.08), X1_Opening.price = c(4.97, 4.95, 4.94, 
4.92, 5.01, 5.01, 5.01, 4.95), X1_High.price = c(4.98, 4.97, 
4.99, 4.93, 5.14, 5.06, 5.1, 5.13), X1_Low.price = c(4.91, 
4.89, 4.92, 4.81, 5.01, 4.94, 4.94, 4.89), X1_Closing.price = c(4.97, 
4.95, 4.94, 4.86, 5.08, 5.05, 4.94, 5.06), X1_Average.price = c(4.96, 
4.94, 4.94, 4.87, 5.08, 5.01, 5, 5.01), X1_Total.volume = c(17969939L, 
21430529L, 20725035L, 66884495L, 32994371L, 24600829L, 24439514L, 
26540825L), X1_Turnover = c(89106913.2, 105823382.96, 102379207.58, 
325592595.95, 167697936.93, 123243137.11, 122189815.88, 133063486.77 
), X1_Trades = c(9317L, 10075L, 10230L, 16446L, 13544L, 11888L, 
10923L, 11981L), X2_Bid = c(4.96, 4.93, 4.84, 5.09, 5.05, 
4.96, 5.08, 5), X2_Ask = c(4.96, 4.94, 4.84, 5.09, 5.06, 
4.97, 5.08, 5.01), X2_Opening.price = c(4.95, 4.94, 4.92, 
5.01, 5.01, 5.01, 4.95, 5.05), X2_High.price = c(4.97, 4.99, 
4.93, 5.14, 5.06, 5.1, 5.13, 5.09), X2_Low.price = c(4.89, 
4.92, 4.81, 5.01, 4.94, 4.94, 4.89, 4.97), X2_Closing.price = c(4.95, 
4.94, 4.86, 5.08, 5.05, 4.94, 5.06, 4.98), X2_Average.price = c(4.94, 
4.94, 4.87, 5.08, 5.01, 5, 5.01, 5.01), X2_Total.volume = c(21430529L, 
20725035L, 66884495L, 32994371L, 24600829L, 24439514L, 26540825L, 
24756699L), X2_Turnover = c(105823382.96, 102379207.58, 325592595.95, 
167697936.93, 123243137.11, 122189815.88, 133063486.77, 124080799.95 
), X2_Trades = c(10075L, 10230L, 16446L, 13544L, 11888L, 
10923L, 11981L, 9696L)), .Names = c("Date", "Bid", "Ask", 
"Opening.price", "High.price", "Low.price", "Closing.price", 
"Average.price", "Total.volume", "Turnover", "Trades", "X1_Bid", 
"X1_Ask", "X1_Opening.price", "X1_High.price", "X1_Low.price", 
"X1_Closing.price", "X1_Average.price", "X1_Total.volume", "X1_Turnover", 
"X1_Trades", "X2_Bid", "X2_Ask", "X2_Opening.price", "X2_High.price", 
"X2_Low.price", "X2_Closing.price", "X2_Average.price", "X2_Total.volume", 
"X2_Turnover", "X2_Trades"), class = "data.frame", row.names = c(NA, 
-8L)) 

編輯2

這裏的人誰是避風港關於rnns的問題以及當直接輸入以前的時間點時它們與經典網絡的區別。 rnn vs classic net 問題的第一部分解釋了我希望如何在每一行中獲得我想要預測的數據右側的x(t-1),x(t-2)...。

+1

[從長到寬格式重塑數據](http://stackoverflow.com/questions/5890584)? – zx8754

+0

請使用'dput'來顯示一個小例子而不是圖像。 – akrun

+0

'rbind(unlist(mtcars [-1,]),unlist(mtcars [-nrow(mtcars),]))'? – alistaire

回答

0
# I put your dataframe into x. 
# Then I extract just the numeric columns into x1, 
# because operations on matrices tend to be faster 
# than operations on dataframes. 
    x1<-as.matrix(x[,-1]) 
# Now I increase the size to a 3000 row matrix to match your comment 
    x2<-x1[sample(1:10,3000,replace=TRUE),] 
# record some sizes: 
    N<-50; n<-nrow(x2); J<-1:(n-(N-1)) 
# Create the new numeric matrix: 
    z<-x2[J,]; for (i in 1:(N-1)){z<-cbind(z,x2[i+J,])} 
#This took an imperceptible length of time (< 1 second) on my MacBook Pro 
    dim(z) 
    [1] 2951 500 
# Note that looping is inelegant at times but here it's just fine. 
# appending columns to a matrix is fast because it goes 
# in the order in which matrices are stored. 

# You could run your test cases with z[,1:50], z[,1:100] ... 
# to test the right number of past entries 
+0

整潔!謝謝!看來cbind就是我一直在尋找的東西。我不知道它可以像那樣使用(仍然需要確定它爲什麼會這樣做)。 仍然需要爲所有標題添加一個索引來區分它們,但這不在請求中。 – user979899