2014-11-04 29 views
2

我想用reshape2來格式化這90觀察dstat數據,以便我可以合理的方式繪製它,但我無法弄清楚如何翻譯所有的foo.5列成一個我可以輕鬆地用ggplot2繪圖的東西。使用R來重塑並繪製linux dstat CSV文件

> library(RCurl) 
> x <- getURL("https://gist.githubusercontent.com/hamiltont/27d9f93ad53d8978bccd/raw/ec6118805b9ab8606a303dc81450a6f760864cf9/dstat.csv") 
> dstat = read.csv(file="stats", skip=6, header=TRUE) 
> names(dstat) 
    [1] "usr" "sys" "idl" "wai" "hiq" 
    [6] "siq" "usr.1" "sys.1" "idl.1" "wai.1" 
[11] "hiq.1" "siq.1" "usr.2" "sys.2" "idl.2" 
[16] "wai.2" "hiq.2" "siq.2" "usr.3" "sys.3" 
[21] "idl.3" "wai.3" "hiq.3" "siq.3" "usr.4" 
[26] "sys.4" "idl.4" "wai.4" "hiq.4" "siq.4" 
[31] "usr.5" "sys.5" "idl.5" "wai.5" "hiq.5" 
[36] "siq.5" "usr.6" "sys.6" "idl.6" "wai.6" 
[41] "hiq.6" "siq.6" "usr.7" "sys.7" "idl.7" 
[46] "wai.7" "hiq.7" "siq.7" "usr.8" "sys.8" 
[51] "idl.8" "wai.8" "hiq.8" "siq.8" "usr.9" 
[56] "sys.9" "idl.9" "wai.9" "hiq.9" "siq.9" 
[61] "usr.10" "sys.10" "idl.10" "wai.10" "hiq.10" 
[66] "siq.10" "usr.11" "sys.11" "idl.11" "wai.11" 
[71] "hiq.11" "siq.11" "usr.12" "sys.12" "idl.12" 
[76] "wai.12" "hiq.12" "siq.12" "usr.13" "sys.13" 
[81] "idl.13" "wai.13" "hiq.13" "siq.13" "usr.14" 
[86] "sys.14" "idl.14" "wai.14" "hiq.14" "siq.14" 
[91] "usr.15" "sys.15" "idl.15" "wai.15" "hiq.15" 
[96] "siq.15" "usr.16" "sys.16" "idl.16" "wai.16" 
[101] "hiq.16" "siq.16" "usr.17" "sys.17" "idl.17" 
[106] "wai.17" "hiq.17" "siq.17" "usr.18" "sys.18" 
[111] "idl.18" "wai.18" "hiq.18" "siq.18" "usr.19" 
[116] "sys.19" "idl.19" "wai.19" "hiq.19" "siq.19" 
[121] "usr.20" "sys.20" "idl.20" "wai.20" "hiq.20" 
[126] "siq.20" "usr.21" "sys.21" "idl.21" "wai.21" 
[131] "hiq.21" "siq.21" "usr.22" "sys.22" "idl.22" 
[136] "wai.22" "hiq.22" "siq.22" "usr.23" "sys.23" 
[141] "idl.23" "wai.23" "hiq.23" "siq.23" "read" 
[146] "writ" "recv" "send" "recv.1" "send.1" 
[151] "recv.2" "send.2" "in." "out" "int" 
[156] "csw" "X20" "X21" "X23" "X76" 
[161] "X77" "X78" "X79" "X80" "X81" 
[166] "X82" "X83" "X1m" "X5m" "X15m" 
[171] "used" "buff" "cach" "free" "run" 
[176] "blk" "new" "read.1" "writ.1" "used.1" 
[181] "free.1" "epoch" "X.aio" "files" "inodes" 
[186] "msg" "sem" "shm" "pos" "lck" 
[191] "rea" "wri" "raw" "tot" "tcp" 
[196] "udp" "raw.1" "frg" "lis" "act" 
[201] "syn" "tim" "clo" "raw.2" "tot.1" 
[206] "tcp.1" "udp.1" "raw.3" "frg.1" "lis.1" 
[211] "act.1" "syn.1" "tim.1" "clo.1" "lis.2" 
[216] "act.2" "dgm" "str" "lis.3" "act.3" 
[221] "majpf" "minpf" "alloc" "free.2" "util" 
[226] "call" "retr" "refr" "call.1" "erca" 
[231] "erau" "ercl" "xdrc" 

我的第一個情節我只是喜歡來顯示所有的usr.*列,不同顏色的線,我會做用是這樣的:

ggplot(dstat, aes(x=observation, y=usr, color=usr_type) + 
    geom_line() 

如果我能說服我的數據幀看起來是這樣的:

observation | usr_type | value 
1    usr  22 
1    usr.1  32 
1    usr.2  12 
<snip> 
2    usr  21 
2    usr.1  20 
2    usr.2  20 

回答

5

有幾種方法可以做到這一點,但我會建議使用Stacked從我的「splitstackshape」包。

的基本辦法是這樣的:

library(splitstackshape) ## Also loads "data.table" 
DT <- as.data.table(dstat, keep.rownames = TRUE) 
Stacked(DT, id.vars = "rn", var.stubs = "usr", 
     sep = "var.stubs", keep.all = FALSE) 
#  rn .time_1 usr 
# 1: 1   0.027 
# 2: 1  .1 8.068 
# 3: 1  .10 0.022 
# 4: 1  .11 3.276 
# 5: 1  .12 0.003 
# ---     
# 2060: 9  .5 0.000 
# 2061: 9  .6 0.000 
# 2062: 9  .7 0.000 
# 2063: 9  .8 0.000 
# 2064: 9  .9 0.000 

但是,你還可以做更多有趣的事情,比如「棧」列的多個組。例如,要創建的「USR」,「SYS」,「外」變量「堆疊」版本list,你可以這樣做:

varsOfInterest <- c("usr", "sys", "wai") 
Stacked(DT, id.vars = "rn", var.stubs = varsOfInterest, 
     sep = "var.stubs", keep.all = FALSE) 
# $usr 
#  rn .time_1 usr 
# 1: 1   0.027 
# 2: 1  .1 8.068 
# 3: 1  .10 0.022 
# 4: 1  .11 3.276 
# 5: 1  .12 0.003 
# ---     
# 2060: 9  .5 0.000 
# 2061: 9  .6 0.000 
# 2062: 9  .7 0.000 
# 2063: 9  .8 0.000 
# 2064: 9  .9 0.000 
# 
# $sys 
#  rn .time_1 sys 
# 1: 1   0.010 
# 2: 1  .1 5.000 
# 3: 1  .10 0.009 
# 4: 1  .11 2.559 
# 5: 1  .12 0.001 
# ---     
# 2060: 9  .5 2.000 
# 2061: 9  .6 0.000 
# 2062: 9  .7 0.000 
# 2063: 9  .8 0.000 
# 2064: 9  .9 0.000 
# 
# $wai 
#  rn .time_1 wai 
# 1: 1   0.027 
# 2: 1  .1 0.036 
# 3: 1  .10 0.003 
# 4: 1  .11 0.038 
# 5: 1  .12 0.003 
# ---     
# 2060: 9  .5 0.000 
# 2061: 9  .6 0.000 
# 2062: 9  .7 0.000 
# 2063: 9  .8 0.000 
# 2064: 9  .9 0.000 

sep = "var.stubs"論點基本上說,沒有真正的分離器在列名中。這不是,確切地說對於您的數據集是正確的,但是它不必將每個集合的第一列重命名爲「usr.0」。如果數據已經以這種形式出現,那麼您可以使用.作爲sep的值。

+0

這看起來很完美,但我無法讓它像您在我的數據上顯示的那樣工作。當使用'sep =「var.stubs」'時,它返回名稱(x)< - c(「。var」,paste(「.time」,1:(length(x) - 1)中的錯誤,sep ='_「)): 'names'屬性[3]的長度必須與vector [1]'的長度相同,當我添加一個'usr.0'列並使用'sep =」。「'它完成時沒有錯誤但沒有提取如上所示的數據(它返回了'rn .time_1 .time_2 .time_3 .time_4 .time_5 usr 1:1 0.425 2:1 2.484'。任何想法爲什麼? – Hamy 2014-11-04 15:44:34

+0

@Hamy,我認爲你鏈接到的文件*是你的數據,你使用的是什麼版本的「splitstackshape」和「data.table」?什麼操作系統? – A5C1D2H2I1M1N2O1R2T1 2014-11-04 15:46:36

+0

這就是爲什麼我也很困惑:-P'splitstackshape version 1.2.0' ,'data.table version 1.9.2'和'OS X 10.9.5' – Hamy 2014-11-04 15:48:55

2
library(reshape2) 
dstat$obs<-1:nrow(dstat) 
u<-names(dstat)[grep("usr", names(dstat))] 
dstat1<-dstat[,c("obs",u)] 
dstat2<-melt(dstat1, id=c("obs")) 
dstat2<-dstat2[order(dstat2$obs),] 
1
dstat -tcmn --output node.csv #output dstat 

mail -a node.csv -s "Backup File" [email protected] < /dev/null #send as attachment 

RStudio & plotly下面

library(plotly) 
ds = read.table("C:\\Users\\tank.sui\\Desktop\\node.csv",header=TRUE,sep=",",skip=6) 
p <- plot_ly(x=strptime(date.time,"%m-%d %H:%M:%S"),y=idl,name="Test",data=ds) 
p 

截圖是在CPU空閒一個statictics。 cpu idl statistics