2016-04-14 75 views
0

我需要能夠從這個文件中,有許多JSON條目提取這些字段:你如何提取嵌套JSON數據數據中的R

sender: Hostname 
mem:used_p 
cpu: user_p 
load: load_5 

//

cat tmp.txt 

{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"} 

{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"} 

{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"} 

我可以提取有效載荷數據部分內的數據,如下所示:

df <- jsonlite::fromJSON(paste0("[",paste0(readLines("c:/tmp.txt"),collapse=","),"]"))$payloadData[c("timestamp","count")] 

但是payloadData部分中存在嵌套對象,如何額外在mem,cpu,load部分嵌套在json數據中的數據?

+1

IIRC有一個在fromJSON一個扁平化參數,它應該帶來訪問的最高級別的所有值從通常的子集方法。 (在家裏,如果沒有答案的話,會檢查tomorow) – Tensibai

+0

@Tensibai,謝謝flatten = TRUE解決了我的問題。 – user1471980

回答

0

一種方法是使用tidyjson

library(tidyjson) 
library(magrittr) 

json <- '{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"}' 

json %>% 
    enter_object("payloadData") %>% 
    spread_values(send_host = jstring("sender", "hostname"), 
       mem_used_p = jstring("mem", "used_p"), 
       cpu_user_p = jstring("cpu", "user_p"), 
       load_load_5 = jstring("load","load5")) 

# document.id send_host   mem_used_p   cpu_user_p load_load_5 
# 1   1 server1 0.6242380717975277 0.18809078763071663   1.2 

或者,您可以堆疊中的每一組鍵:

payload <- json %>% enter_object("payloadData") 

sender_keys <- payload %>% 
    enter_object("sender") %>% 
    gather_keys() %>% 
    append_values_string() 

mem_keys <- payload %>% 
    enter_object("mem") %>% 
    gather_keys() %>% 
    append_values_string() 

cpu_keys <- payload %>% 
    enter_object("cpu") %>% 
    gather_keys() %>% 
    append_values_string() 

load_keys <- payload %>% 
    enter_object("load") %>% 
    gather_keys() %>% 
    append_values_string() 
+0

感謝您的留言。有一件事是我會閱讀一堆文件,並且文件條目就像我插入的JSON數據。你能夠修改你的代碼來反映閱讀文件,每行是json數據。 – user1471980

+0

簡答題,「不」 - 較長的答案,這不是你的問題。如果是,那麼你應該編輯問題或問一個新問題。最長的答案==只是將上面的結果保存到變量'row'中,然後將每個文件的行綁定在一起。你可能想要使用一個循環或一個'apply'函數。 – JasonAizkalns

+0

@JasonAizkalnz,我已更新原始帖子。 – user1471980