2014-10-02 40 views
0

我試圖從我使用weatherData包的個人氣象站獲取每小時的總降水量值。我遇到的問題是每五分鐘收集一次數據,這些數據會重複出現,直到降水量發生變化。我已經嘗試過「重複」功能,但是當沒有降水時我會刪除大量數據,這使得我很難得到小時降水的總結。R每小時摘要每5分鐘的降水量值

請參閱下面的代碼

## Load required libraries 

library(weatherData) 
library(ggplot2) 
library(scales) 
library(plyr) 
library(reshape2) 
library(gridExtra) 
library(lubridate) 
library(weathermetrics) 
library(zoo) 

# Get data for PWS using weatherData package 

pws <- getWeatherForDate("IPENANGB2", "2014-09-01","2014-09-30", station_type = "id",opt_detailed=T, opt_custom_columns=T, custom_columns=c(1,2,6,7,10)) 

# Rename columns 
colnames(pws)<-c("time","time1","tempc","wdd","wspd","prcp") 


## Adding date columns 

pws$time<-as.POSIXct(pws$time1,format="%Y-%m-%d %H:%M:%S",tz="Australia/Perth") 
pws$year <- as.numeric(format(pws$time,"%Y")) 
pws$date <-as.Date(pws$time,format="%Y-%m-%d",tz="Australia/Perth") 
pws$year <- as.numeric(as.POSIXlt(pws$date)$year+1900) 
pws$month <- as.numeric(as.POSIXlt(pws$date)$mon+1) 
pws$monthf <- factor(pws$month,levels=as.character(1:12),labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),ordered=TRUE) 
pws$weekday <- as.POSIXlt(pws$date)$wday 
pws$weekdayf <- factor(pws$weekday,levels=rev(0:6),labels=rev(c("Mon","Tue","Wed","Thu","Fri","Sat","Sun")),ordered=TRUE) 
pws$yearmonth <- as.yearmon(pws$date) 
pws$yearmonthf <- factor(pws$yearmonth) 
pws$week <- as.numeric(format(as.Date(pws$date),"%W")) 
pws$weekf<- factor(pws$week) 
pws$jday<-yday(pws$date) 
pws$hour <- as.numeric(format(strptime(pws$time, format = "%Y-%m-%d %H:%M"),format = "%H")) 
pws$min <- as.numeric(format(strptime(pws$time, format = "%Y-%m-%d %H:%M"),format = "%M")) 

# Remove duplicate values 

pws.df <- pws[!duplicated(pws$prcp),] 

回答

1

假設你想獲得tempc,WDD,WSPD的每小時平均值,PRCP:

# used packages 
library(weatherData) 
library(lubridate) 
library(dplyr) 
library(stringr) 

# read data 
pws <- getWeatherForDate("IPENANGB2", 
         "2014-09-01", 
         "2014-09-30", 
         station_type = "id", 
         opt_detailed = T, 
         opt_custom_columns = T, 
         custom_columns = c(1, 2, 6, 7, 10)) 
# rename columns 
colnames(pws) <- c("time", "time1", "tempc", "wdd", "wspd", "prcp") 

# cleaning dataset and adding some columns 
useful_pws <- 
    pws %>% 
    select(2:6) %>% 
    filter(!str_detect(time1, "<br>")) %>% 
    mutate(time1 = ymd_hms(time1), 
      year = year(time1), 
      month = month(time1), 
      day = day(time1), 
      hour = hour(time1)) %>% 
    tbl_df() 

# summarising dataset 
useful_pws %>% 
    select(-time1) %>% 
    group_by(year, month, day, hour) %>% 
    summarise(tempc = mean(tempc, na.rm = TRUE), 
       wdd = mean(wdd, na.rm = TRUE), 
       wspd = mean(wspd, na.rm = TRUE), 
       prcp = mean(prcp, na.rm = TRUE)) 
+0

米哈謝謝!這很有幫助 – 2014-11-06 00:28:38