實例與數值變量這使得以下:
test BucketName
1 615.59148 01. 0 - 5,000
2 1135.42357 01. 0 - 5,000
3 5302.24208 02. 5,000 - 10,000
4 3794.23109 01. 0 - 5,000
5 2773.70667 01. 0 - 5,000
...
和代碼爲
generateLabelsForPivot = function(breaksVector)
{
startValue = min(breaksVector)
lastValue = max(breaksVector)
lengthOfBreaks = length(breaksVector)
orders = seq(1, lengthOfBreaks-1, 1)
startingPoints = c(breaksVector[-length(breaksVector)])
finishPoints = c(breaksVector[-1])
addingZeros = function(X)
{
prefix = ""
if(nchar(X) == 1)
{
prefix = "0"
} else {
prefix = ""
}
return(paste(prefix, X, ". ", sep = ""))
}
orderPrefixes = sapply(orders, addingZeros)
startingPoints.pretty = prettyNum(startingPoints, scientific=FALSE, big.mark=",", preserve.width = "none")
finishPoints.pretty = prettyNum(finishPoints, scientific=FALSE, big.mark=",", preserve.width = "none")
labels = paste(orderPrefixes, startingPoints.pretty, " - ", finishPoints.pretty, sep = "")
return(labels)
}
dataFrame = data.frame(test = runif(100, 0, 100*100))
GCV_breaks = c(0, 5000, 10000, 20000, 30000, 1000000)
GCV_labels = generateLabelsForPivot(GCV_breaks)
GCV_labels
GCV_buckets = cut(dataFrame$test, breaks = GCV_breaks, labels = GCV_labels)
dataFrame$BucketName = GCV_buckets
您是否嘗試過使用'cut'? – jdharrison 2014-11-24 14:46:01
或'findInterval'(應該更快 – mnel 2014-11-24 22:54:28