如何在R中生成bin频率表?
在下面的示例中,如何存储大小为0.1的数据如何在R中生成bin频率表?,r,bin,R,Bin,在下面的示例中,如何存储大小为0.1的数据 x<-c(0.01,0.34,0.45,0.67,0.89,0.12,0.34,0.45,0.23,0.45,0.34,0.32,0.45,0.21,0.55,0.66,0.99,0.23,.012,0.34) range frequency 0.1-0.2 a 0.2-0.3 b 0.3-0.4 c ................ ................ ..............
x<-c(0.01,0.34,0.45,0.67,0.89,0.12,0.34,0.45,0.23,0.45,0.34,0.32,0.45,0.21,0.55,0.66,0.99,0.23,.012,0.34)
range frequency
0.1-0.2 a
0.2-0.3 b
0.3-0.4 c
................
................
................
................
x试试看
关于@akrun solution,我会从文档?cut
中发布一些有用的信息,以防:
注意
而不是
表格(剪切(x,br))
,历史(x,br,plot=FALSE)
更多
效率高,占用内存少
因此,如果有大量数据,我宁愿选择:
br = seq(0,1,by=0.1)
ranges = paste(head(br,-1), br[-1], sep=" - ")
freq = hist(x, breaks=br, include.lowest=TRUE, plot=FALSE)
data.frame(range = ranges, frequency = freq$counts)
# range frequency
#1 0 - 0.1 2
#2 0.1 - 0.2 1
#3 0.2 - 0.3 3
#4 0.3 - 0.4 5
#5 0.4 - 0.5 4
#6 0.5 - 0.6 1
#7 0.6 - 0.7 2
#8 0.7 - 0.8 0
#10 0.9 - 1 1
Akrun的回答很好,但并没有让我完全理解格式
x<-c(0.01,0.34,0.45,0.67,0.89,0.12,0.34,0.45,0.23,0.45,0.34,0.32,0.45,0.21,0.55,0.66,0.99,0.23,.012,0.34)
cuts<-cut(x, breaks=seq(0,1, by=0.1))
counts<-c(t(table(cuts)))
#Here's the important part for me, formatting the cuts for display in the data frame:
labs <- levels(cuts)
lable_matrix<-cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs) ))
cut_frame<-data.frame(lable_matrix,counts)
# lower upper counts
#1 0.0 0.1 2
#2 0.1 0.2 1
#3 0.2 0.3 3
#4 0.3 0.4 5
#5 0.4 0.5 4
#6 0.5 0.6 1
#7 0.6 0.7 2
#8 0.7 0.8 0
#9 0.8 0.9 1
#10 0.9 1.0 1
x以@collone beuvel的回答为基础
一个bin频率表函数。(柱状图表格)
以下是我所做的:
x <- sort(x)
start <- min(x)
len <- 0.002
end <- start + len
freq_table <- data.frame(start=c(NA), end=c(NA), count=c(NA))
count <- 0
for (i in (1:length(x))) {
if (x[i] < end && x[i] >= start) {
print(c(newdata[i],end))
count <- count + 1
} else {
freq_table <- rbind(freq_table, c(start, end, count))
i <- i - 1
count <- 0
start <- end
end <- end + len
}
}
x freq=hist(x,breaks=br,include.lowest=TRUE,plot=FALSE)#break=br应该是breaks=br如何将此应用于数据帧的所有列,并将结果存储在新的数据帧中?
binFreqTable <- function(x, bins) {
freq = hist(x, breaks=bins, include.lowest=TRUE, plot=FALSE)
ranges = paste(head(freq$breaks,-1), freq$breaks[-1], sep=" - ")
return(data.frame(range = ranges, frequency = freq$counts))
}
> binFreqTable(x,c(0,.3,.6,1))
# range frequency
#1 0 - 0.3 6
#2 0.3 - 0.6 10
#3 0.6 - 1 4
> binFreqTable(x,5)
# range frequency
#1 0 - 0.2 3
#2 0.2 - 0.4 8
#3 0.4 - 0.6 5
#4 0.6 - 0.8 2
#5 0.8 - 1 2
> binFreqTable(x,seq(0,1,by=0.1))
# range frequency
#1 0 - 0.1 2
#2 0.1 - 0.2 1
#3 0.2 - 0.3 3
#4 0.3 - 0.4 5
#5 0.4 - 0.5 4
#6 0.5 - 0.6 1
#7 0.6 - 0.7 2
#8 0.7 - 0.8 0
#9 0.8 - 0.9 1
#10 0.9 - 1 1
x <- sort(x)
start <- min(x)
len <- 0.002
end <- start + len
freq_table <- data.frame(start=c(NA), end=c(NA), count=c(NA))
count <- 0
for (i in (1:length(x))) {
if (x[i] < end && x[i] >= start) {
print(c(newdata[i],end))
count <- count + 1
} else {
freq_table <- rbind(freq_table, c(start, end, count))
i <- i - 1
count <- 0
start <- end
end <- end + len
}
}