按类别计算R中的百分比
以下是我的数据示例:按类别计算R中的百分比,r,R,以下是我的数据示例: dfretret附上了一个可能的解决方案。也许有更好的办法,但我会“自发地”想出 问候 df_test <- read.table(header = TRUE, text = "book pen desk ipad 3 4 3 4 3 0 0 3 0 3 0 2 1 3 2 1 4 1 4 3 0 0 3 1 2 1 3 2 0
dfretret附上了一个可能的解决方案。也许有更好的办法,但我会“自发地”想出
问候
df_test <- read.table(header = TRUE, text =
"book pen desk ipad
3 4 3 4
3 0 0 3
0 3 0 2
1 3 2 1
4 1 4 3
0 0 3 1
2 1 3 2
0 2 1 0
4 2 2 2
0 1 2 1
1 4 1 4
2 0 1 3
4 3 2 0
4 0 4 2"
)
low <- list()
medium <- list()
high <- list()
for(i in 1:ncol(df_test)) # i=1
{
low[[i]] <- ifelse((df_test[,i]==0 | df_test[,i]==1),df_test[,i],NA)
low[[i]] <- sum(colSums(!is.na(t(low[[i]])))) / length(low[[i]]) *100
medium[[i]] <- ifelse((df_test[,i]==2 | df_test[,i]==3),df_test[,i],NA)
medium[[i]] <- sum(colSums(!is.na(t(medium[[i]])))) / length(medium[[i]]) *100
high[[i]] <- ifelse((df_test[,i]==4 | df_test[,i]==5),df_test[,i],NA)
high[[i]] <- sum(colSums(!is.na(t(high[[i]])))) / length(high[[i]]) *100
}
names(low) <- colnames(df_test)
names(medium) <- colnames(df_test)
names(high) <- colnames(df_test)
df_test_final <- data.frame("Class"=colnames(df_test),"Low"=NA,"Medium"=NA,"High"=NA)
df_test_final[,2] <- do.call(rbind,low)
df_test_final[,3] <- do.call(rbind,medium)
df_test_final[,4] <- do.call(rbind,high)
df_测试附上了一个可能的解决方案。也许有更好的办法,但我会“自发地”想出
问候
df_test <- read.table(header = TRUE, text =
"book pen desk ipad
3 4 3 4
3 0 0 3
0 3 0 2
1 3 2 1
4 1 4 3
0 0 3 1
2 1 3 2
0 2 1 0
4 2 2 2
0 1 2 1
1 4 1 4
2 0 1 3
4 3 2 0
4 0 4 2"
)
low <- list()
medium <- list()
high <- list()
for(i in 1:ncol(df_test)) # i=1
{
low[[i]] <- ifelse((df_test[,i]==0 | df_test[,i]==1),df_test[,i],NA)
low[[i]] <- sum(colSums(!is.na(t(low[[i]])))) / length(low[[i]]) *100
medium[[i]] <- ifelse((df_test[,i]==2 | df_test[,i]==3),df_test[,i],NA)
medium[[i]] <- sum(colSums(!is.na(t(medium[[i]])))) / length(medium[[i]]) *100
high[[i]] <- ifelse((df_test[,i]==4 | df_test[,i]==5),df_test[,i],NA)
high[[i]] <- sum(colSums(!is.na(t(high[[i]])))) / length(high[[i]]) *100
}
names(low) <- colnames(df_test)
names(medium) <- colnames(df_test)
names(high) <- colnames(df_test)
df_test_final <- data.frame("Class"=colnames(df_test),"Low"=NA,"Medium"=NA,"High"=NA)
df_test_final[,2] <- do.call(rbind,low)
df_test_final[,3] <- do.call(rbind,medium)
df_test_final[,4] <- do.call(rbind,high)
df_测试这里有一个tidyverse
解决方案:
library(tidyverse)
df <- read.table(header = TRUE, text =
"book pen desk ipad
3 4 3 4
3 0 0 3
0 3 0 2
1 3 2 1
4 1 4 3
0 0 3 1
2 1 3 2
0 2 1 0
4 2 2 2
0 1 2 1
1 4 1 4
2 0 1 3
4 3 2 0
4 0 4 2"
)
df %>%
pivot_longer(1:4,
names_to = "Class",
values_to = "value") %>%
mutate(category = case_when(value %in% 0:1 ~ "l",
value == 2 ~ "m",
value %in% 3:4 ~ "h")) %>%
group_by(Class, category) %>%
count(category) %>%
pivot_wider(names_from = category, values_from = n) %>%
transmute(Class = Class,
High = h / sum(h, m, l)*100,
Medium = m / sum(h, m, l)*100,
Low = l / sum(h, m, l)*100)
库(tidyverse)
df%
枢轴_更长(1:4,
name_to=“Class”,
值_to=“value”)%>%
当(值%0:1~“l”,
值==2~“m”,
%3:4~“h”)中的值%)%>%
分组依据(类别、类别)%>%
计数(类别)%>%
透视图(名称从=类别,值从=n)%>%
蜕变(阶级=阶级,
高=h/和(h,m,l)*100,
中等=米/总和(高、米、长)*100,
低=l/和(h,m,l)*100)
以及由此产生的表格:
# A tibble: 4 x 4
# Groups: Class [4]
Class High Medium Low
<chr> <dbl> <dbl> <dbl>
1 book 42.9 14.3 42.9
2 desk 35.7 28.6 35.7
3 ipad 35.7 28.6 35.7
4 pen 35.7 14.3 50
#一个tible:4 x 4
#组别:班级[4]
级别高中低
1第42.9 14.3 42.9册
2办公桌35.7 28.6 35.7
3 ipad 35.7 28.6 35.7
4笔35.7 14.3 50
这里有一个tidyverse
解决方案:
library(tidyverse)
df <- read.table(header = TRUE, text =
"book pen desk ipad
3 4 3 4
3 0 0 3
0 3 0 2
1 3 2 1
4 1 4 3
0 0 3 1
2 1 3 2
0 2 1 0
4 2 2 2
0 1 2 1
1 4 1 4
2 0 1 3
4 3 2 0
4 0 4 2"
)
df %>%
pivot_longer(1:4,
names_to = "Class",
values_to = "value") %>%
mutate(category = case_when(value %in% 0:1 ~ "l",
value == 2 ~ "m",
value %in% 3:4 ~ "h")) %>%
group_by(Class, category) %>%
count(category) %>%
pivot_wider(names_from = category, values_from = n) %>%
transmute(Class = Class,
High = h / sum(h, m, l)*100,
Medium = m / sum(h, m, l)*100,
Low = l / sum(h, m, l)*100)
库(tidyverse)
df%
枢轴_更长(1:4,
name_to=“Class”,
值_to=“value”)%>%
当(值%0:1~“l”,
值==2~“m”,
%3:4~“h”)中的值%)%>%
分组依据(类别、类别)%>%
计数(类别)%>%
透视图(名称从=类别,值从=n)%>%
蜕变(阶级=阶级,
高=h/和(h,m,l)*100,
中等=米/总和(高、米、长)*100,
低=l/和(h,m,l)*100)
以及由此产生的表格:
# A tibble: 4 x 4
# Groups: Class [4]
Class High Medium Low
<chr> <dbl> <dbl> <dbl>
1 book 42.9 14.3 42.9
2 desk 35.7 28.6 35.7
3 ipad 35.7 28.6 35.7
4 pen 35.7 14.3 50
#一个tible:4 x 4
#组别:班级[4]
级别高中低
1第42.9 14.3 42.9册
2办公桌35.7 28.6 35.7
3 ipad 35.7 28.6 35.7
4笔35.7 14.3 50
我从开始?剪切。这是一个“binning”函数,在这个函数中,它将[0,2)
,[2,4)
,和[4,Inf)
(左关闭,右打开,b/c右=真
)。你可以看到它对一列的剪切(df[,1],c(0,2,4,Inf),right=假)
(暂时不要使用标签=/code>)并查看它返回的因子
s如上所述。将c(0,2,4,Inf)
更改为c(0,2,3,Inf)
,这涵盖了更改后的逻辑。您是否看到cut(…,right=FALSE)
如何工作的模式?我将从开始?cut
。这是一个“binning”函数,它将[0,2]中的每个值都存储在,[2,4)
,和[4,Inf)
(左关闭,右打开,b/cright=TRUE
)。您可以看到它对一列使用cut(df[,1],c(0,2,4,Inf),right=FALSE)
(暂时不使用标签=
,并查看它返回的因子是否如上所述。将c(0,2,4,Inf)
更改为c(0,2,3,Inf)
,它涵盖了您更改的逻辑。您是否看到cut(…,right=FALSE)
的工作模式?