如何使用Purr或Tidyverse中的其他函数重构此脚本？_R_Tidyverse

如何使用Purr或Tidyverse中的其他函数重构此脚本？

如何使用Purr或Tidyverse中的其他函数重构此脚本？,r,tidyverse,R,Tidyverse,这是清理数据之前的数据头。它显示了澳大利亚动物的适应率 head(df) # A tibble: 6 x 12 year animal_type outcome ACT NSW NT QLD SA TAS VIC WA Total <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>

这是清理数据之前的数据头。它显示了澳大利亚动物的适应率

head(df)
# A tibble: 6 x 12
   year animal_type outcome      ACT   NSW    NT   QLD    SA   TAS   VIC    WA Total
  <dbl> <chr>       <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1  1999 Dogs        Reclaimed    610  3140   205  1392  2329   516  7130     1 15323
2  1999 Dogs        Rehomed     1245  7525   526  5489  1105   480  4908   137 21415
3  1999 Dogs        Other         12   745   955   860   380   168  1001     6  4127
4  1999 Dogs        Euthanized   360  9221     9  9214  1701   599  5217    18 26339
5  1999 Cats        Reclaimed    111   201    22   206   157    31   884     0  1612
6  1999 Cats        Rehomed     1442  3913   269  3901  1055   752  3768    62 15162

我试图找出tidyverse中是否有一种方法可以在不必重复调用函数的情况下循环遍历列

library(tidyverse)

df <- readr:: read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_outcomes.csv')

ACT <- df %>% group_by(df$ACT) %>%  count(,outcome, outcome) %>% data.frame()
NSW <- df %>% group_by(df$NSW) %>%  count(,outcome, outcome) %>% data.frame()
NT <- df %>% group_by(df$NT) %>%  count(,outcome, outcome) %>% data.frame()
QLD <- df %>%group_by(df$QLD) %>% count(,outcome, outcome) %>% data.frame()
SA <- df %>% group_by(df$SA) %>% count(,outcome, outcome) %>% data.frame()
TAS <- df %>% group_by(df$TAS) %>% count(,outcome, outcome) %>% data.frame()
VIC <- df %>% group_by(df$VIC) %>% count(,outcome, outcome) %>% data.frame()
WA <- df %>% group_by(df$WA) %>% count(,outcome, outcome) %>% data.frame()

库（tidyverse）
df%计数（，结果，结果）%>%data.frame（）
新南威尔士州%分组依据（df$NSW）%>%计数（，结果，结果）%>%数据帧（）
NT%分组依据（df$NT）%%>%计数（，结果，结果）%%>%数据帧（）
QLD%分组依据（df$QLD）%%>%计数（，结果，结果）%%>%data.frame（）
SA%分组依据（df$SA）%%>%计数（，结果，结果）%%>%数据帧（）
TAS%分组依据（df$TAS）%%>%计数（，结果，结果）%%>%数据帧（）
维克%分组依据（df$VIC）%%>%计数（，结果，结果）%%>%数据帧（）
WA%分组依据（df$WA）%%>%计数（，结果，结果）%%>%数据帧（）

我听说有一个通用的工作流，您可以在其中编写简单的函数并将其映射到数据帧。如何跨列使用此函数

rate <- function(x)
{  
  home_found <- x %>% filter(x$outcome == "Rehomed") %>% count(,outcome)
  home_found <- home_found[2]
  home_not_found <- x %>% filter(x$outcome != "Rehomed") %>% select(n) %>% sum()
  home_not_found <- home_not_found[1]
  percent <- home_found / (home_found+ home_not_found)
  percent <- percent* 100
  return (percent)
}

ACT_val  <- rate(ACT)
NSW_val <- rate(NSW)
NT_Val <- rate(NT)
QLD_val <- rate(QLD)
SA_Val <- rate(SA)
TAS_Val <- rate(TAS)
VIC_VAL <- rate(VIC)
WA_Val <- rate(WA)


a <- rbind("ACT",ACT_val) 
b <- rbind("NSW",NSW_val) 
c <- rbind("NT", NT_Val)
d <- rbind("QLD",QLD_val)
e <- rbind("SA", SA_Val)
f <- rbind("TAS", TAS_Val)
g <- rbind("VIC", VIC_VAL)
h <- rbind("WA", WA_Val)

df <- cbind(a,b,c,d,e,f,g,h) %>% data.frame()
colnames(df) <- df[1,]
df <- df %>% t()
colnames(df) <- c("States", "Percent_Found")
df <- df %>% data.frame()

rate%计数（，结果）
主页\u找到%select（n）%%>%sum（）
家找不到我猜你想要这样的东西。我还将获得每个地区的15.06
。我还使用了dplyr\u 1.0.0

库（purrr）
图书馆（dplyr）
图书馆（tibble）
#指定要迭代的列
费率为%
map_dbl（~df%>%
group_by（！！.x）%>%#需要！！运算符来计算字符值
计数（结果）%>%
速率（））%>%
enframe（“状态”、“找到的百分比”）#将矢量转换为df

这与您发布的内容相匹配
#一个tible:8 x 2
找到的州百分比
第15.1条
2新南威尔士州15.1
3新界15.1
4昆士兰15.1
5 SA 15.1
6.15.1
7.15.1
8.15.1

我认为这不是你想要的。这会给出不同的数字，但也许类似的东西适合你。除非我不理解您的数据集，否则我认为您希望求和
，而不是计数

库（dplyr）
图书馆（tidyr）
df%>%
组别(结果)%>%
汇总于（所有（比率），总和，na.rm=TRUE）%>%
pivot_longer（cols=-outcome，names_to=“States”）%>%
按（州）分组%>%
变异（总百分比=值/总和（值））%>%
筛选（结果=“重新安置”）

你可以在需要的时候把东西拿出来
#一个tible:8 x 4
#集团：国家[8]
结果状态值总百分比
1《重新安置法》45678 0.341
2新南威尔士州重新安置194820 0.302
3重新安置新台币56228 0.409
4昆士兰州重新安置252229 0.294
5安置SA 62939 0.299
6个安置助教353900.378
7安置受害者201866 0.306
8重新安置WA 24781 0.422
当我复制/粘贴您的代码时，我不会得到与您相同的结果。你确定这是正确的吗？一般来说，更好的策略是使用tidyr:：pivot\u longer
将数据重塑为长格式，这样每个区域都有一行数据，而不是将数据分散到各个区域n。我使用的是R-3.6.1。我只是再次运行了那个脚本，得到了相同的结果。我会确保查看pivot_的时间更长。当我运行代码时，我会得到每个地区的15.0602409638554
。它可能更多地与dplyr版本有关，而不是与R版本有关。我用dplyr_1.0.0进行了测试。有count（，output）的地方看起来可疑，因为通常情况下，dplyr动词没有空参数值。我使用的是dplyr_0.8.4。也许我误解了“/%>%”运算符。我认为它将参数带到左边，并用作下一个函数的第一个参数。
rate <- function(x)
{  
  home_found <- x %>% filter(x$outcome == "Rehomed") %>% count(,outcome)
  home_found <- home_found[2]
  home_not_found <- x %>% filter(x$outcome != "Rehomed") %>% select(n) %>% sum()
  home_not_found <- home_not_found[1]
  percent <- home_found / (home_found+ home_not_found)
  percent <- percent* 100
  return (percent)
}

ACT_val  <- rate(ACT)
NSW_val <- rate(NSW)
NT_Val <- rate(NT)
QLD_val <- rate(QLD)
SA_Val <- rate(SA)
TAS_Val <- rate(TAS)
VIC_VAL <- rate(VIC)
WA_Val <- rate(WA)


a <- rbind("ACT",ACT_val) 
b <- rbind("NSW",NSW_val) 
c <- rbind("NT", NT_Val)
d <- rbind("QLD",QLD_val)
e <- rbind("SA", SA_Val)
f <- rbind("TAS", TAS_Val)
g <- rbind("VIC", VIC_VAL)
h <- rbind("WA", WA_Val)

df <- cbind(a,b,c,d,e,f,g,h) %>% data.frame()
colnames(df) <- df[1,]
df <- df %>% t()
colnames(df) <- c("States", "Percent_Found")
df <- df %>% data.frame()