R Mutate()基于另一列中长度不同的字符串向量创建新列

R Mutate()基于另一列中长度不同的字符串向量创建新列,r,dplyr,R,Dplyr,如何mutate()使用表示现有列中字符串的可变长度向量创建新变量 我与几个大洲的许多国家有一个数据框架。我想要mutate()一个名为contraction的新变量 head(chocolate_data_common_beans3, n = 10) company_location cocoa_percent rating 1 France 63 3.75 2 Fiji 72 3.50 3

如何
mutate()
使用表示现有列中字符串的可变长度向量创建新变量

我与几个大洲的许多国家有一个数据框架。我想要
mutate()
一个名为
contraction
的新变量

head(chocolate_data_common_beans3, n = 10)

company_location cocoa_percent rating
1  France           63            3.75  
2  Fiji             72            3.50  
3  Ecuador          55            2.75  
4  U.S.A.           75            2.75  
5  U.S.A.           70            2.75  
6  U.S.A.           55            2.75  
7  Canada           72            3.75  
8  U.S.A.           85            3.50  
9  Australia        78            3.75  
10 Austria          70            3.75
以下是每个大陆的所有值

# company_location by continent
africa <- c("South Africa", "Sao Tome", "Madagascar", "Ghana")

asia <- c("Vietnam", "South Korea", "Singapore", "Russia", "Philippines",
          "Japan", "Israel", "India")

europe <- c("Wales", "U.K.", "Switzerland", "Sweden", "Spain", "Scotland", 
            "Portugal", "Poland", "Netherlands", "Lithuania", "Italy", 
            "Ireland", "Iceland", "Hungary", "Germany", "France","Finland", 
            "Denmark", "Czech Republic", "Belgium", "Austria", "Amsterdam")

south_america <- c("Venezuela", "Suriname", "Peru", "Ecuador", "Costa Rica",
                   "Colombia", "Chile", "Brazil", "Bolivia", "Argentina")

north_america <- c("U.S.A.", "St. Lucia", "Puerto Rico",  "Nicaragua", 
                   "Niacragua", "Mexico","Martinique", "Honduras", 
                   "Guatemala", "Grenada", "Dominican Republic", "Canada")

oceania <- c("New Zealand", "Fiji", "Australia")
我怎样才能做到这一点


您可以看到所有的代码。

我们可以在创建
键/val
数据集后进行连接

library(tidyverse)
list(Africa = africa, Asia = asia, Europe = europe, 
   `South America` = south_america, `North America` = north_america,
   Oceania = oceania) %>% 
 stack %>% 
 right_join(chocolate_data_common_beans2, by = c("values" = "company_location")) %>%
 rename(continent = ind)
#       values     continent cocoa_percent rating
#1     France        Europe            63   3.75
#2       Fiji       Oceania            72   3.50
#3    Ecuador South America            55   2.75
#4     U.S.A. North America            75   2.75
#5     U.S.A. North America            70   2.75
#6     U.S.A. North America            55   2.75
#7     Canada North America            72   3.75
#8     U.S.A. North America            85   3.50
#9  Australia       Oceania            78   3.75
#10   Austria        Europe            70   3.75

或者使用
enframe
而不是
stack

list(Africa = africa, Asia = asia, Europe = europe, 
   `South America` = south_america, `North America` = north_america,
   Oceania = oceania)  %>% 
   enframe(name = "continent", value = "company_location") %>% 
   unnest %>%
   right_join(chocolate_data_common_beans2)
注意:此方法的优点是不使用多个嵌套条件来更改值。我们只需要一个
连接

基准 在稍微大一点的数据集上

dfN <- chocolate_data_common_beans2[rep(seq_len(nrow(chocolate_data_common_beans2)), each = 1e5),]
library(microbenchmark)

akrun <- function() {
  list(Africa = africa, Asia = asia, Europe = europe, 
     `South America` = south_america, `North America` = north_america,
     Oceania = oceania)  %>% 
     enframe(name = "continent", value = "company_location") %>% 
     unnest %>%
     right_join(dfN)


}

iod <- function() {
dfN %>%
     mutate(continent = case_when(
         company_location %in% africa ~ "Africa",
         company_location %in% asia ~ "Asia",
         company_location %in% europe ~ "Europe",
         company_location %in% south_america ~ "South America",
         company_location %in% north_america ~ "North America",
         company_location %in% oceania ~ "Oceania"
     ))


}
microbenchmark(akrun(), iod(), times = 10L, unit = "relative")
#  expr      min       lq     mean   median       uq      max neval cld
# akrun() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000    10  a 
#   iod() 6.332611 6.201221 5.953432 6.125145 5.567748 5.751538    10   b
dfN%
最新%>%
右联合(dfN)
}
碘含量%
变异(大陆=情况)(
公司地址%位于%非洲~“非洲”,
公司位于%asia~“asia”,
公司位于%europe~“europe”,
公司地点%位于%南美洲~“南美洲”,
公司地点%位于%北美~“北美”,
公司地址%位于%oceania~“oceania”
))
}
微基准(akrun(),iod(),times=10L,unit=“relative”)
#expr最小lq平均uq最大neval cld
#akrun()1.0000001.0000001.0000001.0000001.0000001.00000010 a
#iod()6.332611 6.201221 5.953432 6.125145 5.567748 5.751538 10 b
数据
chocolate\u data\u common\u beans2
%in%
的作用正好相反(这在语言上也更有意义:你问的是,这是列表中的一员吗):


看,我把这个问题想成“我能在那个专栏中找到这个值吗?”谢谢你的帮助!
dfN <- chocolate_data_common_beans2[rep(seq_len(nrow(chocolate_data_common_beans2)), each = 1e5),]
library(microbenchmark)

akrun <- function() {
  list(Africa = africa, Asia = asia, Europe = europe, 
     `South America` = south_america, `North America` = north_america,
     Oceania = oceania)  %>% 
     enframe(name = "continent", value = "company_location") %>% 
     unnest %>%
     right_join(dfN)


}

iod <- function() {
dfN %>%
     mutate(continent = case_when(
         company_location %in% africa ~ "Africa",
         company_location %in% asia ~ "Asia",
         company_location %in% europe ~ "Europe",
         company_location %in% south_america ~ "South America",
         company_location %in% north_america ~ "North America",
         company_location %in% oceania ~ "Oceania"
     ))


}
microbenchmark(akrun(), iod(), times = 10L, unit = "relative")
#  expr      min       lq     mean   median       uq      max neval cld
# akrun() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000    10  a 
#   iod() 6.332611 6.201221 5.953432 6.125145 5.567748 5.751538    10   b
chocolate_data_common_beans2 <- structure(list(company_location = 
 c("France", "Fiji", "Ecuador", 
 "U.S.A.", "U.S.A.", "U.S.A.", "Canada", "U.S.A.", "Australia", 
 "Austria"), cocoa_percent = c(63L, 72L, 55L, 75L, 70L, 55L, 72L, 
 85L, 78L, 70L), rating = c(3.75, 3.5, 2.75, 2.75, 2.75, 2.75, 
 3.75, 3.5, 3.75, 3.75)), class = "data.frame", row.names = c("1", 
 "2", "3", "4", "5", "6", "7", "8", "9", "10"))
df %>%
     mutate(continent = case_when(
         company_location %in% africa ~ "Africa",
         company_location %in% asia ~ "Asia",
         company_location %in% europe ~ "Europe",
         company_location %in% south_america ~ "South America",
         company_location %in% north_america ~ "North America",
         company_location %in% oceania ~ "Oceania"
     ))

   company_location cocoa_percent rating     continent
1            France            63   3.75        Europe
2              Fiji            72   3.50       Oceania
3           Ecuador            55   2.75 South America
4            U.S.A.            75   2.75 North America
5            U.S.A.            70   2.75 North America
6            U.S.A.            55   2.75 North America
7            Canada            72   3.75 North America
8            U.S.A.            85   3.50 North America
9         Australia            78   3.75       Oceania
10          Austria            70   3.75        Europe