R Mutate()基于另一列中长度不同的字符串向量创建新列
如何R Mutate()基于另一列中长度不同的字符串向量创建新列,r,dplyr,R,Dplyr,如何mutate()使用表示现有列中字符串的可变长度向量创建新变量 我与几个大洲的许多国家有一个数据框架。我想要mutate()一个名为contraction的新变量 head(chocolate_data_common_beans3, n = 10) company_location cocoa_percent rating 1 France 63 3.75 2 Fiji 72 3.50 3
mutate()
使用表示现有列中字符串的可变长度向量创建新变量
我与几个大洲的许多国家有一个数据框架。我想要mutate()
一个名为contraction
的新变量
head(chocolate_data_common_beans3, n = 10)
company_location cocoa_percent rating
1 France 63 3.75
2 Fiji 72 3.50
3 Ecuador 55 2.75
4 U.S.A. 75 2.75
5 U.S.A. 70 2.75
6 U.S.A. 55 2.75
7 Canada 72 3.75
8 U.S.A. 85 3.50
9 Australia 78 3.75
10 Austria 70 3.75
以下是每个大陆的所有值
# company_location by continent
africa <- c("South Africa", "Sao Tome", "Madagascar", "Ghana")
asia <- c("Vietnam", "South Korea", "Singapore", "Russia", "Philippines",
"Japan", "Israel", "India")
europe <- c("Wales", "U.K.", "Switzerland", "Sweden", "Spain", "Scotland",
"Portugal", "Poland", "Netherlands", "Lithuania", "Italy",
"Ireland", "Iceland", "Hungary", "Germany", "France","Finland",
"Denmark", "Czech Republic", "Belgium", "Austria", "Amsterdam")
south_america <- c("Venezuela", "Suriname", "Peru", "Ecuador", "Costa Rica",
"Colombia", "Chile", "Brazil", "Bolivia", "Argentina")
north_america <- c("U.S.A.", "St. Lucia", "Puerto Rico", "Nicaragua",
"Niacragua", "Mexico","Martinique", "Honduras",
"Guatemala", "Grenada", "Dominican Republic", "Canada")
oceania <- c("New Zealand", "Fiji", "Australia")
我怎样才能做到这一点
您可以看到所有的代码。我们可以在创建
键/val
数据集后进行连接
library(tidyverse)
list(Africa = africa, Asia = asia, Europe = europe,
`South America` = south_america, `North America` = north_america,
Oceania = oceania) %>%
stack %>%
right_join(chocolate_data_common_beans2, by = c("values" = "company_location")) %>%
rename(continent = ind)
# values continent cocoa_percent rating
#1 France Europe 63 3.75
#2 Fiji Oceania 72 3.50
#3 Ecuador South America 55 2.75
#4 U.S.A. North America 75 2.75
#5 U.S.A. North America 70 2.75
#6 U.S.A. North America 55 2.75
#7 Canada North America 72 3.75
#8 U.S.A. North America 85 3.50
#9 Australia Oceania 78 3.75
#10 Austria Europe 70 3.75
或者使用
enframe
而不是stack
list(Africa = africa, Asia = asia, Europe = europe,
`South America` = south_america, `North America` = north_america,
Oceania = oceania) %>%
enframe(name = "continent", value = "company_location") %>%
unnest %>%
right_join(chocolate_data_common_beans2)
注意:此方法的优点是不使用多个嵌套条件来更改值。我们只需要一个连接
基准
在稍微大一点的数据集上
dfN <- chocolate_data_common_beans2[rep(seq_len(nrow(chocolate_data_common_beans2)), each = 1e5),]
library(microbenchmark)
akrun <- function() {
list(Africa = africa, Asia = asia, Europe = europe,
`South America` = south_america, `North America` = north_america,
Oceania = oceania) %>%
enframe(name = "continent", value = "company_location") %>%
unnest %>%
right_join(dfN)
}
iod <- function() {
dfN %>%
mutate(continent = case_when(
company_location %in% africa ~ "Africa",
company_location %in% asia ~ "Asia",
company_location %in% europe ~ "Europe",
company_location %in% south_america ~ "South America",
company_location %in% north_america ~ "North America",
company_location %in% oceania ~ "Oceania"
))
}
microbenchmark(akrun(), iod(), times = 10L, unit = "relative")
# expr min lq mean median uq max neval cld
# akrun() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 10 a
# iod() 6.332611 6.201221 5.953432 6.125145 5.567748 5.751538 10 b
dfN%
最新%>%
右联合(dfN)
}
碘含量%
变异(大陆=情况)(
公司地址%位于%非洲~“非洲”,
公司位于%asia~“asia”,
公司位于%europe~“europe”,
公司地点%位于%南美洲~“南美洲”,
公司地点%位于%北美~“北美”,
公司地址%位于%oceania~“oceania”
))
}
微基准(akrun(),iod(),times=10L,unit=“relative”)
#expr最小lq平均uq最大neval cld
#akrun()1.0000001.0000001.0000001.0000001.0000001.00000010 a
#iod()6.332611 6.201221 5.953432 6.125145 5.567748 5.751538 10 b
数据
chocolate\u data\u common\u beans2%in%
的作用正好相反(这在语言上也更有意义:你问的是,这是列表中的一员吗):
看,我把这个问题想成“我能在那个专栏中找到这个值吗?”谢谢你的帮助!
dfN <- chocolate_data_common_beans2[rep(seq_len(nrow(chocolate_data_common_beans2)), each = 1e5),]
library(microbenchmark)
akrun <- function() {
list(Africa = africa, Asia = asia, Europe = europe,
`South America` = south_america, `North America` = north_america,
Oceania = oceania) %>%
enframe(name = "continent", value = "company_location") %>%
unnest %>%
right_join(dfN)
}
iod <- function() {
dfN %>%
mutate(continent = case_when(
company_location %in% africa ~ "Africa",
company_location %in% asia ~ "Asia",
company_location %in% europe ~ "Europe",
company_location %in% south_america ~ "South America",
company_location %in% north_america ~ "North America",
company_location %in% oceania ~ "Oceania"
))
}
microbenchmark(akrun(), iod(), times = 10L, unit = "relative")
# expr min lq mean median uq max neval cld
# akrun() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 10 a
# iod() 6.332611 6.201221 5.953432 6.125145 5.567748 5.751538 10 b
chocolate_data_common_beans2 <- structure(list(company_location =
c("France", "Fiji", "Ecuador",
"U.S.A.", "U.S.A.", "U.S.A.", "Canada", "U.S.A.", "Australia",
"Austria"), cocoa_percent = c(63L, 72L, 55L, 75L, 70L, 55L, 72L,
85L, 78L, 70L), rating = c(3.75, 3.5, 2.75, 2.75, 2.75, 2.75,
3.75, 3.5, 3.75, 3.75)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"))
df %>%
mutate(continent = case_when(
company_location %in% africa ~ "Africa",
company_location %in% asia ~ "Asia",
company_location %in% europe ~ "Europe",
company_location %in% south_america ~ "South America",
company_location %in% north_america ~ "North America",
company_location %in% oceania ~ "Oceania"
))
company_location cocoa_percent rating continent
1 France 63 3.75 Europe
2 Fiji 72 3.50 Oceania
3 Ecuador 55 2.75 South America
4 U.S.A. 75 2.75 North America
5 U.S.A. 70 2.75 North America
6 U.S.A. 55 2.75 North America
7 Canada 72 3.75 North America
8 U.S.A. 85 3.50 North America
9 Australia 78 3.75 Oceania
10 Austria 70 3.75 Europe