purrr查找最小值,然后在
我有两个数据集。第一个包含城市列表及其与目的地的距离(以英里为单位)。第二个列表包含目的地。我想使用purrr将最近目的地的名称放入第一个数据集中的新列中 这是第一个数据集(包含合成数据/距离):purrr查找最小值,然后在,r,string,dplyr,purrr,pmap,R,String,Dplyr,Purrr,Pmap,我有两个数据集。第一个包含城市列表及其与目的地的距离(以英里为单位)。第二个列表包含目的地。我想使用purrr将最近目的地的名称放入第一个数据集中的新列中 这是第一个数据集(包含合成数据/距离): 库(tidyverse) 数据1我们可以收集为“长”格式,按“城市”分组,切片最小值为“val”的行,左键与“数据2”连接以获得“最近的” library(tidyverse) data1 %>% gather(key, val, starts_with("dist")) %>%
库(tidyverse)
数据1我们可以收集
为“长”格式,按“城市”分组,切片
最小值为“val”的行,左键与“数据2”连接以获得“最近的”
library(tidyverse)
data1 %>%
gather(key, val, starts_with("dist")) %>%
group_by(city) %>%
slice(which.min(val)) %>%
ungroup %>%
transmute(city, key = str_remove(key, 'dist_')) %>%
left_join(data2 %>%
mutate(key = word(destination, 1))) %>%
select(city, nearest = destination) %>%
left_join(data1)
使用tidyverse
的解决方案
library(tidyverse)
data3 <- data1 %>%
mutate(City = apply(data1 %>% select(-city), 1, function(x) names(x)[which.min(x)])) %>%
mutate(City = str_remove(City, "^dist_")) %>%
left_join(data2 %>%
separate(destination, into = c("City", "Country"), sep = " ", remove = FALSE),
by = "City") %>%
select(-City, -Country) %>%
rename(nearest = destination)
data3
# # A tibble: 3 x 6
# city dist_Rome dist_Miami dist_Singapore dist_Toronto nearest
# <chr> <dbl> <dbl> <dbl> <dbl> <chr>
# 1 Atlanta 1000 400 3000 900 Miami United States
# 2 Tokyo 2000 3000 600 3200 Singapore Singapore
# 3 Paris 300 1500 2000 1900 Rome Italy
库(tidyverse)
数据3%
mutate(City=apply(data1%>%select(-City),1,函数(x)名称(x)[which.min(x)])%>%
变异(城市=str_删除(城市,“^dist”))%>%
左联合(数据2%>%
分离(目的地,进入=c(“城市”、“国家”),sep=“”,remove=FALSE),
by=“City”)%%>%
选择(-City,-Country)%%>%
重命名(最近=目的地)
数据3
##tibble:3 x 6
#城市区罗马区迈阿密区新加坡区多伦多最近
#
#1亚特兰大1000 400 3000 900美国迈阿密
#2东京2000 3000 600 3200新加坡新加坡
#3巴黎300 1500 2000 1900意大利罗马
library(tidyverse)
solution <- tibble(city = c("Atlanta", "Tokyo", "Paris"),
dist_Rome = c(1000, 2000, 300),
dist_Miami = c(400, 3000, 1500),
dist_Singapore = c(3000, 600, 2000),
dist_Toronto = c(900, 3200, 1900),
nearest = c("Miami United States", "Singapore Singapore", "Rome Italy"))
library(tidyverse)
solution <- data1 %>%
mutate(nearest_hub = map(select(., contains("dist")), ~
case_when(which.min(c(...)) ~ data2$destination),
TRUE ~ "NA"))
Error in which.min(c(...)) :
(list) object cannot be coerced to type 'double'
library(tidyverse)
data1 %>%
gather(key, val, starts_with("dist")) %>%
group_by(city) %>%
slice(which.min(val)) %>%
ungroup %>%
transmute(city, key = str_remove(key, 'dist_')) %>%
left_join(data2 %>%
mutate(key = word(destination, 1))) %>%
select(city, nearest = destination) %>%
left_join(data1)
library(tidyverse)
data3 <- data1 %>%
mutate(City = apply(data1 %>% select(-city), 1, function(x) names(x)[which.min(x)])) %>%
mutate(City = str_remove(City, "^dist_")) %>%
left_join(data2 %>%
separate(destination, into = c("City", "Country"), sep = " ", remove = FALSE),
by = "City") %>%
select(-City, -Country) %>%
rename(nearest = destination)
data3
# # A tibble: 3 x 6
# city dist_Rome dist_Miami dist_Singapore dist_Toronto nearest
# <chr> <dbl> <dbl> <dbl> <dbl> <chr>
# 1 Atlanta 1000 400 3000 900 Miami United States
# 2 Tokyo 2000 3000 600 3200 Singapore Singapore
# 3 Paris 300 1500 2000 1900 Rome Italy