在dplyr管道中按范围和组连接
我有几个大数据帧,我需要将数据附加到其中。数据结构模拟如下: 订单在dplyr管道中按范围和组连接,r,join,dplyr,R,Join,Dplyr,我有几个大数据帧,我需要将数据附加到其中。数据结构模拟如下: 订单 set.seed(2) N=1e2 tbl.orders <- tibble( ID=1:N, nb_products_ordered = sample(1:15, N, replace = TRUE), type = sample(c("keyboard", "mouse", "other"), N, replace = TRUE), grad
set.seed(2)
N=1e2
tbl.orders <- tibble(
ID=1:N,
nb_products_ordered = sample(1:15, N, replace = TRUE),
type = sample(c("keyboard", "mouse", "other"), N, replace = TRUE),
grade= sample(LETTERS[1:5], N, replace=TRUE)
)
# A tibble: 100 x 4
ID nb_products_ordered type grade
<int> <int> <chr> <chr>
1 1 5 other A
2 2 15 keyboard A
3 3 6 other C
4 4 6 keyboard E
5 5 8 other C
set.seed(2)
N=1e2
待决订单%
组分割()%>%
lappy(,函数(x)
{
#如果包含在价目表中
如果(x$类型[1]%,以%水平表示(系数(待定价格$类型))){
df.priceparameters%
筛选器(类型==x$type[1])
x%>%突变(
单位成本=
如:数字(
如:性格(
削减(
x[“订购的nb_产品”],
breaks=c(df.priceparameters$min\u订购的产品,Inf),
#单位成本收益率
标签=df.价格参数每单位成本$,
右=假
)))
)
}否则{
x%>%变异(每单位成本=NA)
}
})%>%
do.呼叫(“rbind”,)%>%
安排(ID)
结果
# A tibble: 100 x 5
ID nb_products_ordered type grade per_unit_cost
<int> <int> <chr> <chr> <dbl>
1 1 5 other A NA
2 2 15 keyboard A 10.9
3 3 6 other C NA
4 4 6 keyboard E 11
5 5 8 other C NA
#一个tible:100x5
ID每单位成本的nb产品订购类型等级
1 1 5其他A不适用
2 15键盘A 10.9
3 3 6其他不适用
键盘E 11
5 5 8其他不适用
以下是我的尝试,看起来也有点复杂:
我们编写了一个函数来为每个ID
选择正确的价格值
library(dplyr)
select_row <- function(type, nb_products_ordered, min_products_ordered){
if(any(type == 'other')) return(TRUE)
else{
tmp <- first(nb_products_ordered) - min_products_ordered
inds <- tmp >= 0
if(any(inds)) return(tmp == min(tmp[inds], na.rm = TRUE))
else TRUE
}
}
tbl.orders %>%
left_join(tbl.prices, by = 'type') %>%
group_by(ID) %>%
filter(select_row(type, nb_products_ordered, min_products_ordered))
找到正确价格类别的聪明方法。谢谢我认为您的代码需要调整,因为如果有人订购了1个项目,他们将不会被检测到
tmp>=0
您是对的。我错过了。我已相应地更新了答案。
library(dplyr)
select_row <- function(type, nb_products_ordered, min_products_ordered){
if(any(type == 'other')) return(TRUE)
else{
tmp <- first(nb_products_ordered) - min_products_ordered
inds <- tmp >= 0
if(any(inds)) return(tmp == min(tmp[inds], na.rm = TRUE))
else TRUE
}
}
tbl.orders %>%
left_join(tbl.prices, by = 'type') %>%
group_by(ID) %>%
filter(select_row(type, nb_products_ordered, min_products_ordered))