R-在两个数据帧中连续比较行并返回一个值
我有以下两个数据帧:R-在两个数据帧中连续比较行并返回一个值,r,if-statement,compare,dataframe,R,If Statement,Compare,Dataframe,我有以下两个数据帧: df1 <- data.frame(month=c("1","1","1","1","2","2","2","3","3","3","3","3"), temp=c("10","15","16","25","13","17","20","5","16","25","30","37")) df2 <- data.frame(period=c("1","1","1","1","1","1","1","1","2","2","2","2
df1 <- data.frame(month=c("1","1","1","1","2","2","2","3","3","3","3","3"),
temp=c("10","15","16","25","13","17","20","5","16","25","30","37"))
df2 <- data.frame(period=c("1","1","1","1","1","1","1","1","2","2","2","2","2","2","3","3","3","3","3","3","3","3","3","3","3","3"),
max_temp=c("9","13","16","18","30","37","38","39","10","15","16","25","30","32","8","10","12","14","16","18","19","25","28","30","35","40"),
group=c("1","1","1","2","2","2","3","3","3","3","4","4","5","5","5","5","5","6","6","6","7","7","7","7","8","8"))
我一直在玩
ifelse
函数,需要一些帮助或重新指导。谢谢 如前所述,我发现计算新组的过程很难遵循。据我所知,您试图在df1
中创建一个名为new\u group
的变量。对于df1
的i行
,new_组
值是df2
中第一行的组
值:
i
或更高期间
值匹配df1$month[i]
max_temp
值不小于df1$temp[i]
df1
行索引调用sapply
来实现这一点:
fxn = function(idx) {
# Potentially matching indices in df2
pm = idx:nrow(df2)
# Matching indices in df2
m = pm[df2$period[pm] == df1$month[idx] &
as.numeric(as.character(df1$temp[idx])) <=
as.numeric(as.character(df2$max_temp[pm]))]
# Return the group associated with the first matching index
return(df2$group[m[1]])
}
df1$new_group = sapply(seq(nrow(df1)), fxn)
df1
# month temp new_group
# 1 1 10 1
# 2 1 15 1
# 3 1 16 1
# 4 1 25 2
# 5 2 13 3
# 6 2 17 4
# 7 2 20 4
# 8 3 5 5
# 9 3 16 6
# 10 3 25 7
# 11 3 30 7
# 12 3 37 8
fxn=函数(idx){
#df2中的潜在匹配索引
pm=idx:nrow(df2)
#df2中的匹配索引
m=pm[df2$period[pm]==df1$month[idx]&
as.numeric(as.character(df1$temp[idx]))library(data.table)
dt1您是否有意将数据作为字符串?数据文件实际上是以制表符分隔的文本文件,我使用read.table将其作为数据帧上载到R中。作为R新手,我不知道数据是以字符串形式存在的。数字周围的引号告诉您有字符串。另外,请注意字符串伪装为因子,您将使用我的sread.table(..stringsAsFactors=TRUE)
(这是默认值)我给出的data.frame代码只是试图重现我在R.Cheers中使用的数据帧。谢谢你的洞察力!谢谢你的有用代码。new_group的值不是计算出来的,只是放在df1$new_group列中的df2$group的值。希望这能让它更清晰。谢谢。是的,我发布的代码就是这样做的。我看起来你对SO还很陌生,没有接受你之前问题的好答案。如果我的解决方案或@RicardoSaporta的解决方案解决了你的问题,记得选中绿色复选框接受它。
fxn = function(idx) {
# Potentially matching indices in df2
pm = idx:nrow(df2)
# Matching indices in df2
m = pm[df2$period[pm] == df1$month[idx] &
as.numeric(as.character(df1$temp[idx])) <=
as.numeric(as.character(df2$max_temp[pm]))]
# Return the group associated with the first matching index
return(df2$group[m[1]])
}
df1$new_group = sapply(seq(nrow(df1)), fxn)
df1
# month temp new_group
# 1 1 10 1
# 2 1 15 1
# 3 1 16 1
# 4 1 25 2
# 5 2 13 3
# 6 2 17 4
# 7 2 20 4
# 8 3 5 5
# 9 3 16 6
# 10 3 25 7
# 11 3 30 7
# 12 3 37 8
library(data.table)
dt1 <- data.table(df1, key="month")
dt2 <- data.table(df2, key="period")
## add a row index
dt1[, rn1 := seq(nrow(dt1))]
dt3 <-
unique(dt1[dt2, allow.cartesian=TRUE][, new_group := group[min(which(temp <= max_temp))], by="rn1"], by="rn1")
## Keep only the columns you want
dt3[, c("month", "temp", "max_temp", "new_group"), with=FALSE]
month temp max_temp new_group
1: 1 1 19 1
2: 1 3 19 1
3: 1 4 19 1
4: 1 7 19 1
5: 2 2 1 3
6: 2 5 1 3
7: 2 6 1 4
8: 3 10 18 5
9: 3 4 18 5
10: 3 7 18 5
11: 3 8 18 5
12: 3 9 18 5