R 基于条件和按组计数创建新列
我的数据集如下:R 基于条件和按组计数创建新列,r,data.table,conditional-statements,R,Data.table,Conditional Statements,我的数据集如下: DT <- structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22), Household = c(1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6, 7, 8, 8, 9, 10, 11, 12, 12, 13, 13, 13), Main = c(1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
DT <- structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22), Household = c(1, 1,
1, 2, 2, 3, 3, 4, 4, 5, 6, 7, 8, 8, 9, 10, 11, 12, 12, 13, 13,
13), Main = c(1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
1, 0, 1, 0, 1, 0), Variable = c(0.298830253959918, 0.696637114803799,
0.222731978341587, 0.641245156419456, 0.145562076370066, 0.191365477844148,
0.881263670583021, 0.314050677064662, 0.0770074657918196, 0.299236771626852,
0.872886923047494, 0.289628283880055, 0.270138675264831, 0.910147226969786,
0.582434831286225, 0.791627720726525, 0.852872802843926, 0.599723929185799,
0.558087460306338, 0.862531226564633, 0.361200983684113, 0.765999001124532
)), row.names = c(NA, -22L), class = c("tbl_df", "tbl", "data.frame"
))
# create a var with the nr of hh members
DTattempt <- setDT(DT)[, count:= .N, by=Household]
# if there is only 1 hh member, the value of New_Var = 0
DTattempt <- setDT(DTattempt)[count == 1, New_Var:= 0, by=Household]
# Now I try to fill in the value for the other rows, but here I get stuck
# I do not know how to tell R that it should only sum the other variables
DTattempt <- setDT(DTattempt)[count > 1, New_Var:= ifelse(Main==1, sum(Variable, by=Household), sum(Variable, by=Household), by=Household]
预期结果:
DTnew <- structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22), Household = c(1, 1,
1, 2, 2, 3, 3, 4, 4, 5, 6, 7, 8, 8, 9, 10, 11, 12, 12, 13, 13,
13), Main = c(1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
1, 0, 1, 0, 1, 0), Variable = c(0.29883025, 0.69663711, 0.22273198,
0.64124516, 0.14556208, 0.19136548, 0.88126367, 0.31405068, 0.07700747,
0.29923677, 0.87288692, 0.28962828, 0.27013868, 0.91014723, 0.58243483,
0.79162772, 0.8528728, 0.59972393, 0.55808746, 0.86253123, 0.36120098,
0.765999), New_Var = c(0.91936909, 0.29883025, 0.29883025, 0.14556208,
0.64124516, 0.88126367, 0.19136548, 0.07700747, 0.31405068, 0,
0, 0, 0.91014723, 0.27013868, 0, 0, 0, 0.55808746, 0.59972393,
0.36120098, 1.62853023, 0.36120098)), row.names = c(NA, -22L), class = c("tbl_df",
"tbl", "data.frame"))
DTnew我知道我的解决方案可能并不完美,但它应该可以工作
library(tidyverse)
DT2 <- DT %>% arrange(Household, Main) %>%
mutate(new = ifelse(Main == 0, as.complex(0+1i), 1)) %>%
group_by(Household) %>%
mutate(new = sum(new*Variable)) %>%
group_by(Household, Main) %>%
mutate(new = ifelse(Main == 1, Im(new), Re(new))) %>%
ungroup() %>% arrange(ID)
库(tidyverse)
DT2%安排(家庭,主要)%>%
变异(new=ifelse(Main==0,as.complex(0+1i),1))%>%
组别(住户)%>%
变异(新=和(新*变量))%>%
按(家庭、主要)划分的组别%>%
变异(新=ifelse(主==1,Im(新),Re(新)))%>%
解组()%>%arrange(ID)
我知道我的解决方案可能并不完美,但它应该可以工作
library(tidyverse)
DT2 <- DT %>% arrange(Household, Main) %>%
mutate(new = ifelse(Main == 0, as.complex(0+1i), 1)) %>%
group_by(Household) %>%
mutate(new = sum(new*Variable)) %>%
group_by(Household, Main) %>%
mutate(new = ifelse(Main == 1, Im(new), Re(new))) %>%
ungroup() %>% arrange(ID)
库(tidyverse)
DT2%安排(家庭,主要)%>%
变异(new=ifelse(Main==0,as.complex(0+1i),1))%>%
组别(住户)%>%
变异(新=和(新*变量))%>%
按(家庭、主要)划分的组别%>%
变异(新=ifelse(主==1,Im(新),Re(新)))%>%
解组()%>%arrange(ID)
下一步工作:
setDT(DT)
DT[,nm:=.N,由=(家庭,主要)]
[,newVar:=总和(变量),由=(家庭,主要)]
[,New_Var:=总和(newVar/nm)-newVar,由=(住户)]
[,c(“nm”,“newVar”):=list(NULL,NULL)]
ID家庭主要变量新变量
1: 1 1 1 0.29883025 0.91936909
2: 2 1 0 0.69663711 0.29883025
3: 3 1 0 0.22273198 0.29883025
4: 4 2 0 0.64124516 0.14556208
5: 5 2 1 0.14556208 0.64124516
6: 6 3 1 0.19136548 0.88126367
7: 7 3 0 0.88126367 0.19136548
8: 8 4 1 0.31405068 0.07700747
9: 9 4 0 0.07700747 0.31405068
10: 10 5 0 0.29923677 0.00000000
11: 11 6 1 0.87288692 0.00000000
12: 12 7 1 0.28962828 0.00000000
13: 13 8 1 0.27013868 0.91014723
14: 14 8 0 0.91014723 0.27013868
15: 15 9 1 0.58243483 0.00000000
16: 16 10 1 0.79162772 0.00000000
17: 17 11 1 0.85287280 0.00000000
18: 18 12 0 0.59972393 0.55808746
19: 19 12 1 0.55808746 0.59972393
20: 20 13 0 0.86253123 0.36120098
21: 21 13 1 0.36120098 1.62853023
22: 22 13 0 0.76599900 0.36120098
下一步工作:
setDT(DT)
DT[,nm:=.N,由=(家庭,主要)]
[,newVar:=总和(变量),由=(家庭,主要)]
[,New_Var:=总和(newVar/nm)-newVar,由=(住户)]
[,c(“nm”,“newVar”):=list(NULL,NULL)]
ID家庭主要变量新变量
1: 1 1 1 0.29883025 0.91936909
2: 2 1 0 0.69663711 0.29883025
3: 3 1 0 0.22273198 0.29883025
4: 4 2 0 0.64124516 0.14556208
5: 5 2 1 0.14556208 0.64124516
6: 6 3 1 0.19136548 0.88126367
7: 7 3 0 0.88126367 0.19136548
8: 8 4 1 0.31405068 0.07700747
9: 9 4 0 0.07700747 0.31405068
10: 10 5 0 0.29923677 0.00000000
11: 11 6 1 0.87288692 0.00000000
12: 12 7 1 0.28962828 0.00000000
13: 13 8 1 0.27013868 0.91014723
14: 14 8 0 0.91014723 0.27013868
15: 15 9 1 0.58243483 0.00000000
16: 16 10 1 0.79162772 0.00000000
17: 17 11 1 0.85287280 0.00000000
18: 18 12 0 0.59972393 0.55808746
19: 19 12 1 0.55808746 0.59972393
20: 20 13 0 0.86253123 0.36120098
21: 21 13 1 0.36120098 1.62853023
22: 22 13 0 0.76599900 0.36120098
非常感谢!用第三条线来解决这个问题非常酷。真是一个奇妙的方法!非常感谢你!用第三条线来解决这个问题非常酷。真是一个奇妙的方法!非常感谢你!接受了另一个答案,因为它稍微不那么复杂,而且是一个data.table解决方案。一个小评论,答案第10行应该是零(因为这个家庭只有一个成员)。哦!那样的话就容易多了。我将编辑答案。谢谢您的指点。对于交换值,我使用了复数,它可以被视为二维数。非常感谢!接受了另一个答案,因为它稍微不那么复杂,而且是一个data.table解决方案。一个小评论,答案第10行应该是零(因为这个家庭只有一个成员)。哦!那样的话就容易多了。我将编辑答案。谢谢你指出。为了交换值,我使用了复数,它可以被视为二维数