R 创建一个新列,该列的值为1,条件是按组的差异

R 创建一个新列,该列的值为1,条件是按组的差异,r,data.table,R,Data.table,我的数据集如下: DT <- structure(list(year = structure(c(1993, 1993, 1993, 1997, 1997, 1997, 1999, 1999, 1999, 2003, 2003, 2005, 2005, 2005, 2009, 2009, 2009, 2011, 2011, 2011, 2015, 2015, 2017, 2017, 2017), comment = "year"), State = str

我的数据集如下:

DT <- structure(list(year = structure(c(1993, 1993, 1993, 1997, 1997, 
1997, 1999, 1999, 1999, 2003, 2003, 2005, 2005, 2005, 2009, 2009, 
2009, 2011, 2011, 2011, 2015, 2015, 2017, 2017, 2017), comment = "year"), 
    State = structure(c("Kansas", "Kansas", "Kansas", "Kansas", 
    "Kansas", "Kansas", "Kansas", "Kansas", "Kansas", "Kansas", 
    "Kansas", "Kansas", "Kansas", "Kansas", "Kansas", "Kansas", 
    "Kansas", "Kansas", "Kansas", "Kansas", "Kansas", "Kansas", 
    "Kansas", "Kansas", "Kansas"), class = "AsIs", comment = "state"), 
    State_Abbr = structure(c("KS", "KS", "KS", "KS", "KS", "KS", 
    "KS", "KS", "KS", "KS", "KS", "KS", "KS", "KS", "KS", "KS", 
    "KS", "KS", "KS", "KS", "KS", "KS", "KS", "KS", "KS"), class = "AsIs", comment = "state_po"), 
    Party = structure(c("republican", "democrat", "Other", "republican", 
    "democrat", "Other", "republican", "democrat", "Other", "republican", 
    "Other", "democrat", "republican", "Other", "republican", 
    "democrat", "Other", "democrat", "republican", "Other", "republican", 
    "Other", "democrat", "republican", "Other"), class = "AsIs", comment = "party"), 
    Percentage = structure(c(0.626967802302283, 0.310289787269175, 
    0.0627424104285421, 0.620238525135418, 0.344369476385061, 
    0.035391998479521, 0.652661584410013, 0.315878201849193, 
    0.0314602137407939, 0.825223659651155, 0.174776340348845, 
    0.274872411697912, 0.691627798218281, 0.033499790083807, 
    0.600583964516102, 0.364584658335329, 0.0348313771485682, 
    0.263785496339944, 0.70094378363408, 0.0352707200259761, 
    0.531464769317622, 0.468535230682378, 0.322381278217064, 
    0.621752543886607, 0.0558661778963293), comment = "totalvotes"), 
    cyear = structure(c(1992L, 1992L, 1992L, 1996L, 1996L, 1996L, 
    1998L, 1998L, 1998L, 2002L, 2002L, 2004L, 2004L, 2004L, 2008L, 
    2008L, 2008L, 2010L, 2010L, 2010L, 2014L, 2014L, 2016L, 2016L, 
    2016L), comment = "year")), row.names = c(NA, -25L), class = c("data.table", 
"data.frame"))
我做错了什么?

这是否有效:

> library(dplyr)
> DT %>% group_by(State, Party) %>% mutate(newcol = case_when(
+                                                       cyear - lag(cyear) > 2 ~ 1, 
+                                                       TRUE ~ 0))
# A tibble: 25 x 7
# Groups:   State, Party [3]
    year State    State_Abbr Party      Percentage cyear newcol
   <dbl> <I<chr>> <I<chr>>   <I<chr>>        <dbl> <int>  <dbl>
 1  1993 Kansas   KS         republican     0.627   1992      0
 2  1993 Kansas   KS         democrat       0.310   1992      0
 3  1993 Kansas   KS         Other          0.0627  1992      0
 4  1997 Kansas   KS         republican     0.620   1996      1
 5  1997 Kansas   KS         democrat       0.344   1996      1
 6  1997 Kansas   KS         Other          0.0354  1996      1
 7  1999 Kansas   KS         republican     0.653   1998      0
 8  1999 Kansas   KS         democrat       0.316   1998      0
 9  1999 Kansas   KS         Other          0.0315  1998      0
10  2003 Kansas   KS         republican     0.825   2002      1
# ... with 15 more rows
> 
>库(dplyr)
>DT%>%由(国家、缔约方)分组%>%变异(当(
+cyear-lag(cyear)>2~1,
+真的(0)
#一个tibble:25x7
#集团:国家、缔约方[3]
年度州州党百分比cyear newcol
1 1993堪萨斯州共和党0.627 1992 0
2 1993堪萨斯州民主党人0.310 1992 0
3 1993堪萨斯其他0.0627 1992 0
4 1997堪萨斯州共和党人0.620 1996 1
5 1997堪萨斯州民主党人0.344 1996 1
6 1997堪萨斯其他0.0354 1996 1
7 1999堪萨斯州共和党0.653 1998 0
8 1999堪萨斯州民主党人0.316 1998 0
9 1999堪萨斯州其他0.0315 1998 0
10 2003堪萨斯州共和党0.825 2002 1
# ... 还有15行
> 

您可以尝试在
j
中检查条件:

library(data.table)

DT[, newcolumn := as.integer(year - shift(year) > 2), .(State, Party)]

在我的实际数据中,整列显示为
0
。我不知道发生了什么是的Ronak Shah的回答确实有效。。也许这是个线索。嗯,奇怪。但很高兴你找到了解决办法
> library(dplyr)
> DT %>% group_by(State, Party) %>% mutate(newcol = case_when(
+                                                       cyear - lag(cyear) > 2 ~ 1, 
+                                                       TRUE ~ 0))
# A tibble: 25 x 7
# Groups:   State, Party [3]
    year State    State_Abbr Party      Percentage cyear newcol
   <dbl> <I<chr>> <I<chr>>   <I<chr>>        <dbl> <int>  <dbl>
 1  1993 Kansas   KS         republican     0.627   1992      0
 2  1993 Kansas   KS         democrat       0.310   1992      0
 3  1993 Kansas   KS         Other          0.0627  1992      0
 4  1997 Kansas   KS         republican     0.620   1996      1
 5  1997 Kansas   KS         democrat       0.344   1996      1
 6  1997 Kansas   KS         Other          0.0354  1996      1
 7  1999 Kansas   KS         republican     0.653   1998      0
 8  1999 Kansas   KS         democrat       0.316   1998      0
 9  1999 Kansas   KS         Other          0.0315  1998      0
10  2003 Kansas   KS         republican     0.825   2002      1
# ... with 15 more rows
> 
library(data.table)

DT[, newcolumn := as.integer(year - shift(year) > 2), .(State, Party)]