在r中，先计算值数，再计算另一个值 df_R

在r中，先计算值数，再计算另一个值 df

在r中，先计算值数，再计算另一个值 df,r,R,根据更新的数据，我们可能需要滚动paste df <- data.frame(Year = c("May","June","July"), D1 = c(0,0,0), D2 = c(0,0,0), D3 = c(0,0,0), D4 = c(0,0,1),

根据更新的数据，我们可能需要滚动

paste

df <- data.frame(Year = c("May","June","July"), 
                 D1 = c(0,0,0), 
                 D2 = c(0,0,0), 
                 D3 = c(0,0,0), 
                 D4 = c(0,0,1), 
                 D5 = c(0,1,1),
                 D6 = c(0,1,1),
                 D7 = c(0,0,0),
                 D8 = c(0,0,0),
                 D9 = c(0,0,0),
                 D10 = c(0,0,0),
                 D11 = c(0,0,0),
                 D12 = c(0,0,0),
                 D13 = c(0,0,0), 
                 D14 = c(1,1,0), 
                 D15 = c(1,0,0),
                 D16 = c(0,1,0),
                 D17 = c(1,1,1),
                 D18 = c(0,0,0),
                 D19 = c(0,0,0),
                 D20 = c(0,0,0),
                 D21 = c(0,1,0),
                 D22 = c(0,0,0),
                 D23 = c(0,1,0), 
                 D24 = c(0,0,0), 
                 D25 = c(0,0,0),
                 D26 = c(1,0,0),
                 D27 = c(0,0,0),
                 D28 = c(1,0,1),
                 D29 = c(1,0,0),
                 D30 = c(1,1,0),
                 D31 = c(0,1,1)
                 )

或者使用

tidyverse

table(apply(df[-1], 1, rollapply, width = 2, paste, collapse=""))

根据更新的数据，我们可能需要滚动

粘贴
df <- data.frame(Year = c("May","June","July"), 
                 D1 = c(0,0,0), 
                 D2 = c(0,0,0), 
                 D3 = c(0,0,0), 
                 D4 = c(0,0,1), 
                 D5 = c(0,1,1),
                 D6 = c(0,1,1),
                 D7 = c(0,0,0),
                 D8 = c(0,0,0),
                 D9 = c(0,0,0),
                 D10 = c(0,0,0),
                 D11 = c(0,0,0),
                 D12 = c(0,0,0),
                 D13 = c(0,0,0), 
                 D14 = c(1,1,0), 
                 D15 = c(1,0,0),
                 D16 = c(0,1,0),
                 D17 = c(1,1,1),
                 D18 = c(0,0,0),
                 D19 = c(0,0,0),
                 D20 = c(0,0,0),
                 D21 = c(0,1,0),
                 D22 = c(0,0,0),
                 D23 = c(0,1,0), 
                 D24 = c(0,0,0), 
                 D25 = c(0,0,0),
                 D26 = c(1,0,0),
                 D27 = c(0,0,0),
                 D28 = c(1,0,1),
                 D29 = c(1,0,0),
                 D30 = c(1,1,0),
                 D31 = c(0,1,1)
                 )


或者使用tidyverse

table(apply(df[-1], 1, rollapply, width = 2, paste, collapse=""))

使用gregexpr

library(runner)
library(janitor)
library(dplyr)
library(tidyr)

df %>% 
    rowwise %>%
    summarise(out = list(table(runner(c_across(starts_with('D')),
          f = function(x) paste(x, collapse=""), k = 2))), .groups = 'drop') %>%
    unnest_wider(c(out))  %>%
    adorn_totals() 
#     0 00 01 10 11
#     1 19  4  4  3
#     1 16  6  5  3
#     1 21  4  3  2
# Total 56 14 12  8

使用gregexpr

library(runner)
library(janitor)
library(dplyr)
library(tidyr)

df %>% 
    rowwise %>%
    summarise(out = list(table(runner(c_across(starts_with('D')),
          f = function(x) paste(x, collapse=""), k = 2))), .groups = 'drop') %>%
    unnest_wider(c(out))  %>%
    adorn_totals() 
#     0 00 01 10 11
#     1 19  4  4  3
#     1 16  6  5  3
#     1 21  4  3  2
# Total 56 14 12  8

正如您已经认识到的，问题在于行比较。。。然后我们可以将数据从宽格式改为长格式
警告：在您的数据中，由于格式较宽，六月与五月/七月有31天。

  ind values
1  00      1
2  01      5
3  10      4
4  11      1

重塑_df%
tidyr:：pivot_更长（cols=D1:D31，name_to=“date”，values_to=“value”）%>%
变异（索引=if_else（值！=滞后（值），1，0））%>%
替换_na（列表（索引=0））%>%
变异（指数组=总和（指数））
重塑_df%>%
分组依据（指数分组）%>%
总结（第一个月=第一（年），
第一个日期=第一个（日期），
第一个值=第一个（值），
长度=n（）

此数据的结果
reshape_df <- df %>%
  tidyr::pivot_longer(cols = D1:D31, names_to = "date", values_to = "value") %>%
  mutate(index = if_else(value != lag(value), 1, 0)) %>%
  replace_na(list(index = 0)) %>%
  mutate(index_group = cumsum(index))

reshape_df %>%
  group_by(index_group) %>%
  summarize(first_month = first(Year),
            first_date = first(date),
            first_value = first(value),
            length = n())

索引组首个月首个日期首个值长度模式
1 5月10日D1 0 13 0->1
2 5月11日D14 1 2 1->0
2016年5月12日D10 01->1
2017年5月4日至3日1->0
5月4日D18 0 8 0->1
5月6日D26 1->0
2007年5月6日D20 01 0->1
8 5月27日D28 1 3 1->0
9 5月8日D31 0 5 0->1
6月10日9日D5 1 2 1->0
#…还有18排
正如您已经认识到的，问题在于行比较。。。然后我们可以将数据从宽格式改为长格式
警告：在您的数据中，由于格式较宽，六月与五月/七月有31天。

  ind values
1  00      1
2  01      5
3  10      4
4  11      1

重塑_df%
tidyr:：pivot_更长（cols=D1:D31，name_to=“date”，values_to=“value”）%>%
变异（索引=if_else（值！=滞后（值），1，0））%>%
替换_na（列表（索引=0））%>%
变异（指数组=总和（指数））
重塑_df%>%
分组依据（指数分组）%>%
总结（第一个月=第一（年），
第一个日期=第一个（日期），
第一个值=第一个（值），
长度=n（）

此数据的结果
reshape_df <- df %>%
  tidyr::pivot_longer(cols = D1:D31, names_to = "date", values_to = "value") %>%
  mutate(index = if_else(value != lag(value), 1, 0)) %>%
  replace_na(list(index = 0)) %>%
  mutate(index_group = cumsum(index))

reshape_df %>%
  group_by(index_group) %>%
  summarize(first_month = first(Year),
            first_date = first(date),
            first_value = first(value),
            length = n())

索引组首个月首个日期首个值长度模式
1 5月10日D1 0 13 0->1
2 5月11日D14 1 2 1->0
2016年5月12日D10 01->1
2017年5月4日至3日1->0
5月4日D18 0 8 0->1
5月6日D26 1->0
2007年5月6日D20 01 0->1
8 5月27日D28 1 3 1->0
9 5月8日D31 0 5 0->1
6月10日9日D5 1 2 1->0
#…还有18排
它无法在我的大数据框上工作，每个数据框都有12个月28天。你能解释一下你的代码在做什么吗？这样我就能知道它在哪里关闭了。x指的是什么？@user10256905它是一个lambda函数值，即~
等于函数（x）
。使用map
，它将是.x
@user10256905您可以查看我的最新帖子吗非常感谢，我尝试了带有zoo软件包的帖子，效果非常好！它不能在我的大数据框架上工作，每个框架有12个月28天。你能解释一下你的代码在做什么吗？这样我就能知道它在哪里关闭了。x指的是什么？@user10256905它是一个lambda函数值，即~
等于函数（x）
。使用map
，它将是.x
@user10256905您可以查看我的最新帖子吗非常感谢，我尝试了带有zoo软件包的帖子，效果非常好！你能检查一下更新的解决方案吗。我认为早期使用str_extract
或str_count
的解决方案是基于这样一种逻辑，即一旦计算/提取了位置上的元素，就不会再次使用它。通过一次使用2个元素滚动paste
，它解决了这个问题。或者您可以使用lag
以及粘贴
，然后使用表进行计数
是否可以检查更新的解决方案。我认为早期使用str_extract
或str_count
的解决方案是基于这样一种逻辑，即一旦计算/提取了位置上的元素，就不会再次使用它。通过一次使用2个元素滚动paste
，它解决了这个问题。或者您也可以使用lag
粘贴table
进行计数