捕获R中从2到11个条目的所有连续值

捕获R中从2到11个条目的所有连续值,r,R,最近@AntoniosK帮助我找到了如何使用以下代码捕获连续数据点的第一个和最后一个值 library(tidyverse) f = function(x) { DF %>% gather(M,First,-ID) %>% # reshape data group_by(ID) %>% # for each ID mutate(Last = lead(First, x-1)) %>%

最近@AntoniosK帮助我找到了如何使用以下代码捕获连续数据点的第一个和最后一个值

library(tidyverse)

f = function(x) {

DF %>%
gather(M,First,-ID) %>%                 # reshape data
group_by(ID) %>%                        # for each ID
mutate(Last = lead(First, x-1)) %>%     # get the next values in a new column
ungroup() %>%                           # forget the grouping
filter(First != "." & Last != ".") %>%  # exclude rows with . as value
arrange(ID) %>%                         # order ID (needed for visualisation purposes only)
select(-M)                              # remove unnecessary column
}

RDF <- data.frame(consec_months = 2:11) %>%
mutate(d = map(consec_months, ~f(.))) %>%
unnest() %>%
tbl_df()       # only for visualisation purposes
ID 1的连续3个答案如下:

ID  cons_months M1      
1   3   5   15  2
1   3   15  2   20
1   3   4   2   7
1   3   2   7   8
df=read.table(text=”
内径M2 M3 M4 M5 M6 M7 M8 M9 M10 M11 M12 M13
1   5   15  2   20  .   .   .   4   2   7   8   .
2   2   8   7   6   5   4   7   4   7   9   9   8
3   .   .   .   .   .   .   .   .   .   .   7   7
4   6   7   8   6   .   .   .   .   .   7   6   5
,标题=T,字符串=F)
图书馆(tidyverse)
#创建一个函数,该函数获取l作为输入(连续值的数量),并返回一个数据帧,其中这些值作为列
f=函数(l){
df%>%
聚集(M,第一,-ID)%>%#重塑数据
排列(ID)%>%#按ID排列
每个ID的分组依据(ID)%>%#
mutate(vec=list(First),#将第一个向量另存为vec
pos=row_number())%>%#保存此元素在该向量中的位置
取消分组(ID)%>%#忘记分组
过滤器(第一个!=“)%>%#删除.values
mutate(values=map2(pos,vec,~data.frame(t(.y[.x:(.x+l-1)]),stringsAsFactors=F)),根据该元素和相应向量的位置保存连续值
flag=map(值,~sum(.=“))%>%#计算这些连续值中存在多少个
unnest(值、标志)%>%#unnest数据
过滤器(标志==0)%>%#保留连续值,不包含任何值。
选择(ID,matches(“X”)}#保留感兴趣的列
#应用函数
数据帧(连续月份=2:11)%>%
突变(d=map(连续两个月,~f())%>%
unest()%>%
tbl_df()
##A tibble:87 x 13
#连续月份ID X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11
#                       
#1 2 1 5 15纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳
#2 2 1 15 2钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
#3 2 1 2 20 NA NA NA NA NA NA NA NA
#4 2 1 4 2钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
#5.2.1.2.7钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
#62178NaNaNaNaNaNaNaNaNaNaNa
#7.2.2.8钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
#82287NaNaNaNaNaNaNaNaNaNaNaNa
#92276NaNaNaNaNaNaNaNaNaNaNa
#10 2 6 5钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
# # ... 还有77行

显然,不同的步骤数意味着列中存储的值数不同。因此,如果您想将所有内容存储在一个表中,某些列将具有
NA
值。

再次锁定它。非常感谢你。我可以用这个concer_months变量来区分,如果我可以更新我的答案来帮助你,请告诉我。如果您愿意,您可以投票或接受答案:)我将如何绘制每列平均值的差异?平均值(X2)-平均值(X1)等等。
ID  cons_months M1      
1   3   5   15  2
1   3   15  2   20
1   3   4   2   7
1   3   2   7   8
df = read.table(text = "
ID M2  M3  M4  M5  M6  M7  M8  M9  M10 M11 M12 M13
1   5   15  2   20  .   .   .   4   2   7   8   .
2   2   8   7   6   5   4   7   4   7   9   9   8
3   .   .   .   .   .   .   .   .   .   .   7   7
4   6   7   8   6   .   .   .   .   .   7   6   5
", header=T, stringsAsFactors=F)

library(tidyverse)

# create function that gets l as input (number of consecutive values) and returns a dataframe with those values as columns
f = function(l) {

  df %>%
    gather(M,First,-ID) %>%           # reshape data
    arrange(ID) %>%                   # arrange by ID
    group_by(ID) %>%                  # for each ID
    mutate(vec = list(First),         # save First vector as vec
           pos = row_number()) %>%    # save the position of this element in that vector
    ungroup(ID) %>%                   # forget the grouping
    filter(First != ".") %>%          # remove . values
    mutate(values = map2(pos, vec, ~data.frame(t(.y[.x:(.x+l-1)]), stringsAsFactors = F)), # save consecutive values based on position of that element and the corresponding vector
           flag = map(values, ~sum(.=="."))) %>%     # count how many . exists in those consecutive values
    unnest(values, flag) %>%                         # unnest data
    filter(flag == 0) %>%                            # keep consecutive values with no .
    select(ID, matches("X"))  }                      # keep columns of interest

# apply the function
data.frame(consec_months = 2:11) %>%
  mutate(d = map(consec_months, ~f(.))) %>%
  unnest() %>%
  tbl_df()

# # A tibble: 87 x 13
#   consec_months    ID X1    X2    X3    X4    X5    X6    X7    X8    X9    X10   X11  
#           <int> <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
# 1             2     1 5     15    NA    NA    NA    NA    NA    NA    NA    NA    NA   
# 2             2     1 15    2     NA    NA    NA    NA    NA    NA    NA    NA    NA   
# 3             2     1 2     20    NA    NA    NA    NA    NA    NA    NA    NA    NA   
# 4             2     1 4     2     NA    NA    NA    NA    NA    NA    NA    NA    NA   
# 5             2     1 2     7     NA    NA    NA    NA    NA    NA    NA    NA    NA   
# 6             2     1 7     8     NA    NA    NA    NA    NA    NA    NA    NA    NA   
# 7             2     2 2     8     NA    NA    NA    NA    NA    NA    NA    NA    NA   
# 8             2     2 8     7     NA    NA    NA    NA    NA    NA    NA    NA    NA   
# 9             2     2 7     6     NA    NA    NA    NA    NA    NA    NA    NA    NA   
# 10            2     2 6     5     NA    NA    NA    NA    NA    NA    NA    NA    NA   
# # ... with 77 more rows