捕获R中从2到11个条目的所有连续值
最近@AntoniosK帮助我找到了如何使用以下代码捕获连续数据点的第一个和最后一个值捕获R中从2到11个条目的所有连续值,r,R,最近@AntoniosK帮助我找到了如何使用以下代码捕获连续数据点的第一个和最后一个值 library(tidyverse) f = function(x) { DF %>% gather(M,First,-ID) %>% # reshape data group_by(ID) %>% # for each ID mutate(Last = lead(First, x-1)) %>%
library(tidyverse)
f = function(x) {
DF %>%
gather(M,First,-ID) %>% # reshape data
group_by(ID) %>% # for each ID
mutate(Last = lead(First, x-1)) %>% # get the next values in a new column
ungroup() %>% # forget the grouping
filter(First != "." & Last != ".") %>% # exclude rows with . as value
arrange(ID) %>% # order ID (needed for visualisation purposes only)
select(-M) # remove unnecessary column
}
RDF <- data.frame(consec_months = 2:11) %>%
mutate(d = map(consec_months, ~f(.))) %>%
unnest() %>%
tbl_df() # only for visualisation purposes
ID 1的连续3个答案如下:
ID cons_months M1
1 3 5 15 2
1 3 15 2 20
1 3 4 2 7
1 3 2 7 8
df=read.table(text=”
内径M2 M3 M4 M5 M6 M7 M8 M9 M10 M11 M12 M13
1 5 15 2 20 . . . 4 2 7 8 .
2 2 8 7 6 5 4 7 4 7 9 9 8
3 . . . . . . . . . . 7 7
4 6 7 8 6 . . . . . 7 6 5
,标题=T,字符串=F)
图书馆(tidyverse)
#创建一个函数,该函数获取l作为输入(连续值的数量),并返回一个数据帧,其中这些值作为列
f=函数(l){
df%>%
聚集(M,第一,-ID)%>%#重塑数据
排列(ID)%>%#按ID排列
每个ID的分组依据(ID)%>%#
mutate(vec=list(First),#将第一个向量另存为vec
pos=row_number())%>%#保存此元素在该向量中的位置
取消分组(ID)%>%#忘记分组
过滤器(第一个!=“)%>%#删除.values
mutate(values=map2(pos,vec,~data.frame(t(.y[.x:(.x+l-1)]),stringsAsFactors=F)),根据该元素和相应向量的位置保存连续值
flag=map(值,~sum(.=“))%>%#计算这些连续值中存在多少个
unnest(值、标志)%>%#unnest数据
过滤器(标志==0)%>%#保留连续值,不包含任何值。
选择(ID,matches(“X”)}#保留感兴趣的列
#应用函数
数据帧(连续月份=2:11)%>%
突变(d=map(连续两个月,~f())%>%
unest()%>%
tbl_df()
##A tibble:87 x 13
#连续月份ID X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11
#
#1 2 1 5 15纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳
#2 2 1 15 2钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
#3 2 1 2 20 NA NA NA NA NA NA NA NA
#4 2 1 4 2钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
#5.2.1.2.7钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
#62178NaNaNaNaNaNaNaNaNaNaNa
#7.2.2.8钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
#82287NaNaNaNaNaNaNaNaNaNaNaNa
#92276NaNaNaNaNaNaNaNaNaNaNa
#10 2 6 5钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
# # ... 还有77行
显然,不同的步骤数意味着列中存储的值数不同。因此,如果您想将所有内容存储在一个表中,某些列将具有
NA
值。再次锁定它。非常感谢你。我可以用这个concer_months变量来区分,如果我可以更新我的答案来帮助你,请告诉我。如果您愿意,您可以投票或接受答案:)我将如何绘制每列平均值的差异?平均值(X2)-平均值(X1)等等。
ID cons_months M1
1 3 5 15 2
1 3 15 2 20
1 3 4 2 7
1 3 2 7 8
df = read.table(text = "
ID M2 M3 M4 M5 M6 M7 M8 M9 M10 M11 M12 M13
1 5 15 2 20 . . . 4 2 7 8 .
2 2 8 7 6 5 4 7 4 7 9 9 8
3 . . . . . . . . . . 7 7
4 6 7 8 6 . . . . . 7 6 5
", header=T, stringsAsFactors=F)
library(tidyverse)
# create function that gets l as input (number of consecutive values) and returns a dataframe with those values as columns
f = function(l) {
df %>%
gather(M,First,-ID) %>% # reshape data
arrange(ID) %>% # arrange by ID
group_by(ID) %>% # for each ID
mutate(vec = list(First), # save First vector as vec
pos = row_number()) %>% # save the position of this element in that vector
ungroup(ID) %>% # forget the grouping
filter(First != ".") %>% # remove . values
mutate(values = map2(pos, vec, ~data.frame(t(.y[.x:(.x+l-1)]), stringsAsFactors = F)), # save consecutive values based on position of that element and the corresponding vector
flag = map(values, ~sum(.=="."))) %>% # count how many . exists in those consecutive values
unnest(values, flag) %>% # unnest data
filter(flag == 0) %>% # keep consecutive values with no .
select(ID, matches("X")) } # keep columns of interest
# apply the function
data.frame(consec_months = 2:11) %>%
mutate(d = map(consec_months, ~f(.))) %>%
unnest() %>%
tbl_df()
# # A tibble: 87 x 13
# consec_months ID X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11
# <int> <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
# 1 2 1 5 15 NA NA NA NA NA NA NA NA NA
# 2 2 1 15 2 NA NA NA NA NA NA NA NA NA
# 3 2 1 2 20 NA NA NA NA NA NA NA NA NA
# 4 2 1 4 2 NA NA NA NA NA NA NA NA NA
# 5 2 1 2 7 NA NA NA NA NA NA NA NA NA
# 6 2 1 7 8 NA NA NA NA NA NA NA NA NA
# 7 2 2 2 8 NA NA NA NA NA NA NA NA NA
# 8 2 2 8 7 NA NA NA NA NA NA NA NA NA
# 9 2 2 7 6 NA NA NA NA NA NA NA NA NA
# 10 2 2 6 5 NA NA NA NA NA NA NA NA NA
# # ... with 77 more rows