R 基于条件重命名列值
我有一个由三列组成的数据集:R 基于条件重命名列值,r,tidyverse,R,Tidyverse,我有一个由三列组成的数据集:time、jar和measurement\u type 对于每个独特的测量类型,我有一个测量系列jar1、2和3,两个测量系列jar:空白。我想重命名blank,这样每个measurement\u类型中的第一个测量称为blank1,第二个blank2 有什么想法吗 df <- structure(list(time = c("2021-04-02 23:40:20", "2021-04-02 23:41:15", &qu
time
、jar
和measurement\u type
对于每个独特的测量类型,我有一个测量系列jar
1
、2
和3
,两个测量系列jar
:空白。我想重命名blank
,这样每个measurement\u类型中的第一个测量称为blank1
,第二个blank2
有什么想法吗
df <- structure(list(time = c("2021-04-02 23:40:20", "2021-04-02 23:41:15",
"2021-04-02 23:42:10", "2021-04-02 23:43:05", "2021-04-02 23:44:55",
"2021-04-02 23:45:50", "2021-04-02 23:46:45", "2021-04-02 23:47:40",
"2021-04-02 23:48:35", "2021-04-02 23:49:30", "2021-04-02 23:50:25",
"2021-04-02 23:52:15", "2021-04-03 00:36:15", "2021-04-03 00:37:10",
"2021-04-03 00:39:00", "2021-04-03 00:39:55", "2021-04-03 00:56:25",
"2021-04-03 00:57:20", "2021-04-03 00:58:15", "2021-04-03 00:59:10",
"2021-04-03 01:00:05", "2021-04-03 01:01:00", "2021-04-03 01:02:50",
"2021-04-03 01:03:45", "2021-04-03 01:04:40", "2021-04-03 01:05:35",
"2021-04-03 01:06:30", "2021-04-03 01:23:54", "2021-04-03 01:24:49",
"2021-04-03 01:25:44", "2021-04-03 01:26:39", "2021-04-03 01:28:29",
"2021-04-03 01:29:24", "2021-04-03 01:30:19", "2021-04-03 01:31:14",
"2021-04-03 01:32:09", "2021-04-03 01:33:04", "2021-04-03 01:33:59",
"2021-04-03 01:35:49", "2021-04-03 01:36:44", "2021-04-03 01:37:39",
"2021-04-03 01:38:34", "2021-04-03 01:39:29", "2021-04-03 01:48:39",
"2021-04-03 01:49:34", "2021-04-03 01:50:29", "2021-04-03 01:58:44",
"2021-04-03 01:59:39", "2021-04-03 02:00:34", "2021-04-03 02:01:29",
"2021-04-03 02:11:34", "2021-04-03 02:12:29", "2021-04-03 02:18:54",
"2021-04-03 02:19:49", "2021-04-03 02:20:44", "2021-04-03 02:21:39",
"2021-04-03 02:22:34", "2021-04-03 02:23:29", "2021-04-03 02:24:24",
"2021-04-03 02:25:19", "2021-04-03 02:26:14", "2021-04-03 02:27:09",
"2021-04-03 02:28:04", "2021-04-03 02:28:59"), jar = c("blank",
"blank", "blank", "blank", "blank", "blank", "blank", "1", "1",
"1", "1", "1", "2", "2", "2", "2", "2", "blank", "blank", "blank",
"blank", "blank", "blank", "blank", "3", "3", "3", "3", "3",
"3", "3", "blank", "blank", "blank", "blank", "blank", "blank",
"blank", "1", "1", "1", "1", "1", "1", "1", "1", "2", "2", "2",
"2", "2", "2", "blank", "blank", "blank", "blank", "blank", "3",
"3", "3", "3", "3", "3", "3"), measurement_type = c("a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "a", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b"), new_column = c("blank1",
"blank1", "blank1", "blank1", "blank1", "blank1", "blank1", "1",
"1", "1", "1", "1", "2", "2", "2", "2", "2", "blank2", "blank2",
"blank2", "blank2", "blank2", "blank2", "blank2", "3", "3", "3",
"3", "3", "3", "3", "blank1", "blank1", "blank1", "blank1", "blank1",
"blank1", "blank1", "1", "1", "1", "1", "1", "1", "1", "1", "2",
"2", "2", "2", "2", "2", "blank2", "blank2", "blank2", "blank2",
"blank2", "3", "3", "3", "3", "3", "3", "3")), class = "data.frame", row.names = c(NA,
-64L))
df可能有一个较短的解决方案,但我会使用data.table::rleid()
函数,它对于检测重复值组非常有用。
要获得与示例相对应的正确索引,此值将转换为因子,然后再转换为数值:
library(dplyr)
df %>%
group_by(measurement_type) %>%
mutate(
indx = data.table::rleid(jar),
indx = if_else(jar == "blank",
indx,
NA_integer_) %>%
as.factor() %>%
as.numeric()
) %>%
mutate(
new_column = if_else(jar == "blank",
paste0(jar, indx),
jar)
) %>%
ungroup() %>%
select(-indx)
可能有一个较短的解决方案,但我会使用data.table::rleid()
函数,该函数对于检测重复值组非常有用。
要获得与示例相对应的正确索引,此值将转换为因子,然后再转换为数值:
library(dplyr)
df %>%
group_by(measurement_type) %>%
mutate(
indx = data.table::rleid(jar),
indx = if_else(jar == "blank",
indx,
NA_integer_) %>%
as.factor() %>%
as.numeric()
) %>%
mutate(
new_column = if_else(jar == "blank",
paste0(jar, indx),
jar)
) %>%
ungroup() %>%
select(-indx)
一个dplyr
唯一的解决方案。这也应该起作用
library(dplyr)
df <- df %>%
group_by(measurement_type) %>%
mutate(flag = if_else(jar != lag(jar) & jar=="blank", 1, 0, missing = 1),
sequence = cumsum(flag)) %>%
mutate(jar = case_when(jar=="blank" ~ paste(jar,sequence, sep = ""),
TRUE ~ jar)) %>%
select(-new_column, -flag, -sequence)
库(dplyr)
df%
分组依据(测量类型)%>%
变异(flag=if_else(jar!=lag(jar)&jar==“blank”,1,0,missing=1),
序列=总和(标志))%>%
变异(jar=case_,当(jar==“blank”~粘贴(jar,sequence,sep=”)时,
TRUE~jar))%>%
选择(-new_列,-标志,-序列)
输出:
time jar measurement_type
<chr> <chr> <chr>
1 2021-04-02 23:40:20 blank1 a
2 2021-04-02 23:41:15 blank1 a
3 2021-04-02 23:42:10 blank1 a
4 2021-04-02 23:43:05 blank1 a
5 2021-04-02 23:44:55 blank1 a
6 2021-04-02 23:45:50 blank1 a
7 2021-04-02 23:46:45 blank1 a
8 2021-04-02 23:47:40 1 a
9 2021-04-02 23:48:35 1 a
10 2021-04-02 23:49:30 1 a
11 2021-04-02 23:50:25 1 a
12 2021-04-02 23:52:15 1 a
13 2021-04-03 00:36:15 2 a
14 2021-04-03 00:37:10 2 a
15 2021-04-03 00:39:00 2 a
16 2021-04-03 00:39:55 2 a
17 2021-04-03 00:56:25 2 a
18 2021-04-03 00:57:20 blank2 a
19 2021-04-03 00:58:15 blank2 a
20 2021-04-03 00:59:10 blank2 a
21 2021-04-03 01:00:05 blank2 a
22 2021-04-03 01:01:00 blank2 a
23 2021-04-03 01:02:50 blank2 a
24 2021-04-03 01:03:45 blank2 a
25 2021-04-03 01:04:40 3 a
26 2021-04-03 01:05:35 3 a
27 2021-04-03 01:06:30 3 a
28 2021-04-03 01:23:54 3 a
29 2021-04-03 01:24:49 3 a
30 2021-04-03 01:25:44 3 a
31 2021-04-03 01:26:39 3 a
32 2021-04-03 01:28:29 blank1 b
33 2021-04-03 01:29:24 blank1 b
34 2021-04-03 01:30:19 blank1 b
35 2021-04-03 01:31:14 blank1 b
36 2021-04-03 01:32:09 blank1 b
37 2021-04-03 01:33:04 blank1 b
38 2021-04-03 01:33:59 blank1 b
39 2021-04-03 01:35:49 1 b
40 2021-04-03 01:36:44 1 b
41 2021-04-03 01:37:39 1 b
42 2021-04-03 01:38:34 1 b
43 2021-04-03 01:39:29 1 b
44 2021-04-03 01:48:39 1 b
45 2021-04-03 01:49:34 1 b
46 2021-04-03 01:50:29 1 b
47 2021-04-03 01:58:44 2 b
48 2021-04-03 01:59:39 2 b
49 2021-04-03 02:00:34 2 b
50 2021-04-03 02:01:29 2 b
51 2021-04-03 02:11:34 2 b
52 2021-04-03 02:12:29 2 b
53 2021-04-03 02:18:54 blank2 b
54 2021-04-03 02:19:49 blank2 b
55 2021-04-03 02:20:44 blank2 b
56 2021-04-03 02:21:39 blank2 b
57 2021-04-03 02:22:34 blank2 b
58 2021-04-03 02:23:29 3 b
59 2021-04-03 02:24:24 3 b
60 2021-04-03 02:25:19 3 b
61 2021-04-03 02:26:14 3 b
62 2021-04-03 02:27:09 3 b
63 2021-04-03 02:28:04 3 b
64 2021-04-03 02:28:59 3 b
时间震击器测量\u类型
2021-04-02 23:40:20布兰克1A
2021-04-02 23:41:15 a
3 2021-04-02 23:42:10 blank1a
42021-04-0223:43:05布兰克1A
52021-04-0223:44:55布兰克1A
6 2021-04-02 23:45:50布兰克1A
2021-04-02 23:46:45布兰克1A
8 2021-04-02 23:47:40 1A
9 2021-04-02 23:48:35 1A
102021-04-0223:49:30甲
11 2021-04-02 23:50:25 1A
12 2021-04-02 23:52:15 1A
132021-04-0300:36:152A
14 2021-04-03 00:37:102 a
152021-04-0300:39:002A
16 2021-04-03 00:39:55 2 a
172021-04-0300:56:252A
182021-04-0300:57:20A
19 2021-04-03 00:58:15 blank2A
2021-04-03 00:59:10布兰克2A
21 2021-04-03 01:00:05布兰克2A
22 2021-04-03 01:01:00 blank2A
23 2021-04-03 01:02:50 blank2A
24 2021-04-03 01:03:45布兰克2A
25 2021-04-03 01:04:40甲
26 2021-04-03 01:05:35甲
27 2021-04-03 01:06:30甲
2021-04-0301:23:54甲
29 2021-04-03 01:24:49甲
302021-04-0301:25:44甲
31 2021-04-03 01:26:39甲
32 2021-04-03 01:28:29 b
332021-04-0301:29:24B
34 2021-04-03 01:30:19 b
35 2021-04-03 01:31:14 b
36 2021-04-03 01:32:09空白1 b
37 2021-04-03 01:33:04空白1 b
38 2021-04-03 01:33:59 b
39 2021-04-03 01:35:49 1b
40 2021-04-03 01:36:44 1b
412021-04-0301:37:391b
422021-04-0301:38:341b
432021-04-0301:39:291b
44 2021-04-03 01:48:39 1b
452021-04-0301:49:341b
46 2021-04-03 01:50:29 1b
472021-04-0301:58:442b
48 2021-04-03 01:59:39 2 b
492021-04-0302:00:342b
502021-04-0302:01:292b
51 2021-04-03 02:11:34 2 b
52 2021-04-03 02:12:29 2 b
53 2021-04-03 02:18:54 b
54 2021-04-03 02:19:49 b
55 2021-04-03 02:20:44布兰克2b
56 2021-04-03 02:21:39 b
57 2021-04-03 02:22:34 b
58 2021-04-03 02:23:29 3 b
59 2021-04-03 02:24:24 b
60 2021-04-03 02:25:19 b
61 2021-04-03 02:26:14 3 b
622021-04-0302:27:093b
632021-04-0302:28:043b
64 2021-04-03 02:28:59 3 b
Adplyr
唯一的解决方案。这也应该起作用
library(dplyr)
df <- df %>%
group_by(measurement_type) %>%
mutate(flag = if_else(jar != lag(jar) & jar=="blank", 1, 0, missing = 1),
sequence = cumsum(flag)) %>%
mutate(jar = case_when(jar=="blank" ~ paste(jar,sequence, sep = ""),
TRUE ~ jar)) %>%
select(-new_column, -flag, -sequence)
库(dplyr)
df%
分组依据(测量类型)%>%
变异(flag=if_else(jar!=lag(jar)&jar==“blank”,1,0,missing=1),
序列=总和(标志))%>%
变异(jar=case_,当(jar==“blank”~粘贴(jar,sequence,sep=”)时,
TRUE~jar))%>%
选择(-new_列,-标志,-序列)
输出:
time jar measurement_type
<chr> <chr> <chr>
1 2021-04-02 23:40:20 blank1 a
2 2021-04-02 23:41:15 blank1 a
3 2021-04-02 23:42:10 blank1 a
4 2021-04-02 23:43:05 blank1 a
5 2021-04-02 23:44:55 blank1 a
6 2021-04-02 23:45:50 blank1 a
7 2021-04-02 23:46:45 blank1 a
8 2021-04-02 23:47:40 1 a
9 2021-04-02 23:48:35 1 a
10 2021-04-02 23:49:30 1 a
11 2021-04-02 23:50:25 1 a
12 2021-04-02 23:52:15 1 a
13 2021-04-03 00:36:15 2 a
14 2021-04-03 00:37:10 2 a
15 2021-04-03 00:39:00 2 a
16 2021-04-03 00:39:55 2 a
17 2021-04-03 00:56:25 2 a
18 2021-04-03 00:57:20 blank2 a
19 2021-04-03 00:58:15 blank2 a
20 2021-04-03 00:59:10 blank2 a
21 2021-04-03 01:00:05 blank2 a
22 2021-04-03 01:01:00 blank2 a
23 2021-04-03 01:02:50 blank2 a
24 2021-04-03 01:03:45 blank2 a
25 2021-04-03 01:04:40 3 a
26 2021-04-03 01:05:35 3 a
27 2021-04-03 01:06:30 3 a
28 2021-04-03 01:23:54 3 a
29 2021-04-03 01:24:49 3 a
30 2021-04-03 01:25:44 3 a
31 2021-04-03 01:26:39 3 a
32 2021-04-03 01:28:29 blank1 b
33 2021-04-03 01:29:24 blank1 b
34 2021-04-03 01:30:19 blank1 b
35 2021-04-03 01:31:14 blank1 b
36 2021-04-03 01:32:09 blank1 b
37 2021-04-03 01:33:04 blank1 b
38 2021-04-03 01:33:59 blank1 b
39 2021-04-03 01:35:49 1 b
40 2021-04-03 01:36:44 1 b
41 2021-04-03 01:37:39 1 b
42 2021-04-03 01:38:34 1 b
43 2021-04-03 01:39:29 1 b
44 2021-04-03 01:48:39 1 b
45 2021-04-03 01:49:34 1 b
46 2021-04-03 01:50:29 1 b
47 2021-04-03 01:58:44 2 b
48 2021-04-03 01:59:39 2 b
49 2021-04-03 02:00:34 2 b
50 2021-04-03 02:01:29 2 b
51 2021-04-03 02:11:34 2 b
52 2021-04-03 02:12:29 2 b
53 2021-04-03 02:18:54 blank2 b
54 2021-04-03 02:19:49 blank2 b
55 2021-04-03 02:20:44 blank2 b
56 2021-04-03 02:21:39 blank2 b
57 2021-04-03 02:22:34 blank2 b
58 2021-04-03 02:23:29 3 b
59 2021-04-03 02:24:24 3 b
60 2021-04-03 02:25:19 3 b
61 2021-04-03 02:26:14 3 b
62 2021-04-03 02:27:09 3 b
63 2021-04-03 02:28:04 3 b
64 2021-04-03 02:28:59 3 b
时间震击器测量\u类型
2021-04-02 23:40:20布兰克1A
2021-04-02 23:41:15 a
3 2021-04-02 23:42:10 blank1a
42021-04-0223:43:05布兰克1A
52021-04-0223:44:55布兰克1A
6 2021-04-02 23:45:50布兰克1A
2021-04-02 23:46:45布兰克1A
8 2021-04-02 23:47:40 1A
9 2021-04-02 23:48:35 1A
102021-04-0223:49:30甲
11 2021-04-02 23:50:25 1A
12 2021-04-02 23:52:15 1A
132021-04-0300:36:152A
14 2021-04-03 00:37:102 a
152021-04-0300:39:002A
16 2021-04-03 00:39:55 2 a
172021-04-0300:56:252A
182021-04-0300:57:20A
19 2021-04-03 00:58:15 bl