R 如何基于两列重新组织数据

R 如何基于两列重新组织数据,r,R,我有一个数据如下 df<- structure(list(data1 = c(0.013818378, 0.014362551, 0.014647562, 0.0136627, 0.015510173, 0.006818502, 0.006683564, 0.006655434, 0.006691479, 0.00666666, 0.014507653, 0.017446481, 0.014021427, 0.013963069, 0.020706391, 0.007104358,

我有一个数据如下

df<- structure(list(data1 = c(0.013818378, 0.014362551, 0.014647562, 
0.0136627, 0.015510173, 0.006818502, 0.006683564, 0.006655434, 
0.006691479, 0.00666666, 0.014507653, 0.017446481, 0.014021427, 
0.013963069, 0.020706391, 0.007104358, 0.006809539, 0.006680631, 
0.009059533, 0.006681197, 0.015691738, 0.016709763, 0.015761994, 
0.016062111, 0.015917196, 0.006816436, 0.006809539, 0.006680631, 
0.009059533, 0.006681197), data2 = c(0.045378058, 0.041371486, 
0.046058451, 0.040479177, 0.051143336, 0.016131932, 0.014399847, 
0.014950329, 0.016408355, 0.015886182, 0.046151342, 0.05265521, 
0.046046663, 0.040515428, 0.086865434, 0.019222881, 0.016926183, 
0.016703444, 0.081352865, 0.132841645, 0.051641343, 0.059851738, 
0.04830957, 0.047550067, 0.049228835, 0.015154055, 0.016926183, 
0.016703444, 0.081352865, 0.132841645), time = c(1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L
), place = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L), .Label = c("B02", "B03", "B04", "B05", 
"B06", "C02", "C03", "C04", "C05", "C06"), class = "factor")), .Names = c("data1", 
"data2", "time", "place"), class = "data.frame", row.names = c(NA, 
-30L))
数据1在时间10

B   0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
C   0.007104358 0.006809539 0.006680631 0.009059533 0.006681197

etc etc

我们
通过在字母和数字之间拆分,将
位置列分为两列,并
扩展为“宽”格式

library(dplyr)
library(tidyr)
df %>% 
   separate(place, into = c("grp", "number"), "(?<=[A-Z])(?=[0-9])") %>%
   select(-data2) %>% 
   spread(number, data1)
# time grp          02          03          04          05          06
#1    1   B 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
#2    1   C 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
#3   10   B 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
#4   10   C 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
#5   17   B 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
#6   17   C 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197

当我们有多个
value
列时,输出应该是什么样子还不清楚。来自
data.table的
dcast
可以处理多个
value.var

library(data.table)
setDT(df)[, c("grp", "number") := tstrsplit(place, "(?<=[A-Z])(?=[0-9])", perl = TRUE)]
dcast(df, grp + time ~ number, value.var = c("data1", "data2"))
库(data.table)

setDT(df)[,c(“grp”,“number”):=tstrsplit(place),(?We
通过在字母和数字之间拆分,将“place”列分隔为两列,并将其扩展为“wide”格式

library(dplyr)
library(tidyr)
df %>% 
   separate(place, into = c("grp", "number"), "(?<=[A-Z])(?=[0-9])") %>%
   select(-data2) %>% 
   spread(number, data1)
# time grp          02          03          04          05          06
#1    1   B 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
#2    1   C 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
#3   10   B 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
#4   10   C 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
#5   17   B 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
#6   17   C 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197

当我们有多个
value
列时,不清楚输出应该是什么样子。来自
data.table
dcast
可以处理多个
value.var

library(data.table)
setDT(df)[, c("grp", "number") := tstrsplit(place, "(?<=[A-Z])(?=[0-9])", perl = TRUE)]
dcast(df, grp + time ~ number, value.var = c("data1", "data2"))
库(data.table)

setDT(df)[,c(“grp”,“number”):=tstrsplit(place),(?你的问题有点不清楚,但我认为这就是你想要的:

library(tidyverse)

df %>% 
  mutate(
    column = str_extract(place, "[0-9]+"),
    place = str_extract(place, "[A-Z]")
  ) %>% 
  gather(data1, data2, key = "data", value = "val") %>% 
  spread(column, val) %>% 
  split(f = .$data)
将生成以下格式:

$data1
   time place  data          02          03          04          05          06
1     1     B data1 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
3     1     C data1 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
5    10     B data1 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
7    10     C data1 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
9    17     B data1 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
11   17     C data1 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197

$data2
   time place  data         02         03         04         05         06
2     1     B data2 0.04537806 0.04137149 0.04605845 0.04047918 0.05114334
4     1     C data2 0.01613193 0.01439985 0.01495033 0.01640835 0.01588618
6    10     B data2 0.04615134 0.05265521 0.04604666 0.04051543 0.08686543
8    10     C data2 0.01922288 0.01692618 0.01670344 0.08135286 0.13284165
10   17     B data2 0.05164134 0.05985174 0.04830957 0.04755007 0.04922883
12   17     C data2 0.01515405 0.01692618 0.01670344 0.08135286 0.13284165

你的问题有点不清楚,但我认为这就是你想要的:

library(tidyverse)

df %>% 
  mutate(
    column = str_extract(place, "[0-9]+"),
    place = str_extract(place, "[A-Z]")
  ) %>% 
  gather(data1, data2, key = "data", value = "val") %>% 
  spread(column, val) %>% 
  split(f = .$data)
将生成以下格式:

$data1
   time place  data          02          03          04          05          06
1     1     B data1 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
3     1     C data1 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
5    10     B data1 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
7    10     C data1 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
9    17     B data1 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
11   17     C data1 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197

$data2
   time place  data         02         03         04         05         06
2     1     B data2 0.04537806 0.04137149 0.04605845 0.04047918 0.05114334
4     1     C data2 0.01613193 0.01439985 0.01495033 0.01640835 0.01588618
6    10     B data2 0.04615134 0.05265521 0.04604666 0.04051543 0.08686543
8    10     C data2 0.01922288 0.01692618 0.01670344 0.08135286 0.13284165
10   17     B data2 0.05164134 0.05985174 0.04830957 0.04755007 0.04922883
12   17     C data2 0.01515405 0.01692618 0.01670344 0.08135286 0.13284165

你能告诉我当列数超过2时我怎么做吗?例如data1,data2,data3…有不同的名称?我想每次做一个data@NikolasService我不清楚输出格式如何。如果是单个数据集输出,则可以使用
dcast
。请更新后扫描演示如何在列数超过2时执行此操作?例如data1、data2、data3…具有各种名称?我希望每次执行一个data@NikolasService我不清楚输出格式如何。如果它是单个数据集输出,则可以使用
dcast
。更新后的帖子不是我的答案,而是我的答案这不是我的答案,但我喜欢。谢谢