R 如何拆分数据框的列并对其进行重塑?

R 如何拆分数据框的列并对其进行重塑?,r,split,reshape,R,Split,Reshape,我正在尝试重新构造数据,以便将包含多个值的列展开以匹配我在下面列出的所需输出?我曾多次尝试使用tidyr::spread()和tidyr::gather(),但都没有成功。有什么想法吗 dat <- data.frame("name" = c("a", "b", "c"), "count" = c("2003=22; 2004=32", "2003=34; 2005=45",

我正在尝试重新构造数据,以便将包含多个值的列展开以匹配我在下面列出的所需输出?我曾多次尝试使用
tidyr::spread()
tidyr::gather()
,但都没有成功。有什么想法吗

dat <- data.frame("name" = c("a", "b", "c"), 
                  "count" = c("2003=22; 2004=32", 
                              "2003=34; 2005=45", 
                              "2005=32; 2006=67"))


name            count
a               2003=22; 2004=32
b               2003=34; 2005=45
c               2005=32; 2006=67

下面是一个以R和E2为基数的解决方案:

ting1 <- data.frame(get1 = gsub("(.*);(.*)", "\\1", dat$count))
ting1 <- cbind(name = dat$name, ting1)

ting2 <- data.frame(get1 = gsub("(.*);(.*)", "\\2", dat$count))
ting2 <- cbind(name = dat$name, ting2)

df <- rbind(ting1, ting2)

df$years <- trimws(gsub("(.*)=(.*)", "\\1", df$get1))
df$values <- gsub("(.*)=(.*)", "\\2", df$get1)

library(reshape2)

outdf <- dcast(df, name ~ years, value.var = "values")
outdf
# name 2003 2004 2005 2006
# 1    a   22   32 <NA> <NA>
# 2    b   34 <NA>   45 <NA>
# 3    c <NA> <NA>   32   67

ting1这是一个以R为基数,以2为形状的解决方案:

ting1 <- data.frame(get1 = gsub("(.*);(.*)", "\\1", dat$count))
ting1 <- cbind(name = dat$name, ting1)

ting2 <- data.frame(get1 = gsub("(.*);(.*)", "\\2", dat$count))
ting2 <- cbind(name = dat$name, ting2)

df <- rbind(ting1, ting2)

df$years <- trimws(gsub("(.*)=(.*)", "\\1", df$get1))
df$values <- gsub("(.*)=(.*)", "\\2", df$get1)

library(reshape2)

outdf <- dcast(df, name ~ years, value.var = "values")
outdf
# name 2003 2004 2005 2006
# 1    a   22   32 <NA> <NA>
# 2    b   34 <NA>   45 <NA>
# 3    c <NA> <NA>   32   67

ting1可能有一种更聪明、更简洁的方法,但这是可行的:

library(tidyr)
dat %>% 
  separate(count, sep = "; ", into = c("c1", "c2")) %>% 
  gather(Var, Val, -name) %>% 
  separate(Val, sep = "=", into = c("year", "value")) %>% 
  select(-Var) %>% 
  spread(year, value)

  name 2003 2004 2005 2006
1    a   22   32 <NA> <NA>
2    b   34 <NA>   45 <NA>
3    c <NA> <NA>   32   67
library(tidyr)
dat%>%
分开(计数,sep=“;”,放入=c(“c1”、“c2”))%>%
聚集(变量,值,-名称)%>%
分开(Val,sep=“=”,分为=c(“年”,“值”))%>%
选择(-Var)%>%
价差(年、价值)
姓名2003 2004 2005 2006
1 a 22 32
2 b 34 45
3 c 32 67

注意,这会产生“宽”数据;传播前的“长”数据可能更容易处理。

可能有一种更聪明、更简洁的方法,但这是可行的:

library(tidyr)
dat %>% 
  separate(count, sep = "; ", into = c("c1", "c2")) %>% 
  gather(Var, Val, -name) %>% 
  separate(Val, sep = "=", into = c("year", "value")) %>% 
  select(-Var) %>% 
  spread(year, value)

  name 2003 2004 2005 2006
1    a   22   32 <NA> <NA>
2    b   34 <NA>   45 <NA>
3    c <NA> <NA>   32   67
library(tidyr)
dat%>%
分开(计数,sep=“;”,放入=c(“c1”、“c2”))%>%
聚集(变量,值,-名称)%>%
分开(Val,sep=“=”,分为=c(“年”,“值”))%>%
选择(-Var)%>%
价差(年、价值)
姓名2003 2004 2005 2006
1 a 22 32
2 b 34 45
3 c 32 67

注意,这会产生“宽”数据;排列前的“长”数据可能更容易处理。

这里有一种使用
提取
+
绑定行的方法
+
排列
-

dat %>%
  extract(count, c("year1", "value1", "year2", "value2"),
          regex = "([:digit:]+)=([:digit:]+);.([:digit:]+)=([:digit:]+)") %>% 
  {bind_rows(
    select(., name, year = year1, value = value1),
    select(., name, year = year2, value = value2)
  )} %>% 
  spread(year, value)

  name 2003 2004 2005 2006
1    a   22   32 <NA> <NA>
2    b   34 <NA>   45 <NA>
3    c <NA> <NA>   32   67

下面是一种使用
extract
+
bind_行
+
spread
-

dat %>%
  extract(count, c("year1", "value1", "year2", "value2"),
          regex = "([:digit:]+)=([:digit:]+);.([:digit:]+)=([:digit:]+)") %>% 
  {bind_rows(
    select(., name, year = year1, value = value1),
    select(., name, year = year2, value = value2)
  )} %>% 
  spread(year, value)

  name 2003 2004 2005 2006
1    a   22   32 <NA> <NA>
2    b   34 <NA>   45 <NA>
3    c <NA> <NA>   32   67

如果您想要一个基本的R方法,而不需要基于
strsplit()
restrape()
的包


dat[]如果您想要一个基本R方法,而不需要基于
strsplit()
restrape()
的包


dat[]您可以使用
separate_行
separate
from
tidyr

library(tidyr)
dat %>% 
  separate_rows(count, sep = "; ") %>%
  separate(count, sep = "=", into = c("key","val")) %>% 
  spread(key, val)
#   name 2003 2004 2005 2006
# 1    a   22   32 <NA> <NA>
# 2    b   34 <NA>   45 <NA>
# 3    c <NA> <NA>   32   67

您可以使用
separate_行
separate
from
tidyr

library(tidyr)
dat %>% 
  separate_rows(count, sep = "; ") %>%
  separate(count, sep = "=", into = c("key","val")) %>% 
  spread(key, val)
#   name 2003 2004 2005 2006
# 1    a   22   32 <NA> <NA>
# 2    b   34 <NA>   45 <NA>
# 3    c <NA> <NA>   32   67