Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/macos/9.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
重塑data.frame,使包含多个要素的列变为多个二进制列_R - Fatal编程技术网

重塑data.frame,使包含多个要素的列变为多个二进制列

重塑data.frame,使包含多个要素的列变为多个二进制列,r,R,我有一个这样的数据帧 df <-data.frame(id = c(1,2), value = c(25,24), features = c("A,B,D,F","C,B,E")) print(df) id,value,features 1,25,"A,B,D,F" 2,24,"C,B,E" 我猜第一步是确定df$features列中的唯一值,但一旦我有了该列表,我不确定创建最终数据集的有效(即向量化)方法是什么 这感觉

我有一个这样的数据帧

df <-data.frame(id = c(1,2),
                value = c(25,24),
                features = c("A,B,D,F","C,B,E"))

print(df)

id,value,features
1,25,"A,B,D,F"
2,24,"C,B,E"
我猜第一步是确定
df$features
列中的唯一值,但一旦我有了该列表,我不确定创建最终数据集的有效(即向量化)方法是什么


这感觉像是对
dplyr
restrape2
的操作,但我不确定如何实现这一点。

这是经过适当转换后的
merge
的另一个用例

library(reshape2)
f<-with(df,stack(setNames(strsplit(as.character(features),","),id)))
d<-dcast(f,ind~values,length,value.var="ind")
out<-merge(df[,1:2],d,by.x="id",by.y="ind")

print(out)
你可以做:

library(splitstackshape)
library(qdapTools)

df1 = data.frame(cSplit(df, 'features', sep=',', type.convert=F))
cbind(df1[1:2], mtabulate(as.data.frame(t(df1[-c(1,2)]))))

#   id value A B C D E F
#1:  1    25 1 1 0 1 0 1
#2:  2    24 0 1 1 0 1 0
dplyr/tidyr解决方案

library(dplyr)
library(tidyr)

separate(df,features,1:4,",",extra="merge") %>%
  gather(key,letter,-id,-value) %>%
  filter(!is.na(letter)) %>%
  select(-key) %>%
  mutate(n=1) %>%
  spread(letter,n) %>%
  mutate_each(funs(ifelse(is.na(.),0,1)),A:F)

另一个使用
splitstackshape
data.table
(安装说明):


df$features
是否始终具有相同的长度?否,
df$features
的长度不同。-我将编辑示例以澄清这一点。还有
cSplit_e
cSplit_e(df,“features”、“,”,type=“character”,fill=0)
。您的软件包太疯狂了。
d<-xtabs(count~ind+values,transform(f,count=1))
out<-merge(df[,1:2],as.data.frame.matrix(d),by.x="id",by.y="row.names")
library(splitstackshape)
library(qdapTools)

df1 = data.frame(cSplit(df, 'features', sep=',', type.convert=F))
cbind(df1[1:2], mtabulate(as.data.frame(t(df1[-c(1,2)]))))

#   id value A B C D E F
#1:  1    25 1 1 0 1 0 1
#2:  2    24 0 1 1 0 1 0
library(dplyr)
library(tidyr)

separate(df,features,1:4,",",extra="merge") %>%
  gather(key,letter,-id,-value) %>%
  filter(!is.na(letter)) %>%
  select(-key) %>%
  mutate(n=1) %>%
  spread(letter,n) %>%
  mutate_each(funs(ifelse(is.na(.),0,1)),A:F)
require(splitstackshape)
require(data.table) # v1.9.5+
ans <- cSplit(df, 'features', sep = ',', 'long')
dcast(ans, id + value ~ features, fun.aggregate = length)
#    id value A B C D E F
# 1:  1    25 1 1 0 1 0 1
# 2:  2    24 0 1 1 0 1 0
cSplit_e(df, "features", ",", type = "character", fill = 0)
##   id value features features_A features_B features_C features_D features_E features_F
## 1  1    25  A,B,D,F          1          1          0          1          0          1
## 2  2    24    C,B,E          0          1          1          0          1          0