基于R中的主题创建包含0或1的向量

基于R中的主题创建包含0或1的向量,r,vector,dataframe,R,Vector,Dataframe,我得到了下面的data.frame ARTICLE <- c("I'M ARTICLE #1","I'M ARTICLE #2","I'M ARTICLE #3","I'M ARTICLE #4") SUBJECT.1 <- c("POLLUTION", "ACQUIRED", "INSIDER TRADING", "MERGERS & ACQUISITIONS") SUBJECT.2 <- c("FRAUD", "POLLUTION & DAMAGES",

我得到了下面的
data.frame

ARTICLE   <- c("I'M ARTICLE #1","I'M ARTICLE #2","I'M ARTICLE #3","I'M ARTICLE #4")
SUBJECT.1 <- c("POLLUTION", "ACQUIRED", "INSIDER TRADING", "MERGERS & ACQUISITIONS")
SUBJECT.2 <- c("FRAUD", "POLLUTION & DAMAGES", "FRAUD & INSIDER TRADING", "OIL SPILLS")
SUBJECT.3 <- c("OIL", "BIOFUELS", "OIL SPILLS & WASTE", "EMISSIONS")

mydf <- data.frame(ARTICLE, SUBJECT.1, SUBJECT.2, SUBJECT.3)
mydf

#          ARTICLE              SUBJECT.1               SUBJECT.2          SUBJECT.3
# 1 I'M ARTICLE #1              POLLUTION                   FRAUD                OIL
# 2 I'M ARTICLE #2               ACQUIRED     POLLUTION & DAMAGES           BIOFUELS
# 3 I'M ARTICLE #3        INSIDER TRADING FRAUD & INSIDER TRADING OIL SPILLS & WASTE
# 4 I'M ARTICLE #4 MERGERS & ACQUISITIONS              OIL SPILLS          EMISSIONS
输出应如下所示:

#          ARTICLE              SUBJECT.1               SUBJECT.2          SUBJECT.3 POLLUTION OILSPILLS MERGERS FRAUD
# 1 I'M ARTICLE #1              POLLUTION                   FRAUD                OIL         1         0       0     1
# 2 I'M ARTICLE #2               ACQUIRED     POLLUTION & DAMAGES           BIOFUELS         1         0       1     0
# 3 I'M ARTICLE #3        INSIDER TRADING FRAUD & INSIDER TRADING OIL SPILLS & WASTE         1         1       0     1
# 4 I'M ARTICLE #4 MERGERS & ACQUISITIONS               OILSPILLS          EMISSIONS         1         1       1     0
因为我不知道该怎么做,所以我什么都不能尝试

谢谢大家!

t(  apply(mydf[-1], 1, function(x)  as.numeric ( c(   
   # need the t() to change columns to rows
 any( grepl("POLLUTION|EMISSION|WASTE", x) ),
 any(grepl("OIL\\sSPILL", x) ), 
 any(grepl("MERGER|ACQUI", x) ), 
 any(grepl("MERGER|ACQUI", x) )      )
       )     )
 )
 #-------
  [,1] [,2] [,3] [,4]
[1,]    1    0    0    0
[2,]    1    0    1    1
[3,]    1    1    0    0
[4,]    1    1    1    1

 cbind(mydf, .Last.value)
     ARTICLE              SUBJECT.1               SUBJECT.2
1 I'M ARTICLE #1              POLLUTION                   FRAUD
2 I'M ARTICLE #2               ACQUIRED     POLLUTION & DAMAGES
3 I'M ARTICLE #3        INSIDER TRADING FRAUD & INSIDER TRADING
4 I'M ARTICLE #4 MERGERS & ACQUISITIONS              OIL SPILLS
           SUBJECT.3 1 2 3 4
1                OIL 1 0 0 0
2           BIOFUELS 1 0 1 1
3 OIL SPILLS & WASTE 1 1 0 0
4          EMISSIONS 1 1 1 1
也许还有更优雅的方法,但这似乎足够“明显”,以至于这只小脑袋的熊可以把它写在“纸上”。列的命名似乎非常琐碎,可以作为“读者的练习”。

从“BondedDust”的答案稍加修改


vec1“因为我不知道怎么做,所以我真的什么都不能尝试。”有趣的是,在“提问”框中输入你问题的确切主题表明了这一点:作为一个“可能已经有你答案的问题”,它会将你指向
grepl
。感谢您的努力。@thelatemail:我倾向于忘记那个搜索功能,因为它在过去对我来说并没有真正起作用。谢谢你的努力。
t(  apply(mydf[-1], 1, function(x)  as.numeric ( c(   
   # need the t() to change columns to rows
 any( grepl("POLLUTION|EMISSION|WASTE", x) ),
 any(grepl("OIL\\sSPILL", x) ), 
 any(grepl("MERGER|ACQUI", x) ), 
 any(grepl("MERGER|ACQUI", x) )      )
       )     )
 )
 #-------
  [,1] [,2] [,3] [,4]
[1,]    1    0    0    0
[2,]    1    0    1    1
[3,]    1    1    0    0
[4,]    1    1    1    1

 cbind(mydf, .Last.value)
     ARTICLE              SUBJECT.1               SUBJECT.2
1 I'M ARTICLE #1              POLLUTION                   FRAUD
2 I'M ARTICLE #2               ACQUIRED     POLLUTION & DAMAGES
3 I'M ARTICLE #3        INSIDER TRADING FRAUD & INSIDER TRADING
4 I'M ARTICLE #4 MERGERS & ACQUISITIONS              OIL SPILLS
           SUBJECT.3 1 2 3 4
1                OIL 1 0 0 0
2           BIOFUELS 1 0 1 1
3 OIL SPILLS & WASTE 1 1 0 0
4          EMISSIONS 1 1 1 1
vec1 <- c(POLLUTION="POLLUTION|EMISSION|WASTE", OILSPILLS="OIL SPILL",
        MERGERS="MERGER|ACQUI", FRAUD="FRAUD|CRIME")

sapply(vec1, function(x) apply(mydf[,-1],1, function(y) any(grepl(x, y))))+0
cbind(mydf, .Last.value)