制作一个数据帧,其中包含R中每个句子的标记信息

制作一个数据帧,其中包含R中每个句子的标记信息,r,R,我有几个带有意见标签的句子,我想要一个数据框来显示信息 test = c("I very/AD very/AD like/POS the voice/FE","I really really/AD hate/NEG you/FE","I love/POS and adore/POS him although he is rude/NEG") > test [1] "I very/AD very/AD like/POS the voice/FE" [2] "I r

我有几个带有意见标签的句子,我想要一个数据框来显示信息

test = c("I very/AD very/AD like/POS the voice/FE","I really really/AD hate/NEG you/FE","I love/POS and adore/POS him although he is rude/NEG")

> test
[1] "I very/AD very/AD like/POS the voice/FE"             
[2] "I really really/AD hate/NEG you/FE"                  
[3] "I love/POS and adore/POS him although he is rude/NEG"


> test = strsplit(test, ' ')
[[1]]
[1] "I" "very/AD" "very/AD" "like/POS" "the" "voice/FE"

[[2]]
[1] "I" "really" "really/AD" "hate/NEG" "you/FE"   

[[3]]
[1] "I" "love/POS" "and" "adore/POS" "him" "although" "he" "is" "rude/NEG"
有了上面的数据,我想让一个数据框包含每个句子的标签信息,如下所示

  POS NEG AD FE
1   1   0  2 voice
2   0   1  1 you
3   2   1  0
  POS NEG AD    FE
1   3   2  3 voice
2   3   2  3   you
现在,我所能做的就是制作一个数据帧,其中包含这样的汇总信息

  POS NEG AD FE
1   1   0  2 voice
2   0   1  1 you
3   2   1  0
  POS NEG AD    FE
1   3   2  3 voice
2   3   2  3   you
我可以通过使用下面的代码来实现(我编写了这些代码,以防您使用它们来获得提示)

同样,我的目标是创建一个数据框架,其中包含每个句子的重要标记信息。(答案代码可能会工作,而不管句子的数量,因为测试数据可能有更多的句子)

我们可以试试

res <- t(sapply(strsplit(test, " "), function(x) {
        i1 <- grep("[/]", x)
        x1 <- x[i1]
        d1 <- do.call(rbind.data.frame, strsplit(x1, "[/]"))[2:1]
        colnames(d1) <- c("key", "val")
        d1$key <- factor(d1$key, levels = c("POS", "NEG", "AD", "FE"))
        t1 <- t(table(d1))
        colSums(t1)}))
library(stringr)
res[,4] <- str_extract(test, "\\w+(?=/FE)")
res我们可以试试

res <- t(sapply(strsplit(test, " "), function(x) {
        i1 <- grep("[/]", x)
        x1 <- x[i1]
        d1 <- do.call(rbind.data.frame, strsplit(x1, "[/]"))[2:1]
        colnames(d1) <- c("key", "val")
        d1$key <- factor(d1$key, levels = c("POS", "NEG", "AD", "FE"))
        t1 <- t(table(d1))
        colSums(t1)}))
library(stringr)
res[,4] <- str_extract(test, "\\w+(?=/FE)")

res如果需要data.frame,可以使用
plyr::ldply
函数:

ldply(test,
      function(t){

        FE <- strsplit(unlist(t), ' ')[[1]]
        FE <-  FE[grepl(pattern = "FE", FE)]
        FE <-  gsub('\\/FE','', FE) 

        data.frame(
          POS = sum(grepl(pattern = "POS", strsplit(t, ' '))),
          NEG = sum(grepl(pattern = "NEG", strsplit(t, ' '))),
          AD  = sum(grepl(pattern =  "AD", strsplit(t, ' '))),
          FE  = ifelse(length(FE) == 0, '', FE))
      })
ldply(测试、,
功能(t){

FE如果需要data.frame,可以使用
plyr::ldply
函数:

ldply(test,
      function(t){

        FE <- strsplit(unlist(t), ' ')[[1]]
        FE <-  FE[grepl(pattern = "FE", FE)]
        FE <-  gsub('\\/FE','', FE) 

        data.frame(
          POS = sum(grepl(pattern = "POS", strsplit(t, ' '))),
          NEG = sum(grepl(pattern = "NEG", strsplit(t, ' '))),
          AD  = sum(grepl(pattern =  "AD", strsplit(t, ' '))),
          FE  = ifelse(length(FE) == 0, '', FE))
      })
ldply(测试、,
功能(t){

FE
stringr
中的STRU计数可以是一种方法,即

library(stringr)
sapply(c('POS', 'NEG', 'AD', 'FE'), function(i) str_count(test, i))
#     POS NEG AD FE
#[1,]   1   0  2  1
#[2,]   0   1  1  1
#[3,]   2   1  0  0
要在
FE
获取列表,则

m1[,'FE'] <- replace(m1[,'FE'], m1[,'FE'] == 1, 
             gsub('/.*', '', unlist(sapply(strsplit(test, ' '), function(i)
                                                       grep('/FE', i, value = TRUE)))))
m1
#     POS NEG AD  FE     
#[1,] "1" "0" "2" "voice"
#[2,] "0" "1" "1" "you"  
#[3,] "2" "1" "0" "0"  

m1[,'FE']
stru count
from
stringr
可以是一种方法,即

library(stringr)
sapply(c('POS', 'NEG', 'AD', 'FE'), function(i) str_count(test, i))
#     POS NEG AD FE
#[1,]   1   0  2  1
#[2,]   0   1  1  1
#[3,]   2   1  0  0
要在
FE
获取列表,则

m1[,'FE'] <- replace(m1[,'FE'], m1[,'FE'] == 1, 
             gsub('/.*', '', unlist(sapply(strsplit(test, ' '), function(i)
                                                       grep('/FE', i, value = TRUE)))))
m1
#     POS NEG AD  FE     
#[1,] "1" "0" "2" "voice"
#[2,] "0" "1" "1" "you"  
#[3,] "2" "1" "0" "0"  

m1[,'FE']处理FE变量中的列表如何?@Rcoding修复了该问题处理FE变量中的列表如何?@Rcoding修复了该问题