制作一个数据帧,其中包含R中每个句子的标记信息
我有几个带有意见标签的句子,我想要一个数据框来显示信息制作一个数据帧,其中包含R中每个句子的标记信息,r,R,我有几个带有意见标签的句子,我想要一个数据框来显示信息 test = c("I very/AD very/AD like/POS the voice/FE","I really really/AD hate/NEG you/FE","I love/POS and adore/POS him although he is rude/NEG") > test [1] "I very/AD very/AD like/POS the voice/FE" [2] "I r
test = c("I very/AD very/AD like/POS the voice/FE","I really really/AD hate/NEG you/FE","I love/POS and adore/POS him although he is rude/NEG")
> test
[1] "I very/AD very/AD like/POS the voice/FE"
[2] "I really really/AD hate/NEG you/FE"
[3] "I love/POS and adore/POS him although he is rude/NEG"
> test = strsplit(test, ' ')
[[1]]
[1] "I" "very/AD" "very/AD" "like/POS" "the" "voice/FE"
[[2]]
[1] "I" "really" "really/AD" "hate/NEG" "you/FE"
[[3]]
[1] "I" "love/POS" "and" "adore/POS" "him" "although" "he" "is" "rude/NEG"
有了上面的数据,我想让一个数据框包含每个句子的标签信息,如下所示
POS NEG AD FE
1 1 0 2 voice
2 0 1 1 you
3 2 1 0
POS NEG AD FE
1 3 2 3 voice
2 3 2 3 you
现在,我所能做的就是制作一个数据帧,其中包含这样的汇总信息
POS NEG AD FE
1 1 0 2 voice
2 0 1 1 you
3 2 1 0
POS NEG AD FE
1 3 2 3 voice
2 3 2 3 you
我可以通过使用下面的代码来实现(我编写了这些代码,以防您使用它们来获得提示)
同样,我的目标是创建一个数据框架,其中包含每个句子的重要标记信息。(答案代码可能会工作,而不管句子的数量,因为测试数据可能有更多的句子)
我们可以试试
res <- t(sapply(strsplit(test, " "), function(x) {
i1 <- grep("[/]", x)
x1 <- x[i1]
d1 <- do.call(rbind.data.frame, strsplit(x1, "[/]"))[2:1]
colnames(d1) <- c("key", "val")
d1$key <- factor(d1$key, levels = c("POS", "NEG", "AD", "FE"))
t1 <- t(table(d1))
colSums(t1)}))
library(stringr)
res[,4] <- str_extract(test, "\\w+(?=/FE)")
res我们可以试试
res <- t(sapply(strsplit(test, " "), function(x) {
i1 <- grep("[/]", x)
x1 <- x[i1]
d1 <- do.call(rbind.data.frame, strsplit(x1, "[/]"))[2:1]
colnames(d1) <- c("key", "val")
d1$key <- factor(d1$key, levels = c("POS", "NEG", "AD", "FE"))
t1 <- t(table(d1))
colSums(t1)}))
library(stringr)
res[,4] <- str_extract(test, "\\w+(?=/FE)")
res如果需要data.frame,可以使用plyr::ldply
函数:
ldply(test,
function(t){
FE <- strsplit(unlist(t), ' ')[[1]]
FE <- FE[grepl(pattern = "FE", FE)]
FE <- gsub('\\/FE','', FE)
data.frame(
POS = sum(grepl(pattern = "POS", strsplit(t, ' '))),
NEG = sum(grepl(pattern = "NEG", strsplit(t, ' '))),
AD = sum(grepl(pattern = "AD", strsplit(t, ' '))),
FE = ifelse(length(FE) == 0, '', FE))
})
ldply(测试、,
功能(t){
FE如果需要data.frame,可以使用plyr::ldply
函数:
ldply(test,
function(t){
FE <- strsplit(unlist(t), ' ')[[1]]
FE <- FE[grepl(pattern = "FE", FE)]
FE <- gsub('\\/FE','', FE)
data.frame(
POS = sum(grepl(pattern = "POS", strsplit(t, ' '))),
NEG = sum(grepl(pattern = "NEG", strsplit(t, ' '))),
AD = sum(grepl(pattern = "AD", strsplit(t, ' '))),
FE = ifelse(length(FE) == 0, '', FE))
})
ldply(测试、,
功能(t){
FEstringr
中的STRU计数可以是一种方法,即
library(stringr)
sapply(c('POS', 'NEG', 'AD', 'FE'), function(i) str_count(test, i))
# POS NEG AD FE
#[1,] 1 0 2 1
#[2,] 0 1 1 1
#[3,] 2 1 0 0
要在FE
获取列表,则
m1[,'FE'] <- replace(m1[,'FE'], m1[,'FE'] == 1,
gsub('/.*', '', unlist(sapply(strsplit(test, ' '), function(i)
grep('/FE', i, value = TRUE)))))
m1
# POS NEG AD FE
#[1,] "1" "0" "2" "voice"
#[2,] "0" "1" "1" "you"
#[3,] "2" "1" "0" "0"
m1[,'FE']stru count
fromstringr
可以是一种方法,即
library(stringr)
sapply(c('POS', 'NEG', 'AD', 'FE'), function(i) str_count(test, i))
# POS NEG AD FE
#[1,] 1 0 2 1
#[2,] 0 1 1 1
#[3,] 2 1 0 0
要在FE
获取列表,则
m1[,'FE'] <- replace(m1[,'FE'], m1[,'FE'] == 1,
gsub('/.*', '', unlist(sapply(strsplit(test, ' '), function(i)
grep('/FE', i, value = TRUE)))))
m1
# POS NEG AD FE
#[1,] "1" "0" "2" "voice"
#[2,] "0" "1" "1" "you"
#[3,] "2" "1" "0" "0"
m1[,'FE']处理FE变量中的列表如何?@Rcoding修复了该问题处理FE变量中的列表如何?@Rcoding修复了该问题