Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/76.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
使用R为每行应用多个参数_R - Fatal编程技术网

使用R为每行应用多个参数

使用R为每行应用多个参数,r,R,我需要帮助,从段落中获得具有给定关键字的句子,并去除不必要的信息 下面是我拥有的文件的示例 Heading Years Text Head1 2015 <rrrt> I am a boy and I <rrr2> like a girl <t44> from my class. She is pretty. /rr /r /r I am cute. Head2 2015 She is cute. Sh

我需要帮助,从段落中获得具有给定关键字的句子,并去除不必要的信息

下面是我拥有的文件的示例

Heading     Years    Text
Head1       2015     <rrrt> I am a boy and I <rrr2> like a girl <t44> from my class. She is pretty. /rr /r    /r I am cute.
Head2       2015     She is cute. She is beautiful.
Head3       2014         Hi, I am Jane. I play guitar. May is my friend. 
先谢谢你

更新:

Heading     Text
Apple       "Jane is pretty." Good afternoon
Orange      Tom said she is pretty. Also she is kind hearted. Tom listened in class.
Pear        Added Lim, He is a great guy...and clever. Mary turned her head away.
我想要得到的输出是:

Heading     Text
Apple       "Jane is pretty."
Orange      Tom said she is pretty. Also she is kind hearted.
Pear        Added Lim, He is a great guy...and clever.

我想捕捉人们说的话。谢谢。

我们可以将每个句子末尾的“文本”列拆分为
列表
grep
以提取具有
am
的句子,将
列表
转换为
数据。使用
堆栈
进行帧
,然后将
与原始数据集合并

df2 <-  stack(setNames(lapply(strsplit(df1$Text, '(?<=[.])(?=\\s*)\\s+', 
          perl=TRUE), grep, pattern='\\bam\\b', value=TRUE), df1$Heading))[2:1]
colnames(df2) <- colnames(df1)[c(1,3)]
res <- merge(df1[1:2], df2)
res
#  Heading Years                                        Text
#1   Head1  2015 I am a boy and I like a girl from my class.
#2   Head1  2015                                  I am cute.
#3   Head3  2014                              Hi, I am Jane.
数据
df1我们可以将每个句子末尾的“Text”列拆分成一个
列表
grep
来提取具有
am
的句子,将
列表
转换为
数据。使用
堆栈
来框
,然后将
与原始数据集合并

df2 <-  stack(setNames(lapply(strsplit(df1$Text, '(?<=[.])(?=\\s*)\\s+', 
          perl=TRUE), grep, pattern='\\bam\\b', value=TRUE), df1$Heading))[2:1]
colnames(df2) <- colnames(df1)[c(1,3)]
res <- merge(df1[1:2], df2)
res
#  Heading Years                                        Text
#1   Head1  2015 I am a boy and I like a girl from my class.
#2   Head1  2015                                  I am cute.
#3   Head3  2014                              Hi, I am Jane.
数据
df1
head
head hi@RonakShah,谢谢你的帮助。我在运行for循环时出错。错误是:
[.data.frame
(df,a,1:2)中的错误:对象“a”未找到Ohhk。我需要将变量声明为数组。我已更新了代码。它可以工作,感谢您的帮助:)hi@RonakShah,感谢您的帮助。我在运行for循环时出错。错误是:
[.data.frame
(df,a,1:2)中的错误:object'a'没有找到Ohhk。我需要声明一个变量为数组。我已经更新了代码。它可以工作了,谢谢你的帮助:)@akrun它可以工作了!非常感谢:)@poppp你能再描述一点吗。它不是很清楚。另外,这个字符串的dput将有助于获得正确的结构。@poppp你能把它作为一个新问题发布吗更新的帖子没有显示任何模式。在第一行中,我们在
之间提取< /代码>,但这不是第二和第三的情况。也请考虑把它作为一个单独的问题。“第一个Poppp是很容易的,但是第二和第三是没有任何模式的。你可以把第一行作为一个新的问题张贴吗?”AkRun,它工作!非常感谢你:“Poppp你能再描述一点吗?还不太清楚。ing将有助于获得正确的结构。@poppp你能将其作为新问题发布吗。@poppp更新后的帖子没有显示任何模式。在第一行中,我们在
之间提取,但这不是第二和第三的情况。也请考虑把它作为一个单独的问题来发布。@ POPPP第一个很简单,但是第二和第三没有任何模式。
v1 <- gsub('\\<[^>]+\\>\\s*|/r+\\s*', '', df1N$Text, perl=TRUE)
                              Hi, I am Jane. Head3
df2N <- stack(setNames(lapply(strsplit(v1, '(?<=[.])(?=\\s*)\\s+', 
    perl=TRUE), grep, pattern='\\bam\\b', value=TRUE), df1N$Heading))[2:1]
colnames(df2N) <- colnames(df1N)[c(1,3)]
res1 <- merge(df1N[1:2], df2N)
res1
#  Heading Years                                        Text
#1   Head1  2015 I am a boy and I like a girl from my class.
#2   Head1  2015                                  I am cute.
#3   Head3  2014                              Hi, I am Jane.
df1 <- structure(list(Heading = c("Head1", "Head2", "Head3"),
Years = c(2015L, 2015L, 2014L), Text =
c("I am a boy and I like a girl from my class. She is pretty. I am cute.", 
"She is cute. She is beautiful.", 
"Hi, I am Jane. I play guitar. May is my friend.")),
.Names = c("Heading", "Years", "Text"), class = "data.frame",
 row.names = c(NA, -3L))

df1N <- structure(list(Heading = c("Head1", "Head2", "Head3"), 
 Years = c(2015L, 2015L, 2014L),
 Text = c("<rrrt> I am a boy and I <rrr2> like a girl <t44> from my class. She is pretty. /rr /r    /r I am cute.", 
 "She is cute. She is beautiful.", 
 "Hi, I am Jane. I play guitar. May is my friend.")),
 .Names = c("Heading", "Years", "Text"), class = "data.frame", 
 row.names = c(NA, -3L))
head <- c("Head1", "Head2", "Head3")
years <- c(2015, 2015, 2014)
Text <- c("I am a boy and I like a girl from my class. She is pretty. I am cute.","She is cute. She is beautiful.", "Hi, I am Jane. I play guitar. May is my friend.")
#As strsplit doesn't work on factors, converting text to characters
df$Text <- as.character(df$Text)
df <- data.frame(head, years, Text)


words <- unlist(strsplit(df$Text, "[.]"))
test <- words[grep("am", words)]
i <- 0
a <- array()
for(i in 1:length(test)) {
a[i] <- grep(test[i], df$Text)
}
newdf <- data.frame(df[a, 1:2], test)

newdf
#head years                                       test
#1   Head1  2015 I am a boy and I like a girl from my class
#1.1 Head1  2015                                  I am cute
#3   Head3  2014                              Hi, I am Jane