R 从两列中提取数据
我有一个像这样的数据框。 它指的是单词及其结构R 从两列中提取数据,r,subset,R,Subset,我有一个像这样的数据框。 它指的是单词及其结构 df有趣的问题。我可能刚刚重新发明了用于查找这些结构的算法,但它似乎有效 df <- data.frame( word=c("pokkoitta", "demna", "ningatinggo", "tengkeam", "bampana", "njam"), structure=c("CvC:vvC:v", "CvCCv", "CvCvCvNCv", "CvNCvvC", "CvNCvCv", "NCvC"), stringsA
df有趣的问题。我可能刚刚重新发明了用于查找这些结构的算法,但它似乎有效
df <- data.frame(
word=c("pokkoitta", "demna", "ningatinggo", "tengkeam", "bampana", "njam"),
structure=c("CvC:vvC:v", "CvCCv", "CvCvCvNCv", "CvNCvvC", "CvNCvCv", "NCvC"),
stringsAsFactors=FALSE)
pat <- data.frame(str=c("NC", "C", "C:", "C", "v:", "v"),
rex=c("nj|ngk|ngg|nc|nt|nd|mp|mb",
"ng|sy|kh",
"([b-df-hj-np-tv-xz])\\1+",
"[b-df-hj-np-tv-z]",
"(a|e|i|o|u)\\1+",
"a|e|i|o|u"), stringsAsFactors=FALSE)
xs <- xw <- df[,1]
for (i in 1:nrow(pat)) {
rx <- gregexpr(pat[i, 2], xs)
mc <- regmatches(xs, rx)
mp <- sapply(mc, function(x) format(paste("", x), width=6))
mc[lengths(mc) != 0] <- mp[lengths(mc) != 0]
regmatches(xw, rx) <- mc
regmatches(xs, rx) <- paste("", format(pat[i, 1], width=5))
}
phon <- trimws(cbind(word=xw, structure=xs))
phon <- apply(phon, 1, strsplit, " +")
phon <- lapply(phon, function(x) do.call(cbind, x))
head(phon, 3)
# [[1]]
# word structure
# [1,] "p" "C"
# [2,] "o" "v"
# [3,] "kk" "C:"
# [4,] "o" "v"
# [5,] "i" "v"
# [6,] "tt" "C:"
# [7,] "a" "v"
#
# [[2]]
# word structure
# [1,] "d" "C"
# [2,] "e" "v"
# [3,] "m" "C"
# [4,] "n" "C"
# [5,] "a" "v"
#
# [[3]]
# word structure
# [1,] "n" "C"
# [2,] "i" "v"
# [3,] "ng" "C"
# [4,] "a" "v"
# [5,] "t" "C"
# [6,] "i" "v"
# [7,] "ngg" "NC"
# [8,] "o" "v"
df有趣的问题。我可能刚刚重新发明了用于查找这些结构的算法,但它似乎有效
df <- data.frame(
word=c("pokkoitta", "demna", "ningatinggo", "tengkeam", "bampana", "njam"),
structure=c("CvC:vvC:v", "CvCCv", "CvCvCvNCv", "CvNCvvC", "CvNCvCv", "NCvC"),
stringsAsFactors=FALSE)
pat <- data.frame(str=c("NC", "C", "C:", "C", "v:", "v"),
rex=c("nj|ngk|ngg|nc|nt|nd|mp|mb",
"ng|sy|kh",
"([b-df-hj-np-tv-xz])\\1+",
"[b-df-hj-np-tv-z]",
"(a|e|i|o|u)\\1+",
"a|e|i|o|u"), stringsAsFactors=FALSE)
xs <- xw <- df[,1]
for (i in 1:nrow(pat)) {
rx <- gregexpr(pat[i, 2], xs)
mc <- regmatches(xs, rx)
mp <- sapply(mc, function(x) format(paste("", x), width=6))
mc[lengths(mc) != 0] <- mp[lengths(mc) != 0]
regmatches(xw, rx) <- mc
regmatches(xs, rx) <- paste("", format(pat[i, 1], width=5))
}
phon <- trimws(cbind(word=xw, structure=xs))
phon <- apply(phon, 1, strsplit, " +")
phon <- lapply(phon, function(x) do.call(cbind, x))
head(phon, 3)
# [[1]]
# word structure
# [1,] "p" "C"
# [2,] "o" "v"
# [3,] "kk" "C:"
# [4,] "o" "v"
# [5,] "i" "v"
# [6,] "tt" "C:"
# [7,] "a" "v"
#
# [[2]]
# word structure
# [1,] "d" "C"
# [2,] "e" "v"
# [3,] "m" "C"
# [4,] "n" "C"
# [5,] "a" "v"
#
# [[3]]
# word structure
# [1,] "n" "C"
# [2,] "i" "v"
# [3,] "ng" "C"
# [4,] "a" "v"
# [5,] "t" "C"
# [6,] "i" "v"
# [7,] "ngg" "NC"
# [8,] "o" "v"
df添加一个样本数据集,并按字母之间的顺序列出匹配规则。您是否使用了用于查找结构的任何工具?我不认为有一个简单的正则表达式来解决这个问题,因为长度不是一对一匹配。添加一个样本数据集,并按字母之间的顺序列出匹配规则。你没有使用任何工具来查找结构吗?我不认为有一个简单的正则表达式解决这个问题,因为长度不匹配一对一。