R循环通过列中的代码列表

R循环通过列中的代码列表,r,for-loop,split,data.table,R,For Loop,Split,Data.table,我有一个很大的数据集,我需要根据区域列将其拆分为多个数据集,然后为每个区域执行一些代码集,并获得每个区域的输出数据。我试图使用for循环来实现这一点,但它似乎不起作用。如何才能做到这一点。下面是我正在尝试的代码- for (a in c('10','11','14','20','30','40','50','61','64')) { paste0("data3_add_area",a) <- data3_add[AREACODE == a,] paste0("in_add_area

我有一个很大的数据集,我需要根据区域列将其拆分为多个数据集,然后为每个区域执行一些代码集,并获得每个区域的输出数据。我试图使用for循环来实现这一点,但它似乎不起作用。如何才能做到这一点。下面是我正在尝试的代码-

for (a in c('10','11','14','20','30','40','50','61','64'))
{
  paste0("data3_add_area",a) <- data3_add[AREACODE == a,]
  paste0("in_add_area",a) <- in_add[AREA_CODE == a,]

  source1 <- paste0("data3_add_area",a)$name
  source2 <- paste0("in_add_area",a)$name
  .....
  paste0("match_",a) <- output

}
for(c中的a('10','11','14','20','30','40','50','61','64'))
{

paste0(“data3_add_area”,a)这是一个完整的脚本,我稍微更改了输入集,以演示距离矩阵包含多行和一列的情况:

library("stringdist")
library("data.table")

Address1 <- c("786, GALI NO 5, XYZ","rambo, 45, strret 4, atlast, pqr","23/4, 23RD FLOOR, STREET 2, ABC-E, PQR","45-B, GALI NO5, XYZ","HECTIC, 99 STREET, PQR")
AREACODE <- c('10','10','14','20','30')
Year1 <- c(2001:2005)

Address2 <- c("abc, pqr, xyz","786, GALI NO 4 XYZ","45B, GALI NO 5, XYZ","del, 546, strret2, towards east, pqr","23/4, STREET 2, PQR","abc, pqr, xyz","786, GALI NO 4 XYZ","45B, GALI NO 5, XYZ","del, 546, strret2, towards east, pqr","23/4, STREET 2, PQR")
Year2 <- c(2001:2010)
AREA_CODE <- c('10','10','10','20','30','40','50','61','64', '99')

data1 <- data.table(Address1, Year1, AREACODE)
data2 <- data.table(Address2, Year2, AREA_CODE)
data2[, unique_id := sprintf("%06d", 1:nrow(data2))]

methods <- c('osa','lv','dl','hamming','lcs','qgram','cosine','jaccard','jw')

# split data.table into list of data.tables by area code
sdata1 <- split(data1, data1$AREACODE)
sdata2 <- split(data2, data2$AREA_CODE)

# find the unique codes
codes <- union(names(sdata1), names(sdata2))


dist_calc <- function(x, y, methods) {
  distance_methods <- list()

  # calculate distance for each method
  for(m in seq_along(methods)) {
    output <- matrix(NA, ncol = length(x), nrow = length(y))
    for(i in seq_along(x)) {
      for(j in seq_along(y)) { 
        # calculate the matrix of distances for each pairing
        output[j, i] <- stringdist(tolower(x[i]), tolower(y[j]), method = methods[m])
      }  
    }
    # store the output matrix in a named list element
    distance_methods[[methods[m]]] <- output
  }
  # return a list of lists, with each leaf element being a matrix
  return(distance_methods)
}


# iterate over codes, use names to retain codes and do not simplify -- return a list
distances <- sapply(codes, function(code) {
  x <- sdata1[[code]]$Address1
  y <- sdata2[[code]]$Address2

  # do not compute if the code is not present in either one, the other or both
  if (is.null(x) | is.null(y))
    return(NULL)

  # calculate all the distances
  dist_mats <- dist_calc(x, y, methods)

  # calculate the minimum value and indicie for each method
  method_mins <- sapply(methods, function(meth) {

    min_col <- apply(dist_mats[[meth]], 1, which.min)
    min_val <- apply(dist_mats[[meth]], 1, min)

    # list the minimum match value, the string, the matching string
    data.table(name = y, match_name = x[min_col], adist = min_val)
  }, USE.NAMES = TRUE, simplify = FALSE)

  # combine into a single data.table
  rbindlist(method_mins, idcol = "method")

}, USE.NAMES = TRUE, simplify = FALSE)

all_distances <- rbindlist(Filter(function(x) !is.null(x), distances), idcol = "AREACODE")

#    AREACODE  method                                 name                       match_name       adist
# 1:       10     osa                        abc, pqr, xyz              786, GALI NO 5, XYZ 12.00000000
# 2:       10     osa                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  2.00000000
# 3:       10     osa                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  3.00000000
# 4:       10      lv                        abc, pqr, xyz              786, GALI NO 5, XYZ 12.00000000
# 5:       10      lv                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  2.00000000
# 6:       10      lv                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  3.00000000
# 7:       10      dl                        abc, pqr, xyz              786, GALI NO 5, XYZ 12.00000000
# 8:       10      dl                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  2.00000000
# 9:       10      dl                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  3.00000000
#10:       10 hamming                        abc, pqr, xyz              786, GALI NO 5, XYZ         Inf
#11:       10 hamming                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ         Inf
#12:       10 hamming                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  3.00000000
#13:       10     lcs                        abc, pqr, xyz              786, GALI NO 5, XYZ 18.00000000
#14:       10     lcs                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  3.00000000
#15:       10     lcs                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  6.00000000
#16:       10   qgram                        abc, pqr, xyz              786, GALI NO 5, XYZ 16.00000000
#17:       10   qgram                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  3.00000000
#18:       10   qgram                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  6.00000000
#19:       10  cosine                        abc, pqr, xyz rambo, 45, strret 4, atlast, pqr  0.31400566
#20:       10  cosine                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  0.04653741
#21:       10  cosine                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  0.08784068
#22:       10 jaccard                        abc, pqr, xyz rambo, 45, strret 4, atlast, pqr  0.63157895
#23:       10 jaccard                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  0.12500000
#24:       10 jaccard                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  0.29411765
#25:       10      jw                        abc, pqr, xyz              786, GALI NO 5, XYZ  0.42535425
#26:       10      jw                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  0.05360624
#27:       10      jw                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  0.10526316
#28:       20     osa del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ 29.00000000
#29:       20      lv del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ 29.00000000
#30:       20      dl del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ 29.00000000
#31:       20 hamming del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ         Inf
#32:       20     lcs del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ 41.00000000
#33:       20   qgram del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ 35.00000000
#34:       20  cosine del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ  0.46394373
#35:       20 jaccard del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ  0.72000000
#36:       20      jw del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ  0.54863548
#37:       30     osa                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR 11.00000000
#38:       30      lv                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR 11.00000000
#39:       30      dl                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR 11.00000000
#40:       30 hamming                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR         Inf
#41:       30     lcs                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR 15.00000000
#42:       30   qgram                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR 13.00000000
#43:       30  cosine                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR  0.21927994
#44:       30 jaccard                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR  0.50000000
#45:       30      jw                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR  0.31607428
#    AREACODE  method                                 name                       match_name       adist
库(“stringdist”)
库(“数据表”)

地址1您能提供一些可复制的数据吗?
dput(head(data2\u add,20))
例如,可能是添加中的
。可能您正在寻找
split
,它可以根据某个向量的值将data.frame拆分为data.frames列表。
myList@Frank,感谢您指出。我已经更新了示例代码。感谢您的解决方案。这很有趣,但在same复制并非如我所想的那么简单。是否有必要将这些区域列为列表?如果您能让我知道为什么我们需要使用两个[[]]?我已经为每个区域添加了我想要复制的代码。您能建议我需要添加[[]]的地方吗获取每个区域的输出。很抱歉,我一直在尝试复制,但得到一些错误,例如-$运算符对原子向量无效。您可以使用问题查看何时使用
[
以及何时使用
[[
感谢您复制代码。我尝试运行您的代码,但在代码的最后一行出现错误,“rbindlist(distance,idcol=“AREACODE”):试图在set_STRING_ELT中设置索引5/5”,我在网上搜索了这个错误,看到帖子要求更新data.table,我更新了同样的,仍然是这个错误。你知道是什么导致了这个错误吗?我在data.table 1.10.4,r 3.2.5,Win 10中看到了同样的错误。
rbindlist(Filter(function(x)!is.null(x),distance),idcol=“AREACODE”)
工作得很好,我想,尽管@user1412tanks@Frank,看起来我使用的是一个过时的包版本,其中
null
s被悄悄删除,感谢您的关注。
#install.packages('stringdist')
library(stringdist)

distance.methods<-c('osa','lv','dl','hamming','lcs','qgram','cosine','jaccard','jw')
dist.methods<-list()
for(m in 1:length(distance.methods))
{
  dist.name.enh<-matrix(NA, ncol = length(source2.devices$name),nrow = length(source1.devices$name))
  for(i in 1:length(source2.devices$name)) {
    for(j in 1:length(source1.devices$name)) { 
      dist.name.enh[j,i]<-stringdist(tolower(source2.devices[i,]$name),tolower(source1.devices[j,]$name),method = distance.methods[m])      
        #adist.enhance(source2.devices[i,]$name,source1.devices[j,]$name)
    }  
  }
  dist.methods[[distance.methods[m]]]<-dist.name.enh
}

match.s1.s2.enh<-NULL
for(m in 1:length(dist.methods))
{

  dist.matrix<-as.matrix(dist.methods[[distance.methods[m]]])
  min.name.enh<-apply(dist.matrix, 1, base::min)
  for(i in 1:nrow(dist.matrix))
  {
    s2.i<-match(min.name.enh[i],dist.matrix[i,])
    s1.i<-i
    match.s1.s2.enh<-rbind(data.frame(s2.i=s2.i,s1.i=s1.i,s2name=source2.devices[s2.i,]$name, s1name=source1.devices[s1.i,]$name, adist=min.name.enh[i],method=distance.methods[m]),match.s1.s2.enh)
  }
}
# Let's have a look at the results
library(reshape2)
matched.names.matrix<-dcast(match.s1.s2.enh,s2.i+s1.i+s2name+s1name~method, value.var = "adist")
View(matched.names.matrix)
library("stringdist")
library("data.table")

Address1 <- c("786, GALI NO 5, XYZ","rambo, 45, strret 4, atlast, pqr","23/4, 23RD FLOOR, STREET 2, ABC-E, PQR","45-B, GALI NO5, XYZ","HECTIC, 99 STREET, PQR")
AREACODE <- c('10','10','14','20','30')
Year1 <- c(2001:2005)

Address2 <- c("abc, pqr, xyz","786, GALI NO 4 XYZ","45B, GALI NO 5, XYZ","del, 546, strret2, towards east, pqr","23/4, STREET 2, PQR","abc, pqr, xyz","786, GALI NO 4 XYZ","45B, GALI NO 5, XYZ","del, 546, strret2, towards east, pqr","23/4, STREET 2, PQR")
Year2 <- c(2001:2010)
AREA_CODE <- c('10','10','10','20','30','40','50','61','64', '99')

data1 <- data.table(Address1, Year1, AREACODE)
data2 <- data.table(Address2, Year2, AREA_CODE)
data2[, unique_id := sprintf("%06d", 1:nrow(data2))]

methods <- c('osa','lv','dl','hamming','lcs','qgram','cosine','jaccard','jw')

# split data.table into list of data.tables by area code
sdata1 <- split(data1, data1$AREACODE)
sdata2 <- split(data2, data2$AREA_CODE)

# find the unique codes
codes <- union(names(sdata1), names(sdata2))


dist_calc <- function(x, y, methods) {
  distance_methods <- list()

  # calculate distance for each method
  for(m in seq_along(methods)) {
    output <- matrix(NA, ncol = length(x), nrow = length(y))
    for(i in seq_along(x)) {
      for(j in seq_along(y)) { 
        # calculate the matrix of distances for each pairing
        output[j, i] <- stringdist(tolower(x[i]), tolower(y[j]), method = methods[m])
      }  
    }
    # store the output matrix in a named list element
    distance_methods[[methods[m]]] <- output
  }
  # return a list of lists, with each leaf element being a matrix
  return(distance_methods)
}


# iterate over codes, use names to retain codes and do not simplify -- return a list
distances <- sapply(codes, function(code) {
  x <- sdata1[[code]]$Address1
  y <- sdata2[[code]]$Address2

  # do not compute if the code is not present in either one, the other or both
  if (is.null(x) | is.null(y))
    return(NULL)

  # calculate all the distances
  dist_mats <- dist_calc(x, y, methods)

  # calculate the minimum value and indicie for each method
  method_mins <- sapply(methods, function(meth) {

    min_col <- apply(dist_mats[[meth]], 1, which.min)
    min_val <- apply(dist_mats[[meth]], 1, min)

    # list the minimum match value, the string, the matching string
    data.table(name = y, match_name = x[min_col], adist = min_val)
  }, USE.NAMES = TRUE, simplify = FALSE)

  # combine into a single data.table
  rbindlist(method_mins, idcol = "method")

}, USE.NAMES = TRUE, simplify = FALSE)

all_distances <- rbindlist(Filter(function(x) !is.null(x), distances), idcol = "AREACODE")

#    AREACODE  method                                 name                       match_name       adist
# 1:       10     osa                        abc, pqr, xyz              786, GALI NO 5, XYZ 12.00000000
# 2:       10     osa                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  2.00000000
# 3:       10     osa                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  3.00000000
# 4:       10      lv                        abc, pqr, xyz              786, GALI NO 5, XYZ 12.00000000
# 5:       10      lv                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  2.00000000
# 6:       10      lv                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  3.00000000
# 7:       10      dl                        abc, pqr, xyz              786, GALI NO 5, XYZ 12.00000000
# 8:       10      dl                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  2.00000000
# 9:       10      dl                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  3.00000000
#10:       10 hamming                        abc, pqr, xyz              786, GALI NO 5, XYZ         Inf
#11:       10 hamming                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ         Inf
#12:       10 hamming                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  3.00000000
#13:       10     lcs                        abc, pqr, xyz              786, GALI NO 5, XYZ 18.00000000
#14:       10     lcs                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  3.00000000
#15:       10     lcs                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  6.00000000
#16:       10   qgram                        abc, pqr, xyz              786, GALI NO 5, XYZ 16.00000000
#17:       10   qgram                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  3.00000000
#18:       10   qgram                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  6.00000000
#19:       10  cosine                        abc, pqr, xyz rambo, 45, strret 4, atlast, pqr  0.31400566
#20:       10  cosine                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  0.04653741
#21:       10  cosine                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  0.08784068
#22:       10 jaccard                        abc, pqr, xyz rambo, 45, strret 4, atlast, pqr  0.63157895
#23:       10 jaccard                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  0.12500000
#24:       10 jaccard                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  0.29411765
#25:       10      jw                        abc, pqr, xyz              786, GALI NO 5, XYZ  0.42535425
#26:       10      jw                   786, GALI NO 4 XYZ              786, GALI NO 5, XYZ  0.05360624
#27:       10      jw                  45B, GALI NO 5, XYZ              786, GALI NO 5, XYZ  0.10526316
#28:       20     osa del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ 29.00000000
#29:       20      lv del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ 29.00000000
#30:       20      dl del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ 29.00000000
#31:       20 hamming del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ         Inf
#32:       20     lcs del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ 41.00000000
#33:       20   qgram del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ 35.00000000
#34:       20  cosine del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ  0.46394373
#35:       20 jaccard del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ  0.72000000
#36:       20      jw del, 546, strret2, towards east, pqr              45-B, GALI NO5, XYZ  0.54863548
#37:       30     osa                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR 11.00000000
#38:       30      lv                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR 11.00000000
#39:       30      dl                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR 11.00000000
#40:       30 hamming                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR         Inf
#41:       30     lcs                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR 15.00000000
#42:       30   qgram                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR 13.00000000
#43:       30  cosine                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR  0.21927994
#44:       30 jaccard                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR  0.50000000
#45:       30      jw                  23/4, STREET 2, PQR           HECTIC, 99 STREET, PQR  0.31607428
#    AREACODE  method                                 name                       match_name       adist