Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/database/10.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 如何构建一个循环来导入许多数据帧并以相同的方式修改它们?_R_Database_Loops - Fatal编程技术网

R 如何构建一个循环来导入许多数据帧并以相同的方式修改它们?

R 如何构建一个循环来导入许多数据帧并以相同的方式修改它们?,r,database,loops,R,Database,Loops,从1996年1月到2018年12月,我每月有一个新闻数据帧,即264个数据帧。它们都有相同的结构,我需要以相同的方式修改它们。例如,我的一个数据帧的代码大致如下: #import data (only one column $V1) News.9601 <- read.delim("D:/Reuters/1996/News.RTRS.199601.0210.txt.gz", header=FALSE,quote = "") #split the first columns in mor

从1996年1月到2018年12月,我每月有一个新闻数据帧,即264个数据帧。它们都有相同的结构,我需要以相同的方式修改它们。例如,我的一个数据帧的代码大致如下:

#import data (only one column $V1)
News.9601 <- read.delim("D:/Reuters/1996/News.RTRS.199601.0210.txt.gz", header=FALSE,quote = "") 

#split the first columns in more variables
News.9601 = News.9601 %>% 
  mutate(v2=lapply(strsplit(as.character(V1), "\"mimeType\""), "[", 2))

#select only those news that include "R:"
news.9601=news.9601[grepl('R:',news.9601$v2),]


#select only those news that include certain "tags" in $v2

for(i in 1:30){
  tags_split1=paste(tags_split[[i]],collapse = "|")
  tags_split1=gsub("[[:space:]]", "", tags_split1)
  nam=paste("A", i, sep = "")

  assign(nam,news.9601[grepl(tags_split1,news.9601$v2,perl = T),]
         )
}
news.9601=rbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,
                 A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,A28,A29,A30)

news.9601=news.9601[!duplicated(news.9601),]

#Some text analysis, rx.app is a matching rule
news.9601 = news.9601 %>%
  mutate(approach <- regmatches(title_body, gregexpr(rx.app, v2, perl=TRUE)),
         approach=lengths(approach))

write.csv(news.9601, file = "news.9601.csv")
rm(news.9601)
#导入数据(仅一列$V1)
新闻0.9601%
变异(v2=lapply(strsplit(如.character(V1),“\”mimeType\”,“[”,2))
#仅选择包含“R:”的新闻:
news.9601=news.9601[grepl('R:',news.9601$v2),]
#仅选择在$v2中包含某些“标记”的新闻
(我在1:30){
tags_split1=粘贴(tags_split[[i]],collapse=“|”))
tags_split1=gsub(“[:space:][]”,“”,tags_split1)
nam=粘贴(“A”,i,sep=”“)
赋值(nam,news.9601[grepl(tags_split1,news.9601$v2,perl=T),]
)
}
news.9601=rbind(A1、A2、A3、A4、A5、A6、A7、A8、A9、A10、A11、A12、A13、A14、A15、A16、A17、,
A18、A19、A20、A21、A22、A23、A24、A25、A26、A27、A28、A29、A30)
news.9601=news.9601[!复制(news.9601),]
#一些文本分析,rx.app是一个匹配规则
news.9601=news.9601%>%

变异(方法类似的方法应该有效

files <- list.files(path="folderpath/", full.names=TRUE, recursive=FALSE)

这里是您可以使用的代码,使用文件列表

Format_Funtion <- function(mypath, mylist) {
    mydata <- read.delim(paste0(mypath, mylist),header=FALSE,quote = "")

    #split the first columns in more variables
    mydata <- mydata %>% 
        mutate(v2=lapply(strsplit(as.character(V1), "\"mimeType\""), "[", 2))

    #select only those news that include "R:"
    mydata=mydata[grepl('R:',mydata$v2),]

    #select only those news that include certain "tags" in $v2
    for(i in 1:30){
        tags_split1=paste(tags_split[[i]],collapse = "|")
        tags_split1=gsub("[[:space:]]", "", tags_split1)
        nam=paste("A", i, sep = "")

        assign(nam,mydata[grepl(tags_split1,mydata$v2,perl = T),])
    }

    mydata=rbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,
             A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,A28,A29,A30)

    mydata=mydata[!duplicated(mydata),]

    #Some text analysis, rx.app is a matching rule
mydata = mydata %>%
        mutate(approach <- regmatches(title_body, gregexpr(rx.app, v2, perl=TRUE)),
           approach=lengths(approach))

    write.csv(mydata, file = paste0(sub(".txt.gz", "", mylist), ".csv"))
} 

Format_function@tdel谢谢这可能行得通,你为什么不把它作为答案发布呢?我会发布答案谢谢。使用你的循环,我怎样才能将每个文件保存在一个单独的名称下?只需这样做,例如`write.csv(file,file=“file.csv”)`就行了?只需像这样使用
paste0
write.csv(df,paste0(file,.csv)`
其中
df
是您要保存的最终df,使用paste0中的
file
以不同的名称保存文件谢谢!然后是'lappy(mylist,Format_函数)是吗?是的,很抱歉输入错误,告诉我这是否有效,我写的时候没有数据,我无法检查这是否适合你。此外,如果所有文件都在同一个文件夹中,上面的列表文件方法也有效
Format_Funtion <- function(mypath, mylist) {
    mydata <- read.delim(paste0(mypath, mylist),header=FALSE,quote = "")

    #split the first columns in more variables
    mydata <- mydata %>% 
        mutate(v2=lapply(strsplit(as.character(V1), "\"mimeType\""), "[", 2))

    #select only those news that include "R:"
    mydata=mydata[grepl('R:',mydata$v2),]

    #select only those news that include certain "tags" in $v2
    for(i in 1:30){
        tags_split1=paste(tags_split[[i]],collapse = "|")
        tags_split1=gsub("[[:space:]]", "", tags_split1)
        nam=paste("A", i, sep = "")

        assign(nam,mydata[grepl(tags_split1,mydata$v2,perl = T),])
    }

    mydata=rbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,
             A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,A28,A29,A30)

    mydata=mydata[!duplicated(mydata),]

    #Some text analysis, rx.app is a matching rule
mydata = mydata %>%
        mutate(approach <- regmatches(title_body, gregexpr(rx.app, v2, perl=TRUE)),
           approach=lengths(approach))

    write.csv(mydata, file = paste0(sub(".txt.gz", "", mylist), ".csv"))
} 
mypath <- "D:/Reuters/1996/" 
mylist <- list.files(path=mypath, pattern="txt.gz$")
lapply(Format_Function, mypath, mylist)