R-将循环中的向量绑定到一个data.frame中
我正在处理一个大型数据集。我的目标是获得特定国家随时间发生的特定事件(如数据集中编码的)的总和。这个数据集太大了,我必须用一个函数按月加载它。 数据来自GDELT数据集,可在此处获得:我已将csv转换为Rdata,以便更快地读取和写入。这是一个包含57个不同变量的数据集R-将循环中的向量绑定到一个data.frame中,r,R,我正在处理一个大型数据集。我的目标是获得特定国家随时间发生的特定事件(如数据集中编码的)的总和。这个数据集太大了,我必须用一个函数按月加载它。 数据来自GDELT数据集,可在此处获得:我已将csv转换为Rdata,以便更快地读取和写入。这是一个包含57个不同变量的数据集 # Create empty dataframes for all countries to later store data in. Countries <- c("MAR","DZA","TUN","LBY","EGY
# Create empty dataframes for all countries to later store data in.
Countries <- c("MAR","DZA","TUN","LBY","EGY","ISR",
"JOR","SYR","TUR","GEO","UKR","RUS","BLR")
loadNames <- function(CountryName) {
a <- data.frame()
assign(CountryName, a, pos = .GlobalEnv)
}
lapply(Countries,loadNames)
loadMonth <- function(MonthName) {
pb <- txtProgressBar(min = 0, max = total, initial = 0, char = "=", style = 1, width = 10)
# Load the month.
load(paste("/Users/mennoschellekens/Dropbox/HCSS-workinprogress/GDELT/Rdata/",MonthName,".RData", sep = ""), envir=environment())
colnames(Month) <- names(Header.57)
# Create a subset of relevant data for faster looping.
y <- subset(Month, ((Actor1CountryCode == "SYR" | Actor1CountryCode =="MAR" | Actor1CountryCode =="DZA" | Actor1CountryCode == "TUN" | Actor1CountryCode == "LBY" | Actor1CountryCode == "EGY" | Actor1CountryCode == "ISR" | Actor1CountryCode == "JOR" | Actor1CountryCode == "TUR" | Actor1CountryCode == "GEO" | Actor1CountryCode == "UKR" | Actor1CountryCode == "RUS" | Actor1CountryCode == "BLR") & (Actor2CountryCode == "SYR" | Actor2CountryCode == "MAR" | Actor2CountryCode == "DZA" | Actor2CountryCode == "TUN" | Actor2CountryCode == "LBY" | Actor2CountryCode == "EGY" | Actor2CountryCode == "ISR" | Actor2CountryCode == "JOR" | Actor2CountryCode == "TUR" | Actor2CountryCode == "GEO" | Actor2CountryCode == "UKR" | Actor2CountryCode == "RUS" | Actor2CountryCode == "BLR")))
#Define the events I want.
QuadCat <- c(1,2,3,4)
# Define the countries I want.
CountryString <- c("MAR","DZA", "TUN","LBY","EGY","ISR",
"JOR","SYR","TUR","GEO","UKR","RUS","BLR")
CountryData <- c(MAR,DZA,TUN,LBY,EGY,ISR,JOR,SYR,TUR,GEO,UKR,RUS,BLR)
# I want to check the above events for each country, using the function 'Check Events' with an embedded 'for loop'.
CheckEvents <- function(CountryData,CountryString) {
x <- subset(y, ((Actor1CountryCode == CountryString) & (Actor2CountryCode == CountryString)))
# This is the problem:
for (Y in QuadCat) {
e[[Y]] <- (sum(x$QuadClass == Y))
e <- rbind(CountryData,c(e))
assign(CountryString, as.data.frame(e), pos = .GlobalEnv)
}
}
mapply(CheckEvents, CountryData = CountryData, CountryString = CountryString)
} ###### END
#为所有国家/地区创建空数据框,以便以后在其中存储数据。
国家我认为下面的列表将满足您的需求。然后,您可以根据需要对其进行重塑。然而,您的示例代码令人难以置信地困惑,并且涉及一些潜在的危险元素,例如在函数内部使用assign
和subset
。您似乎还有很多多余的代码。我想所有这些都是给你带来麻烦的原因
Countries <- c("MAR","DZA","TUN","LBY","EGY","ISR",
"JOR","SYR","TUR","GEO","UKR","RUS","BLR")
loadMonth <- function(MonthName) {
pb <- txtProgressBar(min = 0, max = total, initial = 0, char = "=", style = 1, width = 10)
# Load the month
load(paste("/Users/mennoschellekens/Dropbox/HCSS-workinprogress/GDELT/Rdata/",MonthName,".RData", sep = ""), envir=environment())
colnames(Month) <- names(Header.57)
y <- Month[(Month$Actor1CountryCode %in% Countries) &
(Month$Actor2CountryCode %in% Countries),] # subset
CheckEvents <- function(CountryString) {
x <- y[(y$Actor1CountryCode == CountryString) &
(y$Actor2CountryCode == CountryString),]
# 1:4 below was your values of QuadCat
sapply(1:4, function(Y) sum(x$QuadClass == Y)) # should return a vector
}
# build list of vectors from `CheckEvents`, one for each country
out <- lapply(Countries, CheckEvents)
names(out) <- Countries
return(out)
}
loadMonth("January") # get the list for one month; not sure how you have months named
未定义您试图获取其子集的对象月份。加载名称函数似乎也不必要,或者至少效率低下。因为它是用Rdata格式化的,加载它已经在我的工作区中创建了对象“月份”。因此,这段代码对我来说是有效的,没有定义对象。你能发布一些数据,或者至少发布一些类似的数据吗?另外,e
在哪里定义?请阅读关于如何提供最小可复制示例的说明。这非常有帮助,谢谢!我有一个后续问题。如何保存每个国家/地区的矢量,每个矢量(对应于每个月)成为一行?我希望以162行的对象“SYR”结束(我的162个月)。尝试类似于do.call(rbind,object)
的操作,其中object
是loadMonth()函数的输出。