R 按年度/十年创建每个项目的计数
我在data.table中有如下数据:R 按年度/十年创建每个项目的计数,r,data.table,R,Data.table,我在data.table中有如下数据: > x<-df[sample(nrow(df), 10),] > x > Importer Exporter Date 1: Ecuador United Kingdom 2004-01-13 2: Mexico
> x<-df[sample(nrow(df), 10),]
> x
> Importer Exporter Date
1: Ecuador United Kingdom 2004-01-13
2: Mexico United States 2013-11-19
3: Australia United States 2006-08-11
4: United States United States 2009-05-04
5: India United States 2007-07-16
6: Guatemala Guatemala 2014-07-02
7: Israel Israel 2000-02-22
8: India United States 2014-02-11
9: Peru Peru 2007-03-26
10: Poland France 2014-09-15
到目前为止,我已经尝试了《邮报》建议的聚合和data.table方法,但这两种方法似乎都只是给我提供了每年(或我更感兴趣的十年)进口商/出口商的数量
>x美元十年进口商/年进口商/年
十年进口商
2 2000 6
3 2010 4
考虑到aggregate使用公式接口,我尝试添加另一个条件,但出现以下错误:
> importer_per_yr<-aggregate(Importer~ Decade + unique(Importer), FUN=length, data=x)
Error in model.frame.default(formula = Importer ~ Decade + :
variable lengths differ (found for 'unique(Importer)')
>importer\u per\u yr我们可以使用数据来实现这一点。table
方法,通过赋值创建“十年”列:=
,然后通过指定度量值列,将数据从“宽”格式融化为“长”格式,使用dcast
将其重塑为“宽”,我们使用fun.aggregate
作为length
x[, Decade:= year(Date) - year(Date) %%10]
dcast(melt(x, measure = c("Importer", "Exporter"), value.name = "Country"),
Decade + Country~variable, length)
# Decade Country Importer Exporter
# 1: 2000 Australia 1 0
# 2: 2000 Ecuador 1 0
# 3: 2000 India 1 0
# 4: 2000 Israel 1 1
# 5: 2000 Peru 1 1
# 6: 2000 United Kingdom 0 1
# 7: 2000 United States 1 3
# 8: 2010 France 0 1
# 9: 2010 Guatemala 1 1
#10: 2010 India 1 0
#11: 2010 Mexico 1 0
#12: 2010 Poland 1 0
#13: 2010 United States 0 2
我认为with将在baseR
中与aggregate
一起工作:
my.data <- read.csv(text = '
Importer, Exporter, Date
Ecuador, United Kingdom, 2004-01-13
Mexico, United States, 2013-11-19
Australia, United States, 2006-08-11
United States, United States, 2009-05-04
India, United States, 2007-07-16
Guatemala, Guatemala, 2014-07-02
Israel, Israel, 2000-02-22
India, United States, 2014-02-11
Peru, Peru, 2007-03-26
Poland, France, 2014-09-15
', header = TRUE, stringsAsFactors = TRUE, strip.white = TRUE)
my.data$my.Date <- as.Date(my.data$Date, format = "%Y-%m-%d")
my.data <- data.frame(my.data,
year = as.numeric(format(my.data$my.Date, format = "%Y")),
month = as.numeric(format(my.data$my.Date, format = "%m")),
day = as.numeric(format(my.data$my.Date, format = "%d")))
my.data$my.decade <- my.data$year - (my.data$year %% 10)
importer.count <- with(my.data, aggregate(cbind(count = Importer) ~ my.decade + Importer, FUN = function(x) { NROW(x) }))
exporter.count <- with(my.data, aggregate(cbind(count = Exporter) ~ my.decade + Exporter, FUN = function(x) { NROW(x) }))
colnames(importer.count) <- c('my.decade', 'country', 'importer.count')
colnames(exporter.count) <- c('my.decade', 'country', 'exporter.count')
my.counts <- merge(importer.count, exporter.count, by = c('my.decade', 'country'), all = TRUE)
my.counts$importer.count[is.na(my.counts$importer.count)] <- 0
my.counts$exporter.count[is.na(my.counts$exporter.count)] <- 0
my.counts
# my.decade country importer.count exporter.count
# 1 2000 Australia 1 0
# 2 2000 Ecuador 1 0
# 3 2000 India 1 0
# 4 2000 Israel 1 1
# 5 2000 Peru 1 1
# 6 2000 United States 1 3
# 7 2000 United Kingdom 0 1
# 8 2010 Guatemala 1 1
# 9 2010 India 1 0
# 10 2010 Mexico 1 0
# 11 2010 Poland 1 0
# 12 2010 United States 0 2
# 13 2010 France 0 1
my.data
x[, Decade:= year(Date) - year(Date) %%10]
dcast(melt(x, measure = c("Importer", "Exporter"), value.name = "Country"),
Decade + Country~variable, length)
# Decade Country Importer Exporter
# 1: 2000 Australia 1 0
# 2: 2000 Ecuador 1 0
# 3: 2000 India 1 0
# 4: 2000 Israel 1 1
# 5: 2000 Peru 1 1
# 6: 2000 United Kingdom 0 1
# 7: 2000 United States 1 3
# 8: 2010 France 0 1
# 9: 2010 Guatemala 1 1
#10: 2010 India 1 0
#11: 2010 Mexico 1 0
#12: 2010 Poland 1 0
#13: 2010 United States 0 2
my.data <- read.csv(text = '
Importer, Exporter, Date
Ecuador, United Kingdom, 2004-01-13
Mexico, United States, 2013-11-19
Australia, United States, 2006-08-11
United States, United States, 2009-05-04
India, United States, 2007-07-16
Guatemala, Guatemala, 2014-07-02
Israel, Israel, 2000-02-22
India, United States, 2014-02-11
Peru, Peru, 2007-03-26
Poland, France, 2014-09-15
', header = TRUE, stringsAsFactors = TRUE, strip.white = TRUE)
my.data$my.Date <- as.Date(my.data$Date, format = "%Y-%m-%d")
my.data <- data.frame(my.data,
year = as.numeric(format(my.data$my.Date, format = "%Y")),
month = as.numeric(format(my.data$my.Date, format = "%m")),
day = as.numeric(format(my.data$my.Date, format = "%d")))
my.data$my.decade <- my.data$year - (my.data$year %% 10)
importer.count <- with(my.data, aggregate(cbind(count = Importer) ~ my.decade + Importer, FUN = function(x) { NROW(x) }))
exporter.count <- with(my.data, aggregate(cbind(count = Exporter) ~ my.decade + Exporter, FUN = function(x) { NROW(x) }))
colnames(importer.count) <- c('my.decade', 'country', 'importer.count')
colnames(exporter.count) <- c('my.decade', 'country', 'exporter.count')
my.counts <- merge(importer.count, exporter.count, by = c('my.decade', 'country'), all = TRUE)
my.counts$importer.count[is.na(my.counts$importer.count)] <- 0
my.counts$exporter.count[is.na(my.counts$exporter.count)] <- 0
my.counts
# my.decade country importer.count exporter.count
# 1 2000 Australia 1 0
# 2 2000 Ecuador 1 0
# 3 2000 India 1 0
# 4 2000 Israel 1 1
# 5 2000 Peru 1 1
# 6 2000 United States 1 3
# 7 2000 United Kingdom 0 1
# 8 2010 Guatemala 1 1
# 9 2010 India 1 0
# 10 2010 Mexico 1 0
# 11 2010 Poland 1 0
# 12 2010 United States 0 2
# 13 2010 France 0 1