R 按年度/十年创建每个项目的计数_R_Data.table

R 按年度/十年创建每个项目的计数

R 按年度/十年创建每个项目的计数,r,data.table,R,Data.table,我在data.table中有如下数据： > x<-df[sample(nrow(df), 10),] > x > Importer Exporter Date 1: Ecuador United Kingdom 2004-01-13 2: Mexico

我在data.table中有如下数据：

> x<-df[sample(nrow(df), 10),]
> x      

>                   Importer                 Exporter       Date

 1:                 Ecuador                  United Kingdom 2004-01-13
 2:                  Mexico                   United States 2013-11-19
 3:               Australia                   United States 2006-08-11
 4:           United States                   United States 2009-05-04
 5:                   India                   United States 2007-07-16
 6:               Guatemala                       Guatemala 2014-07-02
 7:                  Israel                          Israel 2000-02-22
 8:                   India                   United States 2014-02-11
 9:                    Peru                            Peru 2007-03-26
10:                  Poland                          France 2014-09-15

到目前为止，我已经尝试了《邮报》建议的聚合和data.table方法，但这两种方法似乎都只是给我提供了每年（或我更感兴趣的十年）进口商/出口商的数量

>x美元十年进口商/年进口商/年
十年进口商
2   2000                       6
3   2010                       4

考虑到aggregate使用公式接口，我尝试添加另一个条件，但出现以下错误：

> importer_per_yr<-aggregate(Importer~ Decade + unique(Importer), FUN=length, data=x)
Error in model.frame.default(formula = Importer ~ Decade +  : 
  variable lengths differ (found for 'unique(Importer)')

>importer\u per\u yr我们可以使用数据来实现这一点。table
方法，通过赋值创建“十年”列：=
，然后通过指定度量值列，将数据从“宽”格式融化为“长”格式，使用dcast
将其重塑为“宽”，我们使用fun.aggregate
作为length

x[, Decade:= year(Date) - year(Date) %%10]
dcast(melt(x, measure = c("Importer", "Exporter"), value.name = "Country"), 
                       Decade + Country~variable, length)
#     Decade        Country Importer Exporter
# 1:   2000      Australia        1        0
# 2:   2000        Ecuador        1        0
# 3:   2000          India        1        0
# 4:   2000         Israel        1        1
# 5:   2000           Peru        1        1
# 6:   2000 United Kingdom        0        1
# 7:   2000  United States        1        3
# 8:   2010         France        0        1
# 9:   2010      Guatemala        1        1
#10:   2010          India        1        0
#11:   2010         Mexico        1        0
#12:   2010         Poland        1        0
#13:   2010  United States        0        2

我认为with将在baseR
中与aggregate
一起工作：
my.data <- read.csv(text = '
        Importer,             Exporter,           Date
         Ecuador,       United Kingdom,     2004-01-13
          Mexico,        United States,     2013-11-19
       Australia,        United States,     2006-08-11
   United States,        United States,     2009-05-04
           India,        United States,     2007-07-16
       Guatemala,            Guatemala,     2014-07-02
          Israel,               Israel,     2000-02-22
           India,        United States,     2014-02-11
            Peru,                 Peru,     2007-03-26
          Poland,               France,     2014-09-15
', header = TRUE, stringsAsFactors = TRUE, strip.white = TRUE)

my.data$my.Date <- as.Date(my.data$Date, format = "%Y-%m-%d")

my.data <- data.frame(my.data,
                 year  = as.numeric(format(my.data$my.Date, format = "%Y")),
                 month = as.numeric(format(my.data$my.Date, format = "%m")),
                 day   = as.numeric(format(my.data$my.Date, format = "%d")))

my.data$my.decade <- my.data$year - (my.data$year %% 10)

importer.count <- with(my.data, aggregate(cbind(count = Importer) ~ my.decade + Importer, FUN = function(x) { NROW(x) }))
exporter.count <- with(my.data, aggregate(cbind(count = Exporter) ~ my.decade + Exporter, FUN = function(x) { NROW(x) }))

colnames(importer.count) <- c('my.decade', 'country', 'importer.count')
colnames(exporter.count) <- c('my.decade', 'country', 'exporter.count')

my.counts <- merge(importer.count, exporter.count, by = c('my.decade', 'country'), all = TRUE)

my.counts$importer.count[is.na(my.counts$importer.count)] <- 0
my.counts$exporter.count[is.na(my.counts$exporter.count)] <- 0

my.counts

#    my.decade        country importer.count exporter.count
# 1       2000      Australia              1              0
# 2       2000        Ecuador              1              0
# 3       2000          India              1              0
# 4       2000         Israel              1              1
# 5       2000           Peru              1              1
# 6       2000  United States              1              3
# 7       2000 United Kingdom              0              1
# 8       2010      Guatemala              1              1
# 9       2010          India              1              0
# 10      2010         Mexico              1              0
# 11      2010         Poland              1              0
# 12      2010  United States              0              2
# 13      2010         France              0              1

my.data
x[, Decade:= year(Date) - year(Date) %%10]
dcast(melt(x, measure = c("Importer", "Exporter"), value.name = "Country"), 
                       Decade + Country~variable, length)
#     Decade        Country Importer Exporter
# 1:   2000      Australia        1        0
# 2:   2000        Ecuador        1        0
# 3:   2000          India        1        0
# 4:   2000         Israel        1        1
# 5:   2000           Peru        1        1
# 6:   2000 United Kingdom        0        1
# 7:   2000  United States        1        3
# 8:   2010         France        0        1
# 9:   2010      Guatemala        1        1
#10:   2010          India        1        0
#11:   2010         Mexico        1        0
#12:   2010         Poland        1        0
#13:   2010  United States        0        2

my.data <- read.csv(text = '
        Importer,             Exporter,           Date
         Ecuador,       United Kingdom,     2004-01-13
          Mexico,        United States,     2013-11-19
       Australia,        United States,     2006-08-11
   United States,        United States,     2009-05-04
           India,        United States,     2007-07-16
       Guatemala,            Guatemala,     2014-07-02
          Israel,               Israel,     2000-02-22
           India,        United States,     2014-02-11
            Peru,                 Peru,     2007-03-26
          Poland,               France,     2014-09-15
', header = TRUE, stringsAsFactors = TRUE, strip.white = TRUE)

my.data$my.Date <- as.Date(my.data$Date, format = "%Y-%m-%d")

my.data <- data.frame(my.data,
                 year  = as.numeric(format(my.data$my.Date, format = "%Y")),
                 month = as.numeric(format(my.data$my.Date, format = "%m")),
                 day   = as.numeric(format(my.data$my.Date, format = "%d")))

my.data$my.decade <- my.data$year - (my.data$year %% 10)

importer.count <- with(my.data, aggregate(cbind(count = Importer) ~ my.decade + Importer, FUN = function(x) { NROW(x) }))
exporter.count <- with(my.data, aggregate(cbind(count = Exporter) ~ my.decade + Exporter, FUN = function(x) { NROW(x) }))

colnames(importer.count) <- c('my.decade', 'country', 'importer.count')
colnames(exporter.count) <- c('my.decade', 'country', 'exporter.count')

my.counts <- merge(importer.count, exporter.count, by = c('my.decade', 'country'), all = TRUE)

my.counts$importer.count[is.na(my.counts$importer.count)] <- 0
my.counts$exporter.count[is.na(my.counts$exporter.count)] <- 0

my.counts

#    my.decade        country importer.count exporter.count
# 1       2000      Australia              1              0
# 2       2000        Ecuador              1              0
# 3       2000          India              1              0
# 4       2000         Israel              1              1
# 5       2000           Peru              1              1
# 6       2000  United States              1              3
# 7       2000 United Kingdom              0              1
# 8       2010      Guatemala              1              1
# 9       2010          India              1              0
# 10      2010         Mexico              1              0
# 11      2010         Poland              1              0
# 12      2010  United States              0              2
# 13      2010         France              0              1