R 在数据帧中插入缺失的时间观测值_R_Time Series_Dplyr_Forecasting

R 在数据帧中插入缺失的时间观测值

R 在数据帧中插入缺失的时间观测值,r,time-series,dplyr,forecasting,R,Time Series,Dplyr,Forecasting,我有一个数据框： zz <- "Product Quarter Million AAA 2013-Q3 81.1 AAA 2013-Q4 50.5 AAA 2014-Q1 81.9 AAA 2014-Q4 78.3 BBB 2013-Q3 29.9 BBB 2013-Q4 17 BBB 2014-Q3 87.4 BBB 2014-Q4 63 CCC 2013-Q4 41.1 CCC 2014-Q1 59.1 CCC 2014-Q2 110.7 CCC 2014-Q3 127"

我有一个数据框：

zz <- "Product    Quarter   Million
AAA 2013-Q3 81.1
AAA 2013-Q4 50.5
AAA 2014-Q1 81.9
AAA 2014-Q4 78.3
BBB 2013-Q3 29.9
BBB 2013-Q4 17
BBB 2014-Q3 87.4
BBB 2014-Q4 63
CCC 2013-Q4 41.1
CCC 2014-Q1 59.1
CCC 2014-Q2 110.7
CCC 2014-Q3 127"

df <- read.table(text = zz, header = TRUE); rm(zz)

您可以使用

reformae2

软件包执行此操作：

library(reshape2)    
df <- melt(dcast(df, Product ~ Quarter))

library（重塑2）
df您可以使用reformae2
软件包：
library(reshape2)    
df <- melt(dcast(df, Product ~ Quarter))

library（重塑2）
df您可以尝试：
library(data.table)
setkey(setDT(df), Product, Quarter)[CJ(unique(Product), unique(Quarter))][!df, Million:=0][]

#    Product Quarter Million
# 1:     AAA 2013-Q3    81.1
# 2:     AAA 2013-Q4    50.5
# 3:     AAA 2014-Q1    81.9
# 4:     AAA 2014-Q2     0.0
# 5:     AAA 2014-Q3     0.0
# 6:     AAA 2014-Q4    78.3
# 7:     BBB 2013-Q3    29.9
# 8:     BBB 2013-Q4    17.0
# 9:     BBB 2014-Q1     0.0
#10:     BBB 2014-Q2     0.0
#11:     BBB 2014-Q3    87.4
#12:     BBB 2014-Q4    63.0
#13:     CCC 2013-Q3     0.0
#14:     CCC 2013-Q4    41.1
#15:     CCC 2014-Q1    59.1
#16:     CCC 2014-Q2   110.7
#17:     CCC 2014-Q3   127.0
#18:     CCC 2014-Q4     0.0

您可以尝试：
library(data.table)
setkey(setDT(df), Product, Quarter)[CJ(unique(Product), unique(Quarter))][!df, Million:=0][]

#    Product Quarter Million
# 1:     AAA 2013-Q3    81.1
# 2:     AAA 2013-Q4    50.5
# 3:     AAA 2014-Q1    81.9
# 4:     AAA 2014-Q2     0.0
# 5:     AAA 2014-Q3     0.0
# 6:     AAA 2014-Q4    78.3
# 7:     BBB 2013-Q3    29.9
# 8:     BBB 2013-Q4    17.0
# 9:     BBB 2014-Q1     0.0
#10:     BBB 2014-Q2     0.0
#11:     BBB 2014-Q3    87.4
#12:     BBB 2014-Q4    63.0
#13:     CCC 2013-Q3     0.0
#14:     CCC 2013-Q4    41.1
#15:     CCC 2014-Q1    59.1
#16:     CCC 2014-Q2   110.7
#17:     CCC 2014-Q3   127.0
#18:     CCC 2014-Q4     0.0

以下两种解决方案均假设每个季度至少出现在一种产品中，如问题中所示：
1）xtabs此解决方案不需要软件包：
 xt <- xtabs(Million ~ Quarter + Product, df)
 as.data.frame(xt, responseName = "Million")[c(2, 1, 3)]

   Product Quarter Million
1      AAA 2013-Q3    81.1
2      AAA 2013-Q4    50.5
3      AAA 2014-Q1    81.9
4      AAA 2014-Q2     0.0
5      AAA 2014-Q3     0.0
6      AAA 2014-Q4    78.3
7      BBB 2013-Q3    29.9
8      BBB 2013-Q4    17.0
9      BBB 2014-Q1     0.0
10     BBB 2014-Q2     0.0
11     BBB 2014-Q3    87.4
12     BBB 2014-Q4    63.0
13     CCC 2013-Q3     0.0
14     CCC 2013-Q4    41.1
15     CCC 2014-Q1    59.1
16     CCC 2014-Q2   110.7
17     CCC 2014-Q3   127.0
18     CCC 2014-Q4     0.0

如果宽幅形式可以，则可以进一步缩短为：
xtabs(Million ~ Quarter + Product, df)

给予：
         Product
Quarter     AAA   BBB   CCC
  2013-Q3  81.1  29.9   0.0
  2013-Q4  50.5  17.0  41.1
  2014-Q1  81.9   0.0  59.1
  2014-Q2   0.0   0.0 110.7
  2014-Q3   0.0  87.4 127.0
  2014-Q4  78.3  63.0   0.0

> df_full
   Product Quarter Million
1      AAA 2013-Q3    81.1
2      AAA 2013-Q4    50.5
3      AAA 2014-Q1    81.9
4      AAA 2014-Q2      NA
5      AAA 2014-Q3      NA
6      AAA 2014-Q4    78.3
7      BBB 2013-Q3    29.9
8      BBB 2013-Q4    17.0
9      BBB 2014-Q1      NA
10     BBB 2014-Q2      NA
11     BBB 2014-Q3    87.4
12     BBB 2014-Q4    63.0
13     CCC 2013-Q3      NA
14     CCC 2013-Q4    41.1
15     CCC 2014-Q1    59.1
16     CCC 2014-Q2   110.7
17     CCC 2014-Q3   127.0
18     CCC 2014-Q4      NA

2）zoo将df
转换为zoo对象z
，然后将每个NA
替换为零，并使用fortify.zoo
和melt=TRUE
参数将其转换回长格式
library(zoo)

z <- read.zoo(df, index = 2, FUN = identity, split = 1, header = TRUE)
z <- na.fill(z, 0)
df_full <- fortify.zoo(z, melt = TRUE, name = "Product")[, c(2, 1, 3)]
names(df_full) <- names(df)

如果宽格式的“zoo”
对象正常，则忽略最后两行，即忽略设置df_full
及其名称的行，只使用z

> z
         AAA  BBB   CCC
2013-Q3 81.1 29.9   0.0
2013-Q4 50.5 17.0  41.1
2014-Q1 81.9  0.0  59.1
2014-Q2  0.0  0.0 110.7
2014-Q3  0.0 87.4 127.0
2014-Q4 78.3 63.0   0.0

以下两种解决方案均假设每个季度至少出现在一种产品中，如问题中所示：
1）xtabs此解决方案不需要软件包：
 xt <- xtabs(Million ~ Quarter + Product, df)
 as.data.frame(xt, responseName = "Million")[c(2, 1, 3)]

   Product Quarter Million
1      AAA 2013-Q3    81.1
2      AAA 2013-Q4    50.5
3      AAA 2014-Q1    81.9
4      AAA 2014-Q2     0.0
5      AAA 2014-Q3     0.0
6      AAA 2014-Q4    78.3
7      BBB 2013-Q3    29.9
8      BBB 2013-Q4    17.0
9      BBB 2014-Q1     0.0
10     BBB 2014-Q2     0.0
11     BBB 2014-Q3    87.4
12     BBB 2014-Q4    63.0
13     CCC 2013-Q3     0.0
14     CCC 2013-Q4    41.1
15     CCC 2014-Q1    59.1
16     CCC 2014-Q2   110.7
17     CCC 2014-Q3   127.0
18     CCC 2014-Q4     0.0

如果宽幅形式可以，则可以进一步缩短为：
xtabs(Million ~ Quarter + Product, df)

给予：
         Product
Quarter     AAA   BBB   CCC
  2013-Q3  81.1  29.9   0.0
  2013-Q4  50.5  17.0  41.1
  2014-Q1  81.9   0.0  59.1
  2014-Q2   0.0   0.0 110.7
  2014-Q3   0.0  87.4 127.0
  2014-Q4  78.3  63.0   0.0

> df_full
   Product Quarter Million
1      AAA 2013-Q3    81.1
2      AAA 2013-Q4    50.5
3      AAA 2014-Q1    81.9
4      AAA 2014-Q2      NA
5      AAA 2014-Q3      NA
6      AAA 2014-Q4    78.3
7      BBB 2013-Q3    29.9
8      BBB 2013-Q4    17.0
9      BBB 2014-Q1      NA
10     BBB 2014-Q2      NA
11     BBB 2014-Q3    87.4
12     BBB 2014-Q4    63.0
13     CCC 2013-Q3      NA
14     CCC 2013-Q4    41.1
15     CCC 2014-Q1    59.1
16     CCC 2014-Q2   110.7
17     CCC 2014-Q3   127.0
18     CCC 2014-Q4      NA

2）zoo将df
转换为zoo对象z
，然后将每个NA
替换为零，并使用fortify.zoo
和melt=TRUE
参数将其转换回长格式
library(zoo)

z <- read.zoo(df, index = 2, FUN = identity, split = 1, header = TRUE)
z <- na.fill(z, 0)
df_full <- fortify.zoo(z, melt = TRUE, name = "Product")[, c(2, 1, 3)]
names(df_full) <- names(df)

如果宽格式的“zoo”
对象正常，则忽略最后两行，即忽略设置df_full
及其名称的行，只使用z

> z
         AAA  BBB   CCC
2013-Q3 81.1 29.9   0.0
2013-Q4 50.5 17.0  41.1
2014-Q1 81.9  0.0  59.1
2014-Q2  0.0  0.0 110.7
2014-Q3  0.0 87.4 127.0
2014-Q4 78.3 63.0   0.0

试试这个
Values = as.data.frame(table(df$Product,df$Quarter))
Values = Values[with(Values, order(Var1, Var2)), ]
colnames(Values)[1] = 'Product'
colnames(Values)[2] = 'Quarter'

data = merge(x = Values, y = df, by =c("Product","Quarter"), all.x=TRUE)
data[is.na(data)] <- 0
data = data[,c(1,2,4)]

Values=as.data.frame（表（df$产品，df$季度））
值=值[带（值，顺序（Var1，Var2）），]
colnames（value）[1]=“产品”
colnames（值）[2]=“季度”
数据=合并（x=值，y=df，by=c（“产品”，“季度”），all.x=TRUE）
data[is.na（data）]试试这个
Values = as.data.frame(table(df$Product,df$Quarter))
Values = Values[with(Values, order(Var1, Var2)), ]
colnames(Values)[1] = 'Product'
colnames(Values)[2] = 'Quarter'

data = merge(x = Values, y = df, by =c("Product","Quarter"), all.x=TRUE)
data[is.na(data)] <- 0
data = data[,c(1,2,4)]

Values=as.data.frame（表（df$产品，df$季度））
值=值[带（值，顺序（Var1，Var2）），]
colnames（value）[1]=“产品”
colnames（值）[2]=“季度”
数据=合并（x=值，y=df，by=c（“产品”，“季度”），all.x=TRUE）
data[is.na（data）]对于R
人来说，这可能是一个过于冗长的解决方案，但它使用的是dplyr

# all products from your dataframe
product <- unique(df$Product) # all products from your dataframe
# all quarters you want
quarter <- c('2013-Q3', '2013-Q4', '2014-Q1', '2014-Q2', '2014-Q3', '2014-Q4')
# let's combine them
df2 <- expand.grid(Product=product, Quarter = quarter)

# and now let's do a join between your df and all possible 
# combinations of Products and Quarters
df %>%
  right_join(df2) %>%                               # here the join
  arrange(Product, Quarter) %>%                     # here the sorting
  mutate(Value = ifelse(is.na(Million),0, Million)) # replacing the NA with 0

#数据框中的所有产品
产品%#在这里加入
安排（产品、季度）%>%#这里是排序
mutate（Value=ifelse（is.na（百万），0，百万））#将na替换为0
对于R
人来说，这个解决方案可能有点过于冗长，但它使用的是dplyr

# all products from your dataframe
product <- unique(df$Product) # all products from your dataframe
# all quarters you want
quarter <- c('2013-Q3', '2013-Q4', '2014-Q1', '2014-Q2', '2014-Q3', '2014-Q4')
# let's combine them
df2 <- expand.grid(Product=product, Quarter = quarter)

# and now let's do a join between your df and all possible 
# combinations of Products and Quarters
df %>%
  right_join(df2) %>%                               # here the join
  arrange(Product, Quarter) %>%                     # here the sorting
  mutate(Value = ifelse(is.na(Million),0, Million)) # replacing the NA with 0

#数据框中的所有产品
产品%#在这里加入
安排（产品、季度）%>%#这里是排序
mutate（Value=ifelse（is.na（百万），0，百万））#将na替换为0
也：带有（df，CJ（产品=唯一（产品），季度=唯一（季度））[，百万][df，百万]=i.Million][
也：带有（df，CJ（产品=唯一（产品），季度=唯一（季度））[，百万][df，百万]=i.Million][