R 重新排列数据:从水年转换为日历年
我有一个表格,其中包含来自流量计的数据,如下所示:R 重新排列数据:从水年转换为日历年,r,date,time-series,tidyr,R,Date,Time Series,Tidyr,我有一个表格,其中包含来自流量计的数据,如下所示: Water.Year May Jun Jul Aug Sep Oct Nov Dec Jan Feb Mar Apr 1 1953-1954 55.55 43.62 30.46 26.17 26.76 41.74 19.92 41.25 28.77 20.96 12.47 10.51 2 1954-1955 23.49 81.35 46.71 29.33 67.83 133
Water.Year May Jun Jul Aug Sep Oct Nov Dec Jan Feb Mar Apr
1 1953-1954 55.55 43.62 30.46 26.17 26.76 41.74 19.92 41.25 28.77 20.96 12.47 10.51
2 1954-1955 23.49 81.35 46.71 29.33 67.83 133.30 37.62 30.16 21.07 19.38 13.87 10.63
3 1955-1956 9.87 51.59 55.36 63.03 154.08 98.15 104.06 32.85 22.89 17.30 15.68 10.88
> data <- structure(list(Water.Year = structure(1:6, .Label = c("1953-1954", "1954-1955", "1955-1956", "1956-1957", "1957-1958", "1958-1959", "1959-1960", "1960-1961", "1961-1962", "1962-1963", "1963-1964", "1964-1965", "1965-1966", "1966-1967", "1967-1968", "1968-1969", "1969-1970", "1970-1971", "1971-1972", "1972-1973", "1973-1974", "1974-1975", "1975-1976", "1976-1977", "1977-1978", "1978-1979", "1979-1980", "1980-1981", "1981-1982", "1982-1983", "1983-1984", "1984-1985", "1985-1986", "1986-1987", "1987-1988", "1988-1989", "1989-1990", "1990-1991", "1991-1992", "1992-1993", "1993-1994", "1994-1995", "1995-1996", "1996-1997", "1997-1998", "1998-1999", "1999-2000", "2000-2001"), class = "factor"), May = c(55.55, 23.49, 9.87, 18.03, 17.46, 11.37), Jun = c(43.62, 81.35, 51.59, 28.61, 15.14, 29.48), Jul = c(30.46, 46.71, 55.36, 24.36, 20.09, 19.48), Ago = c(26.17, 29.33, 63.03, 22.01, 16.97, 16.86), Set = c(26.76, 67.83, 154.08, 28.51, 27.24, 21.01), Oct = c(41.74, 133.3, 98.15, 53.72, 35.78, 19.78), Nov = c(19.92, 37.62, 104.06, 115.78, 20.35, 18.69), Dic = c(41.25, 30.16, 32.85, 32.04, 22, 18.86), Ene = c(28.77, 21.07, 22.89, 25.44, 13.27, 14.89), Feb = c(20.96, 19.38, 17.3, 14.53, 10.37, 10.4), Mar = c(12.47, 13.87, 15.68, 10.78, 8.77, 8.79), Abr = c(10.51, 10.63, 10.88, 9.33, 7.69, 8.99)), .Names = c("Water.Year", "May", "Jun", "Jul", "Ago", "Set", "Oct", "Nov", "Dic", "Ene", "Feb", "Mar", "Abr"), row.names = c(NA, 6L), class = "data.frame")
现在,我计划使用tidyr中的“聚集”来完成其余的转换,但我一直在思考如何创建日历。年份列使用year1表示五月到十二月,使用year2表示一月到四月
任何帮助都将不胜感激。好的,这个怎么样。这是重塑和基础R之间的混搭 你发布数据集后,我就使用了它。谢谢你提供它
data <- structure(list(Water.Year = structure(1:6, .Label = c("1953-1954", "1954-1955", "1955-1956", "1956-1957", "1957-1958", "1958-1959", "1959-1960", "1960-1961", "1961-1962", "1962-1963", "1963-1964", "1964-1965", "1965-1966", "1966-1967", "1967-1968", "1968-1969", "1969-1970", "1970-1971", "1971-1972", "1972-1973", "1973-1974", "1974-1975", "1975-1976", "1976-1977", "1977-1978", "1978-1979", "1979-1980", "1980-1981", "1981-1982", "1982-1983", "1983-1984", "1984-1985", "1985-1986", "1986-1987", "1987-1988", "1988-1989", "1989-1990", "1990-1991", "1991-1992", "1992-1993", "1993-1994", "1994-1995", "1995-1996", "1996-1997", "1997-1998", "1998-1999", "1999-2000", "2000-2001"), class = "factor"), May = c(55.55, 23.49, 9.87, 18.03, 17.46, 11.37), Jun = c(43.62, 81.35, 51.59, 28.61, 15.14, 29.48), Jul = c(30.46, 46.71, 55.36, 24.36, 20.09, 19.48), Ago = c(26.17, 29.33, 63.03, 22.01, 16.97, 16.86), Set = c(26.76, 67.83, 154.08, 28.51, 27.24, 21.01), Oct = c(41.74, 133.3, 98.15, 53.72, 35.78, 19.78), Nov = c(19.92, 37.62, 104.06, 115.78, 20.35, 18.69), Dic = c(41.25, 30.16, 32.85, 32.04, 22, 18.86), Ene = c(28.77, 21.07, 22.89, 25.44, 13.27, 14.89), Feb = c(20.96, 19.38, 17.3, 14.53, 10.37, 10.4), Mar = c(12.47, 13.87, 15.68, 10.78, 8.77, 8.79), Abr = c(10.51, 10.63, 10.88, 9.33, 7.69, 8.99)), .Names = c("Water.Year", "May", "Jun", "Jul", "Ago", "Set", "Oct", "Nov", "Dic", "Ene", "Feb", "Mar", "Abr"), row.names = c(NA, 6L), class = "data.frame")
data好的,我刚刚意识到您在structure()
中提供的月份可能是另一种语言。我将继续使用我创建的数据,它使用英文版的Months。这样人们就可以用英语看到相应的解决方案
library(tidyr) # for separate function
library(reshape2) # for melt function
library(dplyr) # for pipe operator and arrange function
# Reproducible Data
weather = structure(list(Water.Year = structure(1:3, .Label = c("1953-1954",
"1954-1955", "1955-1956"), class = "factor"),
May = c(55.55, 23.49, 9.87),
Jun = c(43.62, 81.35, 51.59),
Jul = c(30.46, 46.71, 55.36),
Aug = c(26.17, 29.33, 63.03),
Sep = c(26.76, 67.83, 154.08),
Oct = c(41.74, 133.3, 98.15),
Nov = c(19.92, 37.62, 104.06),
Dec = c(41.25, 30.16, 32.85),
Jan = c(28.77, 21.07, 22.89),
Feb = c(20.96, 19.38, 17.3), Mar = c(12.47, 13.87, 15.68),
Apr = c(10.51, 10.63, 10.88)), .Names = c("Water.Year", "May",
"Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Jan", "Feb",
"Mar", "Apr"), class = "data.frame", row.names = c(NA, -3L))
编码从这里开始:
df = separate(weather, Water.Year, c("year1","year2"))
# Split into two datasets
columns1 = c("year1", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Dec")
df1 = subset(df, select = c(year1, May:Dec))
df2 = subset(df, select = c(year2, Jan:Apr))
longdf1 = melt(df1, variable.name = "Month", id.vars = "year1",
value.name = "Flow.Measurement")
names(longdf1)[1] = "Calendar.Year"
longdf2 = melt(df2, variable.name = "Month", id.vars = "year2",
value.name = "Flow.Measurement")
names(longdf2)[1] = "Calendar.Year"
# Combine the two datasets
final_df = rbind(longdf1, longdf2)
# Releveling the Month
final_df$Month = factor(final_df$Month, levels = month.abb)
final_df = arrange(final_df, Calendar.Year, Month)
最终数据帧:
> final_df
Calendar.Year Month Flow.Measurement
1 1953 May 55.55
2 1953 Jun 43.62
3 1953 Jul 30.46
4 1953 Aug 26.17
5 1953 Sep 26.76
6 1953 Oct 41.74
7 1953 Nov 19.92
8 1953 Dec 41.25
9 1954 Jan 28.77
10 1954 Feb 20.96
11 1954 Mar 12.47
12 1954 Apr 10.51
13 1954 May 23.49
14 1954 Jun 81.35
15 1954 Jul 46.71
16 1954 Aug 29.33
17 1954 Sep 67.83
18 1954 Oct 133.30
19 1954 Nov 37.62
20 1954 Dec 30.16
21 1955 Jan 21.07
22 1955 Feb 19.38
23 1955 Mar 13.87
24 1955 Apr 10.63
25 1955 May 9.87
26 1955 Jun 51.59
27 1955 Jul 55.36
28 1955 Aug 63.03
29 1955 Sep 154.08
30 1955 Oct 98.15
31 1955 Nov 104.06
32 1955 Dec 32.85
33 1956 Jan 22.89
34 1956 Feb 17.30
35 1956 Mar 15.68
36 1956 Apr 10.88
另一个想法(使用@useR数据和英语月)
我们将Water.Year
列分为Year1
和Year2
,并使用gather()
将数据重新格式化为长格式。然后,对于每组,我们使用match()
和month.abb
检查月份是否大于或等于5(五月),并用if_else()指定相应的年份。最后,我们删除不必要的列,并按Year
和Month
## A tibble: 36 × 3
# Year Month Value
# <chr> <fctr> <dbl>
#1 1953 May 55.55
#2 1953 Jun 43.62
#3 1953 Jul 30.46
#4 1953 Aug 26.17
#5 1953 Sep 26.76
#6 1953 Oct 41.74
#7 1953 Nov 19.92
#8 1953 Dec 41.25
#9 1954 Jan 28.77
#10 1954 Feb 20.96
## ... with 26 more rows
##一个tible:36×3
#年月价值
#
#1953年5月1日55.55
#1953年6月2日43.62
#3 1953年7月30日46
#4 1953年8月26日17
#1953年5月76日9月26日
#6 1953年10月41日74
#1953年7月1992年11月19日
#1953年12月8日41.25
#9 1954年1月28日77
#10 1954年2月20日96
## ... 还有26行
我决定使用我得到的所有答案中的一些片段。
这是我写的代码:
library(dplyr)
library(tidyr)
#separate the year column into two years
df_years <- df %>%
separate(Water.Year, c("Year1", "Year2"))
#create two different dataframes for each section of the year
df1 <- subset(df_years, select = c(Year1, May:Dec))
df2 <- subset(df_years, select = c(Year2, Jan:Apr))
#rename both years' columns using the same name
colnames(df2)[1] <- "Year"
colnames(df1)[1] <- "Year"
#join both dataframes
cleandata <- full_join(df1, df2, by = "Year")
#sort months chronologically
cleandata <- cleandata[, c("Year", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")]
#convert to tidy data set
cleandata <- gather(cleandata, "Month", "Flow", 2:13)
#sort by year and month
cleandata <- arrange(cleandata, Year, Month)
库(dplyr)
图书馆(tidyr)
#将年列分为两年
df_年%
单独(水年,c(“第1年”、“第2年”))
#为一年中的每个部分创建两个不同的数据帧
df1请使用dput(数据)
发布可重复的问题。如果不在库中手动键入数据,就无法复制您提供的数据,因为它是显示的。library(reforme2);熔化(df)
使用dplyr的绝佳解决方案
!但是你的最终数据的顺序有点混乱,因为你的“1953年12月”没有在“1954年1月”之后。看看我的解决方案。@useR哦,对了!添加了因子()
部分。谢谢
library(dplyr)
library(tidyr)
df %>%
separate(Water.Year, c("Year1", "Year2")) %>%
gather(Month, Value, -(Year1:Year2)) %>%
group_by(Year1, Year2) %>%
mutate(Year = if_else(match(Month, month.abb) >= 5, Year1, Year2),
Month = factor(Month, levels = month.abb)) %>%
ungroup() %>%
select(Year, Month, Value) %>%
arrange(Year, Month)
## A tibble: 36 × 3
# Year Month Value
# <chr> <fctr> <dbl>
#1 1953 May 55.55
#2 1953 Jun 43.62
#3 1953 Jul 30.46
#4 1953 Aug 26.17
#5 1953 Sep 26.76
#6 1953 Oct 41.74
#7 1953 Nov 19.92
#8 1953 Dec 41.25
#9 1954 Jan 28.77
#10 1954 Feb 20.96
## ... with 26 more rows
library(dplyr)
library(tidyr)
#separate the year column into two years
df_years <- df %>%
separate(Water.Year, c("Year1", "Year2"))
#create two different dataframes for each section of the year
df1 <- subset(df_years, select = c(Year1, May:Dec))
df2 <- subset(df_years, select = c(Year2, Jan:Apr))
#rename both years' columns using the same name
colnames(df2)[1] <- "Year"
colnames(df1)[1] <- "Year"
#join both dataframes
cleandata <- full_join(df1, df2, by = "Year")
#sort months chronologically
cleandata <- cleandata[, c("Year", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")]
#convert to tidy data set
cleandata <- gather(cleandata, "Month", "Flow", 2:13)
#sort by year and month
cleandata <- arrange(cleandata, Year, Month)