使用pivot_longer将数据形状从宽改为长
我想使用pivot_对该数据帧进行更长时间的重新塑造-使用pivot_longer将数据形状从宽改为长,r,tidyverse,tidyr,R,Tidyverse,Tidyr,我想使用pivot_对该数据帧进行更长时间的重新塑造- # A tibble: 5 x 7 PizzaNumber Topping_1 Category_1 Topping_2 Category_2 Topping_3 Category_3 <int> <fct> <fct> <fct> <fct> <fct> <fct> 1
# A tibble: 5 x 7
PizzaNumber Topping_1 Category_1 Topping_2 Category_2 Topping_3 Category_3
<int> <fct> <fct> <fct> <fct> <fct> <fct>
1 1 cheese vegetarian ham carnivorous tomato vegetarian
2 2 spinach vegetarian tomato vegetarian NA NA
3 3 pineapple vegetarian cheese vegetarian ham carnivorous
4 4 cheese vegetarian tomato vegetarian NA NA
5 5 beef carnivorous NA NA NA NA
我们可以结合使用
dplyr
、tidyr
和purrr
:
library(dplyr) # 1.0.0
library(tidyr) # 1.1.0
library(purrr) # 0.3.4
widedata %>% group_by(PizzaNumber) %>% nest() %>%
mutate(
Topping_Category = map(data, function(data) {
as.list(paste(data[, c(1, 3, 5)], data[, c(2, 4, 6)], sep = "_"))
})) %>% select(-data) %>%
unnest_longer(Topping_Category, indices_include = FALSE) %>%
ungroup() %>%
separate(Topping_Category, c("Topping", "Category"), sep = "_") %>%
mutate(Topping = na_if(Topping, "NA"), Category = na_if(Category, "NA")) %>%
filter(Topping != is.na(Topping))
使用
pivot\u更长的时间
您可以-
tidyr::pivot_longer(df, cols = -PizzaNumber,
names_to = '.value',
names_pattern = '(\\w+)_\\d+',
values_drop_na = TRUE)
# PizzaNumber Topping Category
# <chr> <chr> <chr>
# 1 1 cheese vegetarian
# 2 1 ham carnivorous
# 3 1 tomato vegetarian
# 4 2 spinach vegetarian
# 5 2 tomato vegetarian
# 6 3 pineapple vegetarian
# 7 3 cheese vegetarian
# 8 3 ham carnivorous
# 9 4 cheese vegetarian
#10 4 tomato vegetarian
#11 5 beef carnivorous
tidyr::pivot_更长(df,cols=-PizzaNumber,
名称_to='.value',
名称\u模式='(\\w+)\ud+,
值_drop_na=TRUE)
#比萨饼类
#
#1奶酪素食者
#2 1火腿肉食性
#3 1番茄素食者
#4.2菠菜素食者
#5.2番茄素食者
#菠萝素食者
#7.3奶酪素食者
#8.3火腿肉食性
#9.4奶酪素食者
#10 4番茄素食者
#11.5肉食牛肉
library(dplyr) # 1.0.0
library(tidyr) # 1.1.0
library(purrr) # 0.3.4
widedata %>% group_by(PizzaNumber) %>% nest() %>%
mutate(
Topping_Category = map(data, function(data) {
as.list(paste(data[, c(1, 3, 5)], data[, c(2, 4, 6)], sep = "_"))
})) %>% select(-data) %>%
unnest_longer(Topping_Category, indices_include = FALSE) %>%
ungroup() %>%
separate(Topping_Category, c("Topping", "Category"), sep = "_") %>%
mutate(Topping = na_if(Topping, "NA"), Category = na_if(Category, "NA")) %>%
filter(Topping != is.na(Topping))
# A tibble: 11 x 3
PizzaNumber Topping Category
<dbl> <chr> <chr>
1 1 cheese vegetarian
2 1 ham carnivorous
3 1 tomato vegetarian
4 2 spinach vegetarian
5 2 tomato vegetarian
6 3 pineapple vegetarian
7 3 cheese vegetarian
8 3 ham carnivorous
9 4 cheese vegetarian
10 4 tomato vegetarian
11 5 beef carnivorous
structure(list(PizzaNumber = c("1", "2", "3", "4", "5"), Topping_1 = c("cheese",
"spinach", "pineapple", "cheese", "beef"), Category_1 = c("vegetarian",
"vegetarian", "vegetarian", "vegetarian", "carnivorous"), Topping_2 = c("ham",
"tomato", "cheese", "tomato", NA), Category_2 = c("carnivorous",
"vegetarian", "vegetarian", "vegetarian", NA), Topping_3 = c("tomato",
NA, "ham", NA, NA), Category_3 = c("vegetarian", NA, "carnivorous",
NA, NA)), row.names = c(NA, -5L), class = c("tbl_df", "tbl",
"data.frame"))
tidyr::pivot_longer(df, cols = -PizzaNumber,
names_to = '.value',
names_pattern = '(\\w+)_\\d+',
values_drop_na = TRUE)
# PizzaNumber Topping Category
# <chr> <chr> <chr>
# 1 1 cheese vegetarian
# 2 1 ham carnivorous
# 3 1 tomato vegetarian
# 4 2 spinach vegetarian
# 5 2 tomato vegetarian
# 6 3 pineapple vegetarian
# 7 3 cheese vegetarian
# 8 3 ham carnivorous
# 9 4 cheese vegetarian
#10 4 tomato vegetarian
#11 5 beef carnivorous