R 如何从包含数据帧的深度嵌套列表中创建数据帧?
在API和R的帮助下,我处理来自TMDb的数据。输出是一个包含n个列表的列表,其中包含数据帧本身。我不能处理嵌套列表,只喜欢一个数据帧 感觉好像我什么都试过了,但都没用 我的丑陋列表的可复制代码(假设此列表本身有超过400.000个列表):R 如何从包含数据帧的深度嵌套列表中创建数据帧?,r,list,dataframe,nested,R,List,Dataframe,Nested,在API和R的帮助下,我处理来自TMDb的数据。输出是一个包含n个列表的列表,其中包含数据帧本身。我不能处理嵌套列表,只喜欢一个数据帧 感觉好像我什么都试过了,但都没用 我的丑陋列表的可复制代码(假设此列表本身有超过400.000个列表): 我希望你能以任何方式帮助我。我希望这是可以理解的。这里有一个非常复杂的方法来做你想做的事情。您想要的df2只是df1,只删除了几列。对于带有性别的df3,您应该能够以类似于这些函数的方式访问它们 df <- data.frame(t(sapply(li
我希望你能以任何方式帮助我。我希望这是可以理解的。这里有一个非常复杂的方法来做你想做的事情。您想要的
df2
只是df1
,只删除了几列。对于带有性别的df3
,您应该能够以类似于这些函数的方式访问它们
df <- data.frame(t(sapply(list,c)))
f_genre <- function(df_input){
row_num <- c(1:nrow(df_input))
for (i in row_num){
temp <- Reduce(merge,df_input$genre[[i]]$name)
temp_concat <- paste(temp$x,temp$y, sep=", ")
df_input$genre[[i]] <- temp_concat
df_final <<- df_input
}
}
f_producer <- function(df_input)
{
row_num <- c(1:nrow(df_input))
for (i in row_num)
{
num_jobs <- c(1:length(df_input$cast[[i]]$crew$job))
temp_producer_list <- list()
if (length(df_input$cast[[i]]$crew) != 0)
{
for (job_num in num_jobs)
{
if(df_input$cast[[i]]$crew$job[job_num] == "Producer")
{
temp_producer <- df_input$cast[[i]]$crew$name[job_num]
temp_producer_list <- c(temp_producer_list, temp_producer)
temp_producer_list <- paste(temp_producer_list,collapse=", ")
}
}
df_input$Producer[[i]] <- temp_producer_list
}
if (length(df_input$cast[[i]]$crew) == 0)
{
df_input$Producer[[i]] <- NA
}
df_final <<- df_input
}
}
f_director <- function(df_input)
{
row_num <- c(1:nrow(df_input))
for (i in row_num)
{
num_jobs <- c(1:length(df_input$cast[[i]]$crew$job))
temp_director_list <- list()
if (length(df_input$cast[[i]]$crew) != 0)
{
for (job_num in num_jobs)
{
if(df_input$cast[[i]]$crew$job[job_num] == "Director")
{
temp_director <- df_input$cast[[i]]$crew$name[job_num]
temp_director_list <- c(temp_director_list, temp_director)
temp_director_list <- paste(temp_director_list,collapse=", ")
}
}
df_input$Director[[i]] <- temp_director_list
}
if (length(df_input$cast[[i]]$crew) == 0)
{
df_input$Director[[i]] <- NA
}
df_final <<- df_input
}
}
f_genre(df)
f_producer(df_final)
f_director(df_final)
df1 <- df_final[,-4]
df我认为您的主要问题是获得一个干净的数据帧,因此我们可以使用map\u df
在lst中循环并创建一个数据帧。之后,您可以使用选择
、dplyr::filter
、tidyr::spread
和分隔行
来获取df1、df2、df3
library(purrr)
library(dplyr)
library(tidyr)
map_df(lst,
~ tibble(id = .x$id,
revenue = .x$revenue,
genre = ifelse(length(.x$genre$name)>1, paste(.x$genre$name, collapse = ','), .x$genre$name),
cast = imap(.x$cast,
~ if(length(.x$id)==0) data.frame(id1=NA, name=NA, term='crew', job=c("Director", "Producer"), gender=NA, stringsAsFactors = FALSE) else
data.frame(.x, term=.y, stringsAsFactors = FALSE))
) %>%
unnest(cast)
) -> df
#df1
df1 <- filter(df, term=='crew' & job %in% c("Director", "Producer")) %>%
group_by(id, job) %>%
mutate(name= paste(name[!is.na(name)], collapse = ',')) %>%
slice(1) %>% dplyr::select(-id1, -term, -gender) %>%
spread(key=job, value = name) %>% ungroup()
#df2
separate_rows(df1, Producer, sep = ',')
库(purrr)
图书馆(dplyr)
图书馆(tidyr)
地图测向(lst,
~tibble(id=.x$id,
收入=.x美元收入,
genre=ifelse(长度(.x$genre$name)>1,粘贴(.x$genre$name,折叠=','),.x$genre$name),
cast=imap(.x$cast,
~if(length(.x$id)==0)data.frame(id1=NA,name=NA,term='crew',job=c(“导演”、“制片人”),gender=NA,stringsAsFactors=FALSE)else
data.frame(.x,term=.y,stringsAsFactors=FALSE))
) %>%
不耐烦(演员)
)->df
#df1
df1%
分组依据(id,职务)%>%
突变(名称=粘贴(名称[!is.na(名称)],折叠=','))%>%
切片(1)%%>%dplyr::select(-id1,-term,-gender)%%>%
排列(键=作业,值=名称)%>%ungroup()
#df2
单独的_行(df1,生产者,sep=',')
非常感谢您的回答!我看起来很好,但是输出与我期望的输出不太一样,尽管非常感谢!真不敢相信。我希望我能把它复制到我原来的疯狂清单上。
df <- data.frame(t(sapply(list,c)))
f_genre <- function(df_input){
row_num <- c(1:nrow(df_input))
for (i in row_num){
temp <- Reduce(merge,df_input$genre[[i]]$name)
temp_concat <- paste(temp$x,temp$y, sep=", ")
df_input$genre[[i]] <- temp_concat
df_final <<- df_input
}
}
f_producer <- function(df_input)
{
row_num <- c(1:nrow(df_input))
for (i in row_num)
{
num_jobs <- c(1:length(df_input$cast[[i]]$crew$job))
temp_producer_list <- list()
if (length(df_input$cast[[i]]$crew) != 0)
{
for (job_num in num_jobs)
{
if(df_input$cast[[i]]$crew$job[job_num] == "Producer")
{
temp_producer <- df_input$cast[[i]]$crew$name[job_num]
temp_producer_list <- c(temp_producer_list, temp_producer)
temp_producer_list <- paste(temp_producer_list,collapse=", ")
}
}
df_input$Producer[[i]] <- temp_producer_list
}
if (length(df_input$cast[[i]]$crew) == 0)
{
df_input$Producer[[i]] <- NA
}
df_final <<- df_input
}
}
f_director <- function(df_input)
{
row_num <- c(1:nrow(df_input))
for (i in row_num)
{
num_jobs <- c(1:length(df_input$cast[[i]]$crew$job))
temp_director_list <- list()
if (length(df_input$cast[[i]]$crew) != 0)
{
for (job_num in num_jobs)
{
if(df_input$cast[[i]]$crew$job[job_num] == "Director")
{
temp_director <- df_input$cast[[i]]$crew$name[job_num]
temp_director_list <- c(temp_director_list, temp_director)
temp_director_list <- paste(temp_director_list,collapse=", ")
}
}
df_input$Director[[i]] <- temp_director_list
}
if (length(df_input$cast[[i]]$crew) == 0)
{
df_input$Director[[i]] <- NA
}
df_final <<- df_input
}
}
f_genre(df)
f_producer(df_final)
f_director(df_final)
df1 <- df_final[,-4]
library(purrr)
library(dplyr)
library(tidyr)
map_df(lst,
~ tibble(id = .x$id,
revenue = .x$revenue,
genre = ifelse(length(.x$genre$name)>1, paste(.x$genre$name, collapse = ','), .x$genre$name),
cast = imap(.x$cast,
~ if(length(.x$id)==0) data.frame(id1=NA, name=NA, term='crew', job=c("Director", "Producer"), gender=NA, stringsAsFactors = FALSE) else
data.frame(.x, term=.y, stringsAsFactors = FALSE))
) %>%
unnest(cast)
) -> df
#df1
df1 <- filter(df, term=='crew' & job %in% c("Director", "Producer")) %>%
group_by(id, job) %>%
mutate(name= paste(name[!is.na(name)], collapse = ',')) %>%
slice(1) %>% dplyr::select(-id1, -term, -gender) %>%
spread(key=job, value = name) %>% ungroup()
#df2
separate_rows(df1, Producer, sep = ',')