R-根据现有列值创建和命名数据帧

R-根据现有列值创建和命名数据帧,r,dataframe,R,Dataframe,通过dput,我有一个这样结构的数据帧: structure(list(railroad = c("bnsf railway company", "bnsf railway company", "bnsf railway company", "bnsf railway company", "bnsf railway company", "bnsf railway company", "bnsf railway company", "bnsf railway company", "union

通过
dput
,我有一个这样结构的数据帧:

structure(list(railroad = c("bnsf railway company", "bnsf railway company", 
"bnsf railway company", "bnsf railway company", "bnsf railway company", 
"bnsf railway company", "bnsf railway company", "bnsf railway company", 
"union pacific railroad", "union pacific railroad", "union pacific railroad", 
"union pacific railroad", "union pacific railroad", "union pacific railroad", 
"union pacific railroad", "union pacific railroad"), measure = 
c("cars.owned.by", 
"cars.owned.by", "cars.type", "cars.type", "cars.type", "train.speed", 
"train.speed", "terminal.dwell", "cars.owned.by", "cars.owned.by", 
"cars.type", "cars.type", "cars.type", "train.speed", "train.speed", 
"terminal.dwell"), category = c("system", "private", "box", "intermodal", 
"total", "intermodal", "all.trains", "entire.railroad", "system", 
"private", "box", "intermodal", "total", "intermodal", "all.trains", 
"entire.railroad"), irm = c(201510L, 201510L, 201510L, 201510L, 
201510L, 201510L, 201510L, 201510L, 201510L, 201510L, 201510L, 
201510L, 201510L, 201510L, 201510L, 201510L), mean = c(66623, 
149937.333, 11395, 16499, 236866, 33.3, 24.5, 25.267, 57618.333, 
195764.667, 22229.333, 14135.333, 293164.333, 31.933, 26.6, 27.6
)), row.names = c(1L, 3L, 6L, 9L, 14L, 15L, 20L, 32L, 127L, 129L, 
132L, 135L, 140L, 141L, 146L, 160L), class = "data.frame")
我想做的是:

cars.owned.by.system <- df[df$category == "system",]
colnames(cars.owned.by.system)[5[ <- cars.owned.by.system
  • 度量值
    类别
    的每个组合创建单独的数据框, 通过粘贴
    measure
    category
    命名,并用
    分隔。因此,第一个数据帧将被称为
    cars.owned.by.system
    等等


  • 将每个数据帧的第五列,
    mean
    重命名为数据帧本身的名称。因此,对于第一个数据帧,它将是
    colnames(df)[5]假设
    df
    是您的数据帧,我认为这样做了

    for(cat in unique(df$category)) {
      newdf<-paste("cars.owned.by.", cat, sep="")
      assign(newdf, df[df$category==cat,])
      eval(parse(text=paste("colnames(", newdf, ")[5] <- '", newdf, "'", sep="")))
    }
    
    for(唯一类别中的cat(df$类别)){
    
    newdf假设
    df
    是您的数据帧,我认为这样做了

    for(cat in unique(df$category)) {
      newdf<-paste("cars.owned.by.", cat, sep="")
      assign(newdf, df[df$category==cat,])
      eval(parse(text=paste("colnames(", newdf, ")[5] <- '", newdf, "'", sep="")))
    }
    
    for(唯一类别中的cat(df$类别)){
    
    newdf一个经典的for循环怎么样:

    # first create the pasted name to iterate the loop 
    df$name <- paste(df$railroad,df$measure,sep='.')
    
    # an empty list to have all your df
    list_df <- list()
    
    # the loop
    for (i in df$name){
    data <- df[which(df$name == i),]  # select the df of name
    colnames(data)[4]<-i              # rename the mean
    data<- data[,-5]                  # remove the useless name
    list_df[[i]] <- data              # store in list
    }
    
    # here you can see all the df in a list
    list_df
    
    > list_df
    $`bnsf railway company.cars.owned.by`
                  railroad       measure category bnsf railway company.cars.owned.by                               name
    1 bnsf railway company cars.owned.by   system                             201510 bnsf railway company.cars.owned.by
    3 bnsf railway company cars.owned.by  private                             201510 bnsf railway company.cars.owned.by
    
    $`bnsf railway company.cars.type`
                   railroad   measure   category bnsf railway company.cars.type                           name
    6  bnsf railway company cars.type        box                         201510 bnsf railway company.cars.type
    9  bnsf railway company cars.type intermodal                         201510 bnsf railway company.cars.type
    14 bnsf railway company cars.type      total                         201510 bnsf railway company.cars.type
    ... and so on  
    
    # you can select each df, for example choosin its name
    list_df$`bnsf railway company.cars.type`
                        railroad   measure   category bnsf railway company.cars.type                           name
    6  bnsf railway company cars.type        box                         201510 bnsf railway company.cars.type
    9  bnsf railway company cars.type intermodal                         201510 bnsf railway company.cars.type
    14 bnsf railway company cars.type      total                         201510 bnsf railway company.cars.type
    
    # and you're sure it's a df
    class(list_df$`bnsf railway company.cars.type`)
    [1] "data.frame"
    
    #首先创建粘贴的名称以迭代循环
    
    df$name经典for循环怎么样:

    # first create the pasted name to iterate the loop 
    df$name <- paste(df$railroad,df$measure,sep='.')
    
    # an empty list to have all your df
    list_df <- list()
    
    # the loop
    for (i in df$name){
    data <- df[which(df$name == i),]  # select the df of name
    colnames(data)[4]<-i              # rename the mean
    data<- data[,-5]                  # remove the useless name
    list_df[[i]] <- data              # store in list
    }
    
    # here you can see all the df in a list
    list_df
    
    > list_df
    $`bnsf railway company.cars.owned.by`
                  railroad       measure category bnsf railway company.cars.owned.by                               name
    1 bnsf railway company cars.owned.by   system                             201510 bnsf railway company.cars.owned.by
    3 bnsf railway company cars.owned.by  private                             201510 bnsf railway company.cars.owned.by
    
    $`bnsf railway company.cars.type`
                   railroad   measure   category bnsf railway company.cars.type                           name
    6  bnsf railway company cars.type        box                         201510 bnsf railway company.cars.type
    9  bnsf railway company cars.type intermodal                         201510 bnsf railway company.cars.type
    14 bnsf railway company cars.type      total                         201510 bnsf railway company.cars.type
    ... and so on  
    
    # you can select each df, for example choosin its name
    list_df$`bnsf railway company.cars.type`
                        railroad   measure   category bnsf railway company.cars.type                           name
    6  bnsf railway company cars.type        box                         201510 bnsf railway company.cars.type
    9  bnsf railway company cars.type intermodal                         201510 bnsf railway company.cars.type
    14 bnsf railway company cars.type      total                         201510 bnsf railway company.cars.type
    
    # and you're sure it's a df
    class(list_df$`bnsf railway company.cars.type`)
    [1] "data.frame"
    
    #首先创建粘贴的名称以迭代循环
    
    df$name考虑
    split
    按这两个因素对数据帧进行子集划分,然后
    Map
    (wrapper to
    mapply
    )对子集数据帧和列表名称进行元素级迭代

    也可以考虑<代码> StimeNeSee()/Script >左边的代码> CalnMeSe()/Cuffe版本,以在一个调用中返回新命名的对象。

    # CREATES NAMED LIST
    df_list <- split(df, list(df$measure, df$category))
    
    # RETURNS SAME LIST WITH RENAMED FIFTH COLUMN
    df_list <- Map(function(sub, nm) setNames(sub, c("railroad", "measure", "category", "irm", nm)), 
                   df_list, names(df_list))
    
    # OUTPUT DFs 
    df_list$cars.owned.by.all.trains
    
    df_list$cars.type.all.trains
    
    df_list$terminal.dwell.all.trains 
    ...
    
    #创建命名列表
    
    df_list考虑
    split
    按这两个因素对数据帧进行子集划分,然后
    Map
    (wrapper to
    mapply
    )对子集数据帧和列表名称进行元素级迭代

    也可以考虑<代码> StimeNeSee()/Script >左边的代码> CalnMeSe()/Cuffe版本,以在一个调用中返回新命名的对象。

    # CREATES NAMED LIST
    df_list <- split(df, list(df$measure, df$category))
    
    # RETURNS SAME LIST WITH RENAMED FIFTH COLUMN
    df_list <- Map(function(sub, nm) setNames(sub, c("railroad", "measure", "category", "irm", nm)), 
                   df_list, names(df_list))
    
    # OUTPUT DFs 
    df_list$cars.owned.by.all.trains
    
    df_list$cars.type.all.trains
    
    df_list$terminal.dwell.all.trains 
    ...
    
    #创建命名列表
    
    df_list这将为您提供一个命名的数据帧列表,这几乎肯定比在您的全局环境中单独使用它们要好:

    lst <- split(df, paste(df$measure, df$category, sep = ".")) %>% 
      purrr::imap(~`names<-`(.x, c(names(.x)[1:4], .y)))
    
    lst%
    
    purrr::imap(~`names这将为您提供一个数据帧的命名列表,这几乎肯定比在您的全局环境中单独使用它们要好:

    lst <- split(df, paste(df$measure, df$category, sep = ".")) %>% 
      purrr::imap(~`names<-`(.x, c(names(.x)[1:4], .y)))
    
    lst%
    
    purrr::imap(~`names重新考虑单独的数据帧并使用数据帧列表(在全局环境中需要担心的对象更少)。重新考虑单独的数据帧并使用数据帧列表(在全局环境中需要担心的对象更少)。是否有方法通过
    rbind
    来堆叠
    df\u列表的所有非空元素?当然,但由于每个df中有不同的列,因此使用外部库:
    dplyr::bind\u行(df\u列表)
    数据。表::rbindlist(df\u列表)
    。否则,您需要填写其他组的空列,以便所有组都具有相同的列,以使用基本R的
    do.call(rbind,df\u列表)
    。顺便说一句,我是一个铁路大佬。你是如何在铁路上获得这些货运数据的?专有数据还是公共数据?它们只保留最近52周左右的数据,但很容易被删除,因此如果你每周都运行脚本,你就可以获得最新的数据。有没有办法将
    df_list通过
    rbind
    ?当然可以,但是因为每个df中有不同的列,所以使用外部库:
    dplyr::bind_行(df_列表)
    数据。table::rbindlist(df_列表)
    。否则,需要填充其他组的空列,以便所有组都有相同的列来使用基本R
    do。调用(rbind,df_列表)
    。顺便说一句,我是一个铁路大佬。你是如何在铁路上获得这些货运数据的?专有数据还是公共数据?它们只保留最近52周左右的数据,但很容易被删除,因此如果你每周运行脚本,就可以获得最新的数据。这非常有效。唯一的问题是它没有重命名第5个ccolumnI遗漏了那部分--编辑了我的答案以重命名第5列。这真的很有效。唯一的问题是它没有重命名第5列--我遗漏了那部分--编辑了我的答案以重命名第5列。