R-根据现有列值创建和命名数据帧
通过R-根据现有列值创建和命名数据帧,r,dataframe,R,Dataframe,通过dput,我有一个这样结构的数据帧: structure(list(railroad = c("bnsf railway company", "bnsf railway company", "bnsf railway company", "bnsf railway company", "bnsf railway company", "bnsf railway company", "bnsf railway company", "bnsf railway company", "union
dput
,我有一个这样结构的数据帧:
structure(list(railroad = c("bnsf railway company", "bnsf railway company",
"bnsf railway company", "bnsf railway company", "bnsf railway company",
"bnsf railway company", "bnsf railway company", "bnsf railway company",
"union pacific railroad", "union pacific railroad", "union pacific railroad",
"union pacific railroad", "union pacific railroad", "union pacific railroad",
"union pacific railroad", "union pacific railroad"), measure =
c("cars.owned.by",
"cars.owned.by", "cars.type", "cars.type", "cars.type", "train.speed",
"train.speed", "terminal.dwell", "cars.owned.by", "cars.owned.by",
"cars.type", "cars.type", "cars.type", "train.speed", "train.speed",
"terminal.dwell"), category = c("system", "private", "box", "intermodal",
"total", "intermodal", "all.trains", "entire.railroad", "system",
"private", "box", "intermodal", "total", "intermodal", "all.trains",
"entire.railroad"), irm = c(201510L, 201510L, 201510L, 201510L,
201510L, 201510L, 201510L, 201510L, 201510L, 201510L, 201510L,
201510L, 201510L, 201510L, 201510L, 201510L), mean = c(66623,
149937.333, 11395, 16499, 236866, 33.3, 24.5, 25.267, 57618.333,
195764.667, 22229.333, 14135.333, 293164.333, 31.933, 26.6, 27.6
)), row.names = c(1L, 3L, 6L, 9L, 14L, 15L, 20L, 32L, 127L, 129L,
132L, 135L, 140L, 141L, 146L, 160L), class = "data.frame")
我想做的是:
cars.owned.by.system <- df[df$category == "system",]
colnames(cars.owned.by.system)[5[ <- cars.owned.by.system
度量值
和类别
的每个组合创建单独的数据框,
通过粘贴measure
和category
命名,并用“
分隔。因此,第一个数据帧将被称为cars.owned.by.system
等等mean
重命名为数据帧本身的名称。因此,对于第一个数据帧,它将是colnames(df)[5]假设df
是您的数据帧,我认为这样做了
for(cat in unique(df$category)) {
newdf<-paste("cars.owned.by.", cat, sep="")
assign(newdf, df[df$category==cat,])
eval(parse(text=paste("colnames(", newdf, ")[5] <- '", newdf, "'", sep="")))
}
for(唯一类别中的cat(df$类别)){
newdf假设df
是您的数据帧,我认为这样做了
for(cat in unique(df$category)) {
newdf<-paste("cars.owned.by.", cat, sep="")
assign(newdf, df[df$category==cat,])
eval(parse(text=paste("colnames(", newdf, ")[5] <- '", newdf, "'", sep="")))
}
for(唯一类别中的cat(df$类别)){
newdf一个经典的for循环怎么样:
# first create the pasted name to iterate the loop
df$name <- paste(df$railroad,df$measure,sep='.')
# an empty list to have all your df
list_df <- list()
# the loop
for (i in df$name){
data <- df[which(df$name == i),] # select the df of name
colnames(data)[4]<-i # rename the mean
data<- data[,-5] # remove the useless name
list_df[[i]] <- data # store in list
}
# here you can see all the df in a list
list_df
> list_df
$`bnsf railway company.cars.owned.by`
railroad measure category bnsf railway company.cars.owned.by name
1 bnsf railway company cars.owned.by system 201510 bnsf railway company.cars.owned.by
3 bnsf railway company cars.owned.by private 201510 bnsf railway company.cars.owned.by
$`bnsf railway company.cars.type`
railroad measure category bnsf railway company.cars.type name
6 bnsf railway company cars.type box 201510 bnsf railway company.cars.type
9 bnsf railway company cars.type intermodal 201510 bnsf railway company.cars.type
14 bnsf railway company cars.type total 201510 bnsf railway company.cars.type
... and so on
# you can select each df, for example choosin its name
list_df$`bnsf railway company.cars.type`
railroad measure category bnsf railway company.cars.type name
6 bnsf railway company cars.type box 201510 bnsf railway company.cars.type
9 bnsf railway company cars.type intermodal 201510 bnsf railway company.cars.type
14 bnsf railway company cars.type total 201510 bnsf railway company.cars.type
# and you're sure it's a df
class(list_df$`bnsf railway company.cars.type`)
[1] "data.frame"
#首先创建粘贴的名称以迭代循环
df$name经典for循环怎么样:
# first create the pasted name to iterate the loop
df$name <- paste(df$railroad,df$measure,sep='.')
# an empty list to have all your df
list_df <- list()
# the loop
for (i in df$name){
data <- df[which(df$name == i),] # select the df of name
colnames(data)[4]<-i # rename the mean
data<- data[,-5] # remove the useless name
list_df[[i]] <- data # store in list
}
# here you can see all the df in a list
list_df
> list_df
$`bnsf railway company.cars.owned.by`
railroad measure category bnsf railway company.cars.owned.by name
1 bnsf railway company cars.owned.by system 201510 bnsf railway company.cars.owned.by
3 bnsf railway company cars.owned.by private 201510 bnsf railway company.cars.owned.by
$`bnsf railway company.cars.type`
railroad measure category bnsf railway company.cars.type name
6 bnsf railway company cars.type box 201510 bnsf railway company.cars.type
9 bnsf railway company cars.type intermodal 201510 bnsf railway company.cars.type
14 bnsf railway company cars.type total 201510 bnsf railway company.cars.type
... and so on
# you can select each df, for example choosin its name
list_df$`bnsf railway company.cars.type`
railroad measure category bnsf railway company.cars.type name
6 bnsf railway company cars.type box 201510 bnsf railway company.cars.type
9 bnsf railway company cars.type intermodal 201510 bnsf railway company.cars.type
14 bnsf railway company cars.type total 201510 bnsf railway company.cars.type
# and you're sure it's a df
class(list_df$`bnsf railway company.cars.type`)
[1] "data.frame"
#首先创建粘贴的名称以迭代循环
df$name考虑split
按这两个因素对数据帧进行子集划分,然后Map
(wrapper tomapply
)对子集数据帧和列表名称进行元素级迭代
也可以考虑<代码> StimeNeSee()/Script >左边的代码> CalnMeSe()/Cuffe版本,以在一个调用中返回新命名的对象。
# CREATES NAMED LIST
df_list <- split(df, list(df$measure, df$category))
# RETURNS SAME LIST WITH RENAMED FIFTH COLUMN
df_list <- Map(function(sub, nm) setNames(sub, c("railroad", "measure", "category", "irm", nm)),
df_list, names(df_list))
# OUTPUT DFs
df_list$cars.owned.by.all.trains
df_list$cars.type.all.trains
df_list$terminal.dwell.all.trains
...
#创建命名列表
df_list考虑split
按这两个因素对数据帧进行子集划分,然后Map
(wrapper tomapply
)对子集数据帧和列表名称进行元素级迭代
也可以考虑<代码> StimeNeSee()/Script >左边的代码> CalnMeSe()/Cuffe版本,以在一个调用中返回新命名的对象。
# CREATES NAMED LIST
df_list <- split(df, list(df$measure, df$category))
# RETURNS SAME LIST WITH RENAMED FIFTH COLUMN
df_list <- Map(function(sub, nm) setNames(sub, c("railroad", "measure", "category", "irm", nm)),
df_list, names(df_list))
# OUTPUT DFs
df_list$cars.owned.by.all.trains
df_list$cars.type.all.trains
df_list$terminal.dwell.all.trains
...
#创建命名列表
df_list这将为您提供一个命名的数据帧列表,这几乎肯定比在您的全局环境中单独使用它们要好:
lst <- split(df, paste(df$measure, df$category, sep = ".")) %>%
purrr::imap(~`names<-`(.x, c(names(.x)[1:4], .y)))
lst%
purrr::imap(~`names这将为您提供一个数据帧的命名列表,这几乎肯定比在您的全局环境中单独使用它们要好:
lst <- split(df, paste(df$measure, df$category, sep = ".")) %>%
purrr::imap(~`names<-`(.x, c(names(.x)[1:4], .y)))
lst%
purrr::imap(~`names重新考虑单独的数据帧并使用数据帧列表(在全局环境中需要担心的对象更少)。重新考虑单独的数据帧并使用数据帧列表(在全局环境中需要担心的对象更少)。是否有方法通过rbind
来堆叠df\u列表的所有非空元素?当然,但由于每个df中有不同的列,因此使用外部库:dplyr::bind\u行(df\u列表)
或数据。表::rbindlist(df\u列表)
。否则,您需要填写其他组的空列,以便所有组都具有相同的列,以使用基本R的do.call(rbind,df\u列表)
。顺便说一句,我是一个铁路大佬。你是如何在铁路上获得这些货运数据的?专有数据还是公共数据?它们只保留最近52周左右的数据,但很容易被删除,因此如果你每周都运行脚本,你就可以获得最新的数据。有没有办法将df_list通过rbind
?当然可以,但是因为每个df中有不同的列,所以使用外部库:dplyr::bind_行(df_列表)
或数据。table::rbindlist(df_列表)
。否则,需要填充其他组的空列,以便所有组都有相同的列来使用基本Rdo。调用(rbind,df_列表)
。顺便说一句,我是一个铁路大佬。你是如何在铁路上获得这些货运数据的?专有数据还是公共数据?它们只保留最近52周左右的数据,但很容易被删除,因此如果你每周运行脚本,就可以获得最新的数据。这非常有效。唯一的问题是它没有重命名第5个ccolumnI遗漏了那部分--编辑了我的答案以重命名第5列。这真的很有效。唯一的问题是它没有重命名第5列--我遗漏了那部分--编辑了我的答案以重命名第5列。