R 从类别层次结构填充数据帧
我在分析地理数据。我有各州的数据,我想按分区和地区进行分组,就像人口普查局分组一样。这里有一个层次结构:从大到小的区域、分区和州 我想做的是填写一个新的数据框,对这些信息进行编码。然后我可以将其用作参考,并清理数据。我试过几种方法来解决这个问题,但一直都很困惑。我感谢任何解决办法 以下是部门列表:R 从类别层次结构填充数据帧,r,R,我在分析地理数据。我有各州的数据,我想按分区和地区进行分组,就像人口普查局分组一样。这里有一个层次结构:从大到小的区域、分区和州 我想做的是填写一个新的数据框,对这些信息进行编码。然后我可以将其用作参考,并清理数据。我试过几种方法来解决这个问题,但一直都很困惑。我感谢任何解决办法 以下是部门列表: pacific <- c('WA', 'OR', 'CA', 'AK', 'HI') mountain <- c('MT', 'ID', 'WY', 'NV', 'UT', 'CO', '
pacific <- c('WA', 'OR', 'CA', 'AK', 'HI')
mountain <- c('MT', 'ID', 'WY', 'NV', 'UT', 'CO', 'AZ', 'NM')
w.n.central <- c('ND', 'SD', 'NE', 'KS', 'MN', 'IA', 'MO')
w.s.central <- c('TX', 'OK', 'AR', 'LA')
e.n.central <- c('WI', 'MI', 'IL', 'IN', 'OH')
e.s.central <- c('KY', 'TN', 'MS', 'AL')
mid.atlantic <- c('NY', 'PA', 'NJ')
new.england <- c('VT', 'NH', 'MA', 'CT', 'RI', 'ME')
south.atlantic <- c('WV', 'MD', 'DE', 'DC', 'VA', 'NC', 'SC', 'GA', 'FL')
divisions <- c(pacific, mountain, w.n.central, w.s.central, e.n.central, e.s.central, mid.atlantic, south.atlantic, new.england)
这有帮助吗
st <- state.abb
lst <- mget(regions)
v1 <- unlist(lapply(names(lst), function(x) {
x1 <- lst[[x]]
setNames(rep(x, length(x1)),x1)}))
reg <- unname(v1[st])
divisions1 <- c('pacific', 'mountain', 'w.n.central', 'w.s.central',
'e.n.central', 'e.s.central', 'mid.atlantic', 'south.atlantic',
'new.england')
lst2 <- mget(divisions1)
v2 <- unlist(lapply(names(lst2), function(x) {
x1 <- lst2[[x]]
setNames(rep(x, length(x1)),x1)}))
div <- unname(v2[st])
dat <- data.frame(state=st, division=div, region=reg,
stringsAsFactors=FALSE)
head(dat,3)
# state division region
#1 AL e.s.central south
#2 AK pacific west
#3 AZ mountain west
这有帮助吗
st <- state.abb
lst <- mget(regions)
v1 <- unlist(lapply(names(lst), function(x) {
x1 <- lst[[x]]
setNames(rep(x, length(x1)),x1)}))
reg <- unname(v1[st])
divisions1 <- c('pacific', 'mountain', 'w.n.central', 'w.s.central',
'e.n.central', 'e.s.central', 'mid.atlantic', 'south.atlantic',
'new.england')
lst2 <- mget(divisions1)
v2 <- unlist(lapply(names(lst2), function(x) {
x1 <- lst2[[x]]
setNames(rep(x, length(x1)),x1)}))
div <- unname(v2[st])
dat <- data.frame(state=st, division=div, region=reg,
stringsAsFactors=FALSE)
head(dat,3)
# state division region
#1 AL e.s.central south
#2 AK pacific west
#3 AZ mountain west
使用dplyr:
使用dplyr:
谢谢这很有帮助。您提到state.abb,它属于state数据集,它已经将所有这些信息编码到向量中。@roctus我没有检查state数据集。很高兴知道它已经在那里了…谢谢!这很有帮助。您提到state.abb,它属于state数据集,它已经将所有这些信息编码到向量中。@roctus我没有检查state数据集。很高兴知道它已经在那里了。。
st <- state.abb
lst <- mget(regions)
v1 <- unlist(lapply(names(lst), function(x) {
x1 <- lst[[x]]
setNames(rep(x, length(x1)),x1)}))
reg <- unname(v1[st])
divisions1 <- c('pacific', 'mountain', 'w.n.central', 'w.s.central',
'e.n.central', 'e.s.central', 'mid.atlantic', 'south.atlantic',
'new.england')
lst2 <- mget(divisions1)
v2 <- unlist(lapply(names(lst2), function(x) {
x1 <- lst2[[x]]
setNames(rep(x, length(x1)),x1)}))
div <- unname(v2[st])
dat <- data.frame(state=st, division=div, region=reg,
stringsAsFactors=FALSE)
head(dat,3)
# state division region
#1 AL e.s.central south
#2 AK pacific west
#3 AZ mountain west
library(dplyr)
chardiv <- c("pacific", "mountain", "w.n.central", "w.s.central",
"e.n.central", "e.s.central", "mid.atlantic",
"south.atlantic", "new.england")
dfdiv <- data.frame(state = unlist(mget(regions))) %>%
mutate(regions = gsub("[0-9]*$", "", rownames(.)))
dfstate = data.frame(state = unlist(mget(chardiv))) %>%
mutate(divisions = gsub("[0-9]*$", "", rownames(.)))
left_join(dfdiv, dfstate, by = "state")
#> head(df, 10L)
# state regions divisions
#1 VT northeast new.england
#2 NH northeast new.england
#3 MA northeast new.england
#4 CT northeast new.england
#5 RI northeast new.england
#6 ME northeast new.england
#7 NY northeast mid.atlantic
#8 PA northeast mid.atlantic
#9 NJ northeast mid.atlantic
#10 WI midwest e.n.central