简单加法。我不打算进一步完善这一点。在R中编写代码时,如果没有自己的一些实验,您将不会变得更好 place_to_county <- function(place, state = NULL) { if (is.null(state)) { x <- trimws(strsplit(place, ",", 2)[[1]]) place <- x[1] state <- x[2] } tbl_df(acs::fips.place) %>% filter(grepl(place, PLACENAME, ignore.case=TRUE) & STATE == state) -> xdf if (nrow(xdf) > 0) { separate_rows(xdf, COUNTY, sep=", ") %>% head(1) %>% setNames(tolower(colnames(.))) %>% left_join(tigris::fips_codes, by=c("county", "state")) } else { data_frame( state = state, statefp = NA, placefp = NA, placename = place, type = NA, funcstat = NA, county = NA, state_code = NA, state_name = NA, county_code = NA ) } } place_to_county% 总目(1)%>% 集合名(tolower(colnames(.))%%>% 左连接(底格里斯:fips代码,by=c(“县”、“州”)) }否则{ 数据帧( 状态=状态, statefp=NA, placefp=NA, placename=地点, 类型=NA, funcstat=NA, 郡=NA, 州代码=NA, 州名称=NA, 县代码=NA ) } }
任何其他功能/更改由您决定 你的意思是说上面的所有代码都应该封装在一个函数中,并且应该能够在输入的波特兰或“”上操作吗?如果你查看不同dat输入的输出,你会发现它只能有一个县,这很好,但有时也有多个县,其中一个县与城市名称匹配。因此,我需要这个测试程序来确保它为给定的输入选择最佳的“县”,如“波特兰,或”,“德克萨斯州达拉斯”,“纽约,纽约”。。。因此,理想的函数是一个大致与上面类似的函数,或者至少产生正确的结果。你是说上面的所有代码都应该封装在一个函数中,并且应该能够在输入上操作简单加法。我不打算进一步完善这一点。在R中编写代码时,如果没有自己的一些实验,您将不会变得更好 place_to_county <- function(place, state = NULL) { if (is.null(state)) { x <- trimws(strsplit(place, ",", 2)[[1]]) place <- x[1] state <- x[2] } tbl_df(acs::fips.place) %>% filter(grepl(place, PLACENAME, ignore.case=TRUE) & STATE == state) -> xdf if (nrow(xdf) > 0) { separate_rows(xdf, COUNTY, sep=", ") %>% head(1) %>% setNames(tolower(colnames(.))) %>% left_join(tigris::fips_codes, by=c("county", "state")) } else { data_frame( state = state, statefp = NA, placefp = NA, placename = place, type = NA, funcstat = NA, county = NA, state_code = NA, state_name = NA, county_code = NA ) } } place_to_county% 总目(1)%>% 集合名(tolower(colnames(.))%%>% 左连接(底格里斯:fips代码,by=c(“县”、“州”)) }否则{ 数据帧( 状态=状态, statefp=NA, placefp=NA, placename=地点, 类型=NA, funcstat=NA, 郡=NA, 州代码=NA, 州名称=NA, 县代码=NA ) } },r,function,dplyr,geocoding,R,Function,Dplyr,Geocoding,任何其他功能/更改由您决定 你的意思是说上面的所有代码都应该封装在一个函数中,并且应该能够在输入的波特兰或“”上操作吗?如果你查看不同dat输入的输出,你会发现它只能有一个县,这很好,但有时也有多个县,其中一个县与城市名称匹配。因此,我需要这个测试程序来确保它为给定的输入选择最佳的“县”,如“波特兰,或”,“德克萨斯州达拉斯”,“纽约,纽约”。。。因此,理想的函数是一个大致与上面类似的函数,或者至少产生正确的结果。你是说上面的所有代码都应该封装在一个函数中,并且应该能够在输入上操作“Portla
“Portland,还是”
?如果你查看不同dat输入的输出,你会发现它只能有一个县,这很好,但有时也有多个县,其中一个县与城市名称匹配。因此,我需要这个测试程序来确保它为给定的输入选择最佳的“县”,如“波特兰,或”,“德克萨斯州达拉斯”,“纽约,纽约”。。。因此,理想的函数是一个与上面大致相同的函数,或者至少产生正确的结果。您能解释一下“tbl_df(acs::fips.place)…单独的_行(xdf,country,sep=“,”)%%>”?刚刚发现:缺少一点:如何将该表与country.fips匹配?因为该数据位于数据(fips\U代码)
中,感谢您提供的非常好的代码注释!如果我们只解决最后一点,那就太棒了!“这是一种“愚蠢”的方法,因为它会捕获字符串中的任何位置的地名。有更聪明的方法,但这应该可以很好地工作。”->如果可能,它应该只与country.name匹配?将geo.lookup()
函数的逻辑与此进行比较,并做出您喜欢的任何过滤逻辑更改。您能解释一下吗“tbl_df(acs::fips.place)。。。单独的_行(xdf,country,sep=“,”%%>”?刚刚发现:缺少一点:如何将该表与country.fips匹配?因为该数据位于data(fips_code)
中,感谢您提供了非常好的代码注释!如果我们只解决了最后一点,那将非常棒!这是一个“愚蠢的”方法,因为它捕获字符串中任何位置的地名。有更聪明的方法,但这应该可以很好地工作。”->如果可能,它应该只与country.name匹配?比较geo.lookup()
函数的逻辑,并根据需要更改过滤逻辑。
library(acs)
library(tidyverse)
library(tigris)
data(fips_codes)
colnames(fips_codes) = c("state.abb", "statefips", "state.name", "countyfips", "county.name")
FUN <- function(x) {
Place <- strsplit(x, ", ")[[1]][1]
State <- strsplit(x, ", ")[[1]][2]
dat = geo.lookup(state = State, place = Place)
dat = na.omit(dat)
# 1 Check whether county.name has multiple counties
cvals <- dat %>% filter(str_detect(county.name, ","))
# 2 If not, i.e. cvals == 0, take first row of output
if(nrow(cvals[2,]) == 0) {
output <- dat[1,]
}
# 3 If yes, i.e. cvals > 0, unbundle code and proceed
else {
unbundle <- dat %>%
group_by(state.name, place.name) %>%
mutate(county.name = strsplit(county.name, ", ")) %>%
unnest %>%
na.omit()
# 3.1 If "Place" matches one of county.name values, take that row
check <- unbundle %>% filter(str_detect(county.name, Place))
nrow(check)
if (nrow(check) > 0) {
output <- check[1,]
}
# 3.2 Otherwise, if no match, nrow(check) = 0, take first row from unbundle by default
output <- unbundle[1,]
}
# Join county data with fips code table
output <- output %>% left_join(fips_codes, by = c("state.name", "county.name"))
print(output)
}
FUN("New York, NY")
FUN("Portland, OR")
FUN("Manhattan, NY")
FUN("Cambridge, MA")
library(acs)
library(tidyverse)
place_to_county <- function(place, state = NULL) {
if (is.null(state)) {
x <- trimws(strsplit(place, ",", 2)[[1]])
place <- x[1]
state <- x[2]
}
tbl_df(acs::fips.place) %>%
filter(grepl(place, PLACENAME, ignore.case=TRUE) & STATE == state) %>%
separate_rows(COUNTY, sep=", ") %>%
head(1) %>%
setNames(tolower(colnames(.)))
}
place_to_county("New York", "NY")
## # A tibble: 1 x 7
## state statefp placefp placename type funcstat county
## <chr> <int> <int> <chr> <chr> <chr> <chr>
## 1 NY 36 51000 New York city Incorporated Place A Bronx County
place_to_county("New York, NY")
## # A tibble: 1 x 7
## state statefp placefp placename type funcstat county
## <chr> <int> <int> <chr> <chr> <chr> <chr>
## 1 NY 36 51000 New York city Incorporated Place A Bronx County
place_to_county("Queens", "NY")
## # A tibble: 1 x 7
## state statefp placefp placename type funcstat county
## <chr> <int> <int> <chr> <chr> <chr> <chr>
## 1 NY 36 60323 Queens borough County Subdivision G Queens County
place_to_county("Queens, NY")
## # A tibble: 1 x 7
## state statefp placefp placename type funcstat county
## <chr> <int> <int> <chr> <chr> <chr> <chr>
## 1 NY 36 60323 Queens borough County Subdivision G Queens County
place_to_county("Berwick", "ME")
## # A tibble: 1 x 7
## state statefp placefp placename type funcstat county
## <chr> <int> <int> <chr> <chr> <chr> <chr>
## 1 ME 23 4685 Berwick CDP Census Designated Place S York County
place_to_county("Berwick, ME")
## # A tibble: 1 x 7
## state statefp placefp placename type funcstat county
## <chr> <int> <int> <chr> <chr> <chr> <chr>
## 1 ME 23 4685 Berwick CDP Census Designated Place S York County
place_to_county("Manhattan", "NY")
## # A tibble: 1 x 7
## state statefp placefp placename type funcstat county
## <chr> <int> <int> <chr> <chr> <chr> <chr>
## 1 NY 36 44919 Manhattan borough County Subdivision G New York County
place_to_county("Manhattan, NY")
## # A tibble: 1 x 7
## state statefp placefp placename type funcstat county
## <chr> <int> <int> <chr> <chr> <chr> <chr>
## 1 NY 36 44919 Manhattan borough County Subdivision G New York County
place_to_county <- function(place, state = NULL) {
if (is.null(state)) {
x <- trimws(strsplit(place, ",", 2)[[1]])
place <- x[1]
state <- x[2]
}
tbl_df(acs::fips.place) %>%
filter(grepl(place, PLACENAME, ignore.case=TRUE) & STATE == state) -> xdf
if (nrow(xdf) > 0) {
separate_rows(xdf, COUNTY, sep=", ") %>%
head(1) %>%
setNames(tolower(colnames(.)))
} else {
NULL
}
}
tbl_df(acs::fips.place) %>%
filter(grepl(place, PLACENAME, ignore.case=TRUE) & STATE == state) %>%
separate_rows(COUNTY, sep=", ") %>%
head(1) %>%
setNames(tolower(colnames(.)))
place_to_county <- function(place, state = NULL) {
if (is.null(state)) {
x <- trimws(strsplit(place, ",", 2)[[1]])
place <- x[1]
state <- x[2]
}
tbl_df(acs::fips.place) %>%
filter(grepl(place, PLACENAME, ignore.case=TRUE) & STATE == state) -> xdf
if (nrow(xdf) > 0) {
separate_rows(xdf, COUNTY, sep=", ") %>%
head(1) %>%
setNames(tolower(colnames(.))) %>%
left_join(tigris::fips_codes, by=c("county", "state"))
} else {
NULL
}
}
place_to_county <- function(place, state = NULL) {
if (is.null(state)) {
x <- trimws(strsplit(place, ",", 2)[[1]])
place <- x[1]
state <- x[2]
}
tbl_df(acs::fips.place) %>%
filter(grepl(place, PLACENAME, ignore.case=TRUE) & STATE == state) -> xdf
if (nrow(xdf) > 0) {
separate_rows(xdf, COUNTY, sep=", ") %>%
head(1) %>%
setNames(tolower(colnames(.))) %>%
left_join(tigris::fips_codes, by=c("county", "state"))
} else {
data_frame(
state = state,
statefp = NA,
placefp = NA,
placename = place,
type = NA,
funcstat = NA,
county = NA,
state_code = NA,
state_name = NA,
county_code = NA
)
}
}