R 帮助理解为什么我的自定义函数可以应用于字符向量而不是数据帧
我拼凑了一个函数,当提供NPI ID的字符向量时,它可以从美国政府API中提取数据R 帮助理解为什么我的自定义函数可以应用于字符向量而不是数据帧,r,function,R,Function,我拼凑了一个函数,当提供NPI ID的字符向量时,它可以从美国政府API中提取数据 providerIDs <- c('1982812681','1336201888','1902121692','1164496618','1073557641','1255522488','1679705214','1467719260') 我知道这对一些人来说是基本的,但是如果能帮助我理解如何传入字符向量而不是数据帧,那将是一个巨大的帮助 以下是我正在使用的函数: pacman::p_load(tid
providerIDs <- c('1982812681','1336201888','1902121692','1164496618','1073557641','1255522488','1679705214','1467719260')
我知道这对一些人来说是基本的,但是如果能帮助我理解如何传入字符向量而不是数据帧,那将是一个巨大的帮助
以下是我正在使用的函数:
pacman::p_load(tidyverse,httr,jsonlite,purrr)
path <- "https://npiregistry.cms.hhs.gov/api/?"
# CREATE A FUNCTION TO QUERY THE NPPES NPI REGISTRY
getNPI <- function(object) {
request <- httr::GET(url = path,
query = list(version = "2.0",
number = object))
warn_for_status(request)
df <- content(request,
as = "text",
encoding = "UTF-8"
) %>%
jsonlite::fromJSON(.,
flatten = TRUE) %>%
data.frame() %>%
tidyr::unnest(c(results.addresses,results.taxonomies),
names_repair = "unique")
df_col_names <- names(df)
cols_to_add <- setdiff(c("result_count",
"results.enumeration_type",
"results.number",
"results.last_updated_epoch",
"results.created_epoch",
"results.other_names",
"country_code",
"country_name",
"address_purpose",
"address_type",
"address_1",
"address_2",
"city",
"state",
"postal_code",
"telephone_number",
"code",
"desc",
"primary",
"state1",
"license",
"results.identifiers",
"results.basic.first_name",
"results.basic.last_name",
"results.basic.middle_name",
"results.basic.credential",
"results.basic.sole_proprietor",
"results.basic.gender",
"results.basic.enumeration_date",
"results.basic.last_updated",
"results.basic.status",
"results.basic.name"), df_col_names)
if (length(cols_to_add) > 0) {
for(i in cols_to_add){
df[,i] <- "UNKNOWN"
}
}
df %>%
select(results.number,
results.basic.name,
results.enumeration_type,
results.basic.gender,
results.basic.credential,
desc,
primary) %>%
rename(Provider_NPI = results.number,
Provider_Name = results.basic.name,
Provider_Gender = results.basic.gender,
Provider_Credentials = results.basic.credential,
Provider_Taxonomy = desc) %>%
mutate(
Provider_Type = case_when(
results.enumeration_type == "NPI-1" ~ 'Individual Provider',
results.enumeration_type == "NPI-2" ~ 'Organizational Provider'
)
) %>%
select(-results.enumeration_type) %>%
# some providers have more than 1 taxonomy, this keeps the primary value
filter(primary == 'TRUE') %>%
# unnesting results in duplicate rows beacause of different address types
distinct()
}
pacman::p_加载(tidyverse、httr、jsonlite、purrr)
路径%
变异(
提供者类型=案例类型(
results.enumeration_type==“NPI-1”~“个人提供者”,
results.enumeration_type==“NPI-2”~“组织提供者”
)
) %>%
选择(-results.enumeration_type)%>%
#某些提供程序有多个分类法,这将保留主值
过滤器(主=='TRUE')%>%
#由于地址类型不同,取消测试会导致重复行
不同的()
}
lappy
没有MARGIN
参数,也可以直接传递提供者ID向量。因此,如果要将其应用于data.frame
,lappy(providerIDs[,1],FUN=getNPI)
lappy
没有MARGIN
参数,也可以直接传递提供程序ID向量。如果要将其应用于data.frame
,lappy(providerIDs[,1],FUN=getNPI)列
@AbdessabourMtk请添加作为答案,我很乐意接受作为答案。非常感谢。
x <- lapply(providerIDs[,1], MARGIN = 2, FUN = getNPI)
Error in providerIDs[, 1] : incorrect number of dimensions
pacman::p_load(tidyverse,httr,jsonlite,purrr)
path <- "https://npiregistry.cms.hhs.gov/api/?"
# CREATE A FUNCTION TO QUERY THE NPPES NPI REGISTRY
getNPI <- function(object) {
request <- httr::GET(url = path,
query = list(version = "2.0",
number = object))
warn_for_status(request)
df <- content(request,
as = "text",
encoding = "UTF-8"
) %>%
jsonlite::fromJSON(.,
flatten = TRUE) %>%
data.frame() %>%
tidyr::unnest(c(results.addresses,results.taxonomies),
names_repair = "unique")
df_col_names <- names(df)
cols_to_add <- setdiff(c("result_count",
"results.enumeration_type",
"results.number",
"results.last_updated_epoch",
"results.created_epoch",
"results.other_names",
"country_code",
"country_name",
"address_purpose",
"address_type",
"address_1",
"address_2",
"city",
"state",
"postal_code",
"telephone_number",
"code",
"desc",
"primary",
"state1",
"license",
"results.identifiers",
"results.basic.first_name",
"results.basic.last_name",
"results.basic.middle_name",
"results.basic.credential",
"results.basic.sole_proprietor",
"results.basic.gender",
"results.basic.enumeration_date",
"results.basic.last_updated",
"results.basic.status",
"results.basic.name"), df_col_names)
if (length(cols_to_add) > 0) {
for(i in cols_to_add){
df[,i] <- "UNKNOWN"
}
}
df %>%
select(results.number,
results.basic.name,
results.enumeration_type,
results.basic.gender,
results.basic.credential,
desc,
primary) %>%
rename(Provider_NPI = results.number,
Provider_Name = results.basic.name,
Provider_Gender = results.basic.gender,
Provider_Credentials = results.basic.credential,
Provider_Taxonomy = desc) %>%
mutate(
Provider_Type = case_when(
results.enumeration_type == "NPI-1" ~ 'Individual Provider',
results.enumeration_type == "NPI-2" ~ 'Organizational Provider'
)
) %>%
select(-results.enumeration_type) %>%
# some providers have more than 1 taxonomy, this keeps the primary value
filter(primary == 'TRUE') %>%
# unnesting results in duplicate rows beacause of different address types
distinct()
}