Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/78.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 帮助理解为什么我的自定义函数可以应用于字符向量而不是数据帧_R_Function - Fatal编程技术网

R 帮助理解为什么我的自定义函数可以应用于字符向量而不是数据帧

R 帮助理解为什么我的自定义函数可以应用于字符向量而不是数据帧,r,function,R,Function,我拼凑了一个函数,当提供NPI ID的字符向量时,它可以从美国政府API中提取数据 providerIDs <- c('1982812681','1336201888','1902121692','1164496618','1073557641','1255522488','1679705214','1467719260') 我知道这对一些人来说是基本的,但是如果能帮助我理解如何传入字符向量而不是数据帧,那将是一个巨大的帮助 以下是我正在使用的函数: pacman::p_load(tid

我拼凑了一个函数,当提供NPI ID的字符向量时,它可以从美国政府API中提取数据

providerIDs <- c('1982812681','1336201888','1902121692','1164496618','1073557641','1255522488','1679705214','1467719260')
我知道这对一些人来说是基本的,但是如果能帮助我理解如何传入字符向量而不是数据帧,那将是一个巨大的帮助

以下是我正在使用的函数:

pacman::p_load(tidyverse,httr,jsonlite,purrr)

path <- "https://npiregistry.cms.hhs.gov/api/?"

# CREATE A FUNCTION TO QUERY THE NPPES NPI REGISTRY
getNPI <- function(object) {
  request <- httr::GET(url = path,
                       query = list(version = "2.0",
                         number = object))
  
  warn_for_status(request)

  df <- content(request, 
                as = "text", 
                encoding = "UTF-8"
                ) %>%
    jsonlite::fromJSON(., 
                       flatten = TRUE) %>%
    data.frame() %>% 
    tidyr::unnest(c(results.addresses,results.taxonomies), 
                  names_repair = "unique")
  
  df_col_names <- names(df)
  cols_to_add <- setdiff(c("result_count",
                           "results.enumeration_type",
                           "results.number",
                           "results.last_updated_epoch",
                           "results.created_epoch",
                           "results.other_names",
                           "country_code",
                           "country_name",
                           "address_purpose",
                           "address_type",
                           "address_1",
                           "address_2",
                           "city",
                           "state",
                           "postal_code",
                           "telephone_number",
                           "code",
                           "desc",
                           "primary",
                           "state1",
                           "license",
                           "results.identifiers",
                           "results.basic.first_name",
                           "results.basic.last_name",
                           "results.basic.middle_name",
                           "results.basic.credential",
                           "results.basic.sole_proprietor",
                           "results.basic.gender",
                           "results.basic.enumeration_date",
                           "results.basic.last_updated",
                           "results.basic.status",
                           "results.basic.name"), df_col_names)
  if (length(cols_to_add) > 0) {
      for(i in cols_to_add){
        df[,i] <- "UNKNOWN"
      }
   }

  df %>%
    select(results.number,
           results.basic.name,
           results.enumeration_type,
           results.basic.gender,
           results.basic.credential,
           desc,
           primary) %>%
    rename(Provider_NPI = results.number,
           Provider_Name = results.basic.name,
           Provider_Gender = results.basic.gender,
           Provider_Credentials = results.basic.credential,
           Provider_Taxonomy = desc) %>%
    mutate(
      Provider_Type = case_when(
        results.enumeration_type == "NPI-1" ~ 'Individual Provider',
        results.enumeration_type == "NPI-2" ~ 'Organizational Provider'
      )
    ) %>%
    select(-results.enumeration_type) %>% 
    # some providers have more than 1 taxonomy, this keeps the primary value
    filter(primary == 'TRUE') %>% 
    # unnesting results in duplicate rows beacause of different address types
    distinct()
}
pacman::p_加载(tidyverse、httr、jsonlite、purrr)
路径%
变异(
提供者类型=案例类型(
results.enumeration_type==“NPI-1”~“个人提供者”,
results.enumeration_type==“NPI-2”~“组织提供者”
)
) %>%
选择(-results.enumeration_type)%>%
#某些提供程序有多个分类法,这将保留主值
过滤器(主=='TRUE')%>%
#由于地址类型不同,取消测试会导致重复行
不同的()
}

lappy
没有
MARGIN
参数,也可以直接传递提供者ID向量。因此,如果要将其应用于
data.frame
lappy(providerIDs[,1],FUN=getNPI)
lappy
没有
MARGIN
参数,也可以直接传递提供程序ID向量。如果要将其应用于
data.frame
lappy(providerIDs[,1],FUN=getNPI)列
@AbdessabourMtk请添加作为答案,我很乐意接受作为答案。非常感谢。
x <- lapply(providerIDs[,1], MARGIN = 2, FUN = getNPI)
Error in providerIDs[, 1] : incorrect number of dimensions
pacman::p_load(tidyverse,httr,jsonlite,purrr)

path <- "https://npiregistry.cms.hhs.gov/api/?"

# CREATE A FUNCTION TO QUERY THE NPPES NPI REGISTRY
getNPI <- function(object) {
  request <- httr::GET(url = path,
                       query = list(version = "2.0",
                         number = object))
  
  warn_for_status(request)

  df <- content(request, 
                as = "text", 
                encoding = "UTF-8"
                ) %>%
    jsonlite::fromJSON(., 
                       flatten = TRUE) %>%
    data.frame() %>% 
    tidyr::unnest(c(results.addresses,results.taxonomies), 
                  names_repair = "unique")
  
  df_col_names <- names(df)
  cols_to_add <- setdiff(c("result_count",
                           "results.enumeration_type",
                           "results.number",
                           "results.last_updated_epoch",
                           "results.created_epoch",
                           "results.other_names",
                           "country_code",
                           "country_name",
                           "address_purpose",
                           "address_type",
                           "address_1",
                           "address_2",
                           "city",
                           "state",
                           "postal_code",
                           "telephone_number",
                           "code",
                           "desc",
                           "primary",
                           "state1",
                           "license",
                           "results.identifiers",
                           "results.basic.first_name",
                           "results.basic.last_name",
                           "results.basic.middle_name",
                           "results.basic.credential",
                           "results.basic.sole_proprietor",
                           "results.basic.gender",
                           "results.basic.enumeration_date",
                           "results.basic.last_updated",
                           "results.basic.status",
                           "results.basic.name"), df_col_names)
  if (length(cols_to_add) > 0) {
      for(i in cols_to_add){
        df[,i] <- "UNKNOWN"
      }
   }

  df %>%
    select(results.number,
           results.basic.name,
           results.enumeration_type,
           results.basic.gender,
           results.basic.credential,
           desc,
           primary) %>%
    rename(Provider_NPI = results.number,
           Provider_Name = results.basic.name,
           Provider_Gender = results.basic.gender,
           Provider_Credentials = results.basic.credential,
           Provider_Taxonomy = desc) %>%
    mutate(
      Provider_Type = case_when(
        results.enumeration_type == "NPI-1" ~ 'Individual Provider',
        results.enumeration_type == "NPI-2" ~ 'Organizational Provider'
      )
    ) %>%
    select(-results.enumeration_type) %>% 
    # some providers have more than 1 taxonomy, this keeps the primary value
    filter(primary == 'TRUE') %>% 
    # unnesting results in duplicate rows beacause of different address types
    distinct()
}