R中基于多个条件的查找表_R_Lookup_Lookup Tables

R中基于多个条件的查找表

R中基于多个条件的查找表,r,lookup,lookup-tables,R,Lookup,Lookup Tables,谢谢你看我的问题我有以下关于患者在3项任务中表现的（虚拟）数据： patient_df = data.frame(id = seq(1:5), age = c(30,72,46,63,58), education = c(11, 22, 18, 12, 14), task1 = c(21, 28, 20, 24, 22),

谢谢你看我的问题

我有以下关于患者在3项任务中表现的（虚拟）数据：

patient_df = data.frame(id = seq(1:5),
                        age = c(30,72,46,63,58),
                        education = c(11, 22, 18, 12, 14),
                        task1 = c(21, 28, 20, 24, 22),
                        task2 = c(15, 15, 10, 11, 14), 
                        task3 = c(82, 60, 74, 78, 78))

我还有以下基于年龄和教育程度的临界值（虚拟）查找表，用于定义患者在每项任务中的表现是否受损：

cutoffs = data.frame(age = rep(seq(from = 35, to = 70, by = 5), 2),
                     education = c(rep("<16", 8), rep(">=16",8)),
                     task1_cutoff = c(rep(24, 16)),
                     task2_cutoff = c(11,11,11,11,10,10,10,10,9,13,13,13,13,12,12,11),
                     task3_cutoff = c(rep(71,8), 70, rep(74,2), rep(73, 5)))

事实上，我的患者_df有600多个患者，每个患者有7个以上的任务，每个任务都有年龄和教育相关的临界值，因此我们非常感谢“干净”的方法！我现在唯一能想到的选择是写大量的if_-else语句或case_-when，这对于任何使用我的代码的人来说都是不可复制的：(

提前谢谢！

我建议您将查找表和

患者_df

数据框都放在长格式中。我认为这可能更容易管理多个任务

您的

education

列是数字列；因此转换为字符“=16”将有助于查找表中的匹配

使用

fuzzy\u-inner\u-join

将数据与查找表匹配，其中任务和教育完全匹配

，但如果为每个查找表行指定年龄范围，则

age

将介于

age\u low

和

age\u high

之间

最后，通过比较特定任务的两个数据帧的值来计算受损的

请注意，对于输出，缺少1的

id

，因为超出了查找表的年龄范围。您可以向该表添加更多行以解决此问题

library(tidyverse)
library(fuzzyjoin)

cutoffs_long <- cutoffs %>%
  pivot_longer(cols = starts_with("task"), names_to = "task", values_to = "cutoff_value", names_pattern = "task(\\d+)") %>%
  mutate(age_low = age, 
         age_high = age + 4) %>%
  select(-age)

patient_df %>%
  pivot_longer(cols = starts_with("task"), names_to = "task", values_to = "patient_value", names_pattern = "(\\d+)") %>%
  mutate(education = ifelse(education < 16, "<16", ">=16")) %>%
  fuzzy_inner_join(cutoffs_long, by = c("age" = "age_low", "age" = "age_high", "education", "task"), match_fun = list(`>=`, `<=`, `==`, `==`)) %>%
  mutate(impaired = +(patient_value < cutoff_value))

库（tidyverse）
库（模糊连接）
截止值长%
pivot_longer（cols=start_with（“task”）、name_to=“task”、values_to=“cutoff_value”、names_pattern=“task（\\d+））%%>%
突变（年龄=年龄，
年龄=年龄+4）%>%
选择（-age）
患者_df%>%
pivot_longer（cols=以（“任务”）开始），name_to=“task”，values_to=“patient_value”，names_pattern=“（\\d+）”）%>%
突变（教育=ifelse（教育<16，“=16”））%>%
模糊内部连接（截止时间长，由=c（“年龄”=“年龄低”，“年龄”=“年龄高”，“教育”，“任务”），匹配乐趣=列表（`>=`，`%）
突变（受损=+（患者_值<截止_值））

输出

# A tibble: 12 x 11
      id   age education.x task.x patient_value education.y task.y cutoff_value age_low age_high impaired
   <int> <dbl> <chr>       <chr>          <dbl> <chr>       <chr>         <dbl>   <dbl>    <dbl>    <int>
 1     2    72 >=16        1                 28 >=16        1                24      70       74        0
 2     2    72 >=16        2                 15 >=16        2                11      70       74        0
 3     2    72 >=16        3                 60 >=16        3                73      70       74        1
 4     3    46 >=16        1                 20 >=16        1                24      45       49        1
 5     3    46 >=16        2                 10 >=16        2                13      45       49        1
 6     3    46 >=16        3                 74 >=16        3                74      45       49        0
 7     4    63 <16         1                 24 <16         1                24      60       64        0
 8     4    63 <16         2                 11 <16         2                10      60       64        0
 9     4    63 <16         3                 78 <16         3                71      60       64        0
10     5    58 <16         1                 22 <16         1                24      55       59        1
11     5    58 <16         2                 14 <16         2                10      55       59        0
12     5    58 <16         3                 78 <16         3                71      55       59        0

#一个tible:12 x 11
id年龄教育.x任务.x患者价值观教育.y任务.y截止值年龄年龄年龄低年龄高受损
1     2    72 >=16        1                 28 >=16        1                24      70       74        0
2     2    72 >=16        2                 15 >=16        2                11      70       74        0
3     2    72 >=16        3                 60 >=16        3                73      70       74        1
4     3    46 >=16        1                 20 >=16        1                24      45       49        1
5     3    46 >=16        2                 10 >=16        2                13      45       49        1
6     3    46 >=16        3                 74 >=16        3                74      45       49        0
7 4 63查看R中的tidyverse包，您可以过滤和修改您创建的数据，以基于conditions@zoey107只是想澄清一下，36岁的人会使用35岁或40岁的截止值吗？感谢您提供了这个优雅的解决方案，并让我在stack overflow上的第一篇文章成为了一次很棒的体验！这非常有效。
> goal_patient_df
  id age education task1 task2 task3 task1_impaired task2_impaired task3_impaired
1  1  30        11     21     15     82               1               1               0
2  2  72        22     28     15     60               0               0               1
3  3  46        18     20     10     74               1               1               0
4  4  63        12     24     11     78               1               0               0
5  5  58        14     22     14     78               1               0               0

library(tidyverse)
library(fuzzyjoin)

cutoffs_long <- cutoffs %>%
  pivot_longer(cols = starts_with("task"), names_to = "task", values_to = "cutoff_value", names_pattern = "task(\\d+)") %>%
  mutate(age_low = age, 
         age_high = age + 4) %>%
  select(-age)

patient_df %>%
  pivot_longer(cols = starts_with("task"), names_to = "task", values_to = "patient_value", names_pattern = "(\\d+)") %>%
  mutate(education = ifelse(education < 16, "<16", ">=16")) %>%
  fuzzy_inner_join(cutoffs_long, by = c("age" = "age_low", "age" = "age_high", "education", "task"), match_fun = list(`>=`, `<=`, `==`, `==`)) %>%
  mutate(impaired = +(patient_value < cutoff_value))

# A tibble: 12 x 11
      id   age education.x task.x patient_value education.y task.y cutoff_value age_low age_high impaired
   <int> <dbl> <chr>       <chr>          <dbl> <chr>       <chr>         <dbl>   <dbl>    <dbl>    <int>
 1     2    72 >=16        1                 28 >=16        1                24      70       74        0
 2     2    72 >=16        2                 15 >=16        2                11      70       74        0
 3     2    72 >=16        3                 60 >=16        3                73      70       74        1
 4     3    46 >=16        1                 20 >=16        1                24      45       49        1
 5     3    46 >=16        2                 10 >=16        2                13      45       49        1
 6     3    46 >=16        3                 74 >=16        3                74      45       49        0
 7     4    63 <16         1                 24 <16         1                24      60       64        0
 8     4    63 <16         2                 11 <16         2                10      60       64        0
 9     4    63 <16         3                 78 <16         3                71      60       64        0
10     5    58 <16         1                 22 <16         1                24      55       59        1
11     5    58 <16         2                 14 <16         2                10      55       59        0
12     5    58 <16         3                 78 <16         3                71      55       59        0