R 基于其他列将两列更改为一列_R

R 基于其他列将两列更改为一列

R 基于其他列将两列更改为一列,r,R,我有两个名为df和df1的数据帧。在df1中，“prob区”分为两列。我想根据性别和地区将df与df1合并，最终在prob地区的列上显示。先谢谢你 df1 <- structure(list(age = c(10, 11, 12, 13, 14, 10, 11, 12, 13, 14), district = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2), gender = c(1, 2, 1, 2, 1, 2, 1, 2, 1, 1)), row.names = c(N

我有两个名为df和df1的数据帧。在df1中，“prob区”分为两列。我想根据性别和地区将df与df1合并，最终在prob地区的列上显示。先谢谢你

df1 <- structure(list(age = c(10, 11, 12, 13, 14, 10, 11, 12, 13, 14), district = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2), gender = c(1, 2, 1, 2, 1, 2, 1, 2, 1, 1)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))

df <- structure(list(age = c(10, 11, 12, 13, 14), gender = c(1, 2, 1, 2, 1), district1 = c(0.0099, 0.0021, 0.0029, 0.0037, 0.005), district2 = c(0.0124, 0.002, 0.0021, 0.0042, 0.0076)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"))

df1我们可以使用pivot\u longer
将“df1”数据重塑为“long”格式，从列名中提取数字部分，并与第一个数据“df”进行右键联接
library(dplyr)
library(tidyr)
library(stringr)

df1 %>%
    pivot_longer(cols = contains('district'), 
       names_to = 'district', values_to = 'prob') %>%
    mutate(district = as.integer(str_extract(district, "\\d+"))) %>%
    right_join(df, by = c('age', 'district', 'gender'))

-输出
# A tibble: 10 x 4
     age gender district    prob
   <int>  <int>    <int>   <dbl>
 1    10      1        1 0.0099 
 2    10      1        2 0.0124 
 3    11      2        1 0.0021 
 4    11      2        2 0.002  
 5    12      1        1 0.00290
 6    12      1        2 0.0021 
 7    13      2        1 0.0037 
 8    13      2        2 0.0042 
 9    14      1        1 0.005  
10    14      1        2 0.0076 

#一个tible:10 x 4
年龄性别地区问题
1    10      1        1 0.0099 
2    10      1        2 0.0124 
3    11      2        1 0.0021 
4    11      2        2 0.002  
5    12      1        1 0.00290
6    12      1        2 0.0021 
7    13      2        1 0.0037 
8    13      2        2 0.0042 
9    14      1        1 0.005  
10    14      1        2 0.0076 

数据
df我们可以使用pivot\u longer
将“df1”数据重塑为“long”格式，从列名中提取数字部分，并使用第一个数据“df”进行右键联接
library(dplyr)
library(tidyr)
library(stringr)

df1 %>%
    pivot_longer(cols = contains('district'), 
       names_to = 'district', values_to = 'prob') %>%
    mutate(district = as.integer(str_extract(district, "\\d+"))) %>%
    right_join(df, by = c('age', 'district', 'gender'))

-输出
# A tibble: 10 x 4
     age gender district    prob
   <int>  <int>    <int>   <dbl>
 1    10      1        1 0.0099 
 2    10      1        2 0.0124 
 3    11      2        1 0.0021 
 4    11      2        2 0.002  
 5    12      1        1 0.00290
 6    12      1        2 0.0021 
 7    13      2        1 0.0037 
 8    13      2        2 0.0042 
 9    14      1        1 0.005  
10    14      1        2 0.0076 

#一个tible:10 x 4
年龄性别地区问题
1    10      1        1 0.0099 
2    10      1        2 0.0124 
3    11      2        1 0.0021 
4    11      2        2 0.002  
5    12      1        1 0.00290
6    12      1        2 0.0021 
7    13      2        1 0.0037 
8    13      2        2 0.0042 
9    14      1        1 0.005  
10    14      1        2 0.0076 

数据
df你是这个意思吗
merge(
  df1,
  reshape(
    setNames(
      df,
      gsub("district", "prob.", names(df))
    ),
    direction = "long",
    idvar = c("age", "gender"),
    varying = -(1:2),
    timevar = "district"
  ),
  all.x = TRUE
)

给
   age district gender   prob
1   10        1      1 0.0099
2   10        2      2     NA
3   11        1      2 0.0021
4   11        2      1     NA
5   12        1      1 0.0029
6   12        2      2     NA
7   13        1      2 0.0037
8   13        2      1     NA
9   14        1      1 0.0050
10  14        2      1 0.0076

数据
df1 <- structure(list(age = c(10, 11, 12, 13, 14, 10, 11, 12, 13, 14), district = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2), gender = c(1, 2, 1, 2, 1, 2, 1, 2, 1, 1)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))

df <- structure(list(age = c(10, 11, 12, 13, 14), gender = c(1, 2, 1, 2, 1), district1 = c(0.0099, 0.0021, 0.0029, 0.0037, 0.005), district2 = c(0.0124, 0.002, 0.0021, 0.0042, 0.0076)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"))

df1你是这个意思吗
merge(
  df1,
  reshape(
    setNames(
      df,
      gsub("district", "prob.", names(df))
    ),
    direction = "long",
    idvar = c("age", "gender"),
    varying = -(1:2),
    timevar = "district"
  ),
  all.x = TRUE
)

给
   age district gender   prob
1   10        1      1 0.0099
2   10        2      2     NA
3   11        1      2 0.0021
4   11        2      1     NA
5   12        1      1 0.0029
6   12        2      2     NA
7   13        1      2 0.0037
8   13        2      1     NA
9   14        1      1 0.0050
10  14        2      1 0.0076

数据
df1 <- structure(list(age = c(10, 11, 12, 13, 14, 10, 11, 12, 13, 14), district = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2), gender = c(1, 2, 1, 2, 1, 2, 1, 2, 1, 1)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))

df <- structure(list(age = c(10, 11, 12, 13, 14), gender = c(1, 2, 1, 2, 1), district1 = c(0.0099, 0.0021, 0.0029, 0.0037, 0.005), district2 = c(0.0124, 0.002, 0.0021, 0.0042, 0.0076)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"))

df1不，我只想要一列prob@mehmo请参阅我的更新。谢谢，但输出应等于地区和性别行数。@mehmo感谢您的反馈。我修正了它。我得到了这个：猜（变）中的错误：未能从它们的名称猜出时变变量>否我只想有一列prob@mehmo请参阅我的更新。谢谢，但输出应等于地区和性别行数。@mehmo感谢您的反馈。我修复了它。我得到了以下错误：猜测中的错误（变化）：无法从名称中猜测时变变量>现在可以尝试吗？我得到了此错误：数据中必须存在联接列。地区的x问题
您现在可以试试吗？我遇到了这个错误：数据中必须存在联接列。x地区问题