在R中计算函数中两个数据集之间的差异时，如何保持数据集的id_R_Key_Difference

在R中计算函数中两个数据集之间的差异时，如何保持数据集的id

在R中计算函数中两个数据集之间的差异时，如何保持数据集的id,r,key,difference,R,Key,Difference,我有一个函数可以计算两个数据集中的行（基于相同的列）之间的差异。我希望在计算后保留id，因为在与另一个表合并后需要它。我实际上不知道该怎么做这一步。下面是数据和函数 # data frame for recipients IDr= c(seq(1,4)) Blood_type_r=c("A","B","AB","O") data_R=data.frame(IDr,Blood_type_r,A=rep(0,4),B=c

我有一个函数可以计算两个数据集中的行（基于相同的列）之间的差异。我希望在计算后保留id，因为在与另一个表合并后需要它。我实际上不知道该怎么做这一步。下面是数据和函数

# data frame for recipients
IDr= c(seq(1,4))
Blood_type_r=c("A","B","AB","O")
data_R=data.frame(IDr,Blood_type_r,A=rep(0,4),B=c(rep(0,3),1),C=c(rep(1,3),0),D=rep(1,4),E=c(rep(0,2),rep(1,1),0),stringsAsFactors=FALSE)

  data_R
  IDr Blood_type_r A B C D E
1   1            A 0 0 1 1 0
2   2            B 0 0 1 1 0
3   3           AB 0 0 1 1 1
4   4            O 0 1 0 1 0
# data frame for donors 
IDd= c(seq(1,8))
Blood_type_d= c(rep("A", each=2),rep("B", each=2),rep("AB", each=2),rep("O", each=2))
WD= c(rep(0.25, each=2),rep(0.125, each=2),rep(0.125, each=2),rep(0.5, each=2))
data_D=data.frame(IDd,Blood_type_d,A=c(rep(0,6),1,1),B=c(rep(0,6),1,1),C=c(rep(1,7),0),D=rep(1,8),E=c(rep(0,6),rep(1,1),0),WD,stringsAsFactors=FALSE)
  data_D
  IDd Blood_type_d A B C D E    WD
1   1            A 0 0 1 1 0 0.250
2   2            A 0 0 1 1 0 0.250
3   3            B 0 0 1 1 0 0.125
4   4            B 0 0 1 1 0 0.125
5   5           AB 0 0 1 1 0 0.125
6   6           AB 0 0 1 1 0 0.125
7   7            O 1 1 1 1 1 0.500
8   8            O 1 1 0 1 0 0.500

# function
soustraction.i=function(D,R,i,threshold){
  D=as.data.frame(D)
  R=as.data.frame(R)
  dif=map2_df(D, R[i,], `-`)
  dif[dif<0] = 0
  dif$mismatch=rowSums(dif)
  dif=dif[which(dif$mismatch <= threshold),]
  return(dif)
  
}

 soustraction.i(data_D[,3:7],data_R[,3:7],1,3)
# A tibble: 8 x 6
      A     B     C     D     E mismatch
  <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
1     0     0     0     0     0        0
2     0     0     0     0     0        0
3     0     0     0     0     0        0
4     0     0     0     0     0        0
5     0     0     0     0     0        0
6     0     0     0     0     0        0
7     1     1     0     0     1        3
8     1     1     0     0     0        2

非常感谢您的帮助。

您可以将ID作为参数传入：

IDr= c(seq(1,4))
Blood_type_r=c("A","B","AB","O")
data_R=data.frame(IDr,Blood_type_r,A=rep(0,4),B=c(rep(0,3),1),C=c(rep(1,3),0),D=rep(1,4),E=c(rep(0,2),rep(1,1),0),stringsAsFactors=FALSE)
IDd= c(seq(1,8))
Blood_type_d= c(rep("A", each=2),rep("B", each=2),rep("AB", each=2),rep("O", each=2))
WD= c(rep(0.25, each=2),rep(0.125, each=2),rep(0.125, each=2),rep(0.5, each=2))
data_D=data.frame(IDd,Blood_type_d,A=c(rep(0,6),1,1),B=c(rep(0,6),1,1),C=c(rep(1,7),0),D=rep(1,8),E=c(rep(0,6),rep(1,1),0),WD,stringsAsFactors=FALSE)

soustraction.i=function(D,R,i,threshold, id){
  if(nrow(D) != length(id))stop("Length of id has to be same as number of rows of D\n")
  D=as.data.frame(D)
  R=as.data.frame(R)
  dif=map2_df(D, R[i,], `-`)
  dif[dif<0] = 0
  dif$mismatch=rowSums(dif)
  dif=dif[which(dif$mismatch <= threshold),]
  col1 <- colnames(dif)[1]
  dif <- dif %>% 
    tibble::add_column(IDd = id, .before=col1)
  return(dif)
  
}

soustraction.i(data_D[,3:7],data_R[,3:7],1,3, id=IDd)
# # A tibble: 8 x 7
#    ID_d     A     B     C     D     E mismatch
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
# 1     1     0     0     0     0     0        0
# 2     2     0     0     0     0     0        0
# 3     3     0     0     0     0     0        0
# 4     4     0     0     0     0     0        0
# 5     5     0     0     0     0     0        0
# 6     6     0     0     0     0     0        0
# 7     7     1     1     0     0     1        3
# 8     8     1     1     0     0     0        2

IDr=c（序号（1,4））
血型=c（“A”、“B”、“AB”、“O”）
数据=数据帧（IDr，血型，A=rep（0,4），B=c（rep（0,3），1），c=c（rep（1,3），0），D=rep（1,4），E=c（rep（0,2），rep（1,1），0），stringsAsFactors=FALSE）
IDd=c（序号（1,8））
血型d=c（代表（“A”，各=2），代表（“B”，各=2），代表（“AB”，各=2），代表（“O”，各=2））
WD=c（代表（0.25，各=2）、代表（0.125，各=2）、代表（0.125，各=2）、代表（0.5，各=2））
数据D=数据帧（IDd，血型D，A=c（代表（0,6），1,1），B=c（代表（0,6），1,1），c=c（代表（1,7），0），D=代表（1,8），E=c（代表（0,6），代表（1,1），0），WD，stringsAsFactors=FALSE）
soustraction.i=函数（D，R，i，threshold，id）{
如果（nrow（D）！=长度（id））停止（“id的长度必须与D的行数相同\n”）
D=as.data.frame（D）
R=as.data.frame（R）
dif=map2_-df（D，R[i，]，`-`）
dif[dif要在输出中有Id列，您应该首先在输入中传递它。尝试此函数：
soustraction.i=function(D,R,i,threshold){
  D=as.data.frame(D)
  R=as.data.frame(R)
  dif=purrr::map2_df(D[-1], R[i,], `-`)
  dif[dif<0] = 0
  dif$mismatch=rowSums(dif)
  dif= cbind(ID = D[1], dif)
  dif=dif[which(dif$mismatch <= threshold),]
  return(dif)
}

soustraction.i(data_D[,c(1, 3:7)],data_R[,3:7],1,3)

#  IDd A B C D E mismatch
#1   1 0 0 0 0 0        0
#2   2 0 0 0 0 0        0
#3   3 0 0 0 0 0        0
#4   4 0 0 0 0 0        0
#5   5 0 0 0 0 0        0
#6   6 0 0 0 0 0        0
#7   7 1 1 0 0 1        3
#8   8 1 1 0 0 0        2

soustraction.i(data_D[,c(1, 3:7)],data_R[,3:7],1,2)
#  IDd A B C D E mismatch
#1   1 0 0 0 0 0        0
#2   2 0 0 0 0 0        0
#3   3 0 0 0 0 0        0
#4   4 0 0 0 0 0        0
#5   5 0 0 0 0 0        0
#6   6 0 0 0 0 0        0
#8   8 1 1 0 0 0        2

soutraction.i=函数（D，R，i，阈值）{
D=as.data.frame（D）
R=as.data.frame（R）
dif=purrr:：map2_-df（D[-1]，R[i，]，`-`）
dif[difThank your response@Dave，但当我将阈值更改为3以外的任何其他数字时，我会得到错误错误：新列必须与数据兼容新列有8行数据有7行，我需要它对每个阈值选择都有效。我需要保留IDd
，其中条件if[which（dif$mismatch，谢谢@Ronak！这是有效的，您的假设是正确的，我的ID始终是我的数据的第一列。
soustraction.i=function(D,R,i,threshold){
  D=as.data.frame(D)
  R=as.data.frame(R)
  dif=purrr::map2_df(D[-1], R[i,], `-`)
  dif[dif<0] = 0
  dif$mismatch=rowSums(dif)
  dif= cbind(ID = D[1], dif)
  dif=dif[which(dif$mismatch <= threshold),]
  return(dif)
}

soustraction.i(data_D[,c(1, 3:7)],data_R[,3:7],1,3)

#  IDd A B C D E mismatch
#1   1 0 0 0 0 0        0
#2   2 0 0 0 0 0        0
#3   3 0 0 0 0 0        0
#4   4 0 0 0 0 0        0
#5   5 0 0 0 0 0        0
#6   6 0 0 0 0 0        0
#7   7 1 1 0 0 1        3
#8   8 1 1 0 0 0        2

soustraction.i(data_D[,c(1, 3:7)],data_R[,3:7],1,2)
#  IDd A B C D E mismatch
#1   1 0 0 0 0 0        0
#2   2 0 0 0 0 0        0
#3   3 0 0 0 0 0        0
#4   4 0 0 0 0 0        0
#5   5 0 0 0 0 0        0
#6   6 0 0 0 0 0        0
#8   8 1 1 0 0 0        2