R 如何创建一个函数,根据df中其他列的值返回特定列的值?

R 如何创建一个函数,根据df中其他列的值返回特定列的值?,r,function,R,Function,我有一个数据框 df <- data.frame(Lastname = c("Crosby", "Bourque", "Messier","Crosby", "Bourque", "Messier"), Name = c("Sid", "Ray", "Mark","Sid", "Ray", "Mark"), Season = c("2005-06","1997-98","1994-95","2006-07","19

我有一个数据框

  df <- data.frame(Lastname = c("Crosby", "Bourque", "Messier","Crosby", "Bourque", "Messier"), 
                 Name = c("Sid", "Ray", "Mark","Sid", "Ray", "Mark"), 
                 Season = c("2005-06","1997-98","1994-95","2006-07","1998-99","1995-96"),
                 Goals = c(20, 30, 40,30,50,60),
                 Assists= c(30,50,60,40, 50, 50),
                 Points = c(50, 80,100,70,100,110))
我想创建一个函数,在这里我可以获得我决定的统计数据 对于某个球员和赛季。我有这个:

 library(plyr)
     printstats = function(lastname, name, season, x) {
      if(sum((df$Lastname==lastname)+(df$Name==name))==0 ) stop("There is a spelling error in ", lastname, " and ", name,".")
      if(sum(df$Lastname==lastname)==0) stop("There is a spelling error in ", lastname,".")
      if(sum(df$Name==name)==0) stop("There is a spelling error in ", name,".")
      if(sum(df$Season==season)==0) stop(season," does not exist.")
      #if(isTRUE(df[df$Lastame=="lastname" & df$Name=="name" & df$Season=="season",])==FALSE) stop(name," ", lastname, " did not play in ", season,".")
      df.sub <- df %>%
        filter(Lastname == lastname, Name == name, Season == season) %>% 
        select(Lastname, Name, Season, one_of(x)) 
      paste(name, lastname, " did", df.sub[, x], x, "in", season)
    }
“希德·克罗斯比在2005-06赛季得了50分”

但是,如果我删除#以将最后一个“if”包含在函数中,它将不起作用。 我希望能够在球员本赛季没有上场时粘贴一句话 在函数中请求,如我询问:

 printstats("Crosby", "Sid","1998-99","Points")
我想让函数告诉我Sid Crosby在1998-99赛季没有参加比赛


如何才能做到这一点?

R的优点之一是能够通过colname或rowname访问数据帧,您应该利用这一点以及不再需要的if语句

下面是一个简短的代码来说明

我保留了您的姓名,但请注意,姓氏通常被理解为姓氏或姓氏。我可能包括/排除了某人,也可能没有

df <- data.frame(Name = c("Ovechkin", "Bourque", "Messier","Ovechkin", "Bourque", "Messier"), 
             Surname = c("Alex", "Ray", "Mark","Alex", "Ray", "Mark"), 
             Season = c("2005-06","1999-00","1994-95","2008-09","1992-93","1991-92"),
             Goals = c(52, 30, 40,56,50,60),
             Assists= c(54,50,60,54, 50, 50),
             Points = c(106, 80,100,110,100,110))

showstats = function(name, surname, season, x) {
    df %>%
    filter(Name == name, Surname == surname, Season == season) %>% 
    select(Name, Surname, Season, one_of(x))
} 
showstats("Ovechkin", "Alex","2008-09","Goals")
showstats("Ovechkin", "Alex","2008-09","Assists")
showstats("Ovechkin", "Alex","2008-09","Points")
df%
过滤器(名称==名称,姓氏==姓氏,季节==季节)%>%
选择(姓名、姓氏、季节、第(x)项中的一项)
} 
showstats(“奥维奇金”,“亚历克斯”,“2008-09”,“目标”)
showstats(“奥维奇金”,“亚历克斯”,“2008-09”,“助攻”)
showstats(“奥维奇金”、“亚历克斯”、“2008-09”、“积分”)
编辑以回答有关打印的其他问题。请注意,为x指定了一个字符值,该值对应于感兴趣的列名之一。因此,您可以使用x索引数据帧

printstats = function(name, surname, season, x) {
    df.sub <- df %>%
    filter(Name == name, Surname == surname, Season == season) %>% 
    select(Name, Surname, Season, one_of(x)) 
    paste(surname, name, "scored", df.sub[, x], x, "in", season)
}
printstats("Ovechkin", "Alex","2008-09","Goals")
printstats=函数(名称、姓氏、季节、x){
df.sub%
过滤器(名称==名称,姓氏==姓氏,季节==季节)%>%
选择(姓名、姓氏、季节、第(x)项中的一项)
粘贴(姓氏,姓名,“得分”,df.sub[,x],x,“在”,季节)
}
printstats(“奥维奇金”、“亚历克斯”、“2008-09”、“目标”)
编辑以包括请求的条件和参数检查:

printstats = function(lastname, name, season, statistic) {
  # check function arguments
  if (missing(lastname) | missing(name) | missing(season) | missing(statistic)) stop("Need to specify player lastname, name, season, statistic")

  # convert season xxxx-xxxx to xxxx-xx
  if (length(strsplit(season, split="")[[1]]) == 9) {
      season <- paste(c(strsplit(season, split="")[[1]][1:5], strsplit(season, split="")[[1]][8:9]), collapse="")
  }
  if(sum(df$Season==season)==0) {
    print(paste("The", season, "season is not included in this database."))
  } else {
    if (sum(df$Lastname == lastname) == 0) {
      print(paste0("Player of last name ", lastname, " is not in the database. Please check spelling of ", lastname))
    } else {
      if (sum(df$Lastname==lastname & df$Name==name) == 0) {
        print(paste0("Player of last name ", lastname, " found, but no entries for first name ", name, ". Available entries for ", lastname, " include:" ))
        print(unique(df[df$Lastname==lastname, c("Lastname", "Name")]))
      } else {
        if (sum(df$Lastname==lastname & df$Name==name & df$Season==season) == 0) {
          print(paste0(name, " ", lastname, " did not play in the ", season, ". Available seasons for this player include:" ))
          print(unique(df[df$Lastname==lastname & df$Name==name, c("Lastname", "Name", "Season")]))
        } else {
          df.sub <- df %>%
            dplyr::filter(Name == name, Lastname == lastname, Season == season) %>% 
            dplyr::select(Name, Lastname, Season, one_of(statistic)) 
          paste(df.sub[["Name"]], df.sub[["Lastname"]], "scored", df.sub[[statistic]], statistic, "in", df.sub[["Season"]])    
        }
      }
    }
  }
}
printstats("Sedin", "Henrik","2005-2006","Points")
printstats("Sedin", "Daniel","2005-2006","Points")
printstats("Crosby", "Sidney","2005-06","Points")
printstats("Crosbie", "Sidney","2005-06","Points")
printstats("Crosby", "Sid","2001-02","Points")
printstats("Bourque", "Ray", "2005-2006")
printstats=函数(姓氏、姓名、季节、统计){
#检查函数参数
如果(缺少(姓氏)|缺少(姓名)|缺少(赛季)|缺少(统计数据))停止(“需要指定球员姓氏、姓名、赛季、统计数据”)
#将季节xxxx转换为xxxx xx
如果(长度(strsplit(季节,split=”“)[[1]])==9){
季节%
dplyr::选择(姓名、姓氏、季节、一个(统计))
粘贴(df.sub[[“Name”]、df.sub[[“Lastname”]、“scored”、df.sub[[统计]、统计、“in”、df.sub[[季节”])
}
}
}
}
}
printstats(“塞丁”、“亨里克”、“2005-2006”、“分数”)
printstats(“塞丁”、“丹尼尔”、“2005-2006”、“分数”)
printstats(“克罗斯比”、“西德尼”、“2005-06”、“积分”)
printstats(“Crosbie”、“Sidney”、“2005-06”、“Points”)
printstats(“克罗斯比”、“Sid”、“2001-02”、“点数”)
printstats(“波克”、“雷”、“2005-2006”)
以下是不区分大小写的版本:

printstats = function(lastname, name, season, statistic) {
  # check function arguments
  if (missing(lastname) | missing(name) | missing(season) | missing(statistic)) stop("Need to specify player lastname, name, season, statistic")

  # convert season xxxx-xxxx to xxxx-xx
  if (length(strsplit(season, split="")[[1]]) == 9) {
    season <- paste(c(strsplit(season, split="")[[1]][1:5], strsplit(season, split="")[[1]][8:9]), collapse="")
  }
  if(sum(df$Season==season)==0) {
    print(paste("The", season, "season is not included in this database."))
  } else {
    if (sum(tolower(df$Lastname) == tolower(lastname)) == 0) {
      print(paste0("Player of last name ", lastname, " is not in the database. Please check spelling of ", lastname))
    } else {
      if (sum(tolower(df$Lastname)==tolower(lastname) & tolower(df$Name)==tolower(name)) == 0) {
        print(paste0("Player of last name ", lastname, " found, but no entries for first name ", name, ". Available entries for ", lastname, " include:" ))
        print(unique(df[tolower(df$Lastname)==tolower(lastname), c("Lastname", "Name")]))
      } else {
        if (sum(tolower(df$Lastname)==tolower(lastname) & tolower(df$Name)==tolower(name) & df$Season==season) == 0) {
          print(paste0(name, " ", lastname, " did not play in the ", season, ". Available seasons for this player include:" ))
          print(unique(df[tolower(df$Lastname)==tolower(lastname) & tolower(df$Name)==tolower(name), c("Lastname", "Name", "Season")]))
        } else {
          df.sub <- df %>%
            dplyr::filter(tolower(Name) == tolower(name), tolower(Lastname) == tolower(lastname), Season == season) %>% 
            dplyr::select(Name, Lastname, Season, one_of(statistic)) 
          paste(df.sub[["Name"]], df.sub[["Lastname"]], "scored", df.sub[[statistic]], statistic, "in", df.sub[["Season"]])    
        }
      }
    }
  }
}
printstats("sedin", "Henrik","2005-2006","Points")
printstats("Sedin", "daniel","2005-2006","Points")
printstats("Crosby", "sidney","2005-06","Points")
printstats("CROSBY", "Sidney","2005-06","Points")
printstats=函数(姓氏、姓名、季节、统计){
#检查函数参数
如果(缺少(姓氏)|缺少(姓名)|缺少(赛季)|缺少(统计数据))停止(“需要指定球员姓氏、姓名、赛季、统计数据”)
#将季节xxxx转换为xxxx xx
如果(长度(strsplit(季节,split=”“)[[1]])==9){
季节%
dplyr::选择(姓名、姓氏、季节、一个(统计))
粘贴(df.sub[[“Name”]、df.sub[[“Lastname”]、“scored”、df.sub[[统计]、统计、“in”、df.sub[[季节”])
}
}
}
}
}
printstats(“塞丁”、“亨里克”、“2005-2006”、“分数”)
printstats(“塞丁”、“丹尼尔”、“2005-2006”、“分数”)
printstats(“克罗斯比”、“西德尼”、“2005-06”、“积分”)
printstats(“克罗斯比”、“西德尼”、“2005-06”、“积分”)

(1)“Assists”在您的函数(“Assists”)中拼写错误,带有3个“s”。(2) 函数中的点过滤器使用“nom”、“saison”而不是“name”、“seasure”。考虑到这些错误,我不清楚该函数如何处理“积分”。此外,额外的
选择(姓名、姓氏、季节、助攻))
中,在“助攻”和“进球”中,代码并不像showstats(“Crosby”、“Sid”、“2005-06”、“积分”)那样运行除非您从源代码中错误地复制了它。为什么
x==“Goals”
是您的
过滤器的一部分,而没有一个名为
x
的列具有类似
“Goals”
的值?对不起,变量名称不是英文的,因此我翻译了它们。我没有看到我在做这件事时犯了拼写错误。谢谢你的帮助,谢谢。它正在工作。我不知道“one_of”的功能。要得到我想要的东西要容易得多。如果我想把答案打印成这样:奥维奇金在2005-06赛季攻入52球,我该怎么做?@Mark see编辑代码。如果您满意,请接受答案,以便将其标记为已回答。请参见
?粘贴
?粘贴0
如何在粘贴中使用答案?我无法将其作为注释中的代码编写,但我有:打印(粘贴0(姓名“,”姓氏“,”得分“,”x,“赛季中的进球”)。我不知道用什么代替x,以获得我的函数中发现的值…你能澄清一下吗?您看到上面编辑的答案了吗?现在有一个新函数
printstats
,它使用粘贴打印您想要的内容。您是否询问如何将
printstats
的输出作为变量返回,以便以后使用?
printstats = function(lastname, name, season, statistic) {
  # check function arguments
  if (missing(lastname) | missing(name) | missing(season) | missing(statistic)) stop("Need to specify player lastname, name, season, statistic")

  # convert season xxxx-xxxx to xxxx-xx
  if (length(strsplit(season, split="")[[1]]) == 9) {
      season <- paste(c(strsplit(season, split="")[[1]][1:5], strsplit(season, split="")[[1]][8:9]), collapse="")
  }
  if(sum(df$Season==season)==0) {
    print(paste("The", season, "season is not included in this database."))
  } else {
    if (sum(df$Lastname == lastname) == 0) {
      print(paste0("Player of last name ", lastname, " is not in the database. Please check spelling of ", lastname))
    } else {
      if (sum(df$Lastname==lastname & df$Name==name) == 0) {
        print(paste0("Player of last name ", lastname, " found, but no entries for first name ", name, ". Available entries for ", lastname, " include:" ))
        print(unique(df[df$Lastname==lastname, c("Lastname", "Name")]))
      } else {
        if (sum(df$Lastname==lastname & df$Name==name & df$Season==season) == 0) {
          print(paste0(name, " ", lastname, " did not play in the ", season, ". Available seasons for this player include:" ))
          print(unique(df[df$Lastname==lastname & df$Name==name, c("Lastname", "Name", "Season")]))
        } else {
          df.sub <- df %>%
            dplyr::filter(Name == name, Lastname == lastname, Season == season) %>% 
            dplyr::select(Name, Lastname, Season, one_of(statistic)) 
          paste(df.sub[["Name"]], df.sub[["Lastname"]], "scored", df.sub[[statistic]], statistic, "in", df.sub[["Season"]])    
        }
      }
    }
  }
}
printstats("Sedin", "Henrik","2005-2006","Points")
printstats("Sedin", "Daniel","2005-2006","Points")
printstats("Crosby", "Sidney","2005-06","Points")
printstats("Crosbie", "Sidney","2005-06","Points")
printstats("Crosby", "Sid","2001-02","Points")
printstats("Bourque", "Ray", "2005-2006")
printstats = function(lastname, name, season, statistic) {
  # check function arguments
  if (missing(lastname) | missing(name) | missing(season) | missing(statistic)) stop("Need to specify player lastname, name, season, statistic")

  # convert season xxxx-xxxx to xxxx-xx
  if (length(strsplit(season, split="")[[1]]) == 9) {
    season <- paste(c(strsplit(season, split="")[[1]][1:5], strsplit(season, split="")[[1]][8:9]), collapse="")
  }
  if(sum(df$Season==season)==0) {
    print(paste("The", season, "season is not included in this database."))
  } else {
    if (sum(tolower(df$Lastname) == tolower(lastname)) == 0) {
      print(paste0("Player of last name ", lastname, " is not in the database. Please check spelling of ", lastname))
    } else {
      if (sum(tolower(df$Lastname)==tolower(lastname) & tolower(df$Name)==tolower(name)) == 0) {
        print(paste0("Player of last name ", lastname, " found, but no entries for first name ", name, ". Available entries for ", lastname, " include:" ))
        print(unique(df[tolower(df$Lastname)==tolower(lastname), c("Lastname", "Name")]))
      } else {
        if (sum(tolower(df$Lastname)==tolower(lastname) & tolower(df$Name)==tolower(name) & df$Season==season) == 0) {
          print(paste0(name, " ", lastname, " did not play in the ", season, ". Available seasons for this player include:" ))
          print(unique(df[tolower(df$Lastname)==tolower(lastname) & tolower(df$Name)==tolower(name), c("Lastname", "Name", "Season")]))
        } else {
          df.sub <- df %>%
            dplyr::filter(tolower(Name) == tolower(name), tolower(Lastname) == tolower(lastname), Season == season) %>% 
            dplyr::select(Name, Lastname, Season, one_of(statistic)) 
          paste(df.sub[["Name"]], df.sub[["Lastname"]], "scored", df.sub[[statistic]], statistic, "in", df.sub[["Season"]])    
        }
      }
    }
  }
}
printstats("sedin", "Henrik","2005-2006","Points")
printstats("Sedin", "daniel","2005-2006","Points")
printstats("Crosby", "sidney","2005-06","Points")
printstats("CROSBY", "Sidney","2005-06","Points")