合并R中特定键上的数据帧/从列中删除星号

合并R中特定键上的数据帧/从列中删除星号,r,merge,punctuation,R,Merge,Punctuation,嘿,伙计们,我需要帮助在R中合并,这里是我的代码的一个可复制的样本,我将解释 我遇到的问题是,固定数据df中的某些人是名人堂成员,因此他们的名字中有星号。我想清理一下,这样我就可以把他们加入选秀数据库,抓取他们的选秀年份。当我使用代码执行此操作时,我遇到了一个错误,该错误会给我: 顺序错误(玩家=列表(“Tariq Abdul Wahad”、“Shareef Abdur Rahim”),: “orderVector1”中未实现的类型“list” 你们建议我如何剥离列中的星号,以便正确连接播放器键

嘿,伙计们,我需要帮助在R中合并,这里是我的代码的一个可复制的样本,我将解释

我遇到的问题是,固定数据df中的某些人是名人堂成员,因此他们的名字中有星号。我想清理一下,这样我就可以把他们加入选秀数据库,抓取他们的选秀年份。当我使用代码执行此操作时,我遇到了一个错误,该错误会给我:

顺序错误(玩家=列表(“Tariq Abdul Wahad”、“Shareef Abdur Rahim”),: “orderVector1”中未实现的类型“list”

你们建议我如何剥离列中的星号,以便正确连接播放器键上的两个数据帧?谢谢

代码:

库(htmltab)
库(sqldf)
图书馆(plyr)
图书馆(readr)

stats0因此,如果有人对此感到好奇并在将来遇到这个问题,答案是我使用lappy(返回为list)而不是vapply(返回为vector)So

fixedstats$Player
library(htmltab)
library(sqldf)
library(plyr)
library(readr)

stats0 <- ""
draftbank0 <- ""

for (i in 20003:2017){

  url <- paste0("http://www.basketball-reference.com/leagues/NBA_",i,"_advanced.html")
  stats <- htmltab(doc = url, which = 1, header = 1, stringsasfactors = FALSE)
  stats$year <- i

  stats0 <- rbind(stats0,stats)

  stats0[rowSums(is.na(stats0)) != ncol(stats0),]

}


colnames(stats0) <- c("Rank",
                  "Player",
                  "Pos",
                  "Age",
                  "Tm",
                  "G",
                  "MP",
                  "PER",
                  "TSp",
                  "ThreePAr",
                  "FTr",
                  "ORBp",
                  "DRBp",
                  "TRBp",
                  "ASTp",
                  "STLp",
                  "BLKp",
                  "TOVp",
                  "USGp",
                  "Null", #comment out null if needed
                  "OWS",
                  "DWS",
                  "WS", 
                  "WS48",
                  "Null2", #comment out null if needed
                  "OBPM",
                  "DBPM",
                  "BPM",
                  "VORP",
                  "Year")

fixedstats <- sqldf("SELECT Rank, Player, Pos, Age, Tm, G, MP, PER, TSp, ThreePAr, FTr, ORBp, DRBp, TRBp, ASTp, STLp, BLKp, TOVp, 
                USGp, OWS, DWS, WS, WS48, OBPM, DBPM, BPM, VORP, Year FROM stats0 WHERE player != 'Player'")

fixedstats <- fixedstats[-1,]



for (i in 1980:2016){


  url2 <- paste0("http://www.draftexpress.com/nba-mock-history/",i,"/all/all/")
  draftbank <- htmltab(doc = url2, which = 1, header = 1, stringsasfactors = FALSE)


  draftbank0 <- rbind(draftbank0,draftbank)


}



colnames(draftbank0) <- c("Draft_Year", 
                      "Pick",
                      "Null1",
                      "Player",
                      "Null2",
                      "Position",
                      "Age",    
                      "Height",
                      "Weight",
                      "Wingspan",
                      "Points", 
                      "Rebounds",   
                      "Assists",
                      "PER", 
                      "Null3",  
                      "League",
                      "EWA")

draftbankfinal <- sqldf("SELECT Player, Position, Age, Height, Wingspan, Draft_Year FROM draftbank0")

draftbankfinal <- draftbank0[-1,]



#Multiple drafts appendix getting rid of guys with similar names
draftbankfinal<-draftbankfinal[!(draftbankfinal$Player=="Corey Brewer" &  draftbankfinal$Draft_Year==1998),]
draftbankfinal<-draftbankfinal[!(draftbankfinal$Player=="Patrick Ewing" & draftbankfinal$Draft_Year==1985),]
draftbankfinal<-draftbankfinal[!(draftbankfinal$Player=="Charles Smith" & draftbankfinal$Draft_Year==1988),]
draftbankfinal<-draftbankfinal[!(draftbankfinal$Player=="Ray McCallum" & draftbankfinal$Draft_Year==1983),]
draftbankfinal<-draftbankfinal[!(draftbankfinal$Player=="James Anderson" & draftbankfinal$Draft_Year==1985),]
draftbankfinal<-draftbankfinal[!(draftbankfinal$Player=="Ken Johnson" & draftbankfinal$Draft_Year==1985),]
draftbankfinal<-draftbankfinal[!(draftbankfinal$Player=="Dee Brown" & draftbankfinal$Draft_Year==1990),]





fixedstats$Player <- lapply(fixedstats$Player, sub, pattern = "[*]", replacement = "")
fixedstats$Player <- lapply(fixedstats$Player, sub, pattern = "[']", replacement = "")

fixedstats$Player <- ifelse(fixedstats$Player == 'Jermaine ONeal', 'Jermaine O\'Neal', fixedstats$Player)
fixedstats$Player <- ifelse(fixedstats$Player == 'J.J Obrien', 'J.J O\'Brien', fixedstats$Player)
fixedstats$Player <- ifelse(fixedstats$Player == 'Johnny OBryant', 'Johnny O\'Bryant', fixedstats$Player)
fixedstats$Player <- ifelse(fixedstats$Player == 'Patrick OBryant', 'Patrick O\'Bryant', fixedstats$Player)
fixedstats$Player <- ifelse(fixedstats$Player == 'Shaquille ONeal', 'Shaquille O\'Neal', fixedstats$Player)

fixedstats$Player <- as.vector(fixedstats$Player)
draftbankfinal$Player <- as.vector(draftbankfinal$Player)




df <- merge(x = fixedstats, y = draftbankfinal, by = "Player", all.x = TRUE)
df2 <- df[c(1,3:5,22,28:29,32:35)]
fixedstats$Player <- lapply(fixedstats$Player, sub, pattern = "[*]", replacement = "")
fixedstats$Player <- lapply(fixedstats$Player, sub, pattern = "[']", replacement = "")


fixedstats$Player <- as.vector(fixedstats$Player)
draftbankfinal$Player <- as.vector(draftbankfinal$Player)
fixedstats$Player <- vapply(fixedstats$Player, sub, pattern = "[*]", replacement = "",character(1))
fixedstats$Player <- vapply(fixedstats$Player, sub, pattern = "[']", replacement = "",character(1))