R 用数据帧中的列名替换1s
我的数据如下所示:R 用数据帧中的列名替换1s,r,R,我的数据如下所示: df<-data.frame( hhid = c(5668,5595,4724,4756,4856,4730,4757,6320,4758,6319,6311,5477,6322), pid=c(93660,93660,100960,100960,100960,100960,100960,100962,100960,100962,100962,93814,100962), pname=c("AB","HG","DC","DC","DC","DC","DC","
df<-data.frame(
hhid = c(5668,5595,4724,4756,4856,4730,4757,6320,4758,6319,6311,5477,6322),
pid=c(93660,93660,100960,100960,100960,100960,100960,100962,100960,100962,100962,93814,100962),
pname=c("AB","HG","DC","DC","DC","DC","DC","BA","DC","BA","BA","BH","BA"),
vid=c(462962,608757,772284,772284,772284,293475,293475,656323,293475,656323,81533,465612,656323),
vname=c("ABCD","KJUI","HIND","HIND","HIND","KAJA","KAJA","ADAR","KAJA","ADAR","SANG","NARI","ADAR"),
me=c(1,1,0,0,0,0,0,1,0,0,0,1,0),
ls=c(0,0,1,1,0,1,0,0,1,0,0,0,1),
lg=c(0,0,0,0,0,0,1,0,0,1,0,0,0),
lo=c(0,0,0,0,1,0,0,0,0,0,1,0,0),
amt=c(20000,20000,14000,14000,14000,14000,14000,27000,14000,27000,27000,20000,23000))
因此,基本上我想要的是用一个新的列LOS替换me、ls、lg、lo列中的1,该列LOS包含列名me、ls、lg、lo作为新行数据。这就是用列名替换1s,并将其保存到一个新的列LOS。您可以将数据转换为更长的格式,并
过滤值为1的行。
library(dplyr)
tidyr::pivot_longer(df, cols = c(me, ls, lg, lo), names_to = "LOS") %>%
filter(value == 1) %>%
select(-value)
# hhid pid pname vid vname amt LOS
# <dbl> <dbl> <fct> <dbl> <fct> <dbl> <chr>
# 1 5668 93660 AB 462962 ABCD 20000 me
# 2 5595 93660 HG 608757 KJUI 20000 me
# 3 4724 100960 DC 772284 HIND 14000 ls
# 4 4756 100960 DC 772284 HIND 14000 ls
# 5 4856 100960 DC 772284 HIND 14000 lo
# 6 4730 100960 DC 293475 KAJA 14000 ls
# 7 4757 100960 DC 293475 KAJA 14000 lg
# 8 6320 100962 BA 656323 ADAR 27000 me
# 9 4758 100960 DC 293475 KAJA 14000 ls
#10 6319 100962 BA 656323 ADAR 27000 lg
#11 6311 100962 BA 81533 SANG 27000 lo
#12 5477 93814 BH 465612 NARI 20000 me
#13 6322 100962 BA 656323 ADAR 23000 ls
库(dplyr)
tidyr::pivot_更长(df,cols=c(me,ls,lg,lo),名称_to=“LOS”)%>%
过滤器(值==1)%>%
选择(-value)
#hhid pid pname vid vname amt LOS
#
#15668 93660 AB 462962 ABCD 20000 me
#2559593660汞柱608757 KJUI 20000 me
#34724100960 DC 772284后14000 ls
#4 4756 100960 DC 772284后14000 ls
#54856100960直流772284后14000低
#6 4730 100960 DC 293475 KAJA 14000 ls
#7 4757 100960 DC 293475 KAJA 14000 lg
#8 6320 100962 BA 656323 ADAR 27000 me
#9 4758 100960 DC 293475 KAJA 14000 ls
#10631100962 BA 656323阿达尔27000 lg
#116311100962BA81533SANG27000LO
#12547793814 BH 465612 NARI 20000 me
#13 6322 100962 BA 656323 ADAR 23000 ls
您可以将数据转换为更长的格式,并过滤值为1的行
library(dplyr)
tidyr::pivot_longer(df, cols = c(me, ls, lg, lo), names_to = "LOS") %>%
filter(value == 1) %>%
select(-value)
# hhid pid pname vid vname amt LOS
# <dbl> <dbl> <fct> <dbl> <fct> <dbl> <chr>
# 1 5668 93660 AB 462962 ABCD 20000 me
# 2 5595 93660 HG 608757 KJUI 20000 me
# 3 4724 100960 DC 772284 HIND 14000 ls
# 4 4756 100960 DC 772284 HIND 14000 ls
# 5 4856 100960 DC 772284 HIND 14000 lo
# 6 4730 100960 DC 293475 KAJA 14000 ls
# 7 4757 100960 DC 293475 KAJA 14000 lg
# 8 6320 100962 BA 656323 ADAR 27000 me
# 9 4758 100960 DC 293475 KAJA 14000 ls
#10 6319 100962 BA 656323 ADAR 27000 lg
#11 6311 100962 BA 81533 SANG 27000 lo
#12 5477 93814 BH 465612 NARI 20000 me
#13 6322 100962 BA 656323 ADAR 23000 ls
库(dplyr)
tidyr::pivot_更长(df,cols=c(me,ls,lg,lo),名称_to=“LOS”)%>%
过滤器(值==1)%>%
选择(-value)
#hhid pid pname vid vname amt LOS
#
#15668 93660 AB 462962 ABCD 20000 me
#2559593660汞柱608757 KJUI 20000 me
#34724100960 DC 772284后14000 ls
#4 4756 100960 DC 772284后14000 ls
#54856100960直流772284后14000低
#6 4730 100960 DC 293475 KAJA 14000 ls
#7 4757 100960 DC 293475 KAJA 14000 lg
#8 6320 100962 BA 656323 ADAR 27000 me
#9 4758 100960 DC 293475 KAJA 14000 ls
#10631100962 BA 656323阿达尔27000 lg
#116311100962BA81533SANG27000LO
#12547793814 BH 465612 NARI 20000 me
#13 6322 100962 BA 656323 ADAR 23000 ls
您可以执行以下操作:
df$LOS <- c("me", "ls", "lg", "lo")[apply(df[, c("me", "ls", "lg", "lo")]==1, 1, which)]
# > df
# hhid pid pname vid vname me ls lg lo amt LOS
# 1 5668 93660 AB 462962 ABCD 1 0 0 0 20000 me
# 2 5595 93660 HG 608757 KJUI 1 0 0 0 20000 me
# 3 4724 100960 DC 772284 HIND 0 1 0 0 14000 ls
# 4 4756 100960 DC 772284 HIND 0 1 0 0 14000 ls
# 5 4856 100960 DC 772284 HIND 0 0 0 1 14000 lo
# 6 4730 100960 DC 293475 KAJA 0 1 0 0 14000 ls
# 7 4757 100960 DC 293475 KAJA 0 0 1 0 14000 lg
# 8 6320 100962 BA 656323 ADAR 1 0 0 0 27000 me
# 9 4758 100960 DC 293475 KAJA 0 1 0 0 14000 ls
# 10 6319 100962 BA 656323 ADAR 0 0 1 0 27000 lg
# 11 6311 100962 BA 81533 SANG 0 0 0 1 27000 lo
# 12 5477 93814 BH 465612 NARI 1 0 0 0 20000 me
# 13 6322 100962 BA 656323 ADAR 0 1 0 0 23000 ls
df$LOS-df
#hhid pid pname vid vname me ls lg lo amt LOS
#15668 93660 AB 462962 ABCD 10020000 me
#259593660汞柱608757 KJUI 10020000ME
#3 4724 100960 DC 772284后01 0 14000 ls
#4756 100960 DC 772284后01 0 14000 ls
#54856100960直流772284后01 14000本地
#6 4730 100960 DC 293475卡贾01 0 14000 ls
#7 4757 100960 DC 293475卡加0 1 0 14000 lg
#8 6320 100962 BA 656323 ADAR 1 0 0 27000 me
#94758 100960 DC 293475卡贾01 0 14000 ls
#10631100962 BA 656323 ADAR 0 0 1 0 27000 lg
#116311100962BA81533SANG0127000LO
#12547793814伯克希尔哈撒韦465612纳里10020000ME
#13 6322 100962 BA 656323 ADAR 01 0 23000 ls
或(定义列名称向量)
cols您可以执行以下操作:
df$LOS <- c("me", "ls", "lg", "lo")[apply(df[, c("me", "ls", "lg", "lo")]==1, 1, which)]
# > df
# hhid pid pname vid vname me ls lg lo amt LOS
# 1 5668 93660 AB 462962 ABCD 1 0 0 0 20000 me
# 2 5595 93660 HG 608757 KJUI 1 0 0 0 20000 me
# 3 4724 100960 DC 772284 HIND 0 1 0 0 14000 ls
# 4 4756 100960 DC 772284 HIND 0 1 0 0 14000 ls
# 5 4856 100960 DC 772284 HIND 0 0 0 1 14000 lo
# 6 4730 100960 DC 293475 KAJA 0 1 0 0 14000 ls
# 7 4757 100960 DC 293475 KAJA 0 0 1 0 14000 lg
# 8 6320 100962 BA 656323 ADAR 1 0 0 0 27000 me
# 9 4758 100960 DC 293475 KAJA 0 1 0 0 14000 ls
# 10 6319 100962 BA 656323 ADAR 0 0 1 0 27000 lg
# 11 6311 100962 BA 81533 SANG 0 0 0 1 27000 lo
# 12 5477 93814 BH 465612 NARI 1 0 0 0 20000 me
# 13 6322 100962 BA 656323 ADAR 0 1 0 0 23000 ls
df$LOS-df
#hhid pid pname vid vname me ls lg lo amt LOS
#15668 93660 AB 462962 ABCD 10020000 me
#259593660汞柱608757 KJUI 10020000ME
#3 4724 100960 DC 772284后01 0 14000 ls
#4756 100960 DC 772284后01 0 14000 ls
#54856100960直流772284后01 14000本地
#6 4730 100960 DC 293475卡贾01 0 14000 ls
#7 4757 100960 DC 293475卡加0 1 0 14000 lg
#8 6320 100962 BA 656323 ADAR 1 0 0 27000 me
#94758 100960 DC 293475卡贾01 0 14000 ls
#10631100962 BA 656323 ADAR 0 0 1 0 27000 lg
#116311100962BA81533SANG0127000LO
#12547793814伯克希尔哈撒韦465612纳里10020000ME
#13 6322 100962 BA 656323 ADAR 01 0 23000 ls
或(定义列名称向量)
colsBase R溶液:
df <- transform(df[!(names(df) %in% c("me", "ls", "lg", "lo"))],
name = names(df)[(names(df) %in% c("me", "ls", "lg", "lo"))][apply(df[(names(df) %in% c("me", "ls", "lg", "lo"))], 1, which.max)])
dfBase R解决方案:
df <- transform(df[!(names(df) %in% c("me", "ls", "lg", "lo"))],
name = names(df)[(names(df) %in% c("me", "ls", "lg", "lo"))][apply(df[(names(df) %in% c("me", "ls", "lg", "lo"))], 1, which.max)])
df