R 生成一个字符变量作为ID变量

R 生成一个字符变量作为ID变量,r,string,R,String,我有以下结构的数据框: structure(list(DateTime = structure(c(1578009600, 1578096000, 1578182400, 1578268800, 1578268800, 1578441600, 1578528000, 1578700800, 1578873600, 1578960000, 1579046400, 1579219200, 1579305600, 1579651200, 1579737600, 1579910400, 15799

我有以下结构的数据框:

structure(list(DateTime = structure(c(1578009600, 1578096000, 
1578182400, 1578268800, 1578268800, 1578441600, 1578528000, 1578700800, 
1578873600, 1578960000, 1579046400, 1579219200, 1579305600, 1579651200, 
1579737600, 1579910400, 1579996800, 1580083200, 1580256000, 1580342400
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), Y = c(0.398128487169134, 
0.943205112163668, 0.28252751156162, 0.249960153693576, 0.795387767024049, 
0.944143005087556, 0.158092709898581, 0.0939977195370584, 0.363535430584373, 
0.44838029451066, 0.693205040632161, 0.552676175195101, 0.985732436206492, 
0.496368199684004, 0.534787647236829, 0.327063363194893, 0.790654871992939, 
0.568471157855241, 0.837558574291766, 0.643802685572038), Var1 = c(0, 
0, 0, 0, 0, 0, 0.94290146, 0, 0, 0, 0, 0, 0, 0, 0.684747396078389, 
0, 0, 0, 0, 0), Var2 = c(0, 0.554797180422304, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Var3 = c(0, 0, 0, 0.815909158957364, 
0, 0, 0, 0, 0, 0.648069655007399, 0, 0.501910457604968, 0, 0, 
0, 0, 0, 0.356728763364177, 0, 0), ID = c("0", "Var2", "0", "Var3", 
"0", "0", "0", "0", "0", "Var3", "0", "Var3", "0", "0", "0", 
"0", "0", "Var3", "0", "0")), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))
我希望在现有数据框中创建/添加一个字符/字符串变量,
ID1
,该变量包含变量名称(
Var1
Var2
Var3
),该变量接受一个非零变量和一个数字ID变量,
ID2
,根据采用非零值的变量分配“数字ID”。谁知道如何有效地计算
ID
ID2
变量

#1  DateTime    Y   Var1    Var2    Var3    
#2  3/1/20  0.67    0.00    0.00    0.00    
#3  4/1/20  0.31    0.00    0.11    0.00    
#4  5/1/20  0.96    0.00    0.00    0.00    
#5  6/1/20  0.28    0.00    0.00    0.40    
#6  6/1/20  0.28    0.00    0.00    0.00    
#7  8/1/20  0.48    0.00    0.00    0.00    
#8  9/1/20  0.07    0.94    0.00    0.00    
#9  11/1/20 0.94    0.00    0.00    0.00    
#10 13/1/20 0.05    0.00    0.00    0.00    
#11 14/1/20 0.04    0.00    0.00    0.53    
#12 15/1/20 0.93    0.00    0.00    0.00    
#13 17/1/20 0.30    0.00    0.00    0.32    
#14 18/1/20 0.75    0.00    0.00    0.00    
#15 22/1/20 0.99    0.00    0.00    0.00    
#16 23/1/20 0.32    0.15    0.00    0.00    
#17 25/1/20 0.51    0.00    0.00    0.00    
#18 26/1/20 0.15    0.00    0.00    0.00    
#19 27/1/20 0.78    0.00    0.00    0.15    
#20 29/1/20 0.83    0.00    0.00    0.00    
#21 30/1/20 0.10    0.00    0.00    0.00
预期输出(即添加到数据框的
ID
ID2
变量)

获取var1、var2、var3不为零的索引,然后根据该索引分配匹配的列名:

d$ID <- NULL # dropping this column, as we need to re-create it.

ix <- which(d[, 3:5] != 0, arr.ind = TRUE)
d[ ix[ , 1], "ID"] <- colnames(d[, 3:5])[ ix[, 2] ] 
d$ID2 <- as.numeric(as.factor(d$ID))

d
##  A tibble: 20 x 7
#    DateTime                 Y  Var1  Var2  Var3 ID      ID2
#    <dttm>               <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#  1 2020-01-03 00:00:00 0.398  0     0     0     NA       NA
#  2 2020-01-04 00:00:00 0.943  0     0.555 0     Var2      2
#  3 2020-01-05 00:00:00 0.283  0     0     0     NA       NA
#  4 2020-01-06 00:00:00 0.250  0     0     0.816 Var3      3
#  5 2020-01-06 00:00:00 0.795  0     0     0     NA       NA
#  6 2020-01-08 00:00:00 0.944  0     0     0     NA       NA
#  7 2020-01-09 00:00:00 0.158  0.943 0     0     Var1      1
#  8 2020-01-11 00:00:00 0.0940 0     0     0     NA       NA
#  9 2020-01-13 00:00:00 0.364  0     0     0     NA       NA
# 10 2020-01-14 00:00:00 0.448  0     0     0.648 Var3      3
# 11 2020-01-15 00:00:00 0.693  0     0     0     NA       NA
# 12 2020-01-17 00:00:00 0.553  0     0     0.502 Var3      3
# 13 2020-01-18 00:00:00 0.986  0     0     0     NA       NA
# 14 2020-01-22 00:00:00 0.496  0     0     0     NA       NA
# 15 2020-01-23 00:00:00 0.535  0.685 0     0     Var1      1
# 16 2020-01-25 00:00:00 0.327  0     0     0     NA       NA
# 17 2020-01-26 00:00:00 0.791  0     0     0     NA       NA
# 18 2020-01-27 00:00:00 0.568  0     0     0.357 Var3      3
# 19 2020-01-29 00:00:00 0.838  0     0     0     NA       NA
# 20 2020-01-30 00:00:00 0.644  0     0     0     NA       NA
d$ID
df%>%
突变(ID=select(,Var1:Var3)%>%
{ifelse(行和(.)!=0,名称(.)[max.col(.)],0)},
ID2=as.integer(有序(ID,唯一(ID))-1)
日期时间Y Var1 Var2 Var3 ID ID2
1 2020-01-03 00:00:00 0.398  0     0     0     0         0
2020-01-04 00:00:00 0.943 0.555 0 Var2 1
3 2020-01-05 00:00:00 0.283  0     0     0     0         0
4 2020-01-06 00:00:00 0.250 0.816 Var3 2
5 2020-01-06 00:00:00 0.795  0     0     0     0         0
6 2020-01-08 00:00:00 0.944  0     0     0     0         0
7 2020-01-09 00:00:00 0.158 0.943 0 Var1 3
8 2020-01-11 00:00:00 0.0940 0     0     0     0         0
9 2020-01-13 00:00:00 0.364  0     0     0     0         0
10 2020-01-14 00:00:00 0.4480 0 0.648 Var3 2
11 2020-01-15 00:00:00 0.693  0     0     0     0         0
12 2020-01-17 00:00:00 0.553 0 0.502 Var3 2
13 2020-01-18 00:00:00 0.986  0     0     0     0         0
14 2020-01-22 00:00:00 0.496  0     0     0     0         0
15 2020-01-23 00:00:00 0.535 0.685 0 Var1 3
16 2020-01-25 00:00:00 0.327  0     0     0     0         0
17 2020-01-26 00:00:00 0.791  0     0     0     0         0
18 2020-01-27 00:00:00 0.5680 0.357 Var3 2
19 2020-01-29 00:00:00 0.838  0     0     0     0         0
20 2020-01-30 00:00:00 0.644  0     0     0     0         0

为免生疑问,您能否给出前几个案例的预期输出?您是否也可以提供可用格式的数据:即作为数据框对象或使用
dupt(…您的数据…)
Hi-Peter。谢谢你指出这一点。我一直想知道如何制作一个可复制的示例-您对
dupt(…您的数据…
函数的引用帮助我解决了这个问题。我现在编辑了我的问题。我希望此编辑足以让您提取数据集。我认为有一个预期输出的示例会很有帮助。我认为您的
dput()
缺少变量ID2?您希望新变量看起来如何-ID和ID2部分之间有一个空格?感谢您提出答案,Onyanbu。将代码应用于数据帧时,我遇到以下错误:
“select(,Var1:Var3)中的错误:未使用的参数(Var1:Var3)”
@CecSK Var1:Var3是变量Var1、var2、Var3
d$ID <- NULL # dropping this column, as we need to re-create it.

ix <- which(d[, 3:5] != 0, arr.ind = TRUE)
d[ ix[ , 1], "ID"] <- colnames(d[, 3:5])[ ix[, 2] ] 
d$ID2 <- as.numeric(as.factor(d$ID))

d
##  A tibble: 20 x 7
#    DateTime                 Y  Var1  Var2  Var3 ID      ID2
#    <dttm>               <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#  1 2020-01-03 00:00:00 0.398  0     0     0     NA       NA
#  2 2020-01-04 00:00:00 0.943  0     0.555 0     Var2      2
#  3 2020-01-05 00:00:00 0.283  0     0     0     NA       NA
#  4 2020-01-06 00:00:00 0.250  0     0     0.816 Var3      3
#  5 2020-01-06 00:00:00 0.795  0     0     0     NA       NA
#  6 2020-01-08 00:00:00 0.944  0     0     0     NA       NA
#  7 2020-01-09 00:00:00 0.158  0.943 0     0     Var1      1
#  8 2020-01-11 00:00:00 0.0940 0     0     0     NA       NA
#  9 2020-01-13 00:00:00 0.364  0     0     0     NA       NA
# 10 2020-01-14 00:00:00 0.448  0     0     0.648 Var3      3
# 11 2020-01-15 00:00:00 0.693  0     0     0     NA       NA
# 12 2020-01-17 00:00:00 0.553  0     0     0.502 Var3      3
# 13 2020-01-18 00:00:00 0.986  0     0     0     NA       NA
# 14 2020-01-22 00:00:00 0.496  0     0     0     NA       NA
# 15 2020-01-23 00:00:00 0.535  0.685 0     0     Var1      1
# 16 2020-01-25 00:00:00 0.327  0     0     0     NA       NA
# 17 2020-01-26 00:00:00 0.791  0     0     0     NA       NA
# 18 2020-01-27 00:00:00 0.568  0     0     0.357 Var3      3
# 19 2020-01-29 00:00:00 0.838  0     0     0     NA       NA
# 20 2020-01-30 00:00:00 0.644  0     0     0     NA       NA
df %>%
  mutate(ID = select(., Var1:Var3) %>%
           { ifelse(rowSums(.) != 0, names(.)[max.col(.)], 0)},
         ID2 = as.integer(ordered(ID,unique(ID))) - 1)


   DateTime                 Y  Var1  Var2  Var3 ID      ID2
       <dttm>               <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
 1 2020-01-03 00:00:00 0.398  0     0     0     0         0
 2 2020-01-04 00:00:00 0.943  0     0.555 0     Var2      1
 3 2020-01-05 00:00:00 0.283  0     0     0     0         0
 4 2020-01-06 00:00:00 0.250  0     0     0.816 Var3      2
 5 2020-01-06 00:00:00 0.795  0     0     0     0         0
 6 2020-01-08 00:00:00 0.944  0     0     0     0         0
 7 2020-01-09 00:00:00 0.158  0.943 0     0     Var1      3
 8 2020-01-11 00:00:00 0.0940 0     0     0     0         0
 9 2020-01-13 00:00:00 0.364  0     0     0     0         0
10 2020-01-14 00:00:00 0.448  0     0     0.648 Var3      2
11 2020-01-15 00:00:00 0.693  0     0     0     0         0
12 2020-01-17 00:00:00 0.553  0     0     0.502 Var3      2
13 2020-01-18 00:00:00 0.986  0     0     0     0         0
14 2020-01-22 00:00:00 0.496  0     0     0     0         0
15 2020-01-23 00:00:00 0.535  0.685 0     0     Var1      3
16 2020-01-25 00:00:00 0.327  0     0     0     0         0
17 2020-01-26 00:00:00 0.791  0     0     0     0         0
18 2020-01-27 00:00:00 0.568  0     0     0.357 Var3      2
19 2020-01-29 00:00:00 0.838  0     0     0     0         0
20 2020-01-30 00:00:00 0.644  0     0     0     0         0