Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/71.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R中分位数为()的假人_R_Data.table - Fatal编程技术网

R中分位数为()的假人

R中分位数为()的假人,r,data.table,R,Data.table,我试图根据分位数为数据项分配一个虚拟项。所以我得到了3个分位数1/32/33,如果杠杆率在q1中,它应该在九月列中加1,如果q2在另一列中加1(其他列保持0) 这是我的数据示例: k <- c("gvkey1" , "gvkey1" , "gvkey1" , "gvkey1", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey3", "gvkey3", "gvkey1" , "gvkey1" , "gvkey1" , "gvk

我试图根据分位数为数据项分配一个虚拟项。所以我得到了3个分位数1/32/33,如果杠杆率在q1中,它应该在九月列中加1,如果q2在另一列中加1(其他列保持0)

这是我的数据示例:

k <- c("gvkey1" , "gvkey1" , "gvkey1" , "gvkey1", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey3", "gvkey3", "gvkey1" , "gvkey1" , "gvkey1" , "gvkey1", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey3", "gvkey3", "gvkey1" , "gvkey1" , "gvkey1" , "gvkey1", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey3", "gvkey3", "gvkey1" , "gvkey1" , "gvkey1" , "gvkey1", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey3", "gvkey3")

l <- c("12/1/2000", "12/1/2000", "12/3/2000", "12/4/2000" , "12/5/2000" , "12/6/2000" , "12/7/2000" , "12/8/2000" , "12/9/2000" , "12/10/2000" , "12/11/2000", "12/1/2000", "12/1/2000", "12/3/2000", "12/4/2000" , "12/5/2000" , "12/6/2000" , "12/7/2000" , "12/8/2000" , "12/9/2000" , "12/10/2000" , "12/11/2000", "12/1/2000", "12/1/2000", "12/3/2000", "12/4/2000" , "12/5/2000" , "12/6/2000" , "12/7/2000" , "12/8/2000" , "12/9/2000" , "12/10/2000" , "12/11/2000", "12/1/2000", "12/1/2000", "12/3/2000", "12/4/2000" , "12/5/2000" , "12/6/2000" , "12/7/2000" , "12/8/2000" , "12/9/2000" , "12/10/2000" , "12/11/2000", "12/1/2000", "12/1/2000", "12/3/2000", "12/4/2000" , "12/5/2000" , "12/6/2000" , "12/7/2000" , "12/8/2000" , "12/9/2000" , "12/10/2000" , "12/11/2000", "12/1/2000", "12/1/2000", "12/3/2000", "12/4/2000" , "12/5/2000" , "12/6/2000" , "12/7/2000" , "12/8/2000" , "12/9/2000" , "12/10/2000" , "12/11/2000")
m <- c(1:66)

y <- structure(list(a = l, b = k, c = m), .Names = c("Date", "gvkey" , "Leverage"),
               row.names = c(NA, -66L), class = "data.frame")

y$Date <- as.Date(y$Date, format = "%m/%d/%Y")

test <- data.table(y)


k这种方法怎么样

test %>% rowwise() %>%
 mutate(dquant = cut(Leverage,
                breaks = c(0,d1,d2,max(Leverage)),
                labels = c('100','010','001'))) %>% print(n=Inf)
# A tibble: 66 x 6
   Date       gvkey  Leverage    d1    d2 dquant     
   <date>     <chr>     <int> <dbl> <dbl> <fct>
 1 2000-12-01 gvkey1        1  19.7  38.3 100  
 2 2000-12-01 gvkey1        2  19.7  38.3 100  
 3 2000-12-03 gvkey1        3  21.3  39.7 100  
 4 2000-12-04 gvkey1        4  22.3  40.7 100  
 5 2000-12-05 gvkey2        5  23.3  41.7 100  
 6 2000-12-06 gvkey2        6  24.3  42.7 100  
 7 2000-12-07 gvkey2        7  25.3  43.7 100  
 8 2000-12-08 gvkey2        8  26.3  44.7 100  
 9 2000-12-09 gvkey2        9  27.3  45.7 100  
10 2000-12-10 gvkey3       10  28.3  46.7 100  
11 2000-12-11 gvkey3       11  29.3  47.7 100  
12 2000-12-01 gvkey1       12  19.7  38.3 100  
13 2000-12-01 gvkey1       13  19.7  38.3 100  
14 2000-12-03 gvkey1       14  21.3  39.7 100  
15 2000-12-04 gvkey1       15  22.3  40.7 100  
16 2000-12-05 gvkey2       16  23.3  41.7 100  
17 2000-12-06 gvkey2       17  24.3  42.7 100  
18 2000-12-07 gvkey2       18  25.3  43.7 100  
19 2000-12-08 gvkey2       19  26.3  44.7 100  
20 2000-12-09 gvkey2       20  27.3  45.7 100  
21 2000-12-10 gvkey3       21  28.3  46.7 100  
22 2000-12-11 gvkey3       22  29.3  47.7 100  
23 2000-12-01 gvkey1       23  19.7  38.3 010  
24 2000-12-01 gvkey1       24  19.7  38.3 010  
25 2000-12-03 gvkey1       25  21.3  39.7 010  
26 2000-12-04 gvkey1       26  22.3  40.7 010  
27 2000-12-05 gvkey2       27  23.3  41.7 010  
28 2000-12-06 gvkey2       28  24.3  42.7 010  
29 2000-12-07 gvkey2       29  25.3  43.7 010  
30 2000-12-08 gvkey2       30  26.3  44.7 010  
31 2000-12-09 gvkey2       31  27.3  45.7 010  
32 2000-12-10 gvkey3       32  28.3  46.7 010  
33 2000-12-11 gvkey3       33  29.3  47.7 010  
34 2000-12-01 gvkey1       34  19.7  38.3 010  
35 2000-12-01 gvkey1       35  19.7  38.3 010  
36 2000-12-03 gvkey1       36  21.3  39.7 010  
37 2000-12-04 gvkey1       37  22.3  40.7 010  
38 2000-12-05 gvkey2       38  23.3  41.7 010  
39 2000-12-06 gvkey2       39  24.3  42.7 010  
40 2000-12-07 gvkey2       40  25.3  43.7 010  
41 2000-12-08 gvkey2       41  26.3  44.7 010  
42 2000-12-09 gvkey2       42  27.3  45.7 010  
43 2000-12-10 gvkey3       43  28.3  46.7 010  
44 2000-12-11 gvkey3       44  29.3  47.7 010  
45 2000-12-01 NA           45  19.7  38.3 001  
46 2000-12-01 NA           46  19.7  38.3 001  
47 2000-12-03 NA           47  21.3  39.7 001  
48 2000-12-04 NA           48  22.3  40.7 001  
49 2000-12-05 NA           49  23.3  41.7 001  
50 2000-12-06 NA           50  24.3  42.7 001  
51 2000-12-07 NA           51  25.3  43.7 001  
52 2000-12-08 NA           52  26.3  44.7 001  
53 2000-12-09 NA           53  27.3  45.7 001  
54 2000-12-10 NA           54  28.3  46.7 001  
55 2000-12-11 NA           55  29.3  47.7 001  
56 2000-12-01 NA           56  19.7  38.3 001  
57 2000-12-01 NA           57  19.7  38.3 001  
58 2000-12-03 NA           58  21.3  39.7 001  
59 2000-12-04 NA           59  22.3  40.7 001  
60 2000-12-05 NA           60  23.3  41.7 001  
61 2000-12-06 NA           61  24.3  42.7 001  
62 2000-12-07 NA           62  25.3  43.7 001  
63 2000-12-08 NA           63  26.3  44.7 001  
64 2000-12-09 NA           64  27.3  45.7 001  
65 2000-12-10 NA           65  28.3  46.7 001  
66 2000-12-11 NA           66  29.3  47.7 001


&
应该是
&
test %>% rowwise() %>%
 mutate(dquant = cut(Leverage,
                breaks = c(0,d1,d2,max(Leverage)),
                labels = c('100','010','001'))) %>% print(n=Inf)
# A tibble: 66 x 6
   Date       gvkey  Leverage    d1    d2 dquant     
   <date>     <chr>     <int> <dbl> <dbl> <fct>
 1 2000-12-01 gvkey1        1  19.7  38.3 100  
 2 2000-12-01 gvkey1        2  19.7  38.3 100  
 3 2000-12-03 gvkey1        3  21.3  39.7 100  
 4 2000-12-04 gvkey1        4  22.3  40.7 100  
 5 2000-12-05 gvkey2        5  23.3  41.7 100  
 6 2000-12-06 gvkey2        6  24.3  42.7 100  
 7 2000-12-07 gvkey2        7  25.3  43.7 100  
 8 2000-12-08 gvkey2        8  26.3  44.7 100  
 9 2000-12-09 gvkey2        9  27.3  45.7 100  
10 2000-12-10 gvkey3       10  28.3  46.7 100  
11 2000-12-11 gvkey3       11  29.3  47.7 100  
12 2000-12-01 gvkey1       12  19.7  38.3 100  
13 2000-12-01 gvkey1       13  19.7  38.3 100  
14 2000-12-03 gvkey1       14  21.3  39.7 100  
15 2000-12-04 gvkey1       15  22.3  40.7 100  
16 2000-12-05 gvkey2       16  23.3  41.7 100  
17 2000-12-06 gvkey2       17  24.3  42.7 100  
18 2000-12-07 gvkey2       18  25.3  43.7 100  
19 2000-12-08 gvkey2       19  26.3  44.7 100  
20 2000-12-09 gvkey2       20  27.3  45.7 100  
21 2000-12-10 gvkey3       21  28.3  46.7 100  
22 2000-12-11 gvkey3       22  29.3  47.7 100  
23 2000-12-01 gvkey1       23  19.7  38.3 010  
24 2000-12-01 gvkey1       24  19.7  38.3 010  
25 2000-12-03 gvkey1       25  21.3  39.7 010  
26 2000-12-04 gvkey1       26  22.3  40.7 010  
27 2000-12-05 gvkey2       27  23.3  41.7 010  
28 2000-12-06 gvkey2       28  24.3  42.7 010  
29 2000-12-07 gvkey2       29  25.3  43.7 010  
30 2000-12-08 gvkey2       30  26.3  44.7 010  
31 2000-12-09 gvkey2       31  27.3  45.7 010  
32 2000-12-10 gvkey3       32  28.3  46.7 010  
33 2000-12-11 gvkey3       33  29.3  47.7 010  
34 2000-12-01 gvkey1       34  19.7  38.3 010  
35 2000-12-01 gvkey1       35  19.7  38.3 010  
36 2000-12-03 gvkey1       36  21.3  39.7 010  
37 2000-12-04 gvkey1       37  22.3  40.7 010  
38 2000-12-05 gvkey2       38  23.3  41.7 010  
39 2000-12-06 gvkey2       39  24.3  42.7 010  
40 2000-12-07 gvkey2       40  25.3  43.7 010  
41 2000-12-08 gvkey2       41  26.3  44.7 010  
42 2000-12-09 gvkey2       42  27.3  45.7 010  
43 2000-12-10 gvkey3       43  28.3  46.7 010  
44 2000-12-11 gvkey3       44  29.3  47.7 010  
45 2000-12-01 NA           45  19.7  38.3 001  
46 2000-12-01 NA           46  19.7  38.3 001  
47 2000-12-03 NA           47  21.3  39.7 001  
48 2000-12-04 NA           48  22.3  40.7 001  
49 2000-12-05 NA           49  23.3  41.7 001  
50 2000-12-06 NA           50  24.3  42.7 001  
51 2000-12-07 NA           51  25.3  43.7 001  
52 2000-12-08 NA           52  26.3  44.7 001  
53 2000-12-09 NA           53  27.3  45.7 001  
54 2000-12-10 NA           54  28.3  46.7 001  
55 2000-12-11 NA           55  29.3  47.7 001  
56 2000-12-01 NA           56  19.7  38.3 001  
57 2000-12-01 NA           57  19.7  38.3 001  
58 2000-12-03 NA           58  21.3  39.7 001  
59 2000-12-04 NA           59  22.3  40.7 001  
60 2000-12-05 NA           60  23.3  41.7 001  
61 2000-12-06 NA           61  24.3  42.7 001  
62 2000-12-07 NA           62  25.3  43.7 001  
63 2000-12-08 NA           63  26.3  44.7 001  
64 2000-12-09 NA           64  27.3  45.7 001  
65 2000-12-10 NA           65  28.3  46.7 001  
66 2000-12-11 NA           66  29.3  47.7 001

d1 <- paste("d1") # first breakpoint
test <- test[, (d1) := quantile(Leverage, (1/3)), by = "Date"]

d2 <- paste("d2") #second breakpoint
test <- test[, (d2) := quantile(Leverage, (2/3)), by = "Date"]

##    I will use the '|' operator in dquant

test = test %>% rowwise() %>% 
         mutate(s = cut(Leverage,
                        breaks = c(0,d1,d2,max(Leverage)),
                        labels = c('1|0|0','0|1|0','0|0|1'))) 
> test
 # A tibble: 66 x 6
   Date       gvkey  Leverage    d1    d2 dquant
   <date>     <chr>     <int> <dbl> <dbl> <fct> 
 1 2000-12-01 gvkey1        1  19.7  38.3 1|0|0 
 2 2000-12-01 gvkey1        2  19.7  38.3 1|0|0 
dummy <- data.frame(do.call('rbind',
                            strsplit(as.character(test$s),'|',fixed=TRUE)))
> dummy
   X1 X2 X3
1   1  0  0
2   1  0  0
3   1  0  0
4   1  0  0
5   1  0  0
6   1  0  0
....

test = cbind(test,dummy)

> test
         Date  gvkey Leverage       d1       d2 dquant X1 X2 X3
1  2000-12-01 gvkey1        1 19.66667 38.33333  1|0|0  1  0  0
2  2000-12-01 gvkey1        2 19.66667 38.33333  1|0|0  1  0  0
3  2000-12-03 gvkey1        3 21.33333 39.66667  1|0|0  1  0  0
4  2000-12-04 gvkey1        4 22.33333 40.66667  1|0|0  1  0  0
5  2000-12-05 gvkey2        5 23.33333 41.66667  1|0|0  1  0  0
6  2000-12-06 gvkey2        6 24.33333 42.66667  1|0|0  1  0  0
7  2000-12-07 gvkey2        7 25.33333 43.66667  1|0|0  1  0  0
8  2000-12-08 gvkey2        8 26.33333 44.66667  1|0|0  1  0  0
9  2000-12-09 gvkey2        9 27.33333 45.66667  1|0|0  1  0  0
10 2000-12-10 gvkey3       10 28.33333 46.66667  1|0|0  1  0  0
11 2000-12-11 gvkey3       11 29.33333 47.66667  1|0|0  1  0  0
...