R 创建以其他列中的值为条件的索引;随时间变化的差异

R 创建以其他列中的值为条件的索引;随时间变化的差异,r,plyr,seq,R,Plyr,Seq,我正在努力解决以下问题: 下面的dataframe包含各种ID随时间变化的值的开发。我试图得到的是这些值的增加/减少,基于事件发生年份的值。一个id内可能发生多个事件,因此新事件将成为该id的新基准年。 为了让事情更清楚,我也在下面添加了我想要的结果 我拥有的 id value year event a 100 1950 NA a 101 1951 NA a 102 1952 NA a 103 1953 NA a

我正在努力解决以下问题: 下面的dataframe包含各种ID随时间变化的值的开发。我试图得到的是这些值的增加/减少,基于事件发生年份的值。一个id内可能发生多个事件,因此新事件将成为该id的新基准年。 为了让事情更清楚,我也在下面添加了我想要的结果

我拥有的

id  value   year    event
a   100     1950    NA
a   101     1951    NA
a   102     1952    NA
a   103     1953    NA
a   104     1954    NA
a   105     1955    X
a   106     1956    NA
a   107     1957    NA
a   108     1958    NA
a   107     1959    Y
a   106     1960    NA
a   105     1961    NA
a   104.8   1962    NA
a   104.2   1963    NA
b   70      1970    NA
b   75      1971    NA
b   80      1972    NA
b   85      1973    NA
b   90      1974    NA
b   60      1975    Z
b   59      1976    NA
b   58      1977    NA
b   57      1978    NA
b   56      1979    NA
b   55      1980    W
b   54      1981    NA
b   53      1982    NA
b   52      1983    NA
b   51      1984    NA
我在寻找什么

id  value   year    event   index   growth
a   100     1950    NA        0 
a   101     1951    NA        0 
a   102     1952    NA        0 
a   103     1953    NA        0 
a   104     1954    NA        0 
a   105     1955    X         1      1
a   106     1956    NA        2      1.00952381
a   107     1957    NA        3      1.019047619
a   108     1958    NA        4      1.028571429
a   107     1959    Y         1      1                  #new baseline year
a   106     1960    NA        2      0.990654206
a   105     1961    NA        3      0.981308411
a   104.8   1962    NA        4      0.979439252
a   104.2   1963    NA        5      0.973831776
b   70      1970    NA        6 
b   75      1971    NA        7 
b   80      1972    NA        8 
b   85      1973    NA        9 
b   90      1974    NA       10 
b   60      1975    Z         1      1
b   59      1976    NA        2      0.983333333
b   58      1977    NA        3      0.966666667
b   57      1978    NA        4      0.95
b   56      1979    NA        5      0.933333333
b   55      1980    W         1      1                #new baseline year
b   54      1981    NA        2      0.981818182
b   53      1982    NA        3      0.963636364
b   52      1983    NA        4      0.945454545
b   51      1984    NA        5      0.927272727
我尝试的

post非常有用,我成功地在年份之间创建了差异,但是,当出现新事件时,我无法重置基准年(索引)。此外,我怀疑我的方法是否真的是最有效/最优雅的方法。对我来说似乎有点笨拙

x <- ddply(x, .(id), transform, year.min=min(year[!is.na(event)]))  #identifies first event year
x1 <- ddply(x[x$year>=x$year.min,], .(id), transform, index=seq_along(id)) #creates counter years following first event; prior years are removed
x1 <- x1[order(x1$id, x1$year),] #sort 
x1 <- ddply(x1, .(id), transform, growth=100*(value/value[1])) #calculate difference, however, based on first event year; this is wrong.

library(Interact)  #i then merge the df with the years prior to first event which have been removed in the begining
x$id.year <- interaction(x$id,x$year)
x1$id.year <- interaction(x1$id,x1$year)
x$index <- x$growth <- NA
y <- rbind(x[x$year<x$year.min,],x1)
y <- y[order(y$id,y$year),]
x试试:

ddf$index=0
ddf$增长=0
基线=0
r=1;开始=错误
适用于(1:nrow(ddf)){
if(is.na(ddf$event[r])){
如果(启动){
ddf$指数[r]=ddf$指数[r-1]+1
ddf$增长率[r]=ddf$价值[r]/基线
}
else{ddf$index[r]=0;
}
}
否则{
开始=T
ddf$索引[r]=1
ddf$增长率[r]=1
基线=ddf$值[r]
}
}
ddf
id值年度事件指数增长
1A 100.0 1950 0.0000000
2 a 101.0 1951 0.0000000
3 a 102.0 1952 0.0000000
4 a 103.0 1953 0.0000000
5 a 104.0 1954 0.0000000
6a 105.01955x110000000
7 a 106.0 1956 2 1.0095238
8 a 107.0 1957 3 1.0190476
9 a 108.0 1958 4 1.0285714
10 a 107.0 1959年Y 11.0000000
11A 106.0 1960 2 0.9906542
12 a 105.0 1961 3 0.9813084
13 a 104.8 1962 4 0.9794393
14 a 104.2 1963 5 0.9738318
15 b 70.0 1970 6 0.6542056
16 b 75.0 1971 7 0.7009346
17 b 80.0 1972 8 0.7476636
18 b 85.0 1973 9 0.7943925
19 b 90.0 1974 10 0.8411215
20 b 60.0 1975 Z 11.0000000
21 b 59.0 1976 2 0.9833333
22 b 58.0 1977 3 0.9666667
23 b 57.0 1978 4 0.9500000
24 b 56.0 1979 5 0.9333333
25 b 55.0 1980 W 11.0000000
26 b 54.0 1981 2 0.9818182
27 b 53.0 1982 3 0.96364
28 b 52.0 1983 4 0.94545
29 b 51.0 1984 5 0.92727
#创建一个标记,通过id或
#当id更改时

dat$tag这是一个使用dplyr的解决方案

ana <- group_by(mydf, id) %>%
       do(na.locf(., na.rm = FALSE)) %>%
       mutate(value = as.numeric(value)) %>%
       group_by(id, event) %>%
       mutate(growth = value/value[1]) %>%
       mutate(index = row_number(event))

ana$growth[is.na(ana$event)] <- 0

   id value year event    growth index
1   a 100.0 1950    NA 0.0000000     1
2   a 101.0 1951    NA 0.0000000     2
3   a 102.0 1952    NA 0.0000000     3
4   a 103.0 1953    NA 0.0000000     4
5   a 104.0 1954    NA 0.0000000     5
6   a 105.0 1955     X 1.0000000     1
7   a 106.0 1956     X 1.0095238     2
8   a 107.0 1957     X 1.0190476     3
9   a 108.0 1958     X 1.0285714     4
10  a 107.0 1959     Y 1.0000000     1
11  a 106.0 1960     Y 0.9906542     2
12  a 105.0 1961     Y 0.9813084     3
13  a 104.8 1962     Y 0.9794393     4
14  a 104.2 1963     Y 0.9738318     5
15  b  70.0 1970    NA 0.0000000     1
16  b  75.0 1971    NA 0.0000000     2
17  b  80.0 1972    NA 0.0000000     3
18  b  85.0 1973    NA 0.0000000     4
19  b  90.0 1974    NA 0.0000000     5
20  b  60.0 1975     Z 1.0000000     1
21  b  59.0 1976     Z 0.9833333     2
22  b  58.0 1977     Z 0.9666667     3
23  b  57.0 1978     Z 0.9500000     4
24  b  56.0 1979     Z 0.9333333     5
25  b  55.0 1980     W 1.0000000     1
26  b  54.0 1981     W 0.9818182     2
27  b  53.0 1982     W 0.9636364     3
28  b  52.0 1983     W 0.9454545     4
ana%
do(na.locf(,na.rm=FALSE))%>%
变异(值=作为数值(值))%>%
分组依据(id,事件)%>%
突变(增长=值/值[1])%>%
变异(索引=行数(事件))
ana$growth[is.na(ana$event)]很棒。我只加了dat
# Create a tag to indicate the start of each new event by id or
# when id changes
dat$tag <- with(dat, ave(as.character(event), as.character(id), 
                                    FUN=function(i) cumsum(!is.na(i))))

# Calculate the growth by id and tag
# this will also produce results for each id before an event has happened
dat$growth <- with(dat, ave(value, tag, id,  FUN=function(i)  i/i[1] ))

# remove growth prior to an event (this will be when tag equals zero as no 
# event have occurred)
dat$growth[dat$tag==0] <- NA
ana <- group_by(mydf, id) %>%
       do(na.locf(., na.rm = FALSE)) %>%
       mutate(value = as.numeric(value)) %>%
       group_by(id, event) %>%
       mutate(growth = value/value[1]) %>%
       mutate(index = row_number(event))

ana$growth[is.na(ana$event)] <- 0

   id value year event    growth index
1   a 100.0 1950    NA 0.0000000     1
2   a 101.0 1951    NA 0.0000000     2
3   a 102.0 1952    NA 0.0000000     3
4   a 103.0 1953    NA 0.0000000     4
5   a 104.0 1954    NA 0.0000000     5
6   a 105.0 1955     X 1.0000000     1
7   a 106.0 1956     X 1.0095238     2
8   a 107.0 1957     X 1.0190476     3
9   a 108.0 1958     X 1.0285714     4
10  a 107.0 1959     Y 1.0000000     1
11  a 106.0 1960     Y 0.9906542     2
12  a 105.0 1961     Y 0.9813084     3
13  a 104.8 1962     Y 0.9794393     4
14  a 104.2 1963     Y 0.9738318     5
15  b  70.0 1970    NA 0.0000000     1
16  b  75.0 1971    NA 0.0000000     2
17  b  80.0 1972    NA 0.0000000     3
18  b  85.0 1973    NA 0.0000000     4
19  b  90.0 1974    NA 0.0000000     5
20  b  60.0 1975     Z 1.0000000     1
21  b  59.0 1976     Z 0.9833333     2
22  b  58.0 1977     Z 0.9666667     3
23  b  57.0 1978     Z 0.9500000     4
24  b  56.0 1979     Z 0.9333333     5
25  b  55.0 1980     W 1.0000000     1
26  b  54.0 1981     W 0.9818182     2
27  b  53.0 1982     W 0.9636364     3
28  b  52.0 1983     W 0.9454545     4