R 按小组对意见进行编号_R - Fatal编程技术网

R 按小组对意见进行编号

R 按小组对意见进行编号,r,R,我试图对小组内的观察结果进行编号，然后确定每个小组内的第一个和最后一个观察结果。我知道如何做到这一点，甚至在这里发布了解决方案，以回应过去的一个问题然而，我现在偶然发现了一种情况，在这种情况下，我的解决方案不起作用，我不知道为什么。感谢您就如何使下面的第二个示例起作用提供的任何建议。第一个例子确实有效。对不起，如果我忽略了一个愚蠢的打字错误我更喜欢用base R #################################################################

我试图对小组内的观察结果进行编号，然后确定每个小组内的第一个和最后一个观察结果。我知道如何做到这一点，甚至在这里发布了解决方案，以回应过去的一个问题

然而，我现在偶然发现了一种情况，在这种情况下，我的解决方案不起作用，我不知道为什么。感谢您就如何使下面的第二个示例起作用提供的任何建议。第一个例子确实有效。对不起，如果我忽略了一个愚蠢的打字错误

我更喜欢用base R

####################################################################

# this works

my.df = read.table(text = '
   state   county   city   miles
       1        1      1       3
       1        1      1       4
       1        1      1       4
       1        1      1       5
       1        1      2       4
       1        1      2       3
       1        2      1       4
       1        2      2       2
       1        2      2       4
       1        2      2       3
       1        2      3       3
       1        2      3       2
', header = TRUE)

my.df
str(my.df)

my.seq <- data.frame(rle(my.df$city)$lengths)
my.seq

my.df$first <- unlist(apply(my.seq, 1, function(x) seq(1,x)))
my.df$last  <- unlist(apply(my.seq, 1, function(x) seq(x,1,-1)))
my.df

my.df2 <- my.df[my.df$first==1 | my.df$last == 1,]
my.df2

####################################################################

# This does not work.  Only the data set has changed.

my.df <- read.table(text = '
   state   county    city    miles
      40        8       1       12
      40        8       1        4
      40        8       2       13
      40        8       2        3
', header = TRUE)

my.df
str(my.df)

my.seq <- data.frame(rle(my.df$city)$lengths)
my.seq

my.df$first <- unlist(apply(my.seq, 1, function(x) seq(1,x)))
my.df$last  <- unlist(apply(my.seq, 1, function(x) seq(x,1,-1)))
my.df

my.df2 <- my.df[my.df$first==1 | my.df$last == 1,]
my.df2

# The expected result with the second example is:

desired.result <- read.table(text = '
   state   county    city    miles   first   last
      40        8       1       12       1      2
      40        8       1        4       2      1
      40        8       2       13       1      2
      40        8       2        3       2      1
', header = TRUE)

####################################################################

####################################################################
#这很有效
my.df=read.table（text=）
州县市英里
1        1      1       3
1        1      1       4
1        1      1       4
1        1      1       5
1        1      2       4
1        1      2       3
1        2      1       4
1        2      2       2
1        2      2       4
1        2      2       3
1        2      3       3
1        2      3       2
，标头=TRUE）
我的.df
str（my.df）
我的.seqIi很难理解你想做什么
我想你会因为独特城市的特殊情况而出错
以下是我将如何做到这一点：
这里的困难在于创建分组变量：
xx <- rle(my.df$city)
my.df$group <- rep(seq_along(xx$values),xx$lengths)

最后，使用unique
删除重复的元素：
unique(res)

 state county city miles group first last
1    40      8    1    12     1     1    2
2    40      8    1     4     1     2    1
3    40      8    2    13     2     1    2
4    40      8    2     3     2     2    1

编辑基本R解决方案，只需将ddply
替换为tapply
：
group <- rep(seq_along(xx$values),xx$lengths)

tapply(my.df,group,function(x){
  y <- rbind(head(x,1),tail(x,1))
  cbind(y,data.frame(first=c(1,nrow(x)),
                     last = c(nrow(x),1)))
})
unique(res)

group我想出了如何修改我的代码，以便用这两个示例数据集得到所需的答案
我只是在我的两个unlist（apply（））
语句周围添加了as.vector（）
。下面是第二个示例的代码：
my.df <- read.table(text = '
   state   county    city    miles
      40        8       1       12
      40        8       1        4
      40        8       2       13
      40        8       2        3
', header = TRUE)

my.df
str(my.df)

my.seq <- data.frame(rle(my.df$city)$lengths)
my.seq

my.df$first <- as.vector(unlist(apply(my.seq, 1, function(x) seq(1,x))))
my.df$last  <- as.vector(unlist(apply(my.seq, 1, function(x) seq(x,1,-1))))
my.df

my.df2 <- my.df[my.df$first==1 | my.df$last == 1,]
my.df2

多谢各位+1.虽然我应该指出，我更喜欢以R为基础的解决方案。
my.df <- read.table(text = '
   state   county    city    miles
      40        8       1       12
      40        8       1        4
      40        8       2       13
      40        8       2        3
', header = TRUE)

my.df
str(my.df)

my.seq <- data.frame(rle(my.df$city)$lengths)
my.seq

my.df$first <- as.vector(unlist(apply(my.seq, 1, function(x) seq(1,x))))
my.df$last  <- as.vector(unlist(apply(my.seq, 1, function(x) seq(x,1,-1))))
my.df

my.df2 <- my.df[my.df$first==1 | my.df$last == 1,]
my.df2

  state county city miles first last
1    40      8    1    12     1    2
2    40      8    1     4     2    1
3    40      8    2    13     1    2
4    40      8    2     3     2    1