ta.table解决方案确实帮了大忙。这是很棒的东西,对数据的解释。table解决方案帮了大忙。 df <- data.frame(id=1:7,link=I(list(c(2,3),c(1,3),c(1,2),NA,NA,7,6))) id

ta.table解决方案确实帮了大忙。这是很棒的东西,对数据的解释。table解决方案帮了大忙。 df <- data.frame(id=1:7,link=I(list(c(2,3),c(1,3),c(1,2),NA,NA,7,6))) id ,r,dataframe,duplicates,R,Dataframe,Duplicates,ta.table解决方案确实帮了大忙。这是很棒的东西,对数据的解释。table解决方案帮了大忙。 df <- data.frame(id=1:7,link=I(list(c(2,3),c(1,3),c(1,2),NA,NA,7,6))) id link 1 1 2, 3 2 2 1, 3 3 3 1, 2 4 4 NA 5 5 NA 6 6 7 7 7 6 id link 1 1 2, 3 2 4 NA 3 5 NA 4 6


ta.table解决方案确实帮了大忙。这是很棒的东西,对
数据的解释。table
解决方案帮了大忙。
df <- data.frame(id=1:7,link=I(list(c(2,3),c(1,3),c(1,2),NA,NA,7,6)))

  id link
1  1 2, 3
2  2 1, 3
3  3 1, 2
4  4   NA
5  5   NA
6  6    7
7  7    6
  id link
1  1 2, 3
2  4   NA
3  5   NA
4  6    7
to_remove <- character(0)
for (n in 1:nrow(df)) {
  links <- df$link[[n]]
  if (all(is.na(links))) next # skip if no links available
  add <- ifelse(links %in% to_remove, NA,links)
  add <- add[!is.na(add)]
  if (length(add > 0)) to_remove <- c(to_remove,add)
}
df <- data.frame(id=1:7,link=I(list(c(2,3),c(1,3),c(1,2),NA,NA,7,6)))

keep <- rep(TRUE, nrow(df))
for (i in 1:nrow(df)) {
  idx <- df$link[[i]]
  idx_larger_than_me <- idx[idx > i]
  print(idx_larger_than_me)
  keep[idx_larger_than_me] <- FALSE
}
df2 <- df[keep,]
library(data.table)
DT <- data.table(id = rep(df$id, lengths(df$link)), link = unlist(df$link))
DT[DT[, .I[!any(id > link) | is.na(link)], by = id]$V1][, .(link = toString(link)), by = id]
   id link
1:  1 2, 3
2:  4   NA
3:  5   NA
4:  6    7
library(dplyr)
library(tidyr)
df %>% 
  unnest(link) %>% 
  group_by(id) %>% 
  filter(!any(id > link) | is.na(link)) %>% 
  summarise(link = toString(link))
# A tibble: 4 × 2
     id  link
  <int> <chr>
1     1  2, 3
2     4    NA
3     5    NA
4     6     7
dfn <- data.frame(id = rep(df$id, lengths(df$link)), link = unlist(df$link))
dfn <- dfn[!dfn$id %in% unique(dfn$id[which(dfn$id > dfn$link)]),]
aggregate(link ~ id, dfn, toString, na.action = na.pass)
library(dplyr)
library(tidyr)
result <- df %>% unnest(link) %>% 
                 filter(is.na(link) | link > id & !duplicated(link)) %>%
                 group_by(id) %>% 
                 nest(link, .key=link)
print(result)
### A tibble: 4 x 3
##     id             link
##  <int>           <list>
##1     1 <tibble [2 x 1]>
##2     4 <tibble [1 x 1]>
##3     5 <tibble [1 x 1]>
##4     6 <tibble [1 x 1]>
print(result$link)
##[[1]]
### A tibble: 2 x 1
##   link
##  <dbl>
##1     2
##2     3
##
##[[2]]
### A tibble: 1 x 1
##   link
##  <dbl>
##1    NA
##
##[[3]]
### A tibble: 1 x 1
##   link
##  <dbl>
##1    NA
##
##[[4]]
### A tibble: 1 x 1
##   link
##  <dbl>
##1     7