R 为列指定一个名称,该名称已使用循环在另一个df/列表中指定

R 为列指定一个名称,该名称已使用循环在另一个df/列表中指定,r,R,我有一个数据看起来像这样 sl.no = c(1,2,3,4) vehicle = c('car, bike','car, bicycle, lorry', 'bike, lorry, bicycle', 'bike, bicycle') df = cbind(sl.no, vehicle) df sl.no vehicle ------------------------------ 1 'car, bike'

我有一个数据看起来像这样

    sl.no = c(1,2,3,4)
    vehicle = c('car, bike','car, bicycle, lorry', 'bike, lorry, bicycle', 'bike, bicycle')
    df = cbind(sl.no, vehicle)

    df

    sl.no       vehicle
    ------------------------------
     1      'car, bike'
    ------------------------------
     2      'car, bicycle, lorry'
    ------------------------------
     3      'bike, lorry, bicycle'
    ------------------------------
     4      'bike, bicycle'
    ------------------------------
    veh

     unique vehicle's
     ---------------
      car
     ---------------
      bike
     ---------------
      bicycle
     ---------------
      lorry
     ---------------
    df1

     sl.no      car     bike    bicycle     lorry
     1          1       1       0           0
     2          1       0       1           1
     3          0       1       1           1
     4          0       1       1           0
我已经创建了一个表“veh”包含唯一的车辆。看起来像这样

    sl.no = c(1,2,3,4)
    vehicle = c('car, bike','car, bicycle, lorry', 'bike, lorry, bicycle', 'bike, bicycle')
    df = cbind(sl.no, vehicle)

    df

    sl.no       vehicle
    ------------------------------
     1      'car, bike'
    ------------------------------
     2      'car, bicycle, lorry'
    ------------------------------
     3      'bike, lorry, bicycle'
    ------------------------------
     4      'bike, bicycle'
    ------------------------------
    veh

     unique vehicle's
     ---------------
      car
     ---------------
      bike
     ---------------
      bicycle
     ---------------
      lorry
     ---------------
    df1

     sl.no      car     bike    bicycle     lorry
     1          1       1       0           0
     2          1       0       1           1
     3          0       1       1           1
     4          0       1       1           0
使用上面两个表,我创建的“df1”如下所示

    sl.no = c(1,2,3,4)
    vehicle = c('car, bike','car, bicycle, lorry', 'bike, lorry, bicycle', 'bike, bicycle')
    df = cbind(sl.no, vehicle)

    df

    sl.no       vehicle
    ------------------------------
     1      'car, bike'
    ------------------------------
     2      'car, bicycle, lorry'
    ------------------------------
     3      'bike, lorry, bicycle'
    ------------------------------
     4      'bike, bicycle'
    ------------------------------
    veh

     unique vehicle's
     ---------------
      car
     ---------------
      bike
     ---------------
      bicycle
     ---------------
      lorry
     ---------------
    df1

     sl.no      car     bike    bicycle     lorry
     1          1       1       0           0
     2          1       0       1           1
     3          0       1       1           1
     4          0       1       1           0
我可以用这个来做上面的事情

     df1$car = str_count(df$vehicle, "\\bcar\\b")
     df1$bike = str_count(df$vehicle, "\\bbike\\b")
     df1$bicycle = str_count(df$vehicle, "\\bbicycle\\b")
     df1$lorry = str_count(df$vehicle, "\\blorry\\b")
比如说,如果有新车“拖拉机”,那么我必须再次写下如下内容

    df1$tractor = str_count(df$vehicle, "\\btractor\\b")
这一个创建了一列“veh”,其中veh[[1]]:car

    df1$veh = str_count(df$vehicle, veh[[1]])
我想循环它们,这样我就不必指定列名并给出详细信息

那么,如何循环它,以便如果添加了新的车辆,我不需要添加

    df1$tractor = str_count(df$vehicle, "\\btractor\\b")
而且我不必给出列名(df1$bicycle)


如果答案已经存在。请给我提供链接。谢谢

我们可以使用
sapply
循环遍历“veh”的第一列,并获得
stru计数

cbind(slNo = seq_len(nrow(veh)), sapply(veh[[1]], str_count, string = df$vehicle))
#      slNo car bike bicycle lorry
#[1,]    1   1    1       0     0
#[2,]    2   1    0       1     1
#[3,]    3   0    1       1     1
#[4,]    4   0    1       1     0
数据
veh或者您可以概括和简化解决方案:

library(tidyverse)

data_frame(
  sl.no = c(1,2,3,4,5),
  vehicle = c(
    'car, bike',
    'car, bicycle, lorry',
    'bike, lorry, bicycle',
    'bike, bicycle',
    'bike, car, tractor'
  )
) -> xdf

xdf
## # A tibble: 5 x 2
##   sl.no              vehicle
##   <dbl>                <chr>
## 1     1            car, bike
## 2     2  car, bicycle, lorry
## 3     3 bike, lorry, bicycle
## 4     4        bike, bicycle
## 5     5   bike, car, tractor

separate_rows(xdf, vehicle) %>%
  mutate(x=1) %>%
  spread(vehicle, x) %>%
  mutate_at(vars(-sl.no), ~replace(., is.na(.), 0))
## # A tibble: 5 x 6
##   sl.no bicycle  bike   car lorry tractor
##   <dbl>   <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1     1       0     1     1     0       0
## 2     2       1     0     1     1       0
## 3     3       1     1     0     1       0
## 4     4       1     1     0     0       0
## 5     5       0     1     1     0       1
库(tidyverse)
数据帧(
sl.no=c(1,2,3,4,5),
车辆=c(
“汽车,自行车”,
“汽车、自行车、卡车”,
“自行车,卡车,自行车”,
“自行车,自行车”,
“自行车、汽车、拖拉机”
)
)->xdf
xdf
###tibble:5 x 2
##sl.无车辆
##                   
##1辆车,自行车
##小汽车、自行车、卡车
##自行车、卡车、自行车
##自行车
##自行车、汽车、拖拉机
单独的_行(xdf,车辆)%>%
突变(x=1)%>%
排列(车辆,x)%>%
在(变量(-sl.no),~replace(,is.na(,0))处变异
###tibble:5 x 6
##sl.无自行车、自行车、汽车、卡车、拖拉机
##            
## 1     1       0     1     1     0       0
## 2     2       1     0     1     1       0
## 3     3       1     1     0     1       0
## 4     4       1     1     0     0       0
## 5     5       0     1     1     0       1

注意:如果不可能总是
1
,则需要修改(轻微)。从你的问题很难看出这一点。另外,试着使用
dput()
vs可爱的手工数据表。这将在将来为你节省大量的时间。