在R中将列拆分为2

在R中将列拆分为2,r,dataframe,split,R,Dataframe,Split,我试过了 CC.Number Date Time Accident.Type alt lng 1 12T008826 07/01/2012 1630 PD 39.26699 -76.560642 2 12L005385 07/02/2012 1229 PD 39.000549 -76.399312 3 12L005388 07/02/2012 1229 PD 39.00058

我试过了

  CC.Number       Date Time Accident.Type      alt       lng
1 12T008826 07/01/2012 1630            PD  39.26699    -76.560642
2 12L005385 07/02/2012 1229            PD  39.000549   -76.399312
3 12L005388 07/02/2012 1229            PD  39.00058    -76.399267
location <- md$Location.1
location1 <- substring(location, 2)
location2 <- substr(location1, 1, nchar(location1)-1 )
location3 <-  strsplit(location2, ",")

location我们可以使用
extract
tidyr
中通过捕获两组数据,其中只有带点的数字元素,并在“location.1”中丢弃其余元素

ocdf<-data.frame(location2)
colnames(locdf)[1] = c("x")
df <- separate(location, col=x,into = c("lat","log"), sep = ",")

我们可以从
tidyr
中使用
extract
,方法是将带点的数字元素捕获为两组,并将其余元素丢弃在“Location.1”中

ocdf<-data.frame(location2)
colnames(locdf)[1] = c("x")
df <- separate(location, col=x,into = c("lat","log"), sep = ",")

您也可以这样做,假设
dat1
是您的原始数据集名称,我们可以使用strsplit和gsub。首先,我们使用gsub将逗号和括号替换为零,然后使用strsplit将值按空格分割:

library(tidyr)
df1 %>% 
  extract(Location.1, into = c('alt', 'lng'), "\\(([0-9.]+),\\s+(-*[0-9.]+).")
# CC.Number       Date Time Accident.Type       alt        lng
#1 12T008826 07/01/2012 1630            PD  39.26699 -76.560642
#2 12L005385 07/02/2012 1229            PD 39.000549 -76.399312
#3 12L005388 07/02/2012 1229            PD  39.00058 -76.399267
#4 12T008851 07/02/2012  445            PI  39.26367  -76.56648
#5 12T008858 07/02/2012  802            PD 39.240862 -76.599017
#6 12T008860 07/02/2012  832            PD  39.27022  -76.63926

df1您也可以这样做,假设
dat1
是您的原始数据集名称,我们可以使用strsplit和gsub。首先,我们使用gsub将逗号和括号替换为零,然后使用strsplit将值按空格分割:

library(tidyr)
df1 %>% 
  extract(Location.1, into = c('alt', 'lng'), "\\(([0-9.]+),\\s+(-*[0-9.]+).")
# CC.Number       Date Time Accident.Type       alt        lng
#1 12T008826 07/01/2012 1630            PD  39.26699 -76.560642
#2 12L005385 07/02/2012 1229            PD 39.000549 -76.399312
#3 12L005388 07/02/2012 1229            PD  39.00058 -76.399267
#4 12T008851 07/02/2012  445            PI  39.26367  -76.56648
#5 12T008858 07/02/2012  802            PD 39.240862 -76.599017
#6 12T008860 07/02/2012  832            PD  39.27022  -76.63926

df1
tidyr
中分离

df1 <- setNames(data.frame(do.call("rbind",strsplit(gsub("\\(|\\)|,","",dat1$Location.1),split=" "))),c("Lat","Long"))
df2 <- data.frame(cbind(dat1[,1:(length(dat1)-1)],df1))

# CC.Number     Date Time Accident.Type       Lat       Long
# 1 12T008826 07/01/12 1630            PD  39.26699 -76.560642
# 2 12L005385 07/02/12 1229            PD 39.000549 -76.399312
# 3 12L005388 07/02/12 1229            PD  39.00058 -76.399267
# 4 12T008851 07/02/12  445            PI  39.26367  -76.56648
# 5 12T008858 07/02/12  802            PD 39.240862 -76.599017
# 6 12T008860 07/02/12  832            PD  39.27022  -76.63926
library(tidyr)
#把括号再分出来

df$Location.1
tidyr
中分离

df1 <- setNames(data.frame(do.call("rbind",strsplit(gsub("\\(|\\)|,","",dat1$Location.1),split=" "))),c("Lat","Long"))
df2 <- data.frame(cbind(dat1[,1:(length(dat1)-1)],df1))

# CC.Number     Date Time Accident.Type       Lat       Long
# 1 12T008826 07/01/12 1630            PD  39.26699 -76.560642
# 2 12L005385 07/02/12 1229            PD 39.000549 -76.399312
# 3 12L005388 07/02/12 1229            PD  39.00058 -76.399267
# 4 12T008851 07/02/12  445            PI  39.26367  -76.56648
# 5 12T008858 07/02/12  802            PD 39.240862 -76.599017
# 6 12T008860 07/02/12  832            PD  39.27022  -76.63926
library(tidyr)
#把括号再分出来
df$Location.1在base中,您可以使用
trimws
删除
()
读取.table
处拆分

library(tidyr)
# Sub out the parentheses
df$Location.1 <- gsub("[()]", "", df$Location.1)

separate(df, col = Location.1, into = c("lat","long"), sep = ",")
#  CC.Number       Date Time Accident.Type       lat        long
#1 12T008826 07/01/2012 1630            PD  39.26699  -76.560642
#2 12L005385 07/02/2012 1229            PD 39.000549  -76.399312
#3 12L005388 07/02/2012 1229            PD  39.00058  -76.399267
#4 12T008851 07/02/2012  445            PI  39.26367   -76.56648
#5 12T008858 07/02/2012  802            PD 39.240862  -76.599017
#6 12T008860 07/02/2012  832            PD  39.27022   -76.63926
数据:

md在base中,您可以使用
trimws
删除
()
读取.table
处拆分

library(tidyr)
# Sub out the parentheses
df$Location.1 <- gsub("[()]", "", df$Location.1)

separate(df, col = Location.1, into = c("lat","long"), sep = ",")
#  CC.Number       Date Time Accident.Type       lat        long
#1 12T008826 07/01/2012 1630            PD  39.26699  -76.560642
#2 12L005385 07/02/2012 1229            PD 39.000549  -76.399312
#3 12L005388 07/02/2012 1229            PD  39.00058  -76.399267
#4 12T008851 07/02/2012  445            PI  39.26367   -76.56648
#5 12T008858 07/02/2012  802            PD 39.240862  -76.599017
#6 12T008860 07/02/2012  832            PD  39.27022   -76.63926
数据:

md