Apache spark 将变量从';dttm&x27;进入';POSIXCT&x27;

Apache spark 将变量从';dttm&x27;进入';POSIXCT&x27;,apache-spark,r,dplyr,Apache Spark,R,Dplyr,使用时,我将这些变量以一种称为dttm的奇怪格式显示如下: tpep_pickup_datetime tpep_dropoff_datetime <dttm> <dttm> 2015-01-15 18:05:39 2015-01-15 18:23:42 2015-01-10 19:33:38 2015-01-10 1

使用时,我将这些变量以一种称为
dttm
的奇怪格式显示如下:

 tpep_pickup_datetime  tpep_dropoff_datetime 
               <dttm>                 <dttm>               
  2015-01-15 18:05:39    2015-01-15 18:23:42                    
  2015-01-10 19:33:38    2015-01-10 19:53:28              
  2015-01-10 19:33:38    2015-01-10 19:43:41              
  2015-01-10 19:33:39    2015-01-10 19:35:31               
我得到这个错误:

与此相同,我们可以使用
dplyr
lubridate::seconds\u to_period
base::difftime
来获得输出

库(dplyr)
图书馆(lubridate)
df1%>%
变异(dur=秒到周期(如数字(difftime)(tpep\u dropoff\u datetime,
tpep_皮卡_日期时间),
单位(秒)
#>#tibble:4 x 3
#>tpep_皮卡_日期时间tpep_下车_日期时间dur
#>                                  
#>1 2015-01-15 18:05:39 2015-01-15 18:23:42 18米3秒
#>2 2015-01-10 19:33:38 2015-01-10 19:53:28 19米50秒
#>3 2015-01-10 19:33:38 2015-01-10 19:43:41
#>4 2015-01-10 19:33:39 2015-01-10 19:35:31 1M 52S
如果您希望以
的形式输出,而不是
以.period
的形式输出,则下面的代码可以工作

库(dplyr)
df1%>%
变异(dur=as.numeric(difftime)(tpep\u dropoff\u datetime,
tpep_皮卡_日期时间),
单位(秒)
#>#tibble:4 x 3
#>tpep_皮卡_日期时间tpep_下车_日期时间dur
#>                                  
#> 1 2015-01-15 18:05:39  2015-01-15 18:23:42    1083
#> 2 2015-01-10 19:33:38  2015-01-10 19:53:28    1190
#> 3 2015-01-10 19:33:38  2015-01-10 19:43:41     603
#> 4 2015-01-10 19:33:39  2015-01-10 19:35:31     112
数据:
read.table(text=“tpep\u pickup\u datetime tpep\u dropoff\u datetime
2015-01-15_18:05:39    2015-01-15_18:23:42                    
2015-01-10_19:33:38    2015-01-10_19:53:28              
2015-01-10_19:33:38    2015-01-10_19:43:41              
2015-01-10_19:33:39 2015-01-10_19:35:31“,stringsAsFactors=F,header=T)->df1
df1%>%
变异时间(VAR(“tpep拾取日期时间”、“tpep衰减日期时间”),
列表(~as.POSIXct(,format=“%Y-%m-%d_%H:%m:%S”,tz=Sys.timezone())))%>%
as_tible->df1
df1#只是为了显示数据
#>tpep_取货_日期时间tpep_卸货_日期时间
#>                                 
#> 1 2015-01-15 18:05:39  2015-01-15 18:23:42  
#> 2 2015-01-10 19:33:38  2015-01-10 19:53:28  
#> 3 2015-01-10 19:33:38  2015-01-10 19:43:41  
#> 4 2015-01-10 19:33:39  2015-01-10 19:35:31  
与此相同,我们可以使用
dplyr
lubridate::seconds\u to_period
base::difftime
获得输出

库(dplyr)
图书馆(lubridate)
df1%>%
变异(dur=秒到周期(如数字(difftime)(tpep\u dropoff\u datetime,
tpep_皮卡_日期时间),
单位(秒)
#>#tibble:4 x 3
#>tpep_皮卡_日期时间tpep_下车_日期时间dur
#>                                  
#>1 2015-01-15 18:05:39 2015-01-15 18:23:42 18米3秒
#>2 2015-01-10 19:33:38 2015-01-10 19:53:28 19米50秒
#>3 2015-01-10 19:33:38 2015-01-10 19:43:41
#>4 2015-01-10 19:33:39 2015-01-10 19:35:31 1M 52S
如果您希望以
的形式输出,而不是
以.period
的形式输出,则下面的代码可以工作

库(dplyr)
df1%>%
变异(dur=as.numeric(difftime)(tpep\u dropoff\u datetime,
tpep_皮卡_日期时间),
单位(秒)
#>#tibble:4 x 3
#>tpep_皮卡_日期时间tpep_下车_日期时间dur
#>                                  
#> 1 2015-01-15 18:05:39  2015-01-15 18:23:42    1083
#> 2 2015-01-10 19:33:38  2015-01-10 19:53:28    1190
#> 3 2015-01-10 19:33:38  2015-01-10 19:43:41     603
#> 4 2015-01-10 19:33:39  2015-01-10 19:35:31     112
数据:
read.table(text=“tpep\u pickup\u datetime tpep\u dropoff\u datetime
2015-01-15_18:05:39    2015-01-15_18:23:42                    
2015-01-10_19:33:38    2015-01-10_19:53:28              
2015-01-10_19:33:38    2015-01-10_19:43:41              
2015-01-10_19:33:39 2015-01-10_19:35:31“,stringsAsFactors=F,header=T)->df1
df1%>%
变异时间(VAR(“tpep拾取日期时间”、“tpep衰减日期时间”),
列表(~as.POSIXct(,format=“%Y-%m-%d_%H:%m:%S”,tz=Sys.timezone())))%>%
as_tible->df1
df1#只是为了显示数据
#>tpep_取货_日期时间tpep_卸货_日期时间
#>                                 
#> 1 2015-01-15 18:05:39  2015-01-15 18:23:42  
#> 2 2015-01-10 19:33:38  2015-01-10 19:53:28  
#> 3 2015-01-10 19:33:38  2015-01-10 19:43:41  
#> 4 2015-01-10 19:33:39  2015-01-10 19:35:31  
试试这个:

  all_data <- all_data %>%
      mutate(new_pickup = as.POSIXct(tpep_pickup_datetime)) %>%
      mutate(day_pickup = as.Date(new_pickup)) %>%
      mutate(time_pickup = paste(hour(new_pickup), minute(new_pickup),second(new_pickup),sep="-")) %>%
      mutate(new_dropoff = as.POSIXct(tpep_dropoff_datetime)) %>%
      mutate(day_dropoff = as.Date(new_dropoff)) %>%
      mutate(time_dropoff = paste(hour(new_dropoff), minute(new_dropoff),second(new_dropoff),sep="-")) %>%
      mutate(trip_duration = ((hour(new_dropoff) - hour(new_pickup))*3600 + (minute(new_dropoff) - minute(new_pickup))*60 + (second(new_dropoff) - second(new_pickup))))
所有_数据%
突变(新拾取=as.POSIXct(tpep拾取日期时间))%>%
变异(日期提取=截止日期(新提取))%>%
变异(时间拾取=粘贴(小时(新拾取)、分钟(新拾取)、秒(新拾取)、sep=“-”)%%>%
突变(新衰减=as.POSIXct(tpep衰减\u日期时间))%>%
变异(日期衰减=截止日期(新衰减))%>%
变异(时间衰减=粘贴(小时(新衰减)、分钟(新衰减)、秒(新衰减)、sep=“-”)%%>%
变异(行程持续时间=((小时(新下车)-小时(新上车))*3600+(分钟(新下车)-分钟(新上车))*60+(秒(新下车)-秒(新上车)))
试试这个:

  all_data <- all_data %>%
      mutate(new_pickup = as.POSIXct(tpep_pickup_datetime)) %>%
      mutate(day_pickup = as.Date(new_pickup)) %>%
      mutate(time_pickup = paste(hour(new_pickup), minute(new_pickup),second(new_pickup),sep="-")) %>%
      mutate(new_dropoff = as.POSIXct(tpep_dropoff_datetime)) %>%
      mutate(day_dropoff = as.Date(new_dropoff)) %>%
      mutate(time_dropoff = paste(hour(new_dropoff), minute(new_dropoff),second(new_dropoff),sep="-")) %>%
      mutate(trip_duration = ((hour(new_dropoff) - hour(new_pickup))*3600 + (minute(new_dropoff) - minute(new_pickup))*60 + (second(new_dropoff) - second(new_pickup))))
所有_数据%
突变(新拾取=as.POSIXct(tpep拾取日期时间))%>%
变异(日期提取=截止日期(新提取))%>%
变异(时间拾取=粘贴(小时(新拾取)、分钟(新拾取)、秒(新拾取)、sep=“-”)%%>%
突变(新衰减=as.POSIXct(tpep衰减\u日期时间))%>%
变异(日期衰减=截止日期(新衰减))%>%
变异(时间衰减=粘贴(小时(新衰减)、分钟(新衰减)、秒(新衰减)、sep=“-”)%%>%
变异(行程持续时间=((小时(新行程下降)-小时(新行程上升))*3600+(分钟(新行程下降)-分钟(新行程下降)-分钟
  all_data <- all_data %>%
      mutate(new_pickup = as.POSIXct(tpep_pickup_datetime)) %>%
      mutate(day_pickup = as.Date(new_pickup)) %>%
      mutate(time_pickup = paste(hour(new_pickup), minute(new_pickup),second(new_pickup),sep="-")) %>%
      mutate(new_dropoff = as.POSIXct(tpep_dropoff_datetime)) %>%
      mutate(day_dropoff = as.Date(new_dropoff)) %>%
      mutate(time_dropoff = paste(hour(new_dropoff), minute(new_dropoff),second(new_dropoff),sep="-")) %>%
      mutate(trip_duration = ((hour(new_dropoff) - hour(new_pickup))*3600 + (minute(new_dropoff) - minute(new_pickup))*60 + (second(new_dropoff) - second(new_pickup))))