Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/81.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 转换列类/类型时维护日期/POSIXct列_R_Lubridate_Posixct - Fatal编程技术网

R 转换列类/类型时维护日期/POSIXct列

R 转换列类/类型时维护日期/POSIXct列,r,lubridate,posixct,R,Lubridate,Posixct,我有一个400列的数据框,其中有多个日期列。 在下面的代表性示例中,我希望实现以下目标: 将因子转换为数字、字符或POSIXct 在合适的情况下,将字符转换为数字/整数 将包含日期的任何列转换为POSIXct,无论它是因子、字符还是日期 set.seed(123) df1 <- data.frame( A = as.numeric(1:10), B = sample(seq(as.POSIXct('2000/01/01'), as.POSIXct('2018/01/01'), by="da

我有一个400列的数据框,其中有多个日期列。 在下面的代表性示例中,我希望实现以下目标:

  • 将因子转换为数字、字符或POSIXct
  • 在合适的情况下,将字符转换为数字/整数
  • 将包含日期的任何列转换为POSIXct,无论它是因子、字符还是日期

    set.seed(123)
    df1 <- data.frame(
    A = as.numeric(1:10),
    B = sample(seq(as.POSIXct('2000/01/01'), as.POSIXct('2018/01/01'), by="day"), size=10),
    C = as.numeric(sample(20:90, size = 10)),
    D = sample(c("yes", "no"), size=10, replace = TRUE),
    E = as.factor(sample(1000:2000, size = 10)),
    F = as.character(c("test","test2","test3","test4","test5","test6","test7","test8","test9","test10")),
    G = as.factor(c("test","test2","test3","test4","test5","test6","test7","test8","test9","test10")),
    H = as.character(sample(seq(as.POSIXct('2000/01/01'), as.POSIXct('2018/01/01'), by="day"), size=10)),stringsAsFactors=FALSE
    )
    df1
    A                   B  C   D    E      F      G                   H
    1   1 2005-03-06 00:00:00 87  no 1963   test   test 2002-07-27 23:00:00
    2   2 2014-03-11 00:00:00 51  no 1902  test2  test2 2007-06-17 23:00:00
    3   3 2007-05-11 23:00:00 66  no 1690  test3  test3 2007-06-11 23:00:00
    4   4 2015-11-22 00:00:00 58  no 1793  test4  test4 2006-08-20 23:00:00
    5   5 2016-12-02 00:00:00 26  no 1024  test5  test5 2002-09-27 23:00:00
    6   6 2000-10-26 00:00:00 79  no 1475  test6  test6 2002-06-30 23:00:00
    7   7 2009-06-30 23:00:00 35  no 1754  test7  test7 2004-03-11 00:00:00
    8   8 2016-01-19 00:00:00 22  no 1215  test8  test8 2008-05-17 23:00:00
    9   9 2009-11-30 00:00:00 40 yes 1315  test9  test9 2004-10-12 00:00:00
    10 10 2008-03-17 00:00:00 85 yes 1229 test10 test10 2015-06-03 23:00:00
    
    unlist(lapply(df1, class))
      A          B1          B2           C           D           E           F           G           H 
      "numeric"   "POSIXct"    "POSIXt"   "numeric" "character"    "factor" "character"    "factor" "character" 
    
    set.seed(123)
    
    df1这可能不是最优雅的方式,但对我来说似乎很管用

    #install.packages("tidyverse")
    #install.packages("dataCompareR")
    library("tidyverse")
    library("dataCompareR")
    
    
    
    # create reproducible df
    set.seed(123)
    df1 <- data.frame(
      A = as.numeric(1:10),
      B = sample(seq(as.POSIXct('2000/01/01', tz = "UTC"), as.POSIXct('2018/01/01', tz = "UTC"), by="day"), size=10),
      C = as.numeric(sample(20:90, size = 10)),
      D = sample(c("yes", "no"), size=10, replace = TRUE),
      E = as.factor(sample(1000:2000, size = 10)),
      F = as.character(c("test","test2","test3","test4","test5","test6","test7","test8","test9","test10")),
      G = as.factor(c("test","test2","test3","test4","test5","test6","test7","test8","test9","test10")),
      H = as.character(sample(seq(as.POSIXct('2000/01/01', tz = "UTC"), as.POSIXct('2018/01/01', tz = "UTC"), by="day"), size=10)),stringsAsFactors=FALSE
    )
    df1 #look at df
    
    unlist(lapply(df1, class)) #look at df classes
    
    
    df1_clean <- df1 %>% mutate_all(funs(type.convert(as.character(.), as.is = TRUE))) #reassign classes by running type.convert (input are all variables from the df but as.character)
    unlist(lapply(df1_clean, class)) #look at df classes now
    
    #check if a column is a Date - https://stackoverflow.com/questions/18178451/is-there-a-way-to-check-if-a-column-is-a-date-in-r
    tmp=sapply(df1_clean, function(x) !all(is.na(as.Date(as.character(x),format="%Y-%m-%d", tz = "UTC")))) 
    
    # if tmp is True, change according column to as.POSIXct 
    for (i in 1:ncol(df1_clean)){
      if (tmp[i] == T){
        df1_clean[,i]<- as.POSIXct(df1_clean[,i], tz = "UTC")
      }
    }
    
    df1_clean #look at df
    unlist(lapply(df1_clean, class)) #look at df classes
    
    
    comp <- rCompare(df1, df1_clean) #compare your dfs before and after using the dataCompareR package
    summary(comp) # check summary
    
    #安装程序包(“tidyverse”)
    #install.packages(“dataCompareR”)
    图书馆(“tidyverse”)
    库(“数据比较器”)
    #创建可复制的df
    种子集(123)
    
    df1我不明白:您显式地将所有列转换为
    字符
    ,然后当它们丢失其
    POSIXct
    -属性时,您会感到惊讶吗?在R(不考虑自动协同)中,类通常是互斥的:
    整数不能是
    字符
    ,而
    字符
    不能是
    POSIXct
    ,因此当你说
    as.character(.)
    ,你是自愿且明确地放弃它的日期时间属性,将它转换成字符串。您是否试图在不预先知道哪个是哪个的情况下动态执行此操作,或者您是否可以使用
    mutate_at(vars(a,C,…),funs(as.character(.))
    ?谢谢!我并不奇怪所有的东西都变成了字符类型。convert
    只是我遇到的唯一一种在R中动态重新分配类的方法。我不知道如何将其编写为代码,但一种可行的方法是:I)将字符日期(例如H列)改为
    POSIXct
    ,ii)创建一个非
    POSIXct
    的变量列表,最后iii)使用
    类型转换所有非
    POSIXct
    的变量。请注意,
    type.convert
    的帮助中没有提到
    POSIXct
    ,因此如果您使用该函数,您只能依靠自己。你提前知道哪些栏目应该是哪些栏目吗?或者,您是否正在尝试提出一个通用函数,该函数将愉快/巧妙地为您实现这一点,而不管您如何使用它?如果您知道type.convert,我很乐意使用一个更好的替代品。是的,我想要的是一个通用函数,不管我怎么做,它都会分配正确的类。执行“自动类型确定”的函数通常是为了将数据读入R,而不是处理已经在R环境中的对象。如果这是来自CSV或类似文件,我是否可以建议使用
    readr
    软件包?它首先为您检查前1000行(可配置)和自动类型。
    #install.packages("tidyverse")
    #install.packages("dataCompareR")
    library("tidyverse")
    library("dataCompareR")
    
    
    
    # create reproducible df
    set.seed(123)
    df1 <- data.frame(
      A = as.numeric(1:10),
      B = sample(seq(as.POSIXct('2000/01/01', tz = "UTC"), as.POSIXct('2018/01/01', tz = "UTC"), by="day"), size=10),
      C = as.numeric(sample(20:90, size = 10)),
      D = sample(c("yes", "no"), size=10, replace = TRUE),
      E = as.factor(sample(1000:2000, size = 10)),
      F = as.character(c("test","test2","test3","test4","test5","test6","test7","test8","test9","test10")),
      G = as.factor(c("test","test2","test3","test4","test5","test6","test7","test8","test9","test10")),
      H = as.character(sample(seq(as.POSIXct('2000/01/01', tz = "UTC"), as.POSIXct('2018/01/01', tz = "UTC"), by="day"), size=10)),stringsAsFactors=FALSE
    )
    df1 #look at df
    
    unlist(lapply(df1, class)) #look at df classes
    
    
    df1_clean <- df1 %>% mutate_all(funs(type.convert(as.character(.), as.is = TRUE))) #reassign classes by running type.convert (input are all variables from the df but as.character)
    unlist(lapply(df1_clean, class)) #look at df classes now
    
    #check if a column is a Date - https://stackoverflow.com/questions/18178451/is-there-a-way-to-check-if-a-column-is-a-date-in-r
    tmp=sapply(df1_clean, function(x) !all(is.na(as.Date(as.character(x),format="%Y-%m-%d", tz = "UTC")))) 
    
    # if tmp is True, change according column to as.POSIXct 
    for (i in 1:ncol(df1_clean)){
      if (tmp[i] == T){
        df1_clean[,i]<- as.POSIXct(df1_clean[,i], tz = "UTC")
      }
    }
    
    df1_clean #look at df
    unlist(lapply(df1_clean, class)) #look at df classes
    
    
    comp <- rCompare(df1, df1_clean) #compare your dfs before and after using the dataCompareR package
    summary(comp) # check summary