Julia:DataFramesMeta转换

Julia:DataFramesMeta转换,dataframe,julia,Dataframe,Julia,我试图在Julia中重现以下R代码 library(dplyr) women_new <- rbind(women, c(NA, 1), c(NA, NA)) women_new %>% filter(height %>% complete.cases) %>% mutate(sector = character(n()), sector = replace(sector, height >= 0 & height <=

我试图在Julia中重现以下R代码

library(dplyr)

women_new <- rbind(women, c(NA, 1), c(NA, NA))
women_new %>% 
  filter(height %>% complete.cases) %>%
  mutate(sector = character(n()),
         sector = replace(sector, height >= 0 & height <= 60, "1"),
         sector = replace(sector, height >= 61 & height <= 67, "2"), 
         sector = replace(sector, height >= 68 & height <= 72, "3"))
我这里的第一个问题是,有没有一种方法可以像在R中一样,在
vcat([[NA 1];[NA NA]])
上立即输入
1
?如果我这样做,它将返回以下错误:

MethodError: Cannot `convert` an object of type DataArrays.NAtype to an object of type Int64
This may have arisen from a call to the constructor Int64(...),
since type constructors fall back to convert methods.
 in macro expansion at multidimensional.jl:431 [inlined]
 in macro expansion at cartesian.jl:64 [inlined]
 in macro expansion at multidimensional.jl:429 [inlined]
 in _unsafe_batchsetindex!(::Array{Int64,2}, ::Base.Repeated{DataArrays.NAtype}, ::UnitRange{Int64}, ::UnitRange{Int64}) at multidimensional.jl:421
 in setindex!(::Array{Int64,2}, ::DataArrays.NAtype, ::UnitRange{Int64}, ::UnitRange{Int64}) at abstractarray.jl:832
 in cat_t(::Int64, ::Type{T}, ::DataArrays.NAtype, ::Vararg{Any,N}) at abstractarray.jl:1098
 in hcat(::DataArrays.NAtype, ::Int64) at abstractarray.jl:1180
 in include_string(::String, ::String) at loading.jl:441
 in include_string(::String, ::String, ::Int64) at eval.jl:30
 in include_string(::Module, ::String, ::String, ::Int64, ::Vararg{Int64,N}) at eval.jl:34
 in (::Atom.##53#56{String,Int64,String})() at eval.jl:50
 in withpath(::Atom.##53#56{String,Int64,String}, ::String) at utils.jl:30
 in withpath(::Function, ::String) at eval.jl:38
 in macro expansion at eval.jl:49 [inlined]
 in (::Atom.##52#55{Dict{String,Any}})() at task.jl:60
我的第二个问题是,有没有办法将
DataArray
转换为
DataFrame
?在这种情况下,列名变为
X1
X2
..
数据帧中的任何默认名称,因为
数据数组
没有列名。我认为这比键入以下内容更简洁:

women_new = DataFrame(Height = women[:, 1], Weight = women[:, 2]);
我希望我可以简单地进行
转换(DataFrame,women)
并简单地重命名列名。但这种转变并不奏效。下面是我对R的转化或变异的尝试

@> begin
  women_new
  @where !isna(:Height)
  @transform(
    Sector = NA,
    Sector = ifelse(:Height .>=  0 & :Height .<= 60, 1,
             ifelse(:Height .>= 61 & :Height .<= 67, 2,
             ifelse(:Height .>= 68 & :Height .<= 72, 3, NA)))
    )
end
这并不等同于R,我还尝试了以下方法:

using DataFrames
using DataFramesMeta
using Lazy
using RDatasets

women = @> begin
  "datasets" 
  dataset("women")
  DataArray()
  vcat([[NA NA]; [NA NA]])
end

women_new = DataFrame(Height = women[:, 1], Weight = women[:, 2]);
women_new[16, 2] = 1;
@> begin
  women_new
  @where !isna(:Height)
  @transform(
    Sector = NA,
    Sector = :Height .>=  0 & :Height .<= 60 ? 1 :
             :Height .>= 61 & :Height .<= 67 ? 2 :
             :Height .>= 68 & :Height .<= 72 ? 3 :
            NA;
    )
end
@> begin
  women_new
  @transform(
    Height_New = NA,
    Height_New = ifelse(isna(:Height), -1, :Height))
  @transform(
    Class = NA,
    Class = ifelse(:Height_New == -1, NA,
              ifelse((:Height_New .>=  0) & (:Height_New .<= 60), 1,
              ifelse((:Height_New .>= 61) & (:Height_New .<= 67), 2,
              ifelse((:Height_New .>= 68) & (:Height_New .<= 72), 3, NA))))
  )
  delete!(:Height_New)
end
@>开始
妇女新
@哪里!isna(:高度)
@转化(
扇区=NA,

扇区=:Height.>=0&:Height.=61&:Height.=68&:Height.我知道了。这对运算符优先级有影响,我认为不需要括号

using DataFrames
using DataFramesMeta
using Lazy
using RDatasets

women = dataset("datasets", "women");
women_new = vcat(
              women,
              DataFrame(Height = [NA; NA], Weight = @data [1; NA])
            )

@> begin
  women_new
  @where !isna(:Height)
  @transform(
    Class = NA,
    Class = ifelse((:Height .>=  0) & (:Height .<= 60), 1,
            ifelse((:Height .>= 61) & (:Height .<= 67), 2,
            ifelse((:Height .>= 68) & (:Height .<= 72), 3, NA)))
            )
end
如果我们不想筛选NAs并处理完整的数据,那么我能做的最好的事情是:

using DataFrames
using DataFramesMeta
using Lazy
using RDatasets

women = @> begin
  "datasets" 
  dataset("women")
  DataArray()
  vcat([[NA NA]; [NA NA]])
end

women_new = DataFrame(Height = women[:, 1], Weight = women[:, 2]);
women_new[16, 2] = 1;
@> begin
  women_new
  @where !isna(:Height)
  @transform(
    Sector = NA,
    Sector = :Height .>=  0 & :Height .<= 60 ? 1 :
             :Height .>= 61 & :Height .<= 67 ? 2 :
             :Height .>= 68 & :Height .<= 72 ? 3 :
            NA;
    )
end
@> begin
  women_new
  @transform(
    Height_New = NA,
    Height_New = ifelse(isna(:Height), -1, :Height))
  @transform(
    Class = NA,
    Class = ifelse(:Height_New == -1, NA,
              ifelse((:Height_New .>=  0) & (:Height_New .<= 60), 1,
              ifelse((:Height_New .>= 61) & (:Height_New .<= 67), 2,
              ifelse((:Height_New .>= 68) & (:Height_New .<= 72), 3, NA))))
  )
  delete!(:Height_New)
end

在这种情况下,代码变得凌乱,因为在
ifelse
的第一个参数中还没有处理NAs的方法。

如果你在这里没有得到任何信息,你可能想问一下。我知道很多“数据员”经常这样做。但请确保你提到这是一篇交叉文章。
15×3 DataFrames.DataFrame
│ Row │ Height │ Weight │ Class │
├─────┼────────┼────────┼───────┤
│ 1   │ 58     │ 115    │ 1     │
│ 2   │ 59     │ 117    │ 1     │
│ 3   │ 60     │ 120    │ 1     │
│ 4   │ 61     │ 123    │ 2     │
│ 5   │ 62     │ 126    │ 2     │
│ 6   │ 63     │ 129    │ 2     │
│ 7   │ 64     │ 132    │ 2     │
│ 8   │ 65     │ 135    │ 2     │
│ 9   │ 66     │ 139    │ 2     │
│ 10  │ 67     │ 142    │ 2     │
│ 11  │ 68     │ 146    │ 3     │
│ 12  │ 69     │ 150    │ 3     │
│ 13  │ 70     │ 154    │ 3     │
│ 14  │ 71     │ 159    │ 3     │
│ 15  │ 72     │ 164    │ 3     │
@> begin
  women_new
  @transform(
    Height_New = NA,
    Height_New = ifelse(isna(:Height), -1, :Height))
  @transform(
    Class = NA,
    Class = ifelse(:Height_New == -1, NA,
              ifelse((:Height_New .>=  0) & (:Height_New .<= 60), 1,
              ifelse((:Height_New .>= 61) & (:Height_New .<= 67), 2,
              ifelse((:Height_New .>= 68) & (:Height_New .<= 72), 3, NA))))
  )
  delete!(:Height_New)
end
@> begin
    women_new
    @transform(
        Class = @> begin
            function (x)
                isna(x)       ? NA :
                 0 <= x <= 60 ?  1 :
                61 <= x <= 67 ?  2 :
                68 <= x <= 72 ?  3 :
                NA
            end
            map(:Height)
        end
    )
end
@from i in women_new begin
    @select {
        i.Height, i.Weight,
        class = 0 <= i.Height <= 60 ?  1 :
               61 <= i.Height <= 67 ?  2 :
               68 <= i.Height <= 72 ?  3 :
                0
    }
    @collect DataFrame
end
17×3 DataFrames.DataFrame
│ Row │ Height │ Weight │ Class │
├─────┼────────┼────────┼───────┤
│ 1   │ 58     │ 115    │ 1     │
│ 2   │ 59     │ 117    │ 1     │
│ 3   │ 60     │ 120    │ 1     │
│ 4   │ 61     │ 123    │ 2     │
│ 5   │ 62     │ 126    │ 2     │
│ 6   │ 63     │ 129    │ 2     │
│ 7   │ 64     │ 132    │ 2     │
│ 8   │ 65     │ 135    │ 2     │
│ 9   │ 66     │ 139    │ 2     │
│ 10  │ 67     │ 142    │ 2     │
│ 11  │ 68     │ 146    │ 3     │
│ 12  │ 69     │ 150    │ 3     │
│ 13  │ 70     │ 154    │ 3     │
│ 14  │ 71     │ 159    │ 3     │
│ 15  │ 72     │ 164    │ 3     │
│ 16  │ NA     │ 1      │ NA    │
│ 17  │ NA     │ NA     │ NA    │