Julia:DataFramesMeta转换
我试图在Julia中重现以下R代码Julia:DataFramesMeta转换,dataframe,julia,Dataframe,Julia,我试图在Julia中重现以下R代码 library(dplyr) women_new <- rbind(women, c(NA, 1), c(NA, NA)) women_new %>% filter(height %>% complete.cases) %>% mutate(sector = character(n()), sector = replace(sector, height >= 0 & height <=
library(dplyr)
women_new <- rbind(women, c(NA, 1), c(NA, NA))
women_new %>%
filter(height %>% complete.cases) %>%
mutate(sector = character(n()),
sector = replace(sector, height >= 0 & height <= 60, "1"),
sector = replace(sector, height >= 61 & height <= 67, "2"),
sector = replace(sector, height >= 68 & height <= 72, "3"))
我这里的第一个问题是,有没有一种方法可以像在R中一样,在vcat([[NA 1];[NA NA]])
上立即输入1
?如果我这样做,它将返回以下错误:
MethodError: Cannot `convert` an object of type DataArrays.NAtype to an object of type Int64
This may have arisen from a call to the constructor Int64(...),
since type constructors fall back to convert methods.
in macro expansion at multidimensional.jl:431 [inlined]
in macro expansion at cartesian.jl:64 [inlined]
in macro expansion at multidimensional.jl:429 [inlined]
in _unsafe_batchsetindex!(::Array{Int64,2}, ::Base.Repeated{DataArrays.NAtype}, ::UnitRange{Int64}, ::UnitRange{Int64}) at multidimensional.jl:421
in setindex!(::Array{Int64,2}, ::DataArrays.NAtype, ::UnitRange{Int64}, ::UnitRange{Int64}) at abstractarray.jl:832
in cat_t(::Int64, ::Type{T}, ::DataArrays.NAtype, ::Vararg{Any,N}) at abstractarray.jl:1098
in hcat(::DataArrays.NAtype, ::Int64) at abstractarray.jl:1180
in include_string(::String, ::String) at loading.jl:441
in include_string(::String, ::String, ::Int64) at eval.jl:30
in include_string(::Module, ::String, ::String, ::Int64, ::Vararg{Int64,N}) at eval.jl:34
in (::Atom.##53#56{String,Int64,String})() at eval.jl:50
in withpath(::Atom.##53#56{String,Int64,String}, ::String) at utils.jl:30
in withpath(::Function, ::String) at eval.jl:38
in macro expansion at eval.jl:49 [inlined]
in (::Atom.##52#55{Dict{String,Any}})() at task.jl:60
我的第二个问题是,有没有办法将DataArray
转换为DataFrame
?在这种情况下,列名变为X1
、X2
、..
或数据帧中的任何默认名称,因为数据数组
没有列名。我认为这比键入以下内容更简洁:
women_new = DataFrame(Height = women[:, 1], Weight = women[:, 2]);
我希望我可以简单地进行转换(DataFrame,women)
并简单地重命名列名。但这种转变并不奏效。下面是我对R的转化或变异的尝试
@> begin
women_new
@where !isna(:Height)
@transform(
Sector = NA,
Sector = ifelse(:Height .>= 0 & :Height .<= 60, 1,
ifelse(:Height .>= 61 & :Height .<= 67, 2,
ifelse(:Height .>= 68 & :Height .<= 72, 3, NA)))
)
end
这并不等同于R,我还尝试了以下方法:
using DataFrames
using DataFramesMeta
using Lazy
using RDatasets
women = @> begin
"datasets"
dataset("women")
DataArray()
vcat([[NA NA]; [NA NA]])
end
women_new = DataFrame(Height = women[:, 1], Weight = women[:, 2]);
women_new[16, 2] = 1;
@> begin
women_new
@where !isna(:Height)
@transform(
Sector = NA,
Sector = :Height .>= 0 & :Height .<= 60 ? 1 :
:Height .>= 61 & :Height .<= 67 ? 2 :
:Height .>= 68 & :Height .<= 72 ? 3 :
NA;
)
end
@> begin
women_new
@transform(
Height_New = NA,
Height_New = ifelse(isna(:Height), -1, :Height))
@transform(
Class = NA,
Class = ifelse(:Height_New == -1, NA,
ifelse((:Height_New .>= 0) & (:Height_New .<= 60), 1,
ifelse((:Height_New .>= 61) & (:Height_New .<= 67), 2,
ifelse((:Height_New .>= 68) & (:Height_New .<= 72), 3, NA))))
)
delete!(:Height_New)
end
@>开始
妇女新
@哪里!isna(:高度)
@转化(
扇区=NA,
扇区=:Height.>=0&:Height.=61&:Height.=68&:Height.我知道了。这对运算符优先级有影响,我认为不需要括号
using DataFrames
using DataFramesMeta
using Lazy
using RDatasets
women = dataset("datasets", "women");
women_new = vcat(
women,
DataFrame(Height = [NA; NA], Weight = @data [1; NA])
)
@> begin
women_new
@where !isna(:Height)
@transform(
Class = NA,
Class = ifelse((:Height .>= 0) & (:Height .<= 60), 1,
ifelse((:Height .>= 61) & (:Height .<= 67), 2,
ifelse((:Height .>= 68) & (:Height .<= 72), 3, NA)))
)
end
如果我们不想筛选NAs并处理完整的数据,那么我能做的最好的事情是:
using DataFrames
using DataFramesMeta
using Lazy
using RDatasets
women = @> begin
"datasets"
dataset("women")
DataArray()
vcat([[NA NA]; [NA NA]])
end
women_new = DataFrame(Height = women[:, 1], Weight = women[:, 2]);
women_new[16, 2] = 1;
@> begin
women_new
@where !isna(:Height)
@transform(
Sector = NA,
Sector = :Height .>= 0 & :Height .<= 60 ? 1 :
:Height .>= 61 & :Height .<= 67 ? 2 :
:Height .>= 68 & :Height .<= 72 ? 3 :
NA;
)
end
@> begin
women_new
@transform(
Height_New = NA,
Height_New = ifelse(isna(:Height), -1, :Height))
@transform(
Class = NA,
Class = ifelse(:Height_New == -1, NA,
ifelse((:Height_New .>= 0) & (:Height_New .<= 60), 1,
ifelse((:Height_New .>= 61) & (:Height_New .<= 67), 2,
ifelse((:Height_New .>= 68) & (:Height_New .<= 72), 3, NA))))
)
delete!(:Height_New)
end
在这种情况下,代码变得凌乱,因为在ifelse
的第一个参数中还没有处理NAs的方法。如果你在这里没有得到任何信息,你可能想问一下。我知道很多“数据员”经常这样做。但请确保你提到这是一篇交叉文章。
15×3 DataFrames.DataFrame
│ Row │ Height │ Weight │ Class │
├─────┼────────┼────────┼───────┤
│ 1 │ 58 │ 115 │ 1 │
│ 2 │ 59 │ 117 │ 1 │
│ 3 │ 60 │ 120 │ 1 │
│ 4 │ 61 │ 123 │ 2 │
│ 5 │ 62 │ 126 │ 2 │
│ 6 │ 63 │ 129 │ 2 │
│ 7 │ 64 │ 132 │ 2 │
│ 8 │ 65 │ 135 │ 2 │
│ 9 │ 66 │ 139 │ 2 │
│ 10 │ 67 │ 142 │ 2 │
│ 11 │ 68 │ 146 │ 3 │
│ 12 │ 69 │ 150 │ 3 │
│ 13 │ 70 │ 154 │ 3 │
│ 14 │ 71 │ 159 │ 3 │
│ 15 │ 72 │ 164 │ 3 │
@> begin
women_new
@transform(
Height_New = NA,
Height_New = ifelse(isna(:Height), -1, :Height))
@transform(
Class = NA,
Class = ifelse(:Height_New == -1, NA,
ifelse((:Height_New .>= 0) & (:Height_New .<= 60), 1,
ifelse((:Height_New .>= 61) & (:Height_New .<= 67), 2,
ifelse((:Height_New .>= 68) & (:Height_New .<= 72), 3, NA))))
)
delete!(:Height_New)
end
@> begin
women_new
@transform(
Class = @> begin
function (x)
isna(x) ? NA :
0 <= x <= 60 ? 1 :
61 <= x <= 67 ? 2 :
68 <= x <= 72 ? 3 :
NA
end
map(:Height)
end
)
end
@from i in women_new begin
@select {
i.Height, i.Weight,
class = 0 <= i.Height <= 60 ? 1 :
61 <= i.Height <= 67 ? 2 :
68 <= i.Height <= 72 ? 3 :
0
}
@collect DataFrame
end
17×3 DataFrames.DataFrame
│ Row │ Height │ Weight │ Class │
├─────┼────────┼────────┼───────┤
│ 1 │ 58 │ 115 │ 1 │
│ 2 │ 59 │ 117 │ 1 │
│ 3 │ 60 │ 120 │ 1 │
│ 4 │ 61 │ 123 │ 2 │
│ 5 │ 62 │ 126 │ 2 │
│ 6 │ 63 │ 129 │ 2 │
│ 7 │ 64 │ 132 │ 2 │
│ 8 │ 65 │ 135 │ 2 │
│ 9 │ 66 │ 139 │ 2 │
│ 10 │ 67 │ 142 │ 2 │
│ 11 │ 68 │ 146 │ 3 │
│ 12 │ 69 │ 150 │ 3 │
│ 13 │ 70 │ 154 │ 3 │
│ 14 │ 71 │ 159 │ 3 │
│ 15 │ 72 │ 164 │ 3 │
│ 16 │ NA │ 1 │ NA │
│ 17 │ NA │ NA │ NA │