Xml Rapidminer数据转置相当于R中的melt
我有一个Rapidminer进程,它从web API读取,使用读取XML处理响应,使用XPATH捕获XML中的一个元素。元素可以是任意数量的,结果属性是元素/text()的串联字符串 作为连接字符串的结果,我必须将字符串拆分为多个列,如下所示:Xml Rapidminer数据转置相当于R中的melt,xml,xpath,transpose,rapidminer,Xml,Xpath,Transpose,Rapidminer,我有一个Rapidminer进程,它从web API读取,使用读取XML处理响应,使用XPATH捕获XML中的一个元素。元素可以是任意数量的,结果属性是元素/text()的串联字符串 作为连接字符串的结果,我必须将字符串拆分为多个列,如下所示: ID Col1 Col2 Col3 Col4 Col5 Col6 A 1 5 7 8 B 2 C 4 D 3 9 10 11 12 13 我的最终目标是将其转换为以下格式: ID Ne
ID Col1 Col2 Col3 Col4 Col5 Col6
A 1 5 7 8
B 2
C 4
D 3 9 10 11 12 13
我的最终目标是将其转换为以下格式:
ID NewCol
A 1
A 5
A 7
A 8
B 2
C 4
D 3
D 9
D 10
D 11
D 12
D 13
两个问题:1.是否可以将Read XML运算符配置为将数据读入多行而不是长串接字符串?
2.如果对1的回答为否定,是否有任何操作符可以执行上述“转置”任务(类似于R中的melt函数)该
读取XML
操作符在示例中创建新属性。基本上,它会生成新列而不是新行
没有一个操作员可以做您需要的事情,但您可以创建一个流程。我附上了一个。这是相对复杂的,如果有更多的时间,我可能会使它更有效率
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.0.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.008" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="6.0.008" expanded="true" height="76" name="make data" width="90" x="112" y="75">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="313" y="345">
<list key="attribute_values">
<parameter key="ID" value=""A""/>
<parameter key="Col1" value=""1""/>
<parameter key="Col2" value=""5""/>
<parameter key="Col3" value=""7""/>
<parameter key="Col4" value=""8""/>
<parameter key="Col5" value=""missing""/>
<parameter key="Col6" value=""missing""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="313" y="435">
<list key="attribute_values">
<parameter key="ID" value=""B""/>
<parameter key="Col1" value=""2""/>
<parameter key="Col2" value=""missing""/>
<parameter key="Col3" value=""missing""/>
<parameter key="Col4" value=""missing""/>
<parameter key="Col5" value=""missing""/>
<parameter key="Col6" value=""missing""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification (3)" width="90" x="313" y="525">
<list key="attribute_values">
<parameter key="ID" value=""C""/>
<parameter key="Col1" value=""4""/>
<parameter key="Col2" value=""missing""/>
<parameter key="Col3" value=""missing""/>
<parameter key="Col4" value=""missing""/>
<parameter key="Col5" value=""missing""/>
<parameter key="Col6" value=""missing""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification (4)" width="90" x="313" y="615">
<list key="attribute_values">
<parameter key="ID" value=""D""/>
<parameter key="Col1" value=""3""/>
<parameter key="Col2" value=""9""/>
<parameter key="Col3" value=""10""/>
<parameter key="Col4" value=""11""/>
<parameter key="Col5" value=""12""/>
<parameter key="Col6" value=""13""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="6.0.008" expanded="true" height="130" name="Append" width="90" x="581" y="345"/>
<operator activated="true" class="declare_missing_value" compatibility="6.0.008" expanded="true" height="76" name="Declare Missing Value" width="90" x="782" y="345">
<parameter key="mode" value="nominal"/>
<parameter key="nominal_value" value="missing"/>
</operator>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
<connect from_op="Append" from_port="merged set" to_op="Declare Missing Value" to_port="example set input"/>
<connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="loop_examples" compatibility="6.0.008" expanded="true" height="94" name="Loop Examples" width="90" x="246" y="75">
<process expanded="true">
<operator activated="true" class="filter_example_range" compatibility="6.0.008" expanded="true" height="76" name="Filter Example Range" width="90" x="112" y="255">
<parameter key="first_example" value="%{example}"/>
<parameter key="last_example" value="%{example}"/>
</operator>
<operator activated="true" class="transpose" compatibility="6.0.008" expanded="true" height="76" name="Transpose" width="90" x="246" y="30"/>
<operator activated="true" class="extract_macro" compatibility="6.0.008" expanded="true" height="60" name="Extract Macro" width="90" x="246" y="120">
<parameter key="macro" value="id"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="att_1"/>
<parameter key="example_index" value="1"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="rename_by_example_values" compatibility="6.0.008" expanded="true" height="76" name="Rename by Example Values" width="90" x="246" y="210"/>
<operator activated="true" class="select_attributes" compatibility="6.0.008" expanded="true" height="76" name="Select Attributes" width="90" x="380" y="30">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="ID"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="6.0.008" expanded="true" height="76" name="Generate Attributes" width="90" x="380" y="120">
<list key="function_descriptions">
<parameter key="ID" value=""%{id}""/>
</list>
</operator>
<operator activated="true" class="rename" compatibility="6.0.008" expanded="true" height="76" name="Rename" width="90" x="380" y="210">
<parameter key="old_name" value="%{id}"/>
<parameter key="new_name" value="NewCol"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="6.0.008" expanded="true" height="94" name="Filter Examples" width="90" x="514" y="30">
<parameter key="condition_class" value="no_missing_attributes"/>
<list key="filters_list"/>
</operator>
<connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
<connect from_op="Filter Example Range" from_port="example set output" to_op="Transpose" to_port="example set input"/>
<connect from_op="Filter Example Range" from_port="original" to_port="example set"/>
<connect from_op="Transpose" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Rename by Example Values" to_port="example set input"/>
<connect from_op="Rename by Example Values" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="6.0.008" expanded="true" height="76" name="Append (2)" width="90" x="380" y="120"/>
<connect from_op="make data" from_port="out 1" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
<connect from_op="Loop Examples" from_port="output 1" to_op="Append (2)" to_port="example set 1"/>
<connect from_op="Append (2)" from_port="merged set" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
希望您可以将其用作起点。您可以在rapid miner中使用反枢轴操作符。只需查看它的文档