Xml Rapidminer数据转置相当于R中的melt

Xml Rapidminer数据转置相当于R中的melt,xml,xpath,transpose,rapidminer,Xml,Xpath,Transpose,Rapidminer,我有一个Rapidminer进程,它从web API读取,使用读取XML处理响应,使用XPATH捕获XML中的一个元素。元素可以是任意数量的,结果属性是元素/text()的串联字符串 作为连接字符串的结果,我必须将字符串拆分为多个列,如下所示: ID Col1 Col2 Col3 Col4 Col5 Col6 A 1 5 7 8 B 2 C 4 D 3 9 10 11 12 13 我的最终目标是将其转换为以下格式: ID Ne

我有一个Rapidminer进程,它从web API读取,使用读取XML处理响应,使用XPATH捕获XML中的一个元素。元素可以是任意数量的,结果属性是元素/text()的串联字符串

作为连接字符串的结果,我必须将字符串拆分为多个列,如下所示:

ID  Col1  Col2 Col3 Col4 Col5 Col6
A   1     5    7    8
B   2
C   4
D   3     9    10   11   12   13
我的最终目标是将其转换为以下格式:

ID  NewCol
A   1
A   5
A   7
A   8
B   2
C   4
D   3
D   9
D   10
D   11
D   12
D   13
两个问题:
1.是否可以将Read XML运算符配置为将数据读入多行而不是长串接字符串?

2.如果对1的回答为否定,是否有任何操作符可以执行上述“转置”任务(类似于R中的melt函数)

读取XML
操作符在示例中创建新属性。基本上,它会生成新列而不是新行

没有一个操作员可以做您需要的事情,但您可以创建一个流程。我附上了一个。这是相对复杂的,如果有更多的时间,我可能会使它更有效率

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.0.008">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.008" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="6.0.008" expanded="true" height="76" name="make data" width="90" x="112" y="75">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="313" y="345">
            <list key="attribute_values">
              <parameter key="ID" value="&quot;A&quot;"/>
              <parameter key="Col1" value="&quot;1&quot;"/>
              <parameter key="Col2" value="&quot;5&quot;"/>
              <parameter key="Col3" value="&quot;7&quot;"/>
              <parameter key="Col4" value="&quot;8&quot;"/>
              <parameter key="Col5" value="&quot;missing&quot;"/>
              <parameter key="Col6" value="&quot;missing&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="313" y="435">
            <list key="attribute_values">
              <parameter key="ID" value="&quot;B&quot;"/>
              <parameter key="Col1" value="&quot;2&quot;"/>
              <parameter key="Col2" value="&quot;missing&quot;"/>
              <parameter key="Col3" value="&quot;missing&quot;"/>
              <parameter key="Col4" value="&quot;missing&quot;"/>
              <parameter key="Col5" value="&quot;missing&quot;"/>
              <parameter key="Col6" value="&quot;missing&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification (3)" width="90" x="313" y="525">
            <list key="attribute_values">
              <parameter key="ID" value="&quot;C&quot;"/>
              <parameter key="Col1" value="&quot;4&quot;"/>
              <parameter key="Col2" value="&quot;missing&quot;"/>
              <parameter key="Col3" value="&quot;missing&quot;"/>
              <parameter key="Col4" value="&quot;missing&quot;"/>
              <parameter key="Col5" value="&quot;missing&quot;"/>
              <parameter key="Col6" value="&quot;missing&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification (4)" width="90" x="313" y="615">
            <list key="attribute_values">
              <parameter key="ID" value="&quot;D&quot;"/>
              <parameter key="Col1" value="&quot;3&quot;"/>
              <parameter key="Col2" value="&quot;9&quot;"/>
              <parameter key="Col3" value="&quot;10&quot;"/>
              <parameter key="Col4" value="&quot;11&quot;"/>
              <parameter key="Col5" value="&quot;12&quot;"/>
              <parameter key="Col6" value="&quot;13&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="append" compatibility="6.0.008" expanded="true" height="130" name="Append" width="90" x="581" y="345"/>
          <operator activated="true" class="declare_missing_value" compatibility="6.0.008" expanded="true" height="76" name="Declare Missing Value" width="90" x="782" y="345">
            <parameter key="mode" value="nominal"/>
            <parameter key="nominal_value" value="missing"/>
          </operator>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Append" from_port="merged set" to_op="Declare Missing Value" to_port="example set input"/>
          <connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="loop_examples" compatibility="6.0.008" expanded="true" height="94" name="Loop Examples" width="90" x="246" y="75">
        <process expanded="true">
          <operator activated="true" class="filter_example_range" compatibility="6.0.008" expanded="true" height="76" name="Filter Example Range" width="90" x="112" y="255">
            <parameter key="first_example" value="%{example}"/>
            <parameter key="last_example" value="%{example}"/>
          </operator>
          <operator activated="true" class="transpose" compatibility="6.0.008" expanded="true" height="76" name="Transpose" width="90" x="246" y="30"/>
          <operator activated="true" class="extract_macro" compatibility="6.0.008" expanded="true" height="60" name="Extract Macro" width="90" x="246" y="120">
            <parameter key="macro" value="id"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="attribute_name" value="att_1"/>
            <parameter key="example_index" value="1"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="rename_by_example_values" compatibility="6.0.008" expanded="true" height="76" name="Rename by Example Values" width="90" x="246" y="210"/>
          <operator activated="true" class="select_attributes" compatibility="6.0.008" expanded="true" height="76" name="Select Attributes" width="90" x="380" y="30">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="ID"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="6.0.008" expanded="true" height="76" name="Generate Attributes" width="90" x="380" y="120">
            <list key="function_descriptions">
              <parameter key="ID" value="&quot;%{id}&quot;"/>
            </list>
          </operator>
          <operator activated="true" class="rename" compatibility="6.0.008" expanded="true" height="76" name="Rename" width="90" x="380" y="210">
            <parameter key="old_name" value="%{id}"/>
            <parameter key="new_name" value="NewCol"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="6.0.008" expanded="true" height="94" name="Filter Examples" width="90" x="514" y="30">
            <parameter key="condition_class" value="no_missing_attributes"/>
            <list key="filters_list"/>
          </operator>
          <connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
          <connect from_op="Filter Example Range" from_port="example set output" to_op="Transpose" to_port="example set input"/>
          <connect from_op="Filter Example Range" from_port="original" to_port="example set"/>
          <connect from_op="Transpose" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="Rename by Example Values" to_port="example set input"/>
          <connect from_op="Rename by Example Values" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_port="output 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="append" compatibility="6.0.008" expanded="true" height="76" name="Append (2)" width="90" x="380" y="120"/>
      <connect from_op="make data" from_port="out 1" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
      <connect from_op="Loop Examples" from_port="output 1" to_op="Append (2)" to_port="example set 1"/>
      <connect from_op="Append (2)" from_port="merged set" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>


希望您可以将其用作起点。

您可以在rapid miner中使用反枢轴操作符。只需查看它的文档