Process RapidMiner:存储(模型)操作符在此流程中的连接位置/方式

Process RapidMiner:存储(模型)操作符在此流程中的连接位置/方式,process,rapidminer,Process,Rapidminer,我在RapidMiner中创建了一个流程流,它利用了一些循环。我不确定我的Store Model操作符应该连接到哪里,以便将通过此流程导出的模型参数保存到新流程中 随附的示例用一些示例数据替换了我的数据,但是过程的其余部分是我的实际数据集 <?xml version="1.0" encoding="UTF-8" standalone="no"?> <process version="5.3.012"> <context> <input/>

我在RapidMiner中创建了一个流程流,它利用了一些循环。我不确定我的Store Model操作符应该连接到哪里,以便将通过此流程导出的模型参数保存到新流程中

随附的示例用一些示例数据替换了我的数据,但是过程的其余部分是我的实际数据集

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.012">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.012" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="5.3.012" expanded="true" height="60" name="Retrieve Sonar" width="90" x="45" y="30">
        <parameter key="repository_entry" value="//Samples/data/Sonar"/>
      </operator>
      <operator activated="true" class="numerical_to_binominal" compatibility="5.3.012" expanded="true" height="76" name="Numerical to Binominal" width="90" x="179" y="30">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="20_OV_COVER"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="5.3.012" expanded="true" height="76" name="Set Role" width="90" x="45" y="120">
        <parameter key="attribute_name" value="class"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="normalize" compatibility="5.3.012" expanded="true" height="94" name="Normalize" width="90" x="179" y="120"/>
      <operator activated="true" class="nominal_to_numerical" compatibility="5.3.012" expanded="true" height="94" name="Nominal to Numerical (2)" width="90" x="45" y="210">
        <list key="comparison_groups"/>
      </operator>
      <operator activated="true" class="replace_missing_values" compatibility="5.3.012" expanded="true" height="94" name="Replace Missing Values" width="90" x="179" y="210">
        <list key="columns"/>
      </operator>
      <operator activated="true" class="independent_component_analysis" compatibility="5.3.012" expanded="true" height="94" name="ICA" width="90" x="313" y="210">
        <parameter key="number_of_components" value="700"/>
      </operator>
      <operator activated="true" class="optimize_selection_forward" compatibility="5.3.012" expanded="true" height="94" name="Forward Selection" width="90" x="514" y="75">
        <parameter key="maximal_number_of_attributes" value="100"/>
        <parameter key="speculative_rounds" value="10"/>
        <process expanded="true">
          <operator activated="true" class="x_validation" compatibility="5.3.012" expanded="true" height="112" name="Validation" width="90" x="112" y="30">
            <parameter key="number_of_validations" value="5"/>
            <process expanded="true">
              <operator activated="true" class="naive_bayes" compatibility="5.3.012" expanded="true" height="76" name="Naive Bayes" width="90" x="112" y="30"/>
              <connect from_port="training" to_op="Naive Bayes" to_port="training set"/>
              <connect from_op="Naive Bayes" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="5.3.012" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance" compatibility="5.3.012" expanded="true" height="76" name="Performance" width="90" x="276" y="30"/>
              <connect from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
              <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <connect from_port="example set" to_op="Validation" to_port="training"/>
          <connect from_op="Validation" from_port="averagable 1" to_port="performance"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve Sonar" from_port="output" to_op="Numerical to Binominal" to_port="example set input"/>
      <connect from_op="Numerical to Binominal" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Normalize" to_port="example set input"/>
      <connect from_op="Normalize" from_port="example set output" to_op="Nominal to Numerical (2)" to_port="example set input"/>
      <connect from_op="Nominal to Numerical (2)" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
      <connect from_op="Replace Missing Values" from_port="example set output" to_op="ICA" to_port="example set input"/>
      <connect from_op="ICA" from_port="example set output" to_op="Forward Selection" to_port="example set"/>
      <connect from_op="ICA" from_port="original" to_port="result 1"/>
      <connect from_op="ICA" from_port="preprocessing model" to_port="result 2"/>
      <connect from_op="Forward Selection" from_port="example set" to_port="result 3"/>
      <connect from_op="Forward Selection" from_port="attribute weights" to_port="result 4"/>
      <connect from_op="Forward Selection" from_port="performance" to_port="result 5"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="18"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
      <portSpacing port="sink_result 5" spacing="0"/>
      <portSpacing port="sink_result 6" spacing="0"/>
    </process>
  </operator>
</process>

正向选择操作符输出一组权重,这些权重可用于选择操作符发现的具有最佳性能的属性。因此,要做的第一件事是将这些权重与“按权重选择”操作符一起使用,以给出用于构建模型的示例集

在此基础上,您可以简单地使用此示例集在正向选择操作符之外重建模型。如果您还想获得对看不见数据的性能估计,您可以对所有数据使用验证块,但如果不这样做,则只需使用模型操作符即可创建所需的模型

当我尝试它时,我得到的估计性能与前向选择操作符产生的性能不同,因为验证块中的分区由于不同的随机数种子而不同。同样,正向选择中的验证块给出了基于10个数据分区构建的10个模型的平均性能。这10个模型可能都不同,因此没有一个真正的模型可以保存

希望有帮助

问候


Andrew

如果你在RapidMiner社区论坛上问这个问题,会更有意义,因为这本质上是一个应用程序使用问题,而不是编程问题。我注意到这是StatExchange交叉发布的。也许原本的海报会很好,可以做一些整理工作。