Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/296.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 如何在Accord.Net中同步训练和测试码本_C#_Tree_Random Forest_Accord.net - Fatal编程技术网

C# 如何在Accord.Net中同步训练和测试码本

C# 如何在Accord.Net中同步训练和测试码本,c#,tree,random-forest,accord.net,C#,Tree,Random Forest,Accord.net,问题:是否有一个随机林示例将火车和测试集分开?我在Accord Net ML测试项目中发现的当前示例使用非常相同的数据进行培训和测试 显然,我遇到的问题是在测试集和列车集之间同步生成的标签(INT)。我正在生成列车标签,如下所示: int[] trainOutputs = trainCodebook.Translate("Output", trainLabels); And the test labels similarly: int[] testOutputs = testCodebook

问题:是否有一个随机林示例将火车和测试集分开?我在Accord Net ML测试项目中发现的当前示例使用非常相同的数据进行培训和测试

显然,我遇到的问题是在测试集和列车集之间同步生成的标签(INT)。我正在生成列车标签,如下所示:

int[] trainOutputs = trainCodebook.Translate("Output", trainLabels);

And the test labels similarly:

int[] testOutputs = testCodebook.Translate("Output", testLabels);

Finally I train with the train data and test with the test data:

var forest = teacher.Learn(trainVectors, trainOutputs);

int[] predicted = forest.Decide(testVectors);
除非列车和测试集中的前三行相同,否则标签不同,因此产生非常高的错误率

我试图用三元字符串手动创建代码本:

new Codification("-1","0","1");
不幸的是,这会产生一个运行时错误,指出给定的键不在字典中。我确信有一种方法可以在两个单独的代码本中同步密钥生成。如果我在测试数据的顶部添加三行包含所有三个键的列车数据,我就能够使用下面的代码。不是我喜欢的解决方案;=)

以下是我正在运行的整个测试:

 [Test]
 public void test_learn()
 {
 Accord.Math.Random.Generator.Seed = 1;

    /////////// TRAINING SET ///////////
    // First, let's load the TRAINING set into an array of text that we can process
    string[][] text = Resources.train.Split(new[] { "\r\n" },
        StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

    int length = text[0].Length;
    List<int> columns = new List<int>();
    for (int i = 1; i < length; i++)
    {
        columns.Add(i);
    }
    double[][] trainVectors = text.GetColumns(columns.ToArray()).To<double[][]>();

    // The first column contains the expected ternary category (i.e. -1, 0, or 1)
    string[] trainLabels = text.GetColumn(0);
    var trainCodebook = new Codification("Output", trainLabels);
    int[] trainOutputs = trainCodebook.Translate("Output", trainLabels);

    ////////// TEST SET ////////////

    text = Resources.test.Split(new[] { "\r\n" },
        StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

    double[][] testVectors = text.GetColumns(columns.ToArray()).To<double[][]>();
    string[] testLabels = text.GetColumn(0);
    var testCodebook = new Codification("Output", testLabels);
    int[] testOutputs = testCodebook.Translate("Output", testLabels);

    var teacher = new RandomForestLearning()
    {
        NumberOfTrees = 10,
    };

    var forest = teacher.Learn(trainVectors, trainOutputs);
    int[] predicted = forest.Decide(testVectors);

    int lineNum = 1;
    foreach (int prediction in predicted)
    {
        Console.WriteLine("Prediction " + lineNum + ": " 
        + trainCodebook.Translate("Output", prediction));
        lineNum++;
    }
    // I'm using the test vectors to calculate the error rate
    double error = new ZeroOneLoss(testOutputs).Loss(forest.Decide(testVectors));

    Console.WriteLine("Error term is " + error);

    Assert.IsTrue(error < 0.20); // humble expectations ;-)
}
[测试]
公共无效测试_learn()
{
Accord.Math.Random.Generator.Seed=1;
///////////训练集///////////
//首先,让我们将训练集加载到可以处理的文本数组中
字符串[][]text=Resources.train.Split(新[]{“\r\n”},
StringSplitOptions.RemoveEmptyEntries.Apply(x=>x.Split(',');
int length=文本[0]。长度;
列表列=新列表();
for(int i=1;ix.Split(',');
double[]testVectors=text.GetColumns(columns.ToArray()).To();
string[]testLabels=text.GetColumn(0);
var testCodebook=新编码(“输出”,testLabels);
int[]testOutputs=testCodebook.Translate(“输出”,testLabels);
var teacher=new RandomForestLearning()
{
NumberOfTrees=10,
};
var forest=教师学习(培训向量、培训输出);
int[]predicted=forest.decise(testVectors);
int lineNum=1;
foreach(预测中的整数预测)
{
Console.WriteLine(“预测”+lineNum+”:“
+trainCodebook.Translate(“输出”,预测));
lineNum++;
}
//我用测试向量来计算错误率
双重错误=新的ZeroOneLoss(testOutputs).Loss(forest.Decise(testVectors));
Console.WriteLine(“错误项为”+错误);
Assert.IsTrue(错误<0.20);//谦逊的期望;-)
}

好吧,我想出来了。请参阅下面的代码:

好吧,我想我能修好它。问题是DecisionTree中序列化的错误实现。幸运的是,我们有代码-请参阅下面的修复程序:

namespace Accord.MachineLearning.DecisionTrees
{
  using System;
  using System.Collections.Generic;
  using System.Linq;
  using System.Text;
  using System.Threading.Tasks;
  using System.Data;
  using System.Runtime.Serialization;
  using System.Runtime.Serialization.Formatters.Binary;
  using System.IO;
  using Accord.Statistics.Filters;
  using Accord.Math;
  using AForge;
  using Accord.Statistics;
  using System.Threading;


/// <summary>
///   Random Forest.
/// </summary>
/// 
/// <remarks>
/// <para>
///   Represents a random forest of <see cref="DecisionTree"/>s. For 
///   sample usage and example of learning, please see the documentation
///   page for <see cref="RandomForestLearning"/>.</para>
/// </remarks>
/// 
/// <seealso cref="DecisionTree"/>
/// <seealso cref="RandomForestLearning"/>
/// 
[Serializable]
public class RandomForest : MulticlassClassifierBase, IParallel
{
    private DecisionTree[] trees;
    **[NonSerialized]
    private ParallelOptions parallelOptions;**


    /// <summary>
    ///   Gets the trees in the random forest.
    /// </summary>
    /// 
    public DecisionTree[] Trees
    {
        get { return trees; }
    }

    /// <summary>
    ///   Gets the number of classes that can be recognized
    ///   by this random forest.
    /// </summary>
    /// 
    [Obsolete("Please use NumberOfOutputs instead.")]
    public int Classes { get { return NumberOfOutputs; } }

    /// <summary>
    ///   Gets or sets the parallelization options for this algorithm.
    /// </summary>
    ///
    **public ParallelOptions ParallelOptions { get { return parallelOptions; } set { parallelOptions = value; } }**

    /// <summary>
    /// Gets or sets a cancellation token that can be used
    /// to cancel the algorithm while it is running.
    /// </summary>
    /// 
    public CancellationToken Token
    {
        get { return ParallelOptions.CancellationToken; }
        set { ParallelOptions.CancellationToken = value; }
    }

    /// <summary>
    ///   Creates a new random forest.
    /// </summary>
    /// 
    /// <param name="trees">The number of trees in the forest.</param>
    /// <param name="classes">The number of classes in the classification problem.</param>
    /// 
    public RandomForest(int trees, int classes)
    {
        this.trees = new DecisionTree[trees];
        this.NumberOfOutputs = classes;
        this.ParallelOptions = new ParallelOptions();
    }

    /// <summary>
    ///   Computes the decision output for a given input vector.
    /// </summary>
    /// 
    /// <param name="data">The input vector.</param>
    /// 
    /// <returns>The forest decision for the given vector.</returns>
    /// 
    [Obsolete("Please use Decide() instead.")]
    public int Compute(double[] data)
    {
        return Decide(data);
    }


    /// <summary>
    /// Computes a class-label decision for a given <paramref name="input" />.
    /// </summary>
    /// <param name="input">The input vector that should be classified into
    /// one of the <see cref="ITransform.NumberOfOutputs" /> possible classes.</param>
    /// <returns>A class-label that best described <paramref name="input" /> according
    /// to this classifier.</returns>
    public override int Decide(double[] input)
    {
        int[] responses = new int[NumberOfOutputs];
        Parallel.For(0, trees.Length, ParallelOptions, i =>
        {
            int j = trees[i].Decide(input);
            Interlocked.Increment(ref responses[j]);
        });

        return responses.ArgMax();
    }

   [OnDeserializing()]
    internal void OnDeserializingMethod(StreamingContext context)
    {
        this.ParallelOptions = new ParallelOptions();
    }
}
}
namespace Accord.MachineLearning.DecisionTrees
{
使用制度;
使用System.Collections.Generic;
使用System.Linq;
使用系统文本;
使用System.Threading.Tasks;
使用系统数据;
使用System.Runtime.Serialization;
使用System.Runtime.Serialization.Formatters.Binary;
使用System.IO;
使用Accord.Statistics.Filters;
使用Accord.Math;
使用冲锋枪;
采用一致性统计;
使用系统线程;
/// 
///随机森林。
/// 
/// 
/// 
/// 
///表示s.的随机林
///示例用法和学习示例,请参阅文档
///第页,共页。
/// 
/// 
/// 
/// 
/// 
[可序列化]
公共类RandomForest:多类分类数据库,IParallel
{
私有决策树[]树;
**[非串行化]
私人平行期权**
/// 
///获取随机林中的树。
/// 
/// 
公共决策树
{
获取{返回树;}
}
/// 
///获取可以识别的类的数目
///在这片随机的森林旁。
/// 
/// 
[过时(“请改用NumberOfOutputs。”)]
公共int类{get{returnnumberofoutputs;}}
/// 
///获取或设置此算法的并行化选项。
/// 
///
**公共ParallelOptions ParallelOptions{get{return ParallelOptions;}set{ParallelOptions=value;}}**
/// 
///获取或设置可使用的取消令牌
///在算法运行时取消该算法。
/// 
/// 
公共取消令牌
{
获取{return ParallelOptions.CancellationToken;}
设置{ParallelOptions.CancellationToken=value;}
}
/// 
///创建一个新的随机林。
/// 
/// 
///森林中树木的数量。
///分类问题中的类数。
/// 
公共林(int树,int类)
{
this.trees=新决策树[树];
this.NumberOfOutputs=类;
this.ParallelOptions=新的ParallelOptions();
}
/// 
///计算给定输入向量的决策输出。
/// 
/// 
///输入向量。
/// 
///给定向量的森林决策。
/// 
[过时(“请改用decise())]
公共整数计算(双[]数据)
{
返回决定(数据);
}
/// 
///计算给定对象的类标签决策。
/// 
///应分类为的输入向量
///一个可能的类。
///最好根据以下内容描述的类标签:
///这个分类器。
公共覆盖整数决定(双[]输入)
{
int[]responses=新的int[NumberOfOutputs];
Parallel.For(0,trees.Length,ParallelOptions,i)