Machine learning id3决策树程序

Machine learning id3决策树程序,machine-learning,id3,decision-tree,Machine Learning,Id3,Decision Tree,你能不能检查一下这个程序在netbeans中运行时没有任何错误。这个程序的输出是什么。我正在尝试运行这个程序,但是我得到的输出只是读取我给定的输入文件中的数据。arff文件。我得到的输出是这样的:读取0数据 最后一行是:@attribute残疾婴儿{String,n,y} 需要2个属性 import java.io.*; import java.util.*; public class ID3 { int numAttributes; String []attributeNames;

你能不能检查一下这个程序在netbeans中运行时没有任何错误。这个程序的输出是什么。我正在尝试运行这个程序,但是我得到的输出只是读取我给定的输入文件中的数据。arff文件。我得到的输出是这样的:读取0数据 最后一行是:@attribute残疾婴儿{String,n,y} 需要2个属性

import java.io.*;

import java.util.*;

public class ID3

{

int numAttributes;

String []attributeNames;

Vector []domains;

/* The class to represent a data point consisting of numAttributes values

of attributes */

class DataPoint {

public int []attributes;

public DataPoint(int numattributes) {

attributes = new int[numattributes];

}

};

/* The class to represent a node in the decomposition tree.

*/

class TreeNode {

public double entropy;

public Vector data;

public int decompositionAttribute;

public int decompositionValue;

public TreeNode []children;

public TreeNode parent;

public TreeNode() {

data = new Vector();

}

};

TreeNode root = new TreeNode();

public int getSymbolValue(int attribute, String symbol) {

int index = domains[attribute].indexOf(symbol);

if (index < 0) {

domains[attribute].addElement(symbol);

return domains[attribute].size() -1;

}

return index;

}

public int []getAllValues(Vector data, int attribute) {

Vector values = new Vector();

int num = data.size();

for (int i=0; i< num; i++) {

DataPoint point = (DataPoint)data.elementAt(i);

String symbol =

(String)domains[attribute].elementAt(point.attributes[attribute] );

int index = values.indexOf(symbol);

if (index < 0) {

values.addElement(symbol);

}

}

int []array = new int[values.size()];

for (int i=0; i< array.length; i++) {

String symbol = (String)values.elementAt(i);

array[i] = domains[attribute].indexOf(symbol);

}

values = null;

return array;

}

public Vector getSubset(Vector data, int attribute, int value) {

Vector subset = new Vector();

int num = data.size();

for (int i=0; i< num; i++) {

DataPoint point = (DataPoint)data.elementAt(i);

if (point.attributes[attribute] == value) subset.addElement(point);

}

return subset;

}

public double calculateEntropy(Vector data) {

int numdata = data.size();

if (numdata == 0) return 0;

int attribute = numAttributes-1;

int numvalues = domains[attribute].size();

double sum = 0;

for (int i=0; i< numvalues; i++) {

int count=0;

for (int j=0; j< numdata; j++) {

DataPoint point = (DataPoint)data.elementAt(j);

if (point.attributes[attribute] == i) count++;

}

double probability = 1.*count/numdata;

if (count > 0) sum += -probability*Math.log(probability);

}

return sum;

}

public boolean alreadyUsedToDecompose(TreeNode node, int attribute) {

if (node.children != null) {

if (node.decompositionAttribute == attribute )

return true;

}

if (node.parent == null) return false;

return alreadyUsedToDecompose(node.parent, attribute);

}

public void decomposeNode(TreeNode node) {

double bestEntropy=0;

boolean selected=false;

int selectedAttribute=0;

int numdata = node.data.size();

int numinputattributes = numAttributes-1;

node.entropy = calculateEntropy(node.data);

if (node.entropy == 0) return;

for (int i=0; i< numinputattributes; i++) {

int numvalues = domains[i].size();

if ( alreadyUsedToDecompose(node, i) ) continue;

double averageentropy = 0;

for (int j=0; j< numvalues; j++) {

Vector subset = getSubset(node.data, i, j);

if (subset.size() == 0) continue;

double subentropy = calculateEntropy(subset);

averageentropy += subentropy *

subset.size();

}

averageentropy = averageentropy / numdata; //

Taking the weighted average

if (selected == false) {

selected = true;

bestEntropy = averageentropy;

selectedAttribute = i;

} else {

if (averageentropy < bestEntropy) {

selected = true;

bestEntropy = averageentropy;

selectedAttribute = i;

}

}

}

if (selected == false) return;

int numvalues = domains[selectedAttribute].size();

node.decompositionAttribute = selectedAttribute;

node.children = new TreeNode [numvalues];

for (int j=0; j< numvalues; j++) {

node.children[j] = new TreeNode();

node.children[j].parent = node;

node.children[j].data = getSubset(node.data,

selectedAttribute, j);

node.children[j].decompositionValue = j;

}

for (int j=0; j< numvalues; j++) {

decomposeNode(node.children[j]);

}

node.data = null;

}

public int readData(String filename) throws Exception {

FileInputStream in = null;

try {

File inputFile = new File(filename);

in = new FileInputStream(inputFile);

} catch ( Exception e) {

System.err.println( "Unable to open data file: " + filename + "n" + e);

return 0;

}

BufferedReader bin = new BufferedReader(new InputStreamReader(in) );

String input;

while(true) {

input = bin.readLine();

if (input == null) {

System.err.println( "No data found in the data file: " + filename +

"n");

return 0;

}

if (input.startsWith("//")) continue;

if (input.equals("")) continue;

break;

}

StringTokenizer tokenizer = new StringTokenizer(input);

numAttributes = tokenizer.countTokens();

if (numAttributes <= 1) {

System.err.println( "Read line: " + input);

System.err.println( "Could not obtain the names of attributes in the

line");

System.err.println( "Expecting at least one input attribute and one

output attribute");

return 0;

}

domains = new Vector[numAttributes];

for (int i=0; i < numAttributes; i++) domains[i] = new Vector();

attributeNames = new String[numAttributes];

for (int i=0; i < numAttributes; i++) {

attributeNames[i] = tokenizer.nextToken();

}

while(true) {

input = bin.readLine();

if (input == null) break;

if (input.startsWith("//")) continue;

if (input.equals("")) continue;

tokenizer = new StringTokenizer(input);

int numtokens = tokenizer.countTokens();

if (numtokens != numAttributes) {

System.err.println( "Read " + root.data.size() + " data");

System.err.println( "Last line read: " + input);

System.err.println( "Expecting " + numAttributes + " attributes");

return 0;

}

DataPoint point = new DataPoint(numAttributes);

for (int i=0; i < numAttributes; i++) {

point.attributes[i] = getSymbolValue(i, tokenizer.nextToken()

);

}

root.data.addElement(point);

}

bin.close();

return 1;

}

public void printTree(TreeNode node, String tab) {

int outputattr = numAttributes-1;

if (node.children == null) {

int []values = getAllValues(node.data, outputattr );

if (values.length == 1) {

System.out.println(tab + "t" + attributeNames[outputattr] + " = "" +

domains[outputattr].elementAt(values[0]) + "";");

return;

}

System.out.print(tab + "t" + attributeNames[outputattr] + " = {");

for (int i=0; i < values.length; i++) {

System.out.print(""" + domains[outputattr].elementAt(values[i]) + ""

");

if ( i != values.length-1 ) System.out.print( " , " );

}

System.out.println( " };");

return;

}

int numvalues = node.children.length;

for (int i=0; i < numvalues; i++) {

System.out.println(tab + "if( " +

attributeNames[node.decompositionAttribute] + " == "" +

domains[node.decompositionAttribute].elementAt(i)

+ "") {" );

printTree(node.children[i], tab + "t");

if (i != numvalues-1) System.out.print(tab + "} else ");

else System.out.println(tab + "}");

}

}

public void createDecisionTree() {

decomposeNode(root);

printTree(root, "");

}

/* main function */

public static void main(String[] args) throws Exception {

ID3 me = new ID3();

int status = me.readData("c:\\in.txt");

if (status <= 0) return;

me.createDecisionTree();

}

}
import java.io.*;
导入java.util.*;
公共类ID3
{
int numAttributes;
字符串[]属性名称;
向量[]域;
/*表示由NuMattAttribute值组成的数据点的类
属性*/
类数据点{
公共int[]属性;
公共数据点(int numattributes){
属性=新的int[numattributes];
}
};
/*表示分解树中的节点的类。
*/
三烯类{
公共双熵;
公共矢量数据;
公共属性;
公共价值;
公共TreeNode[]儿童;
公共树节点父节点;
公共树节点(){
数据=新向量();
}
};
TreeNode根=新的TreeNode();
公共int getSymbolValue(int属性,字符串符号){
int index=domains[attribute].indexOf(符号);
如果(指数<0){
域[属性]。添加元素(符号);
返回域[attribute].size()-1;
}
收益指数;
}
公共int[]getAllValues(向量数据,int属性){
向量值=新向量();
int num=data.size();
for(int i=0;i0)和+=-概率*数学日志(概率);
}
回报金额;
}
公共布尔值alreadyUsedToDecompose(TreeNode节点,int属性){
if(node.children!=null){
if(node.decompositionAttribute==属性)
返回true;
}
如果(node.parent==null)返回false;
返回alreadyUsedToDecompose(node.parent,属性);
}
公共void decomposeNode(TreeNode节点){
双倍熵=0;
所选布尔值=false;
int selectedAttribute=0;
int numdata=node.data.size();
int numinputtributes=numAttributes-1;
node.entropy=计算属性(node.data);
如果(node.entropy==0)返回;
对于(int i=0;iif(numAttributes输出将是一个树。但是,您需要向其传递一个至少具有2个属性的文件。

输出将是一个树。但是,您需要向其传递一个至少具有2个属性的文件