Java 找不到符号:变量TestUtils
我已经编写了一个Java程序,可以分析基因表达式的.soft文件数据并将其写入txtJava 找不到符号:变量TestUtils,java,Java,我已经编写了一个Java程序,可以分析基因表达式的.soft文件数据并将其写入txt package il.ac.tau.cs.sw1.bioinformatics; import org.apache.commons.math3.stat.inference.TestUtils; import java.io.*; import java.util.Arrays; /** * * Gene Expression Analyzer * * Command line arguments: *
package il.ac.tau.cs.sw1.bioinformatics;
import org.apache.commons.math3.stat.inference.TestUtils;
import java.io.*;
import java.util.Arrays;
/**
*
* Gene Expression Analyzer
*
* Command line arguments:
* args[0] - GeoDatasetName: Gene expression dataset name (expects a corresponding
input file in SOFT format to exist in the local directory).
* args[1] - Label1: Label of the first sample subset
* args[2] - Label2: Label of the second sample subset
* args[3] - Alpha: T-test confidence level : only genes with pValue below this
threshold will be printed to output file
*
* Execution example: GeneExpressionAnalyzer GDS4085 "estrogen receptor-negative" "estrogen receptor-positive" 0.01
*
* @author software1-2014
*
*/
public class GeneExpressionAnalyzer {
public static void main(String args[]) throws IOException {
// Reads the dataset from a SOFT input file
String inputSoftFileName = args[0] + ".soft";
GeneExpressionDataset geneExpressionDataset = parseGeneExpressionFile (inputSoftFileName);
System.out.printf ("Gene expression dataset loaded from file %s. %n",inputSoftFileName);
System.out.printf("Dataset contains %d samples and %d gene probes.%n%n",geneExpressionDataset.samplesNumber, geneExpressionDataset.genesNumber);
// Writes the dataset to a tabular format
String tabularFileName = args[0] + "-Tabular.txt";
writeDatasetToTabularFile(geneExpressionDataset,tabularFileName);
System.out.printf ("Dataset saved to tabular file - %s.%n%n",tabularFileName);
// Identifies differentially expressed genes between two sample groups and writes the results to a text file
String label1 = args[1];
String label2 = args[2];
double alpha = Double.parseDouble(args[3]);
String diffGenesFileName = args[0] + "-DiffGenes.txt";
int numOfDiffGenes = writeTopDifferentiallyExpressedGenesToFile(diffGenesFileName,geneExpressionDataset, alpha, label1, label2);
System.out.printf ("%d differentially expressed genes identified using alpha of %f when comparing the two sample groups [%s] and [%s].%n",numOfDiffGenes, alpha, label1, label2);
System.out.printf ("Results saved to file %s.%n",diffGenesFileName);
}
private static float[] StringtoFloat(String[] temp) {
float[] array = new float[temp.length];
for (int i = 0; i < temp.length; i++){
array[i]= Float.parseFloat(temp[i]);
}
return array;
}
private static double[] CutToCounter(double[] array, int counter) {
if (array.length == counter){
return array;
}
double[] args = new double[counter+1];
for (int i = 0; i < args.length; i++){
args[i] = array[i];
}
return args;
}
private static int min(double[] pValues) {
double val = 2;
int index = -1;
for (int i = 0; i < pValues.length; i++){
if (pValues[i] < val && pValues[i] != 3.0){
val = pValues[i];
index = i;
}
}
return index;
}
private static String changeformat(float[] array) {
String[] args = new String[array.length];
for (int i = 0; i < array.length; i++){
args[i] = String.format("%.2f", array[i]);
}
return Arrays.toString(args);
}
/**
*
* parseGeneExpressionFile - parses the given SOFT file
*
*
* @param filename A gene expression file in SOFT format
* @return a GeneExpressionDataset object storing all data parsed from the input file
* @throws IOException
*/
public static GeneExpressionDataset parseGeneExpressionFile (String filename) throws IOException {
GeneExpressionDataset dataset = new GeneExpressionDataset();
BufferedReader buf = new BufferedReader(new FileReader(filename));
String line = buf.readLine();
String[] geneids = null;
String[] genesymbols = null;
float[][] datamatrix = null;
String[][] subsetinfo = new String[10][2];
String[][] subsetsample = new String[10][];
int i = 0;
int j = 0;
boolean bol = false;
while (line != null){
if (line.startsWith("!dataset_sample_count")){
dataset.samplesNumber = Integer.parseInt(line.substring(24));
}
else if (line.startsWith("!dataset_sample_count")){
dataset.genesNumber = Integer.parseInt(line.substring(25));
geneids = new String[dataset.genesNumber];
genesymbols = new String[dataset.genesNumber];
}
else if (line.startsWith("^SUBSET")){
subsetinfo[i][0] = line.substring(10);
i++;
}
else if (line.startsWith("!subset_sample_description")){
subsetinfo[i][1] = line.substring(22);
}
else if (line.startsWith("!subset_sample_id")){
subsetsample[i-1] = line.substring(20).split(",");
}
else if (line.startsWith("!dataset_table_begin")){
datamatrix = new float[dataset.genesNumber][dataset.samplesNumber];
}
else if (line.startsWith("ID_REF")){
String[] array1 = line.split("\t");
dataset.sampleIds = (String[]) Arrays.copyOfRange(array1, 2, array1.length);
bol = true;
}
else if (bol && !line.startsWith("!dataset_table_end")){
String[] array2 = line.split("\t");
geneids[j] = array2[0];
genesymbols[j] = array2[1];
String[] temp = (String[]) Arrays.copyOfRange(array2, 2, array2.length);
datamatrix[j] = StringtoFloat(temp);
j++;
}
}
buf.close();
dataset.geneIds = geneids;
dataset.geneSymbols = genesymbols;
dataset.dataMatrix = datamatrix;
String[] lables = new String[dataset.samplesNumber];
int k = 0;
for (String sample : dataset.sampleIds) {
for (int m = 0; m < subsetsample.length; m++) {
if (Arrays.binarySearch(subsetsample[m], sample) != -1) {
lables[k] = subsetsample[m][1];
k += 1;
} else {
continue;
}
}
}
dataset.labels = lables;
return dataset;
}
/**
* writeDatasetToTabularFile
* writes the dataset to a tabular text file
*
* @param geneExpressionDataset
* @param outputFilename
* @throws IOException
*/
public static void writeDatasetToTabularFile(GeneExpressionDataset geneExpressionDataset, String outputFilename) throws IOException {
File NewFile = new File(outputFilename);
BufferedWriter buf = new BufferedWriter(new FileWriter(NewFile));
String Lables = "\t" + "\t" + "\t" + "\t" + Arrays.toString(geneExpressionDataset.labels);
String Samples = "\t" + "\t" + "\t" + "\t" + Arrays.toString(geneExpressionDataset.sampleIds);
buf.write(Lables + "\r\n" + Samples + "\r\n");
for (int i = 0; i < geneExpressionDataset.genesNumber; i++){
buf.write(geneExpressionDataset.geneIds[i] + "\t"+ geneExpressionDataset.geneSymbols[i] + "\t" +
changeformat(geneExpressionDataset.dataMatrix[i]) + "\r\n");
}
buf.close();
}
/**
*
* writeTopDifferentiallyExpressedGenesToFile
*
* @param outputFilename
* @param geneExpressionDataset
* @param alpha
* @param label1
* @param label2
* @return numOfDiffGenes The number of differentially expressed genes detected, having p-value lower than alpha
* @throws IOException
*/
public static int writeTopDifferentiallyExpressedGenesToFile(String outputFilename,
GeneExpressionDataset geneExpressionDataset, double alpha,
String label1, String label2) throws IOException {
double pValues[] = new double[geneExpressionDataset.genesNumber];
int counter = 0;
for (int i = 0; i < pValues.length; i++){
double pval = calcTtest(geneExpressionDataset, i, label1, label2);
if (pval < alpha){
pValues[i] = pval;
counter++;
}
else{
continue;
}
}
File tofile = new File(outputFilename);
BufferedWriter buf = new BufferedWriter(new FileWriter(tofile));
int j = 0;
while (min(pValues) != -1){
String PVal = String.format("%.6f", pValues[min(pValues)]);
String gene_id = geneExpressionDataset.geneIds[min(pValues)];
String gene_symbol = geneExpressionDataset.geneSymbols[min(pValues)];
String line = String.valueOf(j) + "\t" + PVal + "\t" + gene_id + "\t" + gene_symbol;
buf.write(line + "\r\n");
pValues[min(pValues)] = 3.0;
j++;
}
buf.close();
return counter;
}
/**
*
* getDataEntriesForLabel
*
* Returns the entries in the 'data' array for which the corresponding entries in the 'labels' array equals 'label'
*
* @param data
* @param labels
* @param label
* @return
*/
public static double[] getDataEntriesForLabel(float[] data, String[] labels, String label) {
double[] array = new double[data.length];
int counter = 0;
for (int i = 0; i < data.length; i++){
if (labels[i].equals(label)){
array[counter] = data[i];
counter++;
}
else{
continue;
}
}return CutToCounter(array, counter);
}
/**
* calcTtest - returns a pValue for the t-Test
*
* Returns the p-value, associated with a two-sample, two-tailed t-test comparing the means of the input arrays
*
* //http://commons.apache.org/proper/commons-math/apidocs/org/apache/commons/math3/stat/inference/TTest.html#tTest(double[], double[])
*
* @param geneExpressionDataset
* @param geneIndex
* @param label1
* @param label2
* @return
*/
private static double calcTtest(GeneExpressionDataset geneExpressionDataset, int geneIndex, String label1, String label2) {
double[] sample1 = getDataEntriesForLabel(geneExpressionDataset.dataMatrix[geneIndex], geneExpressionDataset.labels, label1);
double[] sample2 = getDataEntriesForLabel(geneExpressionDataset.dataMatrix[geneIndex], geneExpressionDataset.labels, label2);
return TestUtils.tTest(sample1, sample2);
}
/**
*
* GeneExpressionDataset
* A class representing a gene expression dataset
*
* @author software1-2014
*
*/
public static class GeneExpressionDataset {
public int samplesNumber; //number of dataset samples
public int genesNumber; // number of dataset gene probes
public String[] sampleIds; //sample ids
public String[] geneIds; //gene probe ids
public String[] geneSymbols; //gene symbols
public float[][] dataMatrix; //expression data matrix
public String[] labels; //sample labels
}
}
包il.ac.tau.cs.sw1.生物信息学;
导入org.apache.commons.math3.stat.inference.TestUtils;
导入java.io.*;
导入java.util.array;
/**
*
*基因表达分析仪
*
*命令行参数:
*args[0]-地理数据集名称:基因表达式数据集名称(需要相应的
以软格式输入文件以存在于本地目录中)。
*args[1]-Label1:第一个样本子集的标签
*args[2]-Label2:第二个样本子集的标签
*args[3]-α:T检验置信水平:仅pValue低于此值的基因
阈值将打印到输出文件中
*
*执行示例:基因表达分析仪GDS4085“雌激素受体阴性”“雌激素受体阳性”0.01
*
*@author software1-2014
*
*/
公共类基因表达分析器{
公共静态void main(字符串args[])引发IOException{
//从软输入文件读取数据集
字符串inputSoftFileName=args[0]+“.soft”;
GeneExpressionDataset GeneExpressionDataset=parseGeneExpressionFile(inputSoftFileName);
System.out.printf(“从文件%s.%n加载的基因表达式数据集”,inputSoftFileName);
System.out.printf(“数据集包含%d个样本和%d个基因探针。%n%n”,geneExpressionDataset.samplesNumber,geneExpressionDataset.genesNumber);
//将数据集写入表格格式
字符串tablerfilename=args[0]+“-Tabular.txt”;
writeDatasetToTabularFile(geneExpressionDataset,tablerFileName);
System.out.printf(“数据集保存到表格文件-%s.%n%n”,表格文件名);
//识别两个样本组之间差异表达的基因,并将结果写入文本文件
字符串label1=args[1];
字符串label2=args[2];
double alpha=double.parseDouble(args[3]);
字符串diffGenesFileName=args[0]+“-DiffGenes.txt”;
int numOfDiffGenes=writetopdifferentiallyexpressednestofile(diffGenesFileName,geneExpressionDataset,alpha,label1,label2);
System.out.printf(“%d个差异表达基因,在比较两个样本组[%s]和[%s]时使用%f的α进行识别。%n”,numOfDiffGenes,α,label1,label2);
System.out.printf(“结果保存到文件%s.%n”,diffGenesFileName);
}
私有静态浮点[]StringtoFloat(字符串[]临时){
float[]数组=新的float[temp.length];
对于(int i=0;iimport org.apache.commons.math3.stat.inference.TestUtils;
import java.io.*;
import java.util.Arrays;
public class Test {
public static void main(String args[]) throws IOException {
System.out.printf ("test...");
}
}
C:\temp\test>dir
Répertoire de C:\temp\test
24/04/2014 14:41 <REP> .
24/04/2014 14:41 <REP> ..
24/04/2014 14:38 1 692 782 commons-math3-3.2.jar
24/04/2014 14:41 230 Test.java
2 fichier(s) 1 693 012 octets
2 Rép(s) 23 170 342 912 octets libres
C:\temp\test>javac Test.java
Test.java:1: package org.apache.commons.math3.stat.inference does not exist
import org.apache.commons.math3.stat.inference.TestUtils;
^
1 error
C:\temp\test>javac -cp commons-math3-3.2.jar Test.java
C:\temp\test>dir
Répertoire de C:\temp\test
24/04/2014 14:41 <REP> .
24/04/2014 14:41 <REP> ..
24/04/2014 14:38 1 692 782 commons-math3-3.2.jar
24/04/2014 14:41 500 Test.class
24/04/2014 14:41 230 Test.java