2014-04-24 59 views
0

我寫了一個Java程序,分析基因表達數據的.soft文件,並把它寫爲TXT找不到符號:變量TestUtils

package il.ac.tau.cs.sw1.bioinformatics; 
import org.apache.commons.math3.stat.inference.TestUtils; 
import java.io.*; 
import java.util.Arrays; 
/** 
* 
* Gene Expression Analyzer 
* 
* Command line arguments: 
* args[0] - GeoDatasetName: Gene expression dataset name (expects a corresponding  
    input file in SOFT format to exist in the local directory). 
* args[1] - Label1: Label of the first sample subset 
* args[2] - Label2: Label of the second sample subset 
* args[3] - Alpha: T-test confidence level : only genes with pValue below this  
threshold will be printed to output file 
* 
* Execution example: GeneExpressionAnalyzer GDS4085 "estrogen receptor-negative" "estrogen  receptor-positive" 0.01 
* 
* @author software1-2014 
* 
*/ 
public class GeneExpressionAnalyzer { 

public static void main(String args[]) throws IOException { 

    // Reads the dataset from a SOFT input file 
    String inputSoftFileName = args[0] + ".soft"; 
    GeneExpressionDataset geneExpressionDataset = parseGeneExpressionFile (inputSoftFileName); 
    System.out.printf ("Gene expression dataset loaded from file %s. %n",inputSoftFileName); 
    System.out.printf("Dataset contains %d samples and %d gene probes.%n%n",geneExpressionDataset.samplesNumber, geneExpressionDataset.genesNumber); 

    // Writes the dataset to a tabular format 
    String tabularFileName = args[0] + "-Tabular.txt"; 
    writeDatasetToTabularFile(geneExpressionDataset,tabularFileName); 
    System.out.printf ("Dataset saved to tabular file - %s.%n%n",tabularFileName); 

    // Identifies differentially expressed genes between two sample groups and writes the results to a text file   
    String label1 = args[1]; 
    String label2 = args[2]; 
    double alpha = Double.parseDouble(args[3]); 
    String diffGenesFileName = args[0] + "-DiffGenes.txt"; 
    int numOfDiffGenes = writeTopDifferentiallyExpressedGenesToFile(diffGenesFileName,geneExpressionDataset, alpha, label1, label2); 
    System.out.printf ("%d differentially expressed genes identified using alpha of %f when comparing the two sample groups [%s] and [%s].%n",numOfDiffGenes, alpha, label1, label2); 
    System.out.printf ("Results saved to file %s.%n",diffGenesFileName); 
} 

private static float[] StringtoFloat(String[] temp) { 
    float[] array = new float[temp.length]; 
    for (int i = 0; i < temp.length; i++){ 
     array[i]= Float.parseFloat(temp[i]); 
    } 
    return array; 
} 
private static double[] CutToCounter(double[] array, int counter) { 
    if (array.length == counter){ 
     return array; 
    } 
    double[] args = new double[counter+1]; 
    for (int i = 0; i < args.length; i++){ 
     args[i] = array[i]; 
    } 
    return args; 
} 
private static int min(double[] pValues) { 
    double val = 2; 
    int index = -1; 
    for (int i = 0; i < pValues.length; i++){ 
     if (pValues[i] < val && pValues[i] != 3.0){ 
      val = pValues[i]; 
      index = i; 
     } 
    } 
    return index; 
} 

private static String changeformat(float[] array) { 
    String[] args = new String[array.length]; 
    for (int i = 0; i < array.length; i++){ 
     args[i] = String.format("%.2f", array[i]); 
    } 
    return Arrays.toString(args); 
} 


/** 
* 
* parseGeneExpressionFile - parses the given SOFT file 
* 
* 
* @param filename A gene expression file in SOFT format 
* @return a GeneExpressionDataset object storing all data parsed from the input file 
* @throws IOException 
*/ 

public static GeneExpressionDataset parseGeneExpressionFile (String filename) throws IOException { 

    GeneExpressionDataset dataset = new GeneExpressionDataset(); 
    BufferedReader buf = new BufferedReader(new FileReader(filename)); 
    String line = buf.readLine(); 
    String[] geneids = null; 
    String[] genesymbols = null; 
    float[][] datamatrix = null; 
    String[][] subsetinfo = new String[10][2]; 
    String[][] subsetsample = new String[10][]; 
    int i = 0; 
    int j = 0; 
    boolean bol = false; 
    while (line != null){ 
     if (line.startsWith("!dataset_sample_count")){ 
      dataset.samplesNumber = Integer.parseInt(line.substring(24)); 
     } 
     else if (line.startsWith("!dataset_sample_count")){ 
      dataset.genesNumber = Integer.parseInt(line.substring(25)); 
      geneids = new String[dataset.genesNumber]; 
      genesymbols = new String[dataset.genesNumber]; 
     } 
     else if (line.startsWith("^SUBSET")){ 
      subsetinfo[i][0] = line.substring(10); 
      i++; 
     } 
     else if (line.startsWith("!subset_sample_description")){ 
      subsetinfo[i][1] = line.substring(22); 
     } 
     else if (line.startsWith("!subset_sample_id")){ 
      subsetsample[i-1] = line.substring(20).split(","); 
     } 
     else if (line.startsWith("!dataset_table_begin")){ 
      datamatrix = new float[dataset.genesNumber][dataset.samplesNumber]; 
     } 
     else if (line.startsWith("ID_REF")){ 
      String[] array1 = line.split("\t"); 
      dataset.sampleIds = (String[]) Arrays.copyOfRange(array1, 2, array1.length); 
      bol = true; 
     } 
     else if (bol && !line.startsWith("!dataset_table_end")){ 
      String[] array2 = line.split("\t"); 
      geneids[j] = array2[0]; 
      genesymbols[j] = array2[1]; 
      String[] temp = (String[]) Arrays.copyOfRange(array2, 2, array2.length); 
      datamatrix[j] = StringtoFloat(temp); 
      j++; 
     } 
    } 
    buf.close(); 
    dataset.geneIds = geneids; 
    dataset.geneSymbols = genesymbols; 
    dataset.dataMatrix = datamatrix; 
    String[] lables = new String[dataset.samplesNumber]; 
    int k = 0; 
    for (String sample : dataset.sampleIds) { 
     for (int m = 0; m < subsetsample.length; m++) { 
      if (Arrays.binarySearch(subsetsample[m], sample) != -1) { 
       lables[k] = subsetsample[m][1]; 
       k += 1; 
      } else { 
       continue; 
      } 
     } 
    } 
    dataset.labels = lables; 
    return dataset; 
} 


/** 
* writeDatasetToTabularFile 
* writes the dataset to a tabular text file 
* 
* @param geneExpressionDataset 
* @param outputFilename 
* @throws IOException 
*/ 
public static void writeDatasetToTabularFile(GeneExpressionDataset geneExpressionDataset, String outputFilename) throws IOException { 
    File NewFile = new File(outputFilename); 
    BufferedWriter buf = new BufferedWriter(new FileWriter(NewFile)); 
    String Lables = "\t" + "\t" + "\t" + "\t" + Arrays.toString(geneExpressionDataset.labels); 
    String Samples = "\t" + "\t" + "\t" + "\t" + Arrays.toString(geneExpressionDataset.sampleIds); 
    buf.write(Lables + "\r\n" + Samples + "\r\n"); 
    for (int i = 0; i < geneExpressionDataset.genesNumber; i++){ 
     buf.write(geneExpressionDataset.geneIds[i] + "\t"+ geneExpressionDataset.geneSymbols[i] + "\t" + 
       changeformat(geneExpressionDataset.dataMatrix[i]) + "\r\n");   
    } 
    buf.close(); 
} 


/** 
* 
* writeTopDifferentiallyExpressedGenesToFile 
* 
* @param outputFilename 
* @param geneExpressionDataset 
* @param alpha 
* @param label1 
* @param label2 
* @return numOfDiffGenes The number of differentially expressed genes detected, having p-value lower than alpha 
* @throws IOException 
*/ 
public static int writeTopDifferentiallyExpressedGenesToFile(String outputFilename, 
     GeneExpressionDataset geneExpressionDataset, double alpha, 
     String label1, String label2) throws IOException { 
    double pValues[] = new double[geneExpressionDataset.genesNumber]; 
    int counter = 0; 
    for (int i = 0; i < pValues.length; i++){ 
     double pval = calcTtest(geneExpressionDataset, i, label1, label2); 
     if (pval < alpha){ 
      pValues[i] = pval; 
      counter++; 
     } 
     else{ 
      continue; 
     } 
    } 
    File tofile = new File(outputFilename); 
    BufferedWriter buf = new BufferedWriter(new FileWriter(tofile)); 
    int j = 0; 
    while (min(pValues) != -1){ 
     String PVal = String.format("%.6f", pValues[min(pValues)]); 
     String gene_id = geneExpressionDataset.geneIds[min(pValues)]; 
     String gene_symbol = geneExpressionDataset.geneSymbols[min(pValues)]; 
     String line = String.valueOf(j) + "\t" + PVal + "\t" + gene_id + "\t" + gene_symbol; 
     buf.write(line + "\r\n"); 
     pValues[min(pValues)] = 3.0; 
     j++;  
    } 
    buf.close(); 
    return counter; 
} 



/** 
* 
* getDataEntriesForLabel 
* 
* Returns the entries in the 'data' array for which the corresponding entries in the 'labels' array equals 'label' 
* 
* @param data 
* @param labels 
* @param label 
* @return 
*/ 
public static double[] getDataEntriesForLabel(float[] data, String[] labels, String label) { 
    double[] array = new double[data.length]; 
    int counter = 0; 
    for (int i = 0; i < data.length; i++){ 
     if (labels[i].equals(label)){ 
      array[counter] = data[i]; 
      counter++; 
     } 
     else{ 
      continue; 
     } 
    }return CutToCounter(array, counter); 
} 



/** 
* calcTtest - returns a pValue for the t-Test 
* 
* Returns the p-value, associated with a two-sample, two-tailed t-test comparing the means of the input arrays 
* 
*  //http://commons.apache.org/proper/commons-math/apidocs/org/apache/commons/math3/stat/inference/TTest.html#tTest(double[], double[]) 
* 
* @param geneExpressionDataset 
* @param geneIndex 
* @param label1 
* @param label2 
* @return 
*/ 
private static double calcTtest(GeneExpressionDataset geneExpressionDataset, int geneIndex, String label1, String label2) { 
    double[] sample1 = getDataEntriesForLabel(geneExpressionDataset.dataMatrix[geneIndex], geneExpressionDataset.labels, label1); 
    double[] sample2 = getDataEntriesForLabel(geneExpressionDataset.dataMatrix[geneIndex], geneExpressionDataset.labels, label2); 
    return TestUtils.tTest(sample1, sample2); 
} 

/** 
* 
* GeneExpressionDataset 
* A class representing a gene expression dataset 
* 
* @author software1-2014 
* 
*/ 
public static class GeneExpressionDataset { 

    public int samplesNumber; //number of dataset samples 
    public int genesNumber; // number of dataset gene probes 

    public String[] sampleIds; //sample ids 
    public String[] geneIds; //gene probe ids 
    public String[] geneSymbols; //gene symbols 
    public float[][] dataMatrix; //expression data matrix 

    public String[] labels; //sample labels 
    } 
} 

現在,它不會編譯和錯誤消息是這樣的: 「GeneExpressionAnalyzer.java:2:錯誤:包org.apache.commons.math3.stat.inference不存在

進口org.apach.commons.math3.stat.interference.TestUtils;

GeneExpressionAnalyzer .java:277:錯誤:找不到符號 return TestUtils.tTest; 符號:變量TestUtils 位置:類GeneExpressionAnalyzer 2個錯誤」

我不明白什麼地方出了錯,明明我已經添加包含路徑TestUtils .jar文件 (這裏是:http://apache.spd.co.il//commons/math/binaries/commons-math3-3.2-bin.zip

任何見解?

+2

您是否將zip文件添加到您的類路徑中,或者只是提取後包含在其中的jar commons-math3-3.2.jar? – StephaneM

+0

只是jar commons-math3-3.2.jar – Rotemk55

回答

1

如果您正在使用Eclipse工作,

手動從here

下載jar文件210

後,在Eclipse開放的package explorer - >右鍵點擊你的項目
Build Path - >Configure Build Path,將打開一個窗口。

根據Libraries選項卡 - >單擊Add External JARs。選擇您下載的jar文件單擊確定。

就是這樣。現在問題可能消失

+0

這正是我從一開始就用commons-math3-3.2.jar所做的。 我用3.1.jar重試了一遍,但是,正如所料,它沒有幫助。 – Rotemk55

0

它是否從命令行工作?

我已經縮短你的類

import org.apache.commons.math3.stat.inference.TestUtils; 
import java.io.*; 
import java.util.Arrays; 

public class Test { 

public static void main(String args[]) throws IOException { 
    System.out.printf ("test..."); 
} 
} 

我複製了Test.java文件和公地math3-3.2.jar到同一目錄,這裏是在命令行中我的輸出:

C:\temp\test>dir 

Répertoire de C:\temp\test 

24/04/2014 14:41 <REP>   . 
24/04/2014 14:41 <REP>   .. 
24/04/2014 14:38   1 692 782 commons-math3-3.2.jar 
24/04/2014 14:41    230 Test.java 
       2 fichier(s)  1 693 012 octets 
       2 Rép(s) 23 170 342 912 octets libres 

C:\temp\test>javac Test.java 
Test.java:1: package org.apache.commons.math3.stat.inference does not exist 
import org.apache.commons.math3.stat.inference.TestUtils; 
              ^
1 error 

C:\temp\test>javac -cp commons-math3-3.2.jar Test.java 

C:\temp\test>dir 

Répertoire de C:\temp\test 

24/04/2014 14:41 <REP>   . 
24/04/2014 14:41 <REP>   .. 
24/04/2014 14:38   1 692 782 commons-math3-3.2.jar 
24/04/2014 14:41    500 Test.class 
24/04/2014 14:41    230 Test.java 
+0

我也得到了這個輸出。 – Rotemk55

+0

也許一些,可以幫助:當我打開Eclipse中的jar文件的分支,得到了「TestUtils.class」我得到這個: 類文件編輯器 / / / 源未找到 JAR文件..沒有源附件。 無論如何,我可以解決這個問題? – Rotemk55