Skip to content

Commit

Permalink
Bug correction in GTF Handling
Browse files Browse the repository at this point in the history
  • Loading branch information
DeplanckeLab committed Nov 27, 2019
1 parent d91ae47 commit f45d98e
Show file tree
Hide file tree
Showing 9 changed files with 40 additions and 126,946 deletions.
31,731 changes: 0 additions & 31,731 deletions examples/output.dge.reads.detailed.txt

This file was deleted.

31,726 changes: 0 additions & 31,726 deletions examples/output.dge.reads.txt

This file was deleted.

31,731 changes: 0 additions & 31,731 deletions examples/output.dge.umis.detailed.txt

This file was deleted.

31,726 changes: 0 additions & 31,726 deletions examples/output.dge.umis.txt

This file was deleted.

Binary file added releases/BRBseqTools.1.4.jar
Binary file not shown.
14 changes: 7 additions & 7 deletions src/DGEMatrixManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -181,24 +181,24 @@ public static void createOutputDGE() throws Exception
int nbUMIs = 0;
for(String gene:sortedKeys)
{
String mappedGene = Parameters.mappingGeneIdGeneName.get(gene);
if(mappedGene == null) mappedGene = "";
if(!mappedGene.equals("")) {bw_reads.write(gene); if(Parameters.UMILength != -1) bw_umis.write(gene); }
bw_reads_detailed.write(gene + "\t" + mappedGene);
if(Parameters.UMILength != -1) bw_umis_detailed.write(gene + "\t" + mappedGene);
HashSet<String> mappedGene = Parameters.mappingGeneIdGeneName.get(gene);
if(mappedGene != null) mappedGene.remove(gene);
if(mappedGene != null) {bw_reads.write(gene); if(Parameters.UMILength != -1) bw_umis.write(gene); }
bw_reads_detailed.write(gene + "\t" + Utils.toString(mappedGene));
if(Parameters.UMILength != -1) bw_umis_detailed.write(gene + "\t" + Utils.toString(mappedGene));
for(String barcode:Parameters.barcodeIndex.keySet())
{
String mappedBarcode = Parameters.mappingBarcodeName.get(barcode);
nbUMIs += umis[Parameters.barcodeIndex.get(barcode)][Parameters.geneIndex.get(gene)].getCorrectedSize();
if(!mappedGene.equals("") && mappedBarcode != null)
if(mappedGene != null && mappedBarcode != null)
{
bw_reads.write("\t" + counts[Parameters.barcodeIndex.get(barcode)][Parameters.geneIndex.get(gene)]);
if(Parameters.UMILength != -1) bw_umis.write("\t" + umis[Parameters.barcodeIndex.get(barcode)][Parameters.geneIndex.get(gene)].getCorrectedSize());
}
bw_reads_detailed.write("\t" + counts[Parameters.barcodeIndex.get(barcode)][Parameters.geneIndex.get(gene)]);
if(Parameters.UMILength != -1) bw_umis_detailed.write("\t" + umis[Parameters.barcodeIndex.get(barcode)][Parameters.geneIndex.get(gene)].getCorrectedSize());
}
if(!mappedGene.equals("")) { bw_reads.write("\n"); if(Parameters.UMILength != -1) bw_umis.write("\n"); }
if(mappedGene != null) { bw_reads.write("\n"); if(Parameters.UMILength != -1) bw_umis.write("\n"); }
bw_reads_detailed.write("\n"); if(Parameters.UMILength != -1) bw_umis_detailed.write("\n");
}
System.out.println(nbUMIs + " UMI counts were written (" + (nbReadsWritten - nbUMIs) + " duplicates = "+ Parameters.pcFormatter.format(((nbReadsWritten - nbUMIs) / (float)nbReadsWritten)*100) + "%)");
Expand Down
43 changes: 19 additions & 24 deletions src/model/GTF.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package model;

import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
Expand All @@ -18,16 +17,16 @@ public static void readGTF() throws Exception
{
System.out.println("\nReading GTF file provided: " + Parameters.inputGTFFile.getAbsolutePath());
Parameters.geneIndex = new HashMap<String, Integer>(); // Fill this as we go
Parameters.mappingGeneIdGeneName = new HashMap<String, String>(); // For filling final matrices
Parameters.mappingGeneIdGeneName = new HashMap<String, HashSet<String>>(); // For filling final matrices
BufferedReader br = Utils.readGTF(Parameters.inputGTFFile);
String line = br.readLine();
int line_number = 0;
forest = new HashMap<>();
int nbExons = 0;
int nbGenes = 0;
HashSet<String> uniqueGeneId = new HashSet<String>();
ArrayList<String> uniqueGeneName = new ArrayList<String>(); // It's actually not unique and should not be since two geneID can have the same gene Name => ArrayList
while(line != null)
{
line_number++;
if(!line.startsWith("#") && !line.trim().equals(""))
{
String[] tokens = line.split("\t");
Expand All @@ -51,47 +50,43 @@ public static void readGTF() throws Exception
}
}
if(gene_name == null) gene_name = gene_id;
if(gene_id != null)
if(gene_id == null) System.err.println("[WARNING] l." + line_number + "\t: No gene_id. This entry is ignored.");
else
{
HashSet<String> names = Parameters.mappingGeneIdGeneName.get(gene_id);
if(names == null) // First time we see this gene_id
{
names = new HashSet<String>();
Parameters.geneIndex.put(gene_id, nbGenes);
nbGenes++;
}
names.add(gene_name);
Parameters.mappingGeneIdGeneName.put(gene_id, names);
// Which type is it?
if(type.equals("exon"))
{
nbExons++;
IntervalTree tree = forest.get(chr);
if(tree == null) tree = new IntervalTree();
if(uniqueGeneId.add(gene_id)) uniqueGeneName.add(gene_name);
tree.insert(new IntervalLabelled((int)start, (int)end, gene_id, strand));
forest.put(chr, tree);
}
else if(type.equals("gene"))
{
Parameters.geneIndex.put(gene_id, nbGenes);
Parameters.mappingGeneIdGeneName.put(gene_id, gene_name);
nbGenes++;
}
}
}
line = br.readLine();
}
br.close();

if(nbGenes == 0) {
System.out.println("No Genes were detected in the GTF file. Probably the \"gene\" annotation is missing from the GTF file 3rd column?");
System.out.println("Trying to \"save the day\" by collapsing exons to their annotated gene_id");
for(String gene_id:uniqueGeneId) {
Parameters.geneIndex.put(gene_id, nbGenes);
Parameters.mappingGeneIdGeneName.put(gene_id, uniqueGeneName.get(nbGenes));
nbGenes++;
}
}

System.out.println(nbExons + " 'exons' are annotating " + uniqueGeneId.size() + " unique genes in the provided GTF file. In total " + nbGenes + " 'gene' annotations are found in the GTF file.");

if(nbGenes == 0) {
System.err.println("We couldn't parse the GTF file. Please report this problem if the GTF is in standard format. Or use another GTF from another source.");
System.exit(-1);
}

System.out.println(nbExons + " 'exons' are annotating " + nbGenes + " unique gene_ids in the provided GTF file.");

System.out.println(Utils.toString(Parameters.mappingGeneIdGeneName.get("MSTRG.26637")));
System.out.println(Utils.toString(Parameters.mappingGeneIdGeneName.get("MSTRG.3370")));

Parameters.geneIndex.put("__alignment_not_unique", Parameters.geneIndex.size());
Parameters.geneIndex.put("__no_feature", Parameters.geneIndex.size());
Parameters.geneIndex.put("__ambiguous", Parameters.geneIndex.size());
Expand Down
2 changes: 1 addition & 1 deletion src/model/Parameters.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public class Parameters
public static ArrayList<String> BC1;
public static int lengthBarcode = 0;
public static HashMap<String, String> mappingBarcodeName = null;
public static HashMap<String, String> mappingGeneIdGeneName = null;
public static HashMap<String, HashSet<String>> mappingGeneIdGeneName = null;
public static int l1 = -1;
public static HashMap<String, Integer> geneIndex = null;
public static HashMap<String, Integer> barcodeIndex = null;
Expand Down
13 changes: 13 additions & 0 deletions src/tools/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,19 @@ public static String[] sortKeys(Map<String, Integer> map)
return keys;
}

public static String toString(HashSet<String> str)
{
if(str == null) return "";
StringBuffer sb = new StringBuffer();
String prefix = "";
for(String s:str)
{
sb.append(prefix).append(s);
prefix = ",";
}
return sb.toString();
}

public static String toReadableTime(long ms)
{
if(ms < 1000) return ""+ms+" ms";
Expand Down

0 comments on commit f45d98e

Please sign in to comment.