diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/CellBaseCliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/CellBaseCliOptionsParser.java index b597e5325b..1c90526113 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/CellBaseCliOptionsParser.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/CellBaseCliOptionsParser.java @@ -191,6 +191,10 @@ public class VariantAnnotationCommandOptions { arity = 0) public boolean checkAminoAcidChange; + @Parameter(names = {"--filter"}, description = "string indicating the FILTER label that variants must have to be annotated. " + + "Only variants with this label will be written in the output.", required = false, arity = 1) + public String filter = null; + @DynamicParameter(names = "-D", description = "Dynamic parameters. Available parameters: " + "{population-frequencies=for internal purposes mainly. Full path to a json file containing Variant " + "documents that include lists of population frequencies objects. Will allow annotating the input file " diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/VariantAnnotatorTask.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/VariantAnnotatorTask.java index 30149cc31f..3d750c1585 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/VariantAnnotatorTask.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/VariantAnnotatorTask.java @@ -17,14 +17,16 @@ package org.opencb.cellbase.app.cli.main.annotation; import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.FileEntry; +import org.opencb.biodata.models.variant.avro.StudyEntry; import org.opencb.biodata.models.variant.avro.VariantType; import org.opencb.cellbase.lib.variant.annotation.VariantAnnotator; +import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.run.ParallelTaskRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; +import java.util.*; /** * Created by fjlopez on 11/02/16. @@ -34,9 +36,16 @@ public class VariantAnnotatorTask implements private final Logger logger = LoggerFactory.getLogger(this.getClass()); private List variantAnnotatorList; + private QueryOptions serverQueryOptions; + private static final String FILTER_PARAM = "filter"; public VariantAnnotatorTask(List variantAnnotatorList) { + this(variantAnnotatorList, new QueryOptions()); + } + + public VariantAnnotatorTask(List variantAnnotatorList, QueryOptions serverQueryOptions) { this.variantAnnotatorList = variantAnnotatorList; + this.serverQueryOptions = serverQueryOptions; } public void pre() { @@ -46,18 +55,42 @@ public void pre() { } public List apply(List batch) throws Exception { - List variantListToAnnotate = filterReferenceBlocksOut(batch); + List variantListToAnnotate = filter(batch); for (VariantAnnotator variantAnnotator : variantAnnotatorList) { variantAnnotator.run(variantListToAnnotate); } return variantListToAnnotate; } - private List filterReferenceBlocksOut(List variantList) { + private List filter(List variantList) { List filteredVariantList = new ArrayList<>(variantList.size()); + String queryOptionsFilterValue = null; + if (serverQueryOptions != null && serverQueryOptions.containsKey(FILTER_PARAM)) { + queryOptionsFilterValue = (String) serverQueryOptions.get(FILTER_PARAM); + } for (Variant variant : variantList) { + // true when we find a FILTER match. to prevent variant being added twice. + boolean variantFound = false; + // filter out reference blocks if (!VariantType.NO_VARIATION.equals(variant.getType())) { - filteredVariantList.add(variant); + // if FILTER param set, VCF line must match or it's skipped + if (queryOptionsFilterValue != null) { + Iterator studyIterator = variant.getImpl().getStudies().iterator(); + while (studyIterator.hasNext() && !variantFound) { + for (FileEntry fileEntry : studyIterator.next().getFiles()) { + Map attributes = fileEntry.getData(); + String vcfFilterValue = attributes.get("FILTER"); + if (vcfFilterValue != null && vcfFilterValue.equalsIgnoreCase(queryOptionsFilterValue)) { + // matched, variant added. we are done here. + filteredVariantList.add(variant); + variantFound = true; + break; + } + } + } + } else { + filteredVariantList.add(variant); + } } } diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java index 771b7919ca..dadb5444ee 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java @@ -394,7 +394,7 @@ private List> for (int i = 0; i < numThreads; i++) { List variantAnnotatorList = createAnnotators(); - variantAnnotatorTaskList.add(new VariantAnnotatorTask(variantAnnotatorList)); + variantAnnotatorTaskList.add(new VariantAnnotatorTask(variantAnnotatorList, serverQueryOptions)); } return variantAnnotatorTaskList; } @@ -576,6 +576,7 @@ private void checkParameters() throws IOException, CellBaseException { leftAlign = !variantAnnotationCommandOptions.skipLeftAlign; // Update serverQueryOptions serverQueryOptions.put("checkAminoAcidChange", variantAnnotationCommandOptions.checkAminoAcidChange); + serverQueryOptions.put("filter", variantAnnotationCommandOptions.filter); // output file if (variantAnnotationCommandOptions.output != null) { diff --git a/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java b/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java index 9da5c90793..7793500e82 100644 --- a/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java +++ b/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java @@ -965,4 +965,50 @@ private void cleanUp() throws IOException { return variantAnnotationCommandOptions; } + @Test + public void testFilter() throws Exception { + cleanUp(); + + // Set up annotation CLI options: NOTE checkAminoAcidChange is NOT enabled + CellBaseCliOptionsParser.VariantAnnotationCommandOptions variantAnnotationCommandOptions + = new CellBaseCliOptionsParser().getVariantAnnotationCommandOptions(); + variantAnnotationCommandOptions.assembly = "GRCh37"; + + variantAnnotationCommandOptions.commonOptions.conf = resourcesFolder.resolve("commandExecutor").toString(); + variantAnnotationCommandOptions.input + = resourcesFolder.resolve("commandExecutor/proteinChangeMatch/proband.duprem.atomic.left.split.vcf.gz").toString(); + variantAnnotationCommandOptions.output = OUTPUT_FILENAME; + variantAnnotationCommandOptions.local = true; + variantAnnotationCommandOptions.species = "hsapiens"; + variantAnnotationCommandOptions.filter = "PASS"; + // Annotate + VariantAnnotationCommandExecutor variantAnnotationCommandExecutor + = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions); + variantAnnotationCommandExecutor.loadCellBaseConfiguration(); + + variantAnnotationCommandExecutor.execute(); + // Load annotated variants + List variantList = loadResult(); + + // one variant has the PASS filter + assertEquals(1, variantList.size()); + + variantAnnotationCommandOptions.filter = "BAD FILTER"; + variantAnnotationCommandExecutor = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions); + variantAnnotationCommandExecutor.loadCellBaseConfiguration(); + variantAnnotationCommandExecutor.execute(); + variantList = loadResult(); + + // one variant has the PASS filter. there should be no results! + assertEquals(0, variantList.size()); + + variantAnnotationCommandOptions.filter = null; + variantAnnotationCommandExecutor = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions); + variantAnnotationCommandExecutor.loadCellBaseConfiguration(); + variantAnnotationCommandExecutor.execute(); + variantList = loadResult(); + + // no filter 1 results + assertEquals(1, variantList.size()); + } } \ No newline at end of file