From c97f2bfdfc33a2d121291e4006524b6649fce834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 20 Mar 2020 15:07:38 +0000 Subject: [PATCH] models: Replace StudyEntry.samples map with list #179 --- biodata-models/src/main/avro/variant.avdl | 7 +- .../biodata/models/variant/StudyEntry.java | 74 +++++++++---------- .../models/variant/stats/VariantStats.java | 29 +++++--- .../main/proto/protobuf/opencb/variant.proto | 7 +- .../VariantAvroToVariantContextConverter.java | 31 ++++---- ...ntStatsToPopulationFrequencyConverter.java | 4 + .../avro/VariantStatsToTsvConverter.java | 4 +- .../VariantAvroToVariantProtoConverter.java | 5 +- ...VariantContextToVariantProtoConverter.java | 4 +- ...VariantProtoToVariantContextConverter.java | 14 ++-- .../VariantAggregatedEVSStatsCalculator.java | 14 ++-- .../VariantAggregatedExacStatsCalculator.java | 21 +++--- .../VariantAggregatedStatsCalculator.java | 8 +- .../variant/stats/VariantStatsCalculator.java | 3 +- ...riantStatsPopulationFrequencyExporter.java | 6 +- .../HardyWeinbergScoreCalculatorTaskTest.java | 3 +- .../stats/VariantStatsCalculatorTest.java | 6 +- .../writer/VariantStatsTsvExporterTest.java | 6 +- 18 files changed, 130 insertions(+), 116 deletions(-) diff --git a/biodata-models/src/main/avro/variant.avdl b/biodata-models/src/main/avro/variant.avdl index 0a7864964..70da8e574 100644 --- a/biodata-models/src/main/avro/variant.avdl +++ b/biodata-models/src/main/avro/variant.avdl @@ -35,6 +35,11 @@ protocol Variants { } record VariantStats { + /** + * Cohort identifier + **/ + string cohortId; + /** * Total number of alleles in called genotypeCounters. Does not include missing alleles **/ @@ -244,7 +249,7 @@ protocol Variants { * Statistics of the genomic variation, such as its alleles/genotypeCounters count * or its minimum allele frequency, grouped by cohort name. */ - map stats; + array stats; array scores = []; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/variant/StudyEntry.java b/biodata-models/src/main/java/org/opencb/biodata/models/variant/StudyEntry.java index e64da7f5f..0b4ace29b 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/variant/StudyEntry.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/variant/StudyEntry.java @@ -42,7 +42,8 @@ public class StudyEntry implements Serializable { private volatile LinkedHashMap samplesPosition = null; private final AtomicReference> sampleDataKeysPosition = new AtomicReference<>(); - private volatile Map cohortStats = null; +// private volatile Map cohortStats = null; + private volatile List stats = null; private final org.opencb.biodata.models.variant.avro.StudyEntry impl; public static final String DEFAULT_COHORT = "ALL"; @@ -73,7 +74,7 @@ public StudyEntry(String fileId, String studyId) { public StudyEntry(String studyId, List secondaryAlternates, List format) { this.impl = new org.opencb.biodata.models.variant.avro.StudyEntry(studyId, - new ArrayList<>(), null, format, new ArrayList<>(), new ArrayList<>(), new LinkedHashMap<>(), new ArrayList<>()); + new ArrayList<>(), null, format, new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), new ArrayList<>()); setSecondaryAlternates(secondaryAlternates); } @@ -430,53 +431,46 @@ public StudyEntry setIssues(List issues) { return this; } - public Map getStats() { - resetStatsMap(); - return Collections.unmodifiableMap(cohortStats); + public List getStats() { + resetStatsList(); + return Collections.unmodifiableList(stats); } - private void resetStatsMap() { - if (cohortStats == null) { - cohortStats = new HashMap<>(); - impl.getStats().forEach((k, v) -> cohortStats.put(k, new VariantStats(v))); - } - } - - public void setStats(Map stats) { - this.cohortStats = stats; - impl.setStats(new HashMap<>(stats.size())); - stats.forEach((k, v) -> impl.getStats().put(k, v.getImpl())); - } - - public void setStats(String cohortName, VariantStats stats) { - resetStatsMap(); - cohortStats.put(cohortName, stats); - impl.getStats().put(cohortName, stats.getImpl()); + public void setStats(List stats) { + impl.setStats(new ArrayList<>(stats.size())); + stats.forEach((v) -> impl.getStats().add(v.getImpl())); + this.stats = stats; } - public VariantStats getStats(String cohortName) { - resetStatsMap(); - return cohortStats.get(cohortName); - } + public void addStats(VariantStats stats) { + resetStatsList(); + impl.getStats().add(stats.getImpl()); + this.stats.add(stats); - @Deprecated - public VariantStats getCohortStats(String cohortName) { - return getStats(cohortName); - } - - @Deprecated - public void setCohortStats(String cohortName, VariantStats stats) { - setStats(cohortName, stats); } - @Deprecated - public Map getCohortStats() { - return getStats(); + public VariantStats getStats(String cohortId) { + resetStatsList(); + for (VariantStats stats : stats) { + if (stats.getCohortId().equals(cohortId)) { + return stats; + } + } + return null; } - @Deprecated - public void setCohortStats(Map cohortStats) { - setStats(cohortStats); + private void resetStatsList() { + if (stats == null) { + if (impl.getStats() == null) { + impl.setStats(new ArrayList<>()); + stats = new ArrayList<>(); + } else { + stats = new ArrayList<>(impl.getStats().size()); + for (org.opencb.biodata.models.variant.avro.VariantStats v : impl.getStats()) { + stats.add(new VariantStats(v)); + } + } + } } public void addFileData(String fileId, String key, String value) { diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/variant/stats/VariantStats.java b/biodata-models/src/main/java/org/opencb/biodata/models/variant/stats/VariantStats.java index a82867f1f..dc13423e0 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/variant/stats/VariantStats.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/variant/stats/VariantStats.java @@ -21,8 +21,6 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import org.opencb.biodata.models.feature.Genotype; -import org.opencb.biodata.models.variant.Variant; -import org.opencb.biodata.models.variant.avro.VariantType; import java.util.Arrays; import java.util.HashMap; @@ -38,6 +36,11 @@ public class VariantStats { private final org.opencb.biodata.models.variant.avro.VariantStats impl; + public VariantStats(String cohortId) { + this(); + impl.setCohortId(cohortId); + } + public VariantStats() { this(-1f, -1f, null, null, -1, -1); } @@ -48,7 +51,7 @@ public VariantStats(org.opencb.biodata.models.variant.avro.VariantStats other) { public VariantStats(float maf, float mgf, String mafAllele, String mgfGenotype, int missingAlleleCount, int missingGenotypeCount) { - impl = new org.opencb.biodata.models.variant.avro.VariantStats(-1, -1, -1, -1F, -1F, + impl = new org.opencb.biodata.models.variant.avro.VariantStats("", -1, -1, -1, -1F, -1F, missingAlleleCount, missingGenotypeCount, new HashMap<>(), new HashMap<>(), new HashMap<>(), new HashMap<>(), -1F, @@ -59,6 +62,14 @@ public org.opencb.biodata.models.variant.avro.VariantStats getImpl() { return impl; } + public VariantStats setCohortId(String cohortId) { + impl.setCohortId(cohortId); + return this; + } + + public String getCohortId() { + return impl.getCohortId(); + } public Integer getAlleleCount() { return impl.getAlleleCount(); @@ -182,27 +193,27 @@ public VariantStats addGenotype(Genotype g, int addedCount, boolean normalize) { return this; } - public java.util.Map getFilterCount() { + public Map getFilterCount() { return impl.getFilterCount(); } - public void setFilterCount(java.util.Map value) { + public void setFilterCount(Map value) { this.impl.setFilterCount(value); } - public java.util.Map getFilterFreq() { + public Map getFilterFreq() { return impl.getFilterFreq(); } - public void setFilterFreq(java.util.Map value) { + public void setFilterFreq(Map value) { this.impl.setFilterFreq(value); } - public java.lang.Float getQualityAvg() { + public Float getQualityAvg() { return impl.getQualityAvg(); } - public void setQualityAvg(java.lang.Float value) { + public void setQualityAvg(Float value) { this.impl.setQualityAvg(value); } diff --git a/biodata-models/src/main/proto/protobuf/opencb/variant.proto b/biodata-models/src/main/proto/protobuf/opencb/variant.proto index 69332363d..d82aebbe3 100644 --- a/biodata-models/src/main/proto/protobuf/opencb/variant.proto +++ b/biodata-models/src/main/proto/protobuf/opencb/variant.proto @@ -43,6 +43,11 @@ enum VariantType { } message VariantStats { + /** + * Cohort identifier + **/ + string cohortId = 17; + /** * Total number of alleles in called genotypeCounters. Does not include missing alleles **/ @@ -164,7 +169,7 @@ message StudyEntry { repeated AlternateCoordinate secondaryAlternates = 3; repeated string sampleDataKeys = 4; repeated SampleEntry samples = 5; - map stats = 6; + repeated VariantStats stats = 6; } /** diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantAvroToVariantContextConverter.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantAvroToVariantContextConverter.java index d9f6368ac..05e85851b 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantAvroToVariantContextConverter.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantAvroToVariantContextConverter.java @@ -190,23 +190,22 @@ private void addCohortStatsMultiInfoField(StudyEntry studyEntry, Map entry : studyEntry.getStats().entrySet()) { - String cohortName = entry.getKey(); - VariantStats stats = entry.getValue(); + for (VariantStats stats : studyEntry.getStats()) { + String cohortId = stats.getCohortId(); - if (cohortName.equals(StudyEntry.DEFAULT_COHORT)) { - cohortName = ""; + if (cohortId.equals(StudyEntry.DEFAULT_COHORT)) { + cohortId = ""; int an = stats.getAltAlleleCount(); if (an >= 0) { - attributes.put(cohortName + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an)); + attributes.put(cohortId + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an)); } if (stats.getAltAlleleCount() >= 0) { - attributes.put(cohortName + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount())); + attributes.put(cohortId + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount())); } } else { - cohortName = cohortName + "_"; + cohortId = cohortId + "_"; } - attributes.put(cohortName + VCFConstants.ALLELE_FREQUENCY_KEY, DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq())); + attributes.put(cohortId + VCFConstants.ALLELE_FREQUENCY_KEY, DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq())); } } @@ -217,20 +216,18 @@ private void addCohortStatsSingleInfoField(StudyEntry studyEntry, Map statsList = new ArrayList<>(); - for (Map.Entry entry : studyEntry.getStats().entrySet()) { - String cohortName = entry.getKey(); - VariantStats stats = entry.getValue(); - -// if (cohortName.equals(StudyEntry.DEFAULT_COHORT)) { + for (VariantStats stats : studyEntry.getStats()) { + String cohortId = stats.getCohortId(); +// if (cohortId.equals(StudyEntry.DEFAULT_COHORT)) { // int an = stats.getAltAlleleCount() + stats.getRefAlleleCount(); // if (an >= 0) { -// attributes.put(cohortName + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an)); +// attributes.put(cohortId + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an)); // } // if (stats.getAltAlleleCount() >= 0) { -// attributes.put(cohortName + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount())); +// attributes.put(cohortId + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount())); // } // } - statsList.add(cohortName + ":" + DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq())); + statsList.add(cohortId + ":" + DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq())); } // set cohort stats attributes attributes.put(STATS_INFO_KEY, String.join(FIELD_SEPARATOR, statsList)); diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantStatsToPopulationFrequencyConverter.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantStatsToPopulationFrequencyConverter.java index 35892e331..4a2423e04 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantStatsToPopulationFrequencyConverter.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantStatsToPopulationFrequencyConverter.java @@ -32,6 +32,10 @@ */ public class VariantStatsToPopulationFrequencyConverter { + public PopulationFrequency convert(String study, VariantStats stats, String reference, String alternate) { + return convert(study, stats.getCohortId(), stats, reference, alternate); + } + public PopulationFrequency convert(String study, String population, VariantStats stats, String reference, String alternate) { Float refHomGenotypeFreq = 0F; Float hetGenotypeFreq = 0F; diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantStatsToTsvConverter.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantStatsToTsvConverter.java index 850be9645..4ae9899bb 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantStatsToTsvConverter.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/avro/VariantStatsToTsvConverter.java @@ -116,7 +116,7 @@ public String createHeader() { return sb.toString(); } - public String convert(Variant variant, Map statsMap, VariantAnnotation annotation) { + public String convert(Variant variant, List statsList, VariantAnnotation annotation) { sb.setLength(0); sb.append(variant.getChromosome()); @@ -163,7 +163,7 @@ public String convert(Variant variant, Map statsMap, Varia } for (Iterator cohortIterator = cohorts.iterator(); cohortIterator.hasNext(); ) { String cohort = cohortIterator.next(); - VariantStats stats = statsMap.get(cohort); + VariantStats stats = statsList.stream().filter(s -> s.getCohortId().equals(cohort)).findFirst().orElse(null); if (stats == null) { for (int i = 0; i < STATS_COLUMNS.size() - 1; i++) { sb.append(".\t"); diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantAvroToVariantProtoConverter.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantAvroToVariantProtoConverter.java index 0bed50253..3419a1f4f 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantAvroToVariantProtoConverter.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantAvroToVariantProtoConverter.java @@ -84,10 +84,9 @@ private VariantProto.StudyEntry.Builder toProto(StudyEntry study) { studyBuilder.addSamples(VariantProto.SampleEntry.newBuilder().addAllData(sampleEntry.getData())); } - for (Map.Entry entry : study.getStats().entrySet()) { - VariantStats stats = entry.getValue(); + for (VariantStats stats : study.getStats()) { VariantProto.VariantStats.Builder variantStats = toProto(stats); - studyBuilder.putStats(entry.getKey(), variantStats.build()); + studyBuilder.addStats(variantStats.build()); } for (FileEntry fileEntry : study.getFiles()) { VariantProto.FileEntry.Builder fileBuilder = toProto(fileEntry); diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantContextToVariantProtoConverter.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantContextToVariantProtoConverter.java index 20b5afcbc..c41fb6848 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantContextToVariantProtoConverter.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantContextToVariantProtoConverter.java @@ -196,14 +196,14 @@ public VariantProto.Variant convert(VariantContext variantContext, VariantProto. * being as these value will not be getting from HTSJDK * currently. */ - Map stats = new HashMap<>(); + List stats = new ArrayList<>(); //TODO: Call to the Variant Aggregated Stats Parser // stats.put( // "2", // setVariantStatsParams( // setVariantHardyWeinbergStatsParams(), // variantContext)); - variantSourceEntry.putAllStats(stats); + variantSourceEntry.addAllStats(stats); studies.add(variantSourceEntry.build()); variant.addAllStudies(studies); diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantProtoToVariantContextConverter.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantProtoToVariantContextConverter.java index 43e34f18f..a5f149a05 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantProtoToVariantContextConverter.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/converters/proto/VariantProtoToVariantContextConverter.java @@ -165,20 +165,18 @@ private void addCohortStats(VariantProto.StudyEntry studyEntry, Map statsList = new ArrayList<>(); - for (Map.Entry entry : studyEntry.getStats().entrySet()) { - String cohortName = entry.getKey(); - VariantProto.VariantStats stats = entry.getValue(); - -// if (cohortName.equals(StudyEntry.DEFAULT_COHORT)) { + for (VariantProto.VariantStats stats : studyEntry.getStatsList()) { + String cohortId = stats.getCohortId(); +// if (cohortId.equals(StudyEntry.DEFAULT_COHORT)) { // int an = stats.getAlleleCount(); // if (an >= 0) { -// attributes.put(cohortName + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an)); +// attributes.put(cohortId + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an)); // } // if (stats.getAltAlleleCount() >= 0) { -// attributes.put(cohortName + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount())); +// attributes.put(cohortId + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount())); // } // } - statsList.add(cohortName + ":" + DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq())); + statsList.add(cohortId + ":" + DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq())); } // set cohort stats attributes attributes.put(STATS_INFO_KEY, String.join(FIELD_SEPARATOR, statsList)); diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedEVSStatsCalculator.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedEVSStatsCalculator.java index 434808a19..d7d0eb261 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedEVSStatsCalculator.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedEVSStatsCalculator.java @@ -70,7 +70,7 @@ protected void parseStats(Variant variant, StudyEntry study, int numAllele, Stri alternateAlleles = ori[2].split(","); reference = ori[1]; } - VariantStats stats = new VariantStats(); + VariantStats stats = new VariantStats(StudyEntry.DEFAULT_COHORT); if (info.containsKey("MAF")) { String splitsMAF[] = info.get("MAF").split(","); if (splitsMAF.length == 3) { @@ -85,7 +85,7 @@ protected void parseStats(Variant variant, StudyEntry study, int numAllele, Stri } calculateFilterQualStats(fileEntry.getData(), stats); - study.setStats(StudyEntry.DEFAULT_COHORT, stats); + study.addStats(stats); } @Override @@ -109,8 +109,8 @@ protected void parseMappedStats(Variant variant, StudyEntry studyEntry, String cohort = opencgaTagSplit[0]; VariantStats cohortStats = studyEntry.getStats(cohort); if (cohortStats == null) { - cohortStats = new VariantStats(); - studyEntry.setStats(cohort, cohortStats); + cohortStats = new VariantStats(cohort); + studyEntry.addStats(cohortStats); } switch (opencgaTagSplit[1]) { case "AC": @@ -141,8 +141,8 @@ protected void parseMappedStats(Variant variant, StudyEntry studyEntry, float maf = Float.parseFloat(values[i]) / 100; // from [0, 100] (%) to [0, 1] VariantStats cohortStats = studyEntry.getStats(populations[i]); if (cohortStats == null) { - cohortStats = new VariantStats(); - studyEntry.setStats(populations[i], cohortStats); + cohortStats = new VariantStats(populations[i]); + studyEntry.addStats(cohortStats); } cohortStats.setMaf(maf); } @@ -152,7 +152,7 @@ protected void parseMappedStats(Variant variant, StudyEntry studyEntry, } // TODO reprocess stats to complete inferable values. A StatsHolder may be needed to keep values not storables in VariantStats } - for (VariantStats stats : studyEntry.getStats().values()) { + for (VariantStats stats : studyEntry.getStats()) { calculateFilterQualStats(info, stats); } } diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedExacStatsCalculator.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedExacStatsCalculator.java index f3e07a387..6aa94e3af 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedExacStatsCalculator.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedExacStatsCalculator.java @@ -67,7 +67,7 @@ public VariantAggregatedExacStatsCalculator(Properties tagMap) { @Override protected void parseStats(Variant variant, StudyEntry studyEntry, int numAllele, String reference, String[] alternateAlleles, Map info) { - VariantStats stats = new VariantStats(); + VariantStats stats = new VariantStats(StudyEntry.DEFAULT_COHORT); if (info.containsKey(AC_HET)) { // heterozygous genotype count // Het count is a non standard field that can not be rearranged when decomposing multi-allelic variants. @@ -116,7 +116,7 @@ protected void parseStats(Variant variant, StudyEntry studyEntry, int numAllele, } calculateFilterQualStats(info, stats); - studyEntry.setStats(StudyEntry.DEFAULT_COHORT, stats); + studyEntry.addStats(stats); } @Override @@ -135,8 +135,8 @@ protected void parseMappedStats(Variant variant, StudyEntry studyEntry, int numA String cohortName = opencgaTagSplit[0]; VariantStats cohortStats = studyEntry.getStats(cohortName); if (cohortStats == null) { - cohortStats = new VariantStats(); - studyEntry.setStats(cohortName, cohortStats); + cohortStats = new VariantStats(cohortName); + studyEntry.addStats(cohortStats); } switch (opencgaTagSplit[1]) { case "AC": @@ -168,14 +168,15 @@ protected void parseMappedStats(Variant variant, StudyEntry studyEntry, int numA } } } - for (String cohortName : studyEntry.getStats().keySet()) { - if (ans.containsKey(cohortName)) { - VariantStats cohortStats = studyEntry.getStats(cohortName); + for (VariantStats variantStats : studyEntry.getStats()) { + String cohortId = variantStats.getCohortId(); + if (ans.containsKey(cohortId)) { + VariantStats cohortStats = studyEntry.getStats(cohortId); calculateFilterQualStats(info, cohortStats); - Integer alleleNumber = ans.get(cohortName); + Integer alleleNumber = ans.get(cohortId); addReferenceGenotype(variant, cohortStats, alleleNumber); - setRefAlleleCount(cohortStats, alleleNumber, acs.get(cohortName)); - setMaf(alleleNumber, acs.get(cohortName), variant.getReference(), alternateAlleles, cohortStats); + setRefAlleleCount(cohortStats, alleleNumber, acs.get(cohortId)); + setMaf(alleleNumber, acs.get(cohortId), variant.getReference(), alternateAlleles, cohortStats); } } } diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedStatsCalculator.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedStatsCalculator.java index 6f265036f..29b4353b8 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedStatsCalculator.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantAggregatedStatsCalculator.java @@ -131,7 +131,7 @@ public void calculate(Variant variant, StudyEntry study) { * @param info */ protected void parseStats(Variant variant, StudyEntry file, int numAllele, String reference, String[] alternateAlleles, Map info) { - VariantStats vs = new VariantStats(); + VariantStats vs = new VariantStats(StudyEntry.DEFAULT_COHORT); Map stats = new LinkedHashMap<>(); for (Map.Entry entry : info.entrySet()) { @@ -145,7 +145,7 @@ protected void parseStats(Variant variant, StudyEntry file, int numAllele, Strin calculate(variant, file, numAllele, reference, alternateAlleles, stats, vs); - file.setStats(StudyEntry.DEFAULT_COHORT, vs); + file.addStats(vs); } /** @@ -172,9 +172,9 @@ protected void parseMappedStats(Variant variant, StudyEntry file, int numAllele, } for (String cohortName : cohortStats.keySet()) { - VariantStats vs = new VariantStats(); + VariantStats vs = new VariantStats(cohortName); calculate(variant, file, numAllele, reference, alternateAlleles, cohortStats.get(cohortName), vs); - file.setStats(cohortName, vs); + file.addStats(vs); } } diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantStatsCalculator.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantStatsCalculator.java index 7b1a3bd6a..3fabd47a5 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantStatsCalculator.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/VariantStatsCalculator.java @@ -257,7 +257,8 @@ public static void calculateStatsForVariantsList(List variants, Pedigre for (Variant variant : variants) { for (StudyEntry entry : variant.getStudies()) { VariantStats stats = calculate(variant, entry); - entry.setStats(StudyEntry.DEFAULT_COHORT, stats); + stats.setCohortId(StudyEntry.DEFAULT_COHORT); + entry.addStats(stats); } } } diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/writer/VariantStatsPopulationFrequencyExporter.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/writer/VariantStatsPopulationFrequencyExporter.java index ebfe124ae..93b8f2764 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/writer/VariantStatsPopulationFrequencyExporter.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/stats/writer/VariantStatsPopulationFrequencyExporter.java @@ -37,7 +37,6 @@ import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.List; -import java.util.Map; /** * Exports the given variant stats into a Json format. @@ -91,12 +90,11 @@ public boolean write(List batch) { public boolean write(Variant variant) { ArrayList frequencies = new ArrayList<>(); for (StudyEntry studyEntry : variant.getStudies()) { - for (Map.Entry cohortEntry : studyEntry.getStats().entrySet()) { + for (VariantStats variantStats : studyEntry.getStats()) { String studyId = studyEntry.getStudyId(); studyId = studyId.substring(studyId.lastIndexOf(":") + 1); PopulationFrequency populationFrequency = converter.convert(studyId, - cohortEntry.getKey(), - cohortEntry.getValue(), variant.getReference(), variant.getAlternate()); + variantStats, variant.getReference(), variant.getAlternate()); // Write only frequencies non zero if (populationFrequency.getAltAlleleFreq() > 0 && !populationFrequency.getAltAlleleFreq().isNaN()) { frequencies.add(populationFrequency); diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/scores/HardyWeinbergScoreCalculatorTaskTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/scores/HardyWeinbergScoreCalculatorTaskTest.java index cbc52c29e..71176994c 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/scores/HardyWeinbergScoreCalculatorTaskTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/scores/HardyWeinbergScoreCalculatorTaskTest.java @@ -30,7 +30,8 @@ public void testHW() throws Exception { .setStudyId(STUDY) .build(); - variant.getStudy(STUDY).setStats(Collections.singletonMap("ALL", new VariantStats() + variant.getStudy(STUDY).setStats(Collections.singletonList(new VariantStats() + .setCohortId("ALL") .addGenotype(new Genotype("0/0"), 50) .addGenotype(new Genotype("0/1"), 20) .addGenotype(new Genotype("1/1"), 5) diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/stats/VariantStatsCalculatorTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/stats/VariantStatsCalculatorTest.java index 1124d1eb6..986a14504 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/stats/VariantStatsCalculatorTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/stats/VariantStatsCalculatorTest.java @@ -278,7 +278,7 @@ public void testCreate_1000g_38_liftover() throws Exception { freqs.put("EUR", 0.4056); freqs.put("SAS", 0.4949); - freqs.forEach((coh, freq) -> assertEquals(freq, s.getStats().get(coh).getAltAlleleFreq(), 0.0001)); + freqs.forEach((coh, freq) -> assertEquals(freq, s.getStats(coh).getAltAlleleFreq(), 0.0001)); // System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(s.getStats())); } @@ -319,7 +319,7 @@ public void testCreate_1000g_38_liftover_wrong() throws Exception { freqs.put("EUR", -1.0); freqs.put("SAS", -1.0); - freqs.forEach((coh, freq) -> assertEquals(freq, s.getStats().get(coh).getAltAlleleFreq(), 0.0001)); + freqs.forEach((coh, freq) -> assertEquals(freq, s.getStats(coh).getAltAlleleFreq(), 0.0001)); // System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(s.getStats())); } @@ -360,7 +360,7 @@ public void testCreate_1000g_38_liftover_indel() throws Exception { freqs.put("EUR", 0.0); freqs.put("SAS", 0.0); - freqs.forEach((coh, freq) -> assertEquals(freq, s.getStats().get(coh).getAltAlleleFreq(), 0.0001)); + freqs.forEach((coh, freq) -> assertEquals(freq, s.getStats(coh).getAltAlleleFreq(), 0.0001)); // System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(s.getStats())); } diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/stats/writer/VariantStatsTsvExporterTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/stats/writer/VariantStatsTsvExporterTest.java index 9822fa440..f0bf199db 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/stats/writer/VariantStatsTsvExporterTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/stats/writer/VariantStatsTsvExporterTest.java @@ -41,9 +41,9 @@ public void testExport() { private Variant getVariant(String s, String gt1, String gt2, String gt3) { Variant variant = VariantTestUtils.generateVariant(s, "s1", gt1, "s2", gt2, "s3", gt3); StudyEntry study = variant.getStudy(""); - study.setStats("ALL", VariantStatsCalculator.calculate(variant, study)); - study.setStats("C1", VariantStatsCalculator.calculate(variant, study, Arrays.asList("s1", "s2"))); - study.setStats("C2", VariantStatsCalculator.calculate(variant, study, Arrays.asList("s2", "s3"))); + study.addStats(VariantStatsCalculator.calculate(variant, study).setCohortId("ALL")); + study.addStats(VariantStatsCalculator.calculate(variant, study, Arrays.asList("s1", "s2")).setCohortId("C1")); + study.addStats(VariantStatsCalculator.calculate(variant, study, Arrays.asList("s2", "s3")).setCohortId("C2")); VariantAnnotation annotation = new VariantAnnotation(); annotation.setId("rs" + RandomUtils.nextInt());