Skip to content

Commit

Permalink
models: Replace StudyEntry.samples map with list #179
Browse files Browse the repository at this point in the history
  • Loading branch information
j-coll committed Mar 20, 2020
1 parent 0a56f33 commit c97f2bf
Show file tree
Hide file tree
Showing 18 changed files with 130 additions and 116 deletions.
7 changes: 6 additions & 1 deletion biodata-models/src/main/avro/variant.avdl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ protocol Variants {
}

record VariantStats {
/**
* Cohort identifier
**/
string cohortId;

/**
* Total number of alleles in called genotypeCounters. Does not include missing alleles
**/
Expand Down Expand Up @@ -244,7 +249,7 @@ protocol Variants {
* Statistics of the genomic variation, such as its alleles/genotypeCounters count
* or its minimum allele frequency, grouped by cohort name.
*/
map<VariantStats> stats;
array<VariantStats> stats;

array<VariantScore> scores = [];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ public class StudyEntry implements Serializable {

private volatile LinkedHashMap<String, Integer> samplesPosition = null;
private final AtomicReference<Map<String, Integer>> sampleDataKeysPosition = new AtomicReference<>();
private volatile Map<String, VariantStats> cohortStats = null;
// private volatile Map<String, VariantStats> cohortStats = null;
private volatile List<VariantStats> stats = null;
private final org.opencb.biodata.models.variant.avro.StudyEntry impl;

public static final String DEFAULT_COHORT = "ALL";
Expand Down Expand Up @@ -73,7 +74,7 @@ public StudyEntry(String fileId, String studyId) {

public StudyEntry(String studyId, List<AlternateCoordinate> secondaryAlternates, List<String> format) {
this.impl = new org.opencb.biodata.models.variant.avro.StudyEntry(studyId,
new ArrayList<>(), null, format, new ArrayList<>(), new ArrayList<>(), new LinkedHashMap<>(), new ArrayList<>());
new ArrayList<>(), null, format, new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), new ArrayList<>());
setSecondaryAlternates(secondaryAlternates);
}

Expand Down Expand Up @@ -430,53 +431,46 @@ public StudyEntry setIssues(List<IssueEntry> issues) {
return this;
}

public Map<String, VariantStats> getStats() {
resetStatsMap();
return Collections.unmodifiableMap(cohortStats);
public List<VariantStats> getStats() {
resetStatsList();
return Collections.unmodifiableList(stats);
}

private void resetStatsMap() {
if (cohortStats == null) {
cohortStats = new HashMap<>();
impl.getStats().forEach((k, v) -> cohortStats.put(k, new VariantStats(v)));
}
}

public void setStats(Map<String, VariantStats> stats) {
this.cohortStats = stats;
impl.setStats(new HashMap<>(stats.size()));
stats.forEach((k, v) -> impl.getStats().put(k, v.getImpl()));
}

public void setStats(String cohortName, VariantStats stats) {
resetStatsMap();
cohortStats.put(cohortName, stats);
impl.getStats().put(cohortName, stats.getImpl());
public void setStats(List<VariantStats> stats) {
impl.setStats(new ArrayList<>(stats.size()));
stats.forEach((v) -> impl.getStats().add(v.getImpl()));
this.stats = stats;
}

public VariantStats getStats(String cohortName) {
resetStatsMap();
return cohortStats.get(cohortName);
}
public void addStats(VariantStats stats) {
resetStatsList();
impl.getStats().add(stats.getImpl());
this.stats.add(stats);

@Deprecated
public VariantStats getCohortStats(String cohortName) {
return getStats(cohortName);
}

@Deprecated
public void setCohortStats(String cohortName, VariantStats stats) {
setStats(cohortName, stats);
}

@Deprecated
public Map<String, VariantStats> getCohortStats() {
return getStats();
public VariantStats getStats(String cohortId) {
resetStatsList();
for (VariantStats stats : stats) {
if (stats.getCohortId().equals(cohortId)) {
return stats;
}
}
return null;
}

@Deprecated
public void setCohortStats(Map<String, VariantStats> cohortStats) {
setStats(cohortStats);
private void resetStatsList() {
if (stats == null) {
if (impl.getStats() == null) {
impl.setStats(new ArrayList<>());
stats = new ArrayList<>();
} else {
stats = new ArrayList<>(impl.getStats().size());
for (org.opencb.biodata.models.variant.avro.VariantStats v : impl.getStats()) {
stats.add(new VariantStats(v));
}
}
}
}

public void addFileData(String fileId, String key, String value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import org.opencb.biodata.models.feature.Genotype;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.VariantType;

import java.util.Arrays;
import java.util.HashMap;
Expand All @@ -38,6 +36,11 @@ public class VariantStats {

private final org.opencb.biodata.models.variant.avro.VariantStats impl;

public VariantStats(String cohortId) {
this();
impl.setCohortId(cohortId);
}

public VariantStats() {
this(-1f, -1f, null, null, -1, -1);
}
Expand All @@ -48,7 +51,7 @@ public VariantStats(org.opencb.biodata.models.variant.avro.VariantStats other) {

public VariantStats(float maf, float mgf, String mafAllele, String mgfGenotype,
int missingAlleleCount, int missingGenotypeCount) {
impl = new org.opencb.biodata.models.variant.avro.VariantStats(-1, -1, -1, -1F, -1F,
impl = new org.opencb.biodata.models.variant.avro.VariantStats("", -1, -1, -1, -1F, -1F,
missingAlleleCount, missingGenotypeCount,
new HashMap<>(), new HashMap<>(),
new HashMap<>(), new HashMap<>(), -1F,
Expand All @@ -59,6 +62,14 @@ public org.opencb.biodata.models.variant.avro.VariantStats getImpl() {
return impl;
}

public VariantStats setCohortId(String cohortId) {
impl.setCohortId(cohortId);
return this;
}

public String getCohortId() {
return impl.getCohortId();
}

public Integer getAlleleCount() {
return impl.getAlleleCount();
Expand Down Expand Up @@ -182,27 +193,27 @@ public VariantStats addGenotype(Genotype g, int addedCount, boolean normalize) {
return this;
}

public java.util.Map<java.lang.String,java.lang.Integer> getFilterCount() {
public Map<String, Integer> getFilterCount() {
return impl.getFilterCount();
}

public void setFilterCount(java.util.Map<java.lang.String,java.lang.Integer> value) {
public void setFilterCount(Map<String, Integer> value) {
this.impl.setFilterCount(value);
}

public java.util.Map<java.lang.String,java.lang.Float> getFilterFreq() {
public Map<String, Float> getFilterFreq() {
return impl.getFilterFreq();
}

public void setFilterFreq(java.util.Map<java.lang.String,java.lang.Float> value) {
public void setFilterFreq(Map<String, Float> value) {
this.impl.setFilterFreq(value);
}

public java.lang.Float getQualityAvg() {
public Float getQualityAvg() {
return impl.getQualityAvg();
}

public void setQualityAvg(java.lang.Float value) {
public void setQualityAvg(Float value) {
this.impl.setQualityAvg(value);
}

Expand Down
7 changes: 6 additions & 1 deletion biodata-models/src/main/proto/protobuf/opencb/variant.proto
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ enum VariantType {
}

message VariantStats {
/**
* Cohort identifier
**/
string cohortId = 17;

/**
* Total number of alleles in called genotypeCounters. Does not include missing alleles
**/
Expand Down Expand Up @@ -164,7 +169,7 @@ message StudyEntry {
repeated AlternateCoordinate secondaryAlternates = 3;
repeated string sampleDataKeys = 4;
repeated SampleEntry samples = 5;
map<string, VariantStats> stats = 6;
repeated VariantStats stats = 6;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,23 +190,22 @@ private void addCohortStatsMultiInfoField(StudyEntry studyEntry, Map<String, Obj
if (studyEntry.getStats() == null || studyEntry.getStats().size() == 0) {
return;
}
for (Map.Entry<String, VariantStats> entry : studyEntry.getStats().entrySet()) {
String cohortName = entry.getKey();
VariantStats stats = entry.getValue();
for (VariantStats stats : studyEntry.getStats()) {
String cohortId = stats.getCohortId();

if (cohortName.equals(StudyEntry.DEFAULT_COHORT)) {
cohortName = "";
if (cohortId.equals(StudyEntry.DEFAULT_COHORT)) {
cohortId = "";
int an = stats.getAltAlleleCount();
if (an >= 0) {
attributes.put(cohortName + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an));
attributes.put(cohortId + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an));
}
if (stats.getAltAlleleCount() >= 0) {
attributes.put(cohortName + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount()));
attributes.put(cohortId + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount()));
}
} else {
cohortName = cohortName + "_";
cohortId = cohortId + "_";
}
attributes.put(cohortName + VCFConstants.ALLELE_FREQUENCY_KEY, DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq()));
attributes.put(cohortId + VCFConstants.ALLELE_FREQUENCY_KEY, DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq()));
}
}

Expand All @@ -217,20 +216,18 @@ private void addCohortStatsSingleInfoField(StudyEntry studyEntry, Map<String, Ob
}

List<String> statsList = new ArrayList<>();
for (Map.Entry<String, VariantStats> entry : studyEntry.getStats().entrySet()) {
String cohortName = entry.getKey();
VariantStats stats = entry.getValue();

// if (cohortName.equals(StudyEntry.DEFAULT_COHORT)) {
for (VariantStats stats : studyEntry.getStats()) {
String cohortId = stats.getCohortId();
// if (cohortId.equals(StudyEntry.DEFAULT_COHORT)) {
// int an = stats.getAltAlleleCount() + stats.getRefAlleleCount();
// if (an >= 0) {
// attributes.put(cohortName + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an));
// attributes.put(cohortId + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an));
// }
// if (stats.getAltAlleleCount() >= 0) {
// attributes.put(cohortName + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount()));
// attributes.put(cohortId + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount()));
// }
// }
statsList.add(cohortName + ":" + DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq()));
statsList.add(cohortId + ":" + DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq()));
}
// set cohort stats attributes
attributes.put(STATS_INFO_KEY, String.join(FIELD_SEPARATOR, statsList));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
*/
public class VariantStatsToPopulationFrequencyConverter {

public PopulationFrequency convert(String study, VariantStats stats, String reference, String alternate) {
return convert(study, stats.getCohortId(), stats, reference, alternate);
}

public PopulationFrequency convert(String study, String population, VariantStats stats, String reference, String alternate) {
Float refHomGenotypeFreq = 0F;
Float hetGenotypeFreq = 0F;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ public String createHeader() {
return sb.toString();
}

public String convert(Variant variant, Map<String, VariantStats> statsMap, VariantAnnotation annotation) {
public String convert(Variant variant, List<VariantStats> statsList, VariantAnnotation annotation) {
sb.setLength(0);

sb.append(variant.getChromosome());
Expand Down Expand Up @@ -163,7 +163,7 @@ public String convert(Variant variant, Map<String, VariantStats> statsMap, Varia
}
for (Iterator<String> cohortIterator = cohorts.iterator(); cohortIterator.hasNext(); ) {
String cohort = cohortIterator.next();
VariantStats stats = statsMap.get(cohort);
VariantStats stats = statsList.stream().filter(s -> s.getCohortId().equals(cohort)).findFirst().orElse(null);
if (stats == null) {
for (int i = 0; i < STATS_COLUMNS.size() - 1; i++) {
sb.append(".\t");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,9 @@ private VariantProto.StudyEntry.Builder toProto(StudyEntry study) {
studyBuilder.addSamples(VariantProto.SampleEntry.newBuilder().addAllData(sampleEntry.getData()));
}

for (Map.Entry<String, VariantStats> entry : study.getStats().entrySet()) {
VariantStats stats = entry.getValue();
for (VariantStats stats : study.getStats()) {
VariantProto.VariantStats.Builder variantStats = toProto(stats);
studyBuilder.putStats(entry.getKey(), variantStats.build());
studyBuilder.addStats(variantStats.build());
}
for (FileEntry fileEntry : study.getFiles()) {
VariantProto.FileEntry.Builder fileBuilder = toProto(fileEntry);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,14 +196,14 @@ public VariantProto.Variant convert(VariantContext variantContext, VariantProto.
* being as these value will not be getting from HTSJDK
* currently.
*/
Map<String, VariantProto.VariantStats> stats = new HashMap<>();
List<VariantProto.VariantStats> stats = new ArrayList<>();
//TODO: Call to the Variant Aggregated Stats Parser
// stats.put(
// "2",
// setVariantStatsParams(
// setVariantHardyWeinbergStatsParams(),
// variantContext));
variantSourceEntry.putAllStats(stats);
variantSourceEntry.addAllStats(stats);

studies.add(variantSourceEntry.build());
variant.addAllStudies(studies);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,20 +165,18 @@ private void addCohortStats(VariantProto.StudyEntry studyEntry, Map<String, Obje
}

List<String> statsList = new ArrayList<>();
for (Map.Entry<String, VariantProto.VariantStats> entry : studyEntry.getStats().entrySet()) {
String cohortName = entry.getKey();
VariantProto.VariantStats stats = entry.getValue();

// if (cohortName.equals(StudyEntry.DEFAULT_COHORT)) {
for (VariantProto.VariantStats stats : studyEntry.getStatsList()) {
String cohortId = stats.getCohortId();
// if (cohortId.equals(StudyEntry.DEFAULT_COHORT)) {
// int an = stats.getAlleleCount();
// if (an >= 0) {
// attributes.put(cohortName + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an));
// attributes.put(cohortId + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an));
// }
// if (stats.getAltAlleleCount() >= 0) {
// attributes.put(cohortName + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount()));
// attributes.put(cohortId + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount()));
// }
// }
statsList.add(cohortName + ":" + DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq()));
statsList.add(cohortId + ":" + DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq()));
}
// set cohort stats attributes
attributes.put(STATS_INFO_KEY, String.join(FIELD_SEPARATOR, statsList));
Expand Down
Loading

0 comments on commit c97f2bf

Please sign in to comment.