From c62759629306c81279f8046bfac4b305d3ae83a6 Mon Sep 17 00:00:00 2001 From: Julie Sullivan Date: Fri, 10 May 2024 08:33:44 +0100 Subject: [PATCH] fix truncation of results in phased queries where checkAminoAcidChange=TRUE --- .../AnnotationBasedPhasedQueryManager.java | 76 +++++++++---------- .../core/variant/PhasedQueryManager.java | 2 +- 2 files changed, 37 insertions(+), 41 deletions(-) diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/AnnotationBasedPhasedQueryManager.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/AnnotationBasedPhasedQueryManager.java index cfed5fd5a5..e544856abc 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/AnnotationBasedPhasedQueryManager.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/AnnotationBasedPhasedQueryManager.java @@ -34,57 +34,53 @@ public List> run(List variantList, CellBaseDataResult variantCellBaseDataResult = variantCellBaseDataResultList.get(j); if (variantCellBaseDataResult != null && variantCellBaseDataResult.getResults() != null && !variantCellBaseDataResult.getResults().isEmpty()) { - // Variants are normalised and data from each of the sources (COSMIC, ClinVar, DOCM, etc.) integrated - // during the build process. Only one variant record should be present per assembly. - if (variantCellBaseDataResult.getResults().size() > 1) { - logger.warn("More than one result found either the clinical_variants or variation collection" - + "for variant " + variantCellBaseDataResult.getId() + ". Arbitrarily selecting the first one. " - + "Please, check."); - } - - Variant matchedVariant = variantCellBaseDataResult.getResults().get(0); Variant queryVariant = variantList.get(j); - List annotationObjectList = getAnnotationObjectList(matchedVariant); - // Phase is stored at the evidence entry/population frequency level, e.g.: there might be two ClinVar - // RCVs for one variant: - // - In the first the variant is submitted as part of an MNV and therefore it is phased - // - In the second one the variant is submitted singleton and therefore it is not phased - // both RCVs will be integrated in the same Variant object after decomposition as separate EvidenceEntry - // objects, each with its corresponding phase information - int i = 0; - while (i < annotationObjectList.size()) { - T annotationObject = annotationObjectList.get(i); - List databaseHaplotype = getHaplotype(annotationObject, matchedVariant); - // Haplotype empty if EvidenceEntry/PopulationFrequency is not phased - if (databaseHaplotype.isEmpty()) { - i++; - } else { - // Sample Cellbase Match - // ------------------------------- - // SNV MNV X - // MNV MNV ✓ - // Missing genotypes in the input list will be considered as wildcards towards finding a - // matching haplotype (MNV) in the input list, since otherwise the clinical variant would not be - // returned - if (sameHaplotype(queryVariant, variantList, databaseHaplotype)) { + boolean queryVariantHasTraitAssociations = false; + for (Variant matchedVariant: variantCellBaseDataResult.getResults()) { + List annotationObjectList = getAnnotationObjectList(matchedVariant); + // Phase is stored at the evidence entry/population frequency level, e.g.: there might be two ClinVar + // RCVs for one variant: + // - In the first the variant is submitted as part of an MNV and therefore it is phased + // - In the second one the variant is submitted singleton and therefore it is not phased + // both RCVs will be integrated in the same Variant object after decomposition as separate EvidenceEntry + // objects, each with its corresponding phase information + int i = 0; + while (i < annotationObjectList.size()) { + T annotationObject = annotationObjectList.get(i); + List databaseHaplotype = getHaplotype(annotationObject, matchedVariant); + // Haplotype empty if EvidenceEntry/PopulationFrequency is not phased + if (databaseHaplotype.isEmpty()) { i++; } else { - annotationObjectList.remove(i); + boolean queryVariantInDBHaplotype = getVariant(databaseHaplotype, queryVariant) != null; + // Sample Cellbase Match + // ------------------------------- + // SNV MNV X + // MNV MNV ✓ + // Missing genotypes in the input list will be considered as wildcards towards finding a + // matching haplotype (MNV) in the input list, since otherwise the clinical variant would not be + // returned + if (queryVariantInDBHaplotype && sameHaplotype(queryVariant, variantList, databaseHaplotype)) { + i++; + } else { + annotationObjectList.remove(i); + } + // Sample Cellbase Match + // ------------------------------- + // SNV SNV ✓ + // MNV SNV ✓ } - // Sample Cellbase Match - // ------------------------------- - // SNV SNV ✓ - // MNV SNV ✓ + } + if (!annotationObjectList.isEmpty()) { + queryVariantHasTraitAssociations = true; } } - // Remove whole variant from the query result object if ended up without any evidence entry - if (annotationObjectList.isEmpty()) { + if (!queryVariantHasTraitAssociations) { reset(variantCellBaseDataResult); } } } - return variantCellBaseDataResultList; } diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/PhasedQueryManager.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/PhasedQueryManager.java index 395d823f54..b07a4327d2 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/PhasedQueryManager.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/PhasedQueryManager.java @@ -87,7 +87,7 @@ public static boolean isMissing(String field) { return StringUtils.isBlank(field) || field.equals(MISSING_VALUE); } - private Variant getVariant(List variantList, Variant variant) { + protected Variant getVariant(List variantList, Variant variant) { for (Variant variant1 : variantList) { // TODO: simple chr, start, ref, alt matching here - shall implement something fancier if (variant.getChromosome().equals(variant1.getChromosome())