Skip to content

Commit

Permalink
lib: the command 'build' supports additional network files, #58
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Nov 30, 2020
1 parent e0a363f commit abba9da
Show file tree
Hide file tree
Showing 6 changed files with 179 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ public class BuildCommandOptions {
@Parameter(names = {"-o", "--output"}, description = "Output directory where to save the CSV files to import", required = true, arity = 1)
public String output;

@Parameter(names = {"--add-network-file"}, description = "JSON file containing a BioNetDB network", arity = 1)
public List<String> networkFiles;

@Parameter(names = {"--exclude"}, description = "Exclude information separated by comma, e.g.:'XREF_DBNAME:Reactome Database ID Release 63'", arity = 1)
public List<String> exclude;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package org.opencb.bionetdb.app.cli.admin.executors;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.MapperFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import htsjdk.samtools.util.StringUtil;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.bionetdb.app.cli.CommandExecutor;
Expand All @@ -11,12 +14,15 @@
import org.opencb.bionetdb.core.io.SbmlParser;
import org.opencb.bionetdb.core.io.SifParser;
import org.opencb.bionetdb.core.models.network.Network;
import org.opencb.bionetdb.core.models.network.Node;
import org.opencb.bionetdb.core.models.network.Relation;
import org.opencb.bionetdb.lib.BioNetDbManager;
import org.opencb.bionetdb.lib.utils.Builder;
import org.opencb.commons.utils.FileUtils;
import org.opencb.commons.utils.ListUtils;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Path;
Expand Down Expand Up @@ -47,7 +53,7 @@ public void execute() {
FileUtils.checkDirectory(outputPath);

BioNetDbManager manager = new BioNetDbManager(configuration);
manager.build(inputPath, outputPath, buildCommandOptions.exclude);
manager.build(inputPath, outputPath, buildCommandOptions.networkFiles, buildCommandOptions.exclude);
} catch (IOException | BioNetDBException e) {
e.printStackTrace();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
package org.opencb.bionetdb.app;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.MapperFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.Test;
import org.opencb.bionetdb.core.models.network.Network;
import org.opencb.bionetdb.core.models.network.Node;
import org.opencb.bionetdb.core.models.network.Relation;

import static org.junit.Assert.*;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;

public class BioNetDBMainTest {

Expand All @@ -12,4 +20,58 @@ public void createCsvClinicalAnalysis() {
String cmdLine = "~/appl/bionetdb/build/bin/bionetdb.sh create-csv -i " + caPath + "/input/ -o csv/ --clinical-analysis";
}

private void createNetworks() {
long uid = 0;

ObjectMapper mapper = new ObjectMapper();
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);

Network network;
Node node1, node2, node3;
Relation relation1, relation2, relation3;

network = new Network("net1", "net1", "Network #1");
network.setNodes(new ArrayList<>());
network.setRelations(new ArrayList<>());

node1 = new Node(uid++, "ENSG00000078808", "SDF4", Node.Type.GENE);
network.getNodes().add(node1);
node2 = new Node(uid++, null, "COCA", Node.Type.DRUG);
network.getNodes().add(node2);
relation1 = new Relation(uid++, "rel1", node1.getUid(), Node.Type.GENE, node2.getUid(), Node.Type.DRUG,
Relation.Type.GENE__DRUG);
network.getRelations().add(relation1);

try {
mapper.writer().writeValue(new File("/tmp/network1.json"), network);
} catch (IOException e) {
e.printStackTrace();
}


network = new Network("net2", "net2", "Network #2");
network.setNodes(new ArrayList<>());
network.setRelations(new ArrayList<>());

node1 = new Node(uid++, "ENSG00000066666", "SDF666", Node.Type.GENE);
network.getNodes().add(node1);
node2 = new Node(uid++, null, "COCA", Node.Type.DRUG);
network.getNodes().add(node2);
node3 = new Node(uid++, "ALCOHOL", "ALCOHOL", Node.Type.DRUG);
network.getNodes().add(node3);
relation2 = new Relation(uid++, "rel2", node1.getUid(), Node.Type.GENE, node2.getUid(), Node.Type.DRUG,
Relation.Type.GENE__DRUG);
network.getRelations().add(relation2);
relation3 = new Relation(uid++, "rel3", node1.getUid(), Node.Type.GENE, node3.getUid(), Node.Type.DRUG,
Relation.Type.GENE__DRUG);
network.getRelations().add(relation3);

try {
mapper.writer().writeValue(new File("/tmp/network2.json"), network);
} catch (IOException e) {
e.printStackTrace();
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ public static boolean isPhysicalEntity(Node node) {
}
}

public Node() {
this(-1, null, null, null, null);
}

public Node(long uid) {
this(uid, null, null, null, null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,14 @@ public void download(Path outDir) throws IOException {
//-------------------------------------------------------------------------

public void build(Path inputPath, Path outputPath, List<String> exclude) throws IOException {
build(inputPath, outputPath, null, exclude);
}

public void build(Path inputPath, Path outputPath, List<String> networkFiles, List<String> exclude) throws IOException {
Builder builder = new Builder(inputPath, outputPath, parseFilters(exclude));
if (CollectionUtils.isNotEmpty(networkFiles)) {
builder.setAdditionalNeworkFiles(networkFiles);
}
builder.build();
}

Expand Down
140 changes: 95 additions & 45 deletions bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/utils/Builder.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.opencb.biodata.models.core.Xref;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.*;
import org.opencb.bionetdb.core.models.network.Network;
import org.opencb.bionetdb.core.models.network.Node;
import org.opencb.bionetdb.core.models.network.Relation;
import org.opencb.bionetdb.lib.db.Neo4jBioPaxBuilder;
Expand All @@ -26,6 +27,7 @@
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;

public class Builder {
Expand All @@ -45,6 +47,8 @@ public class Builder {

public static final Object CLINICAL_VARIANT_FILENAME = "clinical_variants.full.json";

private List<String> additionalNeworkFiles;

private CsvInfo csv;
private Path inputPath;
private Path outputPath;
Expand All @@ -53,20 +57,30 @@ public class Builder {

protected static Logger logger;

public Builder(Path inputPath, Path outputPath, Map<String, Set<String>> filters) {

this.inputPath = inputPath;
this.outputPath = outputPath;
this.filters = filters;


// Prepare CSV object
csv = new CsvInfo(inputPath, outputPath);

// Prepare jackson writer (object to string)
mapper = new ObjectMapper();
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);

this.logger = LoggerFactory.getLogger(this.getClass().toString());
}

public void build() throws IOException {
long start;

// Open CSV files
csv.openCSVFiles();

long ensemblGeneBuildTime = 0;
long refSeqGeneBuildTime = 0;
long proteinBuildTime = 0;
long genePanelBuildTime = 0;
long bioPaxBuildTime = 0;
long clinvarBuildTime = 0;


// Check input files
File ensemblGeneFile = new File(inputPath + "/" + ENSEMBL_GENE_FILENAME);
if (!ensemblGeneFile.exists()) {
Expand Down Expand Up @@ -105,31 +119,27 @@ public void build() throws IOException {
logger.info("Processing Ensembl genes...");
start = System.currentTimeMillis();
buildGenes(ensemblGeneFile.toPath());
ensemblGeneBuildTime = (System.currentTimeMillis() - start) / 1000;
logger.info("Ensembl gene processing done in {} s", ensemblGeneBuildTime);
logger.info("Ensembl gene processing done in {} s", (System.currentTimeMillis() - start) / 1000);
}

if (refSeqGeneFile.exists()) {
logger.info("Processing RefSeq genes...");
start = System.currentTimeMillis();
buildGenes(refSeqGeneFile.toPath());
refSeqGeneBuildTime = (System.currentTimeMillis() - start) / 1000;
logger.info("RefSeq gene processing done in {} s", refSeqGeneBuildTime);
logger.info("RefSeq gene processing done in {} s", (System.currentTimeMillis() - start) / 1000);
}

// Processing proteins
logger.info("Processing proteins...");
start = System.currentTimeMillis();
buildProteins(proteinFile.toPath());
proteinBuildTime = (System.currentTimeMillis() - start) / 1000;
logger.info("Protein processing done in {} s", proteinBuildTime);
logger.info("Protein processing done in {} s", (System.currentTimeMillis() - start) / 1000);

// Gene panels support
logger.info("Processing gene panels...");
start = System.currentTimeMillis();
buildGenePanels(panelFile.toPath());
genePanelBuildTime = (System.currentTimeMillis() - start) / 1000;
logger.info("Gene panel processing done in {} s", genePanelBuildTime);
logger.info("Gene panel processing done in {} s", (System.currentTimeMillis() - start) / 1000);


// Procesing BioPAX file
Expand All @@ -138,44 +148,27 @@ public void build() throws IOException {
start = System.currentTimeMillis();
bioPAXImporter.build(networkFile.toPath());
biopaxProcessing.post();
bioPaxBuildTime = (System.currentTimeMillis() - start) / 1000;
logger.info("Processing BioPax/reactome file done in {} s", (System.currentTimeMillis() - start) / 1000);


// Processing clinical variants
logger.info("Processing clinical variants...");
start = System.currentTimeMillis();
buildClinicalVariants(clinicalVariantFile.toPath());
clinvarBuildTime = (System.currentTimeMillis() - start) / 1000;
logger.info("Processing clinical variants done in {} s", clinvarBuildTime);
logger.info("Processing clinical variants done in {} s", (System.currentTimeMillis() - start) / 1000);

// Processing additional networks
if (CollectionUtils.isNotEmpty(additionalNeworkFiles)) {
for (String additionalNeworkFile: additionalNeworkFiles) {
logger.info("Processing additional network file {}...", additionalNeworkFile);
start = System.currentTimeMillis();
processAdditionalNetwork(additionalNeworkFile);
logger.info("Processing clinical variants done in {} s", (System.currentTimeMillis() - start) / 1000);
}
}

// Close CSV files
csv.close();

logger.info("Ensembl gene build time: {} s", ensemblGeneBuildTime);
logger.info("RefSeq gene build time: {} s", refSeqGeneBuildTime);
logger.info("Protein build time: {} s", proteinBuildTime);
logger.info("Gene panel build time: {} s", genePanelBuildTime);
logger.info("BioPAX build time: {} s", bioPaxBuildTime);
logger.info("Clinical variant build time: {} s", clinvarBuildTime);
}


public Builder(Path inputPath, Path outputPath, Map<String, Set<String>> filters) {

this.inputPath = inputPath;
this.outputPath = outputPath;
this.filters = filters;


// Prepare CSV object
csv = new CsvInfo(inputPath, outputPath);

// Prepare jackson writer (object to string)
mapper = new ObjectMapper();
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);

this.logger = LoggerFactory.getLogger(this.getClass().toString());
}

//-------------------------------------------------------------------------
Expand Down Expand Up @@ -1018,6 +1011,54 @@ private Node createVariantNode(Variant variant, Long varUid) {

return varNode;
}

private void processAdditionalNetwork(String additionalNeworkFilename) throws IOException {
// Check file
File addNetworkFile = Paths.get(additionalNeworkFilename).toFile();
if (!addNetworkFile.exists()) {
logger.info("Additional network file {} does not exist", additionalNeworkFilename);
return;
}

ObjectMapper objectMapper = new ObjectMapper();
Network network = objectMapper.readValue(addNetworkFile, Network.class);

Map<Long, Long> nodeUidMap = new HashMap<>();

// First, nodes
if (CollectionUtils.isNotEmpty(network.getNodes())) {
for (Node node: network.getNodes()) {
Long uid = csv.getLong(node.getId(), node.getType().name());
if (uid == null) {
// Node does not exist in the !
nodeUidMap.put(node.getUid(), csv.getAndIncUid());
// Update UID and append node to the CSV file
node.setUid(nodeUidMap.get(node.getUid()));
csv.getCsvWriters().get(node.getType().toString()).println(csv.nodeLine(node));
} else {
// Node already exists !!
nodeUidMap.put(node.getUid(), uid);
}
}
}

// Second, relations
if (CollectionUtils.isNotEmpty(network.getRelations())) {
for (Relation relation: network.getRelations()) {
relation.setUid(csv.getAndIncUid());
System.out.println(relation.getType().toString());
System.out.println(csv.relationLine(nodeUidMap.get(relation.getOrigUid()), nodeUidMap.get(relation.getDestUid())));
if (csv.getCsvWriters().containsKey(relation.getType().toString())) {
System.out.println("YYYYEEEEESSSSSSSS");
}

csv.getCsvWriters().get(relation.getType().toString()).println(csv.relationLine(nodeUidMap.get(relation.getOrigUid()),
nodeUidMap.get(relation.getDestUid())));
}
}
}


//
// public Long processClinicalAnalysis(ClinicalAnalysis clinicalAnalysis) throws IOException {
// Node clinicalAnalysisNode = null;
Expand Down Expand Up @@ -1710,4 +1751,13 @@ private void createVariantObjectNode(Variant variant, Node variantNode) throws I
pw = csv.getCsvWriters().get(Relation.Type.VARIANT__VARIANT_OBJECT.toString());
pw.println(variantNode.getUid() + CsvInfo.SEPARATOR + variantObjectNode.getUid());
}

public List<String> getAdditionalNeworkFiles() {
return additionalNeworkFiles;
}

public Builder setAdditionalNeworkFiles(List<String> additionalNeworkFiles) {
this.additionalNeworkFiles = additionalNeworkFiles;
return this;
}
}

0 comments on commit abba9da

Please sign in to comment.