From 4f37d13ad290e5e9b540b8483ef0eb7d935db79a Mon Sep 17 00:00:00 2001 From: Luiz Gadelha Date: Wed, 23 Oct 2024 16:30:37 +0200 Subject: [PATCH] 2023-2024 updates --- _bibliography/papers.bib | 172 +++++++++++++++++++++++++++++++++++++++ _pages/about.md | 2 +- 2 files changed, 173 insertions(+), 1 deletion(-) diff --git a/_bibliography/papers.bib b/_bibliography/papers.bib index 73c3ebee47a9..4a90eda1a222 100644 --- a/_bibliography/papers.bib +++ b/_bibliography/papers.bib @@ -1,3 +1,175 @@ +@article{costaHIHISIVDatabaseGene2024, + abbr = {BMC Bioinf.}, + selected = {true}, + title = {{HIHISIV}: a database of gene expression in {HIV} and {SIV} host immune response}, + volume = {25}, + copyright = {All rights reserved}, + issn = {1471-2105}, + shorttitle = {{HIHISIV}}, + url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-024-05740-7}, + doi = {10.1186/s12859-024-05740-7}, + abstract = {Abstract + + In the battle of the host against lentiviral pathogenesis, the immune response is crucial. However, several questions remain unanswered about the interaction with different viruses and their influence on disease progression. The simian immunodeficiency virus (SIV) infecting nonhuman primates (NHP) is widely used as a model for the study of the human immunodeficiency virus (HIV) both because they are evolutionarily linked and because they share physiological and anatomical similarities that are largely explored to understand the disease progression. The HIHISIV database was developed to support researchers to integrate and evaluate the large number of transcriptional data associated with the presence/absence of the pathogen (SIV or HIV) and the host response (NHP and human). The datasets are composed of microarray and RNA-Seq gene expression data that were selected, curated, analyzed, enriched, and stored in a relational database. Six query templates comprise the main data analysis functions and the resulting information can be downloaded. The HIHISIV database, available at  + https://hihisiv.github.io + , provides accurate resources for browsing and visualizing results and for more robust analyses of pre-existing data in transcriptome repositories.}, + language = {en}, + number = {1}, + urldate = {2024-03-23}, + journal = {BMC Bioinformatics}, + author = {Costa, Raquel L. and Gadelha, Luiz and D’arc, Mirela and Ribeiro-Alves, Marcelo and Robertson, David L. and Schwartz, Jean-Marc and Soares, Marcelo A. and Porto, Fábio}, + month = mar, + year = {2024}, + pages = {125}, +} + +@article{zulfiqarImplementationFAIRPractices2024a, + abbr = {Metabolites}, + selected = {true}, + title = {Implementation of {FAIR} {Practices} in {Computational} {Metabolomics} {Workflows}—{A} {Case} {Study}}, + volume = {14}, + copyright = {https://creativecommons.org/licenses/by/4.0/}, + issn = {2218-1989}, + url = {https://www.mdpi.com/2218-1989/14/2/118}, + doi = {10.3390/metabo14020118}, + abstract = {Scientific workflows facilitate the automation of data analysis tasks by integrating various software and tools executed in a particular order. To enable transparency and reusability in workflows, it is essential to implement the FAIR principles. Here, we describe our experiences implementing the FAIR principles for metabolomics workflows using the Metabolome Annotation Workflow (MAW) as a case study. MAW is specified using the Common Workflow Language (CWL), allowing for the subsequent execution of the workflow on different workflow engines. MAW is registered using a CWL description on WorkflowHub. During the submission process on WorkflowHub, a CWL description is used for packaging MAW using the Workflow RO-Crate profile, which includes metadata in Bioschemas. Researchers can use this narrative discussion as a guideline to commence using FAIR practices for their bioinformatics or cheminformatics workflows while incorporating necessary amendments specific to their research area.}, + language = {en}, + number = {2}, + urldate = {2024-04-16}, + journal = {Metabolites}, + author = {Zulfiqar, Mahnoor and Crusoe, Michael R. and König-Ries, Birgitta and Steinbeck, Christoph and Peters, Kristian and Gadelha, Luiz}, + month = feb, + year = {2024}, + pages = {118}, +} + +@inproceedings{krugerGHGAArchiveSelected2024a, + abbr = {IWSG}, + selected = {true}, + address = {Toulouse, France}, + title = {The {GHGA} {Archive}: {Selected} {Updates}}, + copyright = {Creative Commons Attribution 4.0 International}, + shorttitle = {The {GHGA} {Archive}}, + url = {https://zenodo.org/doi/10.5281/zenodo.13863209}, + doi = {10.5281/ZENODO.13863209}, + abstract = {The German Human Genome-Phenome Archive (GHGA) is a cross-institutional project and German National Research Data Infrastructure (NFDI) consortium for the development of a scientific gateway for secure omics data sharing based on FAIR principles to act as the German node of the federated European Genome Archive (fEGA), participating also in the European Genomics Data Infrastructure (GDI) project. Started in 2020, the GHGA is reaching its first major release milestone: having released an online science gateway for metadata browsing termed the Metadata Catalog, the next step involves the development of a portal that extends the functionality already available to also include authentication, data download (via an external tool), and data access request management: the GHGA Archive. We present here a brief overview of the new technologies and functionalities that will be available for the Archive version of the GHGA science gateway.}, + urldate = {2024-10-01}, + booktitle = {16th {International} {Workshop} on {Science} {Gateways} ({IWSG2024})}, + author = {Krüger, Jens and Orellana Figueroa, Jordy Didier and Sezer, Zehra Hazal and Zajac, Thomas Jakob and Breuer, Kersten and Rocha Gadelha Junior, Luiz Manoel and Zwerschke, Christoph and Sürün, Bilge and Nahnsen, Sven}, + month = sep, + year = {2024}, + note = {Publisher: Zenodo}, + keywords = {FAIR, human genome data, NFDI, omics, science gateway, sensitive data, FEGA}, +} +@article{zulfiqarMAWReproducibleMetabolome2023a, + abbr = {J. Cheminf.}, + selected = {true}, + title = {{MAW}: the reproducible {Metabolome} {Annotation} {Workflow} for untargeted tandem mass spectrometry}, + volume = {15}, + copyright = {All rights reserved}, + issn = {1758-2946}, + shorttitle = {{MAW}}, + url = {https://jcheminf.biomedcentral.com/articles/10.1186/s13321-023-00695-y}, + doi = {10.1186/s13321-023-00695-y}, + abstract = {Abstract + + Mapping the chemical space of compounds to chemical structures remains a challenge in metabolomics. Despite the advancements in untargeted liquid chromatography-mass spectrometry (LC–MS) to achieve a high-throughput profile of metabolites from complex biological resources, only a small fraction of these metabolites can be annotated with confidence. Many novel computational methods and tools have been developed to enable chemical structure annotation to known and unknown compounds such as in silico generated spectra and molecular networking. Here, we present an automated and reproducible + M + etabolome + A + nnotation + W + orkflow (MAW) for untargeted metabolomics data to further facilitate and automate the complex annotation by combining tandem mass spectrometry (MS + 2 + ) input data pre-processing, spectral and compound database matching with computational classification, and in silico annotation. MAW takes the LC-MS + 2 + spectra as input and generates a list of putative candidates from spectral and compound databases. The databases are integrated via the R package Spectra and the metabolite annotation tool SIRIUS as part of the R segment of the workflow (MAW-R). The final candidate selection is performed using the cheminformatics tool RDKit in the Python segment (MAW-Py). Furthermore, each feature is assigned a chemical structure and can be imported to a chemical structure similarity network. MAW is following the FAIR (Findable, Accessible, Interoperable, Reusable) principles and has been made available as the docker images, maw-r and maw-py. The source code and documentation are available on GitHub ( + https://github.com/zmahnoor14/MAW + ). The performance of MAW is evaluated on two case studies. MAW can improve candidate ranking by integrating spectral databases with annotation tools like SIRIUS which contributes to an efficient candidate selection procedure. The results from MAW are also reproducible and traceable, compliant with the FAIR guidelines. Taken together, MAW could greatly facilitate automated metabolite characterization in diverse fields such as clinical metabolomics and natural product discovery.}, + language = {en}, + number = {1}, + urldate = {2023-07-27}, + journal = {Journal of Cheminformatics}, + author = {Zulfiqar, Mahnoor and Gadelha, Luiz and Steinbeck, Christoph and Sorokina, Maria and Peters, Kristian}, + month = mar, + year = {2023}, + pages = {32}, + file = {Full Text:/Users/lgadelha/Zotero/storage/N4BBPKXX/Zulfiqar et al. - 2023 - MAW the reproducible Metabolome Annotation Workfl.pdf:application/pdf}, +} + +@article{gadelhaGermanHumanGenomePhenome2023, + abbr = {CoRDI}, + title = {German {Human} {Genome}-{Phenome} {Archive} in an {International} {Context}: {Toward} a {Federated} {Infrastructure} for {Managing} and {Analyzing} {Genomics} and {Health} {Data}}, + volume = {1}, + copyright = {All rights reserved}, + issn = {2941-296X}, + shorttitle = {German {Human} {Genome}-{Phenome} {Archive} in an {International} {Context}}, + url = {https://www.tib-op.org/ojs/index.php/CoRDI/article/view/394}, + doi = {10.52825/cordi.v1i.394}, + abstract = {With increasing numbers of human omics data, there is an urgent need for adequate resources for data sharing while also standardizing and harmonizing data processing. As part of the National Research Data Infrastructure (NFDI), the German Human Genome-Phenome Archive (GHGA) strives to connect the data from German researchers and their institutions to the international landscape of genome research. To achieve this, GHGA partners up with international activities such as the federated European Genome-Phenome Archive (EGA) [1] and the recently funded European Genomic Data Infrastructure (GDI) project to enable participation in international studies while ensuring at the same time the proper protection of the sensitive patient data included in GHGA.}, + urldate = {2023-09-10}, + journal = {Proceedings of the Conference on Research Data Infrastructure}, + author = {Gadelha, Luiz and Eufinger, Jan}, + month = sep, + year = {2023}, +} + +@inproceedings{gadelhaFrameworkIntegrativeFAIR2022, + abbr = {eScience}, + selected = {true}, + address = {Salt Lake City, UT, USA}, + title = {Toward a {Framework} for {Integrative}, {FAIR}, and {Reproducible} {Management} of {Data} on the {Dynamic} {Balance} of {Microbial} {Communities}}, + copyright = {All rights reserved}, + isbn = {978-1-66546-124-5}, + url = {https://ieeexplore.ieee.org/document/9973522/}, + doi = {10.1109/eScience55777.2022.00080}, + urldate = {2022-12-21}, + booktitle = {2022 {IEEE} 18th {International} {Conference} on e-{Science} (e-{Science})}, + publisher = {IEEE}, + author = {Gadelha, Luiz and Hohmuth, Martin and Zulfiqar, Mahnoor and Schone, David and Samuel, Sheeba and Sorokina, Maria and Steinbeck, Christoph and Konig-Ries, Birgitta}, + month = oct, + year = {2022}, + pages = {443--449}, +} + +@incollection{ocanaParslRNASeqEfficientScalable2022, + abbr = {CARLA}, + address = {Cham}, + title = {{ParslRNA}-{Seq}: {An} {Efficient} and {Scalable} {RNAseq} {Analysis} {Workflow} for {Studies} of {Differentiated} {Gene} {Expression}}, + volume = {1660}, + copyright = {All rights reserved}, + isbn = {978-3-031-23820-8 978-3-031-23821-5}, + shorttitle = {{ParslRNA}-{Seq}}, + url = {https://link.springer.com/10.1007/978-3-031-23821-5_13}, + language = {en}, + urldate = {2022-12-21}, + booktitle = {High {Performance} {Computing}}, + publisher = {Springer International Publishing}, + author = {Ocaña, Kary and Cruz, Lucas and Coelho, Micaella and Terra, Rafael and Galheigo, Marcelo and Carneiro, Andre and Carvalho, Diego and Gadelha, Luiz and Boito, Francieli and Navaux, Philippe and Osthoff, Carla}, + editor = {Navaux, Philippe and Barrios H., Carlos J. and Osthoff, Carla and Guerrero, Ginés}, + year = {2022}, + doi = {10.1007/978-3-031-23821-5_13}, + note = {Series Title: Communications in Computer and Information Science}, + pages = {174--189}, +} + +@article{cruzParallelPerformanceProfiling2022, + abbr = {Computación y Sistemas}, + title = {Parallel {Performance} and {I}/{O} {Profiling} of {HPC} {RNA}-{Seq} {Applications}}, + volume = {26}, + copyright = {All rights reserved}, + issn = {2007-9737, 1405-5546}, + url = {https://cys.cic.ipn.mx/ojs/index.php/CyS/article/view/4437}, + doi = {10.13053/cys-26-4-4437}, + number = {4}, + urldate = {2023-01-05}, + journal = {Computación y Sistemas}, + author = {Cruz, Lucas and Coelho, Micaella and Galheigo, Marcelo and Carneiro, Andre and Carvalho, Diego and Gadelha, Luiz and Boito, Francieli and Navaux, Philippe and Osthoff, Carla and Ocaña, Kary}, + month = dec, + year = {2022}, +} + + @article{Peterson2022, abbr = {Biodiv. Inf.}, author = {Peterson, A. Townsend and Aiello-Lammens, Matthew and Amatulli, Giuseppe and Anderson, Robert and Cobos, Marlon and Diniz-Filho, Jos{\'{e}} Alexandre and Escobar, Luis and Feng, Xiao and Franklin, Janet and Gadelha, Luiz and Georges, Damien and Gu{\'{e}}guen, M and Gueta, Tomer and Ingenloff, Kate and Jarvie, Scott and Jim{\'{e}}nez, Laur and Karger, Dirk and Kass, Jamie and Kearney, Michael and Loyola, Rafael and Machado-Stredel, Fernando and Mart{\'{i}}nez-Meyer, Enrique and Merow, Cory and Mondelli, Maria Luiza and Mortara, Sara and Muscarella, Robert and Myers, Corinne and Naimi, Babak and Noesgaard, Daniel and Ondo, Ian and Osorio-Olvera, Luis and Owens, Hannah and Pearson, Richard and Pinilla-Buitrago, Gonzalo and S{\'{a}}nchez-Tapia, Andrea and Saupe, Erin and Thuiller, Wilfried and Varela, Sara and Warren, Dan and Wieczorek, John and Yates, Katherine and Zhu, Gengping and Zuquim, Gabriela and Zurell, Damaris}, diff --git a/_pages/about.md b/_pages/about.md index 2c3cf7760863..39c74151a259 100644 --- a/_pages/about.md +++ b/_pages/about.md @@ -19,4 +19,4 @@ selected_papers: true # includes a list of papers marked as "selected={true}" social: true # includes social icons at the bottom of the page --- -I'm a project coordinator in the German Human Phenome-Genome Archive ([GHGA](https://www.ghga.de)) at the German Cancer Research Center ([DKFZ](https://www.dkfz.de/en/index.html)) in Germany working on the European Genomic Data Infrastructure ([GDI](https://gdi.onemilliongenomes.eu)). I'm currently on leave as a researcher at the National Laboratory for Scientific Computing ([LNCC](https://www.lncc.br)) in Brazil. I received my D.Sc. degree in Computer and Systems Engineering from the Federal University of Rio de Janeiro ([UFRJ](https://www.cos.ufrj.br/index.php/en/)), Brazil. I've been involved in the research and development of parallel and distributed scientific workflow management systems and scientific databases. I've participated in research projects in the bioinformatics and biodiversity application areas. My main research interests are scientific workflows, research data management, and parallel computing. I'm a member of the Brazilian Computer Society (SBC) and the Association for Computing Machinery (ACM). +I'm a project coordinator in the German Human Phenome-Genome Archive ([GHGA](https://www.ghga.de)) at the German Cancer Research Center ([DKFZ](https://www.dkfz.de/en/index.html)) in Germany working on the European Genomic Data Infrastructure ([GDI](https://gdi.onemilliongenomes.eu)). I received my D.Sc. degree in Computer and Systems Engineering from the Federal University of Rio de Janeiro ([UFRJ](https://www.cos.ufrj.br/index.php/en/)), Brazil. I've been involved in the research and development of parallel and distributed scientific workflow management systems and scientific databases. I've participated in research projects in the bioinformatics and biodiversity application areas. My main research interests are scientific workflows, research data management, and parallel computing. I'm currently on leave as a researcher at the National Laboratory for Scientific Computing ([LNCC](https://www.lncc.br)) in Brazil.I'm a member of the Brazilian Computer Society (SBC) and the Association for Computing Machinery (ACM).