diff --git a/tests/quicktests/run_pipeline_w_different_input_formats.ipynb b/tests/quicktests/run_pipeline_w_different_input_formats.ipynb index 2b7c102..e9c8026 100644 --- a/tests/quicktests/run_pipeline_w_different_input_formats.ipynb +++ b/tests/quicktests/run_pipeline_w_different_input_formats.ipynb @@ -2,19 +2,39 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "../../test_data/system_tests/quicktests does not yet exist\n", + "quicktests.zip successfully downloaded to ../../test_data/system_tests/quicktests.zip\n", + "quicktests.zip successfully unzipped\n" + ] + }, + { + "data": { + "text/plain": [ + "'../../test_data/system_tests/quicktests'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import alphabase\n", "import alphabase.test_data_downloader\n", "\n", - "alphabase.test_data_downloader.DataShareDownloader(url=\"https://datashare.biochem.mpg.de/s/VJm70w0p2P86tE1\", output_dir=\"../../test_data/system_tests\").download()" + "alphabase.test_data_downloader.DataShareDownloader(url=\"https://datashare.biochem.mpg.de/s/VJm70w0p2P86tE1\", output_dir=\"test_data/system_tests\").download()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -31,9 +51,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-16 15:35:28,476 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n", + "2024-10-16 15:35:28,557 - directlfq.utils - INFO - using input type diann_precursor_ms1_and_ms2\n", + "2024-10-16 15:35:28,678 - directlfq.lfq_manager - INFO - Performing sample normalization.\n", + "2024-10-16 15:35:29,872 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n", + "2024-10-16 15:35:29,873 - directlfq.protein_intensity_estimation - INFO - 46 lfq-groups total\n", + "2024-10-16 15:35:29,889 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n", + "2024-10-16 15:35:30,778 - directlfq.lfq_manager - INFO - Could not add additional columns to protein table, printing without additional columns.\n", + "2024-10-16 15:35:30,779 - directlfq.lfq_manager - INFO - Writing results files.\n", + "2024-10-16 15:35:30,826 - directlfq.lfq_manager - INFO - Analysis finished!\n" + ] + } + ], "source": [ "import directlfq.lfq_manager as lfq_manager\n", "diann_quicktest_file_parquet = f\"{quicktest_folder_diann}/shortened_input.parquet\"\n", @@ -43,9 +79,61 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-16 15:35:30,839 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n", + "2024-10-16 15:35:31,025 - directlfq.utils - INFO - using input type diann_precursors\n", + "2024-10-16 15:35:31,077 - directlfq.lfq_manager - INFO - Performing sample normalization.\n", + "2024-10-16 15:35:31,078 - directlfq.normalization - INFO - to few values for normalization without missing values. Including missing values\n", + "2024-10-16 15:35:31,082 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n", + "2024-10-16 15:35:31,082 - directlfq.protein_intensity_estimation - INFO - 46 lfq-groups total\n", + "2024-10-16 15:35:31,098 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n", + "2024-10-16 15:35:31,623 - directlfq.lfq_manager - INFO - Writing results files.\n", + "2024-10-16 15:35:31,634 - directlfq.lfq_manager - INFO - Analysis finished!\n", + "2024-10-16 15:35:31,635 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n", + "2024-10-16 15:35:31,703 - directlfq.utils - INFO - using input type diann_peptide_based_on_precursor_ms1_and_ms2\n", + "2024-10-16 15:35:31,822 - directlfq.lfq_manager - INFO - Performing sample normalization.\n", + "2024-10-16 15:35:31,827 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n", + "2024-10-16 15:35:31,827 - directlfq.protein_intensity_estimation - INFO - 840 lfq-groups total\n", + "2024-10-16 15:35:32,152 - directlfq.protein_intensity_estimation - INFO - using 10 processes\n", + "2024-10-16 15:35:32,173 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n", + "2024-10-16 15:35:32,237 - directlfq.protein_intensity_estimation - INFO - lfq-object 100\n", + "2024-10-16 15:35:32,298 - directlfq.protein_intensity_estimation - INFO - lfq-object 200\n", + "2024-10-16 15:35:32,443 - directlfq.protein_intensity_estimation - INFO - lfq-object 300\n", + "2024-10-16 15:35:32,492 - directlfq.protein_intensity_estimation - INFO - lfq-object 400\n", + "2024-10-16 15:35:32,554 - directlfq.protein_intensity_estimation - INFO - lfq-object 500\n", + "2024-10-16 15:35:32,609 - directlfq.protein_intensity_estimation - INFO - lfq-object 600\n", + "2024-10-16 15:35:32,710 - directlfq.protein_intensity_estimation - INFO - lfq-object 700\n", + "2024-10-16 15:35:32,771 - directlfq.protein_intensity_estimation - INFO - lfq-object 800\n", + "2024-10-16 15:35:32,976 - directlfq.lfq_manager - INFO - Writing results files.\n", + "2024-10-16 15:35:33,004 - directlfq.lfq_manager - INFO - Analysis finished!\n", + "2024-10-16 15:35:33,006 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n", + "2024-10-16 15:35:33,077 - directlfq.utils - INFO - using input type diann_precursor_ms1_and_ms2\n", + "2024-10-16 15:35:33,179 - directlfq.lfq_manager - INFO - Performing sample normalization.\n", + "2024-10-16 15:35:33,184 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n", + "2024-10-16 15:35:33,185 - directlfq.protein_intensity_estimation - INFO - 46 lfq-groups total\n", + "2024-10-16 15:35:33,280 - directlfq.protein_intensity_estimation - INFO - using 10 processes\n", + "2024-10-16 15:35:33,289 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n", + "2024-10-16 15:35:33,610 - directlfq.lfq_manager - INFO - Writing results files.\n", + "2024-10-16 15:35:33,630 - directlfq.lfq_manager - INFO - Analysis finished!\n", + "2024-10-16 15:35:33,632 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n", + "2024-10-16 15:35:33,702 - directlfq.utils - INFO - using input type diann_precursor_ms1_and_ms2\n", + "2024-10-16 15:35:33,780 - directlfq.lfq_manager - INFO - Performing sample normalization.\n", + "2024-10-16 15:35:33,786 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n", + "2024-10-16 15:35:33,787 - directlfq.protein_intensity_estimation - INFO - 46 lfq-groups total\n", + "2024-10-16 15:35:33,885 - directlfq.protein_intensity_estimation - INFO - using 10 processes\n", + "2024-10-16 15:35:33,895 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n", + "2024-10-16 15:35:34,274 - directlfq.lfq_manager - INFO - Could not add additional columns to protein table, printing without additional columns.\n", + "2024-10-16 15:35:34,275 - directlfq.lfq_manager - INFO - Writing results files.\n", + "2024-10-16 15:35:34,334 - directlfq.lfq_manager - INFO - Analysis finished!\n" + ] + } + ], "source": [ "# run diann\n", "import directlfq.lfq_manager as lfq_manager\n", @@ -64,9 +152,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-16 15:35:34,343 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n", + "2024-10-16 15:35:34,488 - directlfq.utils - INFO - using input type maxquant_peptides_leading_razor_protein\n", + "2024-10-16 15:35:34,533 - directlfq.lfq_manager - INFO - Performing sample normalization.\n", + "2024-10-16 15:35:34,535 - directlfq.normalization - INFO - to few values for normalization without missing values. Including missing values\n", + "2024-10-16 15:35:34,541 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n", + "2024-10-16 15:35:34,542 - directlfq.protein_intensity_estimation - INFO - 49 lfq-groups total\n", + "2024-10-16 15:35:34,558 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n", + "2024-10-16 15:35:35,042 - directlfq.lfq_manager - INFO - Writing results files.\n", + "2024-10-16 15:35:35,054 - directlfq.lfq_manager - INFO - Analysis finished!\n" + ] + } + ], "source": [ "#run mq peptides\n", "\n", @@ -82,9 +186,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-16 15:35:35,061 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n", + "2024-10-16 15:35:35,240 - directlfq.utils - INFO - using input type maxquant_evidence_leading_razor_protein\n", + "2024-10-16 15:35:35,324 - directlfq.lfq_manager - INFO - Performing sample normalization.\n", + "2024-10-16 15:35:35,326 - directlfq.normalization - INFO - to few values for normalization without missing values. Including missing values\n", + "2024-10-16 15:35:35,332 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n", + "2024-10-16 15:35:35,332 - directlfq.protein_intensity_estimation - INFO - 50 lfq-groups total\n", + "2024-10-16 15:35:35,424 - directlfq.protein_intensity_estimation - INFO - using 10 processes\n", + "2024-10-16 15:35:35,433 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n", + "2024-10-16 15:35:35,723 - directlfq.lfq_manager - INFO - Writing results files.\n", + "2024-10-16 15:35:35,738 - directlfq.lfq_manager - INFO - Analysis finished!\n", + "2024-10-16 15:35:35,739 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n", + "2024-10-16 15:35:35,875 - directlfq.utils - INFO - using input type maxquant_evidence_leading_razor_protein\n", + "2024-10-16 15:35:35,960 - directlfq.lfq_manager - INFO - Performing sample normalization.\n", + "2024-10-16 15:35:35,961 - root - INFO - Normalizing only selected proteins\n", + "2024-10-16 15:35:35,968 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n", + "2024-10-16 15:35:35,969 - directlfq.protein_intensity_estimation - INFO - 50 lfq-groups total\n", + "2024-10-16 15:35:35,986 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n", + "2024-10-16 15:35:36,558 - directlfq.lfq_manager - INFO - Writing results files.\n", + "2024-10-16 15:35:36,573 - directlfq.lfq_manager - INFO - Analysis finished!\n" + ] + } + ], "source": [ "#run mq evidence\n", "\n", @@ -100,9 +230,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-16 15:35:36,580 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n", + "2024-10-16 15:35:36,757 - directlfq.utils - INFO - using input type spectronaut_fragion_isotopes\n", + "2024-10-16 15:35:37,122 - directlfq.lfq_manager - INFO - Performing sample normalization.\n", + "2024-10-16 15:35:37,123 - root - INFO - Normalizing only selected proteins\n", + "2024-10-16 15:35:37,134 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n", + "2024-10-16 15:35:37,135 - directlfq.protein_intensity_estimation - INFO - 50 lfq-groups total\n", + "2024-10-16 15:35:37,155 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n", + "2024-10-16 15:35:38,484 - directlfq.lfq_manager - INFO - Writing results files.\n", + "2024-10-16 15:35:38,510 - directlfq.lfq_manager - INFO - Analysis finished!\n", + "2024-10-16 15:35:38,511 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n", + "2024-10-16 15:35:38,648 - directlfq.utils - INFO - using input type spectronaut_fragion_isotopes\n", + "2024-10-16 15:35:39,003 - directlfq.lfq_manager - INFO - Performing sample normalization.\n", + "2024-10-16 15:35:39,011 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n", + "2024-10-16 15:35:39,012 - directlfq.protein_intensity_estimation - INFO - 50 lfq-groups total\n", + "2024-10-16 15:35:39,103 - directlfq.protein_intensity_estimation - INFO - using 10 processes\n", + "2024-10-16 15:35:39,111 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n", + "2024-10-16 15:35:39,539 - directlfq.lfq_manager - INFO - Writing results files.\n", + "2024-10-16 15:35:39,566 - directlfq.lfq_manager - INFO - Analysis finished!\n" + ] + } + ], "source": [ "# run spectronaut\n", "import directlfq.lfq_manager as lfq_manager\n", @@ -113,6 +268,20 @@ " lfq_manager.run_lfq(spectronaut_quicktest_file, selected_proteins_file=spectronaut_protein_subset, num_cores=1, compile_normalized_ion_table=True)\n", " lfq_manager.run_lfq(spectronaut_quicktest_file)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {