From ac901b611b82129c71a19d5381aa4b1432b5a658 Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Thu, 10 Oct 2024 18:06:20 +0200 Subject: [PATCH 1/6] make if statement more explicit --- directlfq/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/directlfq/utils.py b/directlfq/utils.py index c5d3a30..0e5022f 100644 --- a/directlfq/utils.py +++ b/directlfq/utils.py @@ -257,7 +257,7 @@ def add_columns_to_lfq_results_table(lfq_results_df, input_file, columns_to_add) all_columns = filter_columns_to_existing_columns(all_columns, input_file) lfq_results_df = lfq_results_df[[x is not None for x in lfq_results_df[config.PROTEIN_ID]]] - if len(all_columns) == 1: #if there are no columns to add, return the original dataframe + if (len(columns_to_add) == 0) and (len(standard_columns_for_input_type)==0) : #if there are no columns to add, return the original dataframe return lfq_results_df input_df = pd.read_csv(input_file, sep="\t", usecols=all_columns).drop_duplicates(subset=protein_column_input_table) From ca533615725d65736e2f78c0dd4e0d1cd2e35f92 Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Thu, 10 Oct 2024 18:08:30 +0200 Subject: [PATCH 2/6] change comment --- directlfq/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/directlfq/utils.py b/directlfq/utils.py index 0e5022f..3a5973b 100644 --- a/directlfq/utils.py +++ b/directlfq/utils.py @@ -257,7 +257,7 @@ def add_columns_to_lfq_results_table(lfq_results_df, input_file, columns_to_add) all_columns = filter_columns_to_existing_columns(all_columns, input_file) lfq_results_df = lfq_results_df[[x is not None for x in lfq_results_df[config.PROTEIN_ID]]] - if (len(columns_to_add) == 0) and (len(standard_columns_for_input_type)==0) : #if there are no columns to add, return the original dataframe + if (len(columns_to_add) == 0) and (len(standard_columns_for_input_type)==0) : #if there are no columns to add and no additional standard columns defined return the original dataframe return lfq_results_df input_df = pd.read_csv(input_file, sep="\t", usecols=all_columns).drop_duplicates(subset=protein_column_input_table) From 86fbc5df66784419621eedf5e705f192f8af8c1a Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Fri, 11 Oct 2024 11:16:48 +0200 Subject: [PATCH 3/6] remove alphadia config from this PR --- directlfq/configs/intable_config.yaml | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/directlfq/configs/intable_config.yaml b/directlfq/configs/intable_config.yaml index 5a2e8a9..e97b013 100644 --- a/directlfq/configs/intable_config.yaml +++ b/directlfq/configs/intable_config.yaml @@ -1,31 +1,6 @@ --- #this file determines the parameters used to convert long format tables as e.g. produced by Spectronaut or DIA-NN into a wide table format -alphadia_precursor_protein: - format: longtable - sample_ID: run - quant_ID: - precursor: weighted_ms1_intensity - protein_cols: - - pg_master - ion_hierarchy: - precursor: - order: [SEQ, MOD, CHARGE] - mapping: - SEQ: - - sequence - MOD: - - mods - CHARGE: - - charge - use_iontree: True - ml_level: CHARGE - filters: - protein_qval: - param: pg_qval - comparator: "<=" - value: 0.01 - alphapept_peptides: format: longtable From 915cc494abd6bbb3ec25044bcd9c9b390bd8c76c Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Fri, 11 Oct 2024 11:28:06 +0200 Subject: [PATCH 4/6] remove redundant text --- directlfq/utils.py | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/directlfq/utils.py b/directlfq/utils.py index 3a5973b..457eaf2 100644 --- a/directlfq/utils.py +++ b/directlfq/utils.py @@ -1,35 +1,5 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbdev_nbs/04_utils.ipynb. - -# %% auto 0 -__all__ = ['get_samples_used_from_samplemap_file', 'get_samples_used_from_samplemap_df', 'get_all_samples_from_samplemap_df', - 'get_samplenames_from_input_df', 'filter_df_to_minrep', 'get_condpairname', 'get_quality_score_column', - 'make_dir_w_existcheck', 'get_results_plot_dir_condpair', 'get_middle_elem', 'get_nonna_array', - 'get_non_nas_from_pd_df', 'get_ionints_from_pd_df', 'invert_dictionary', 'get_z_from_p_empirical', - 'count_fraction_outliers_from_expected_fc', 'create_or_replace_folder', - 'add_mq_protein_group_ids_if_applicable_and_obtain_annotated_file', - 'load_input_file_and_de_duplicate_if_evidence', 'create_id_to_protein_df', - 'determine_id_column_from_input_df', 'annotate_mq_df', 'remove_ids_not_occurring_in_mq_df', - 'save_annotated_mq_df', 'add_columns_to_lfq_results_table', 'clean_input_filename_if_necessary', - 'get_protein_column_input_table', 'get_standard_columns_for_input_type', - 'filter_columns_to_existing_columns', 'show_diff', 'write_chunk_to_file', 'index_and_log_transform_input_df', - 'remove_allnan_rows_input_df', 'get_relevant_columns', 'get_relevant_columns_config_dict', - 'get_quant_ids_from_config_dict', 'get_sample_ids_from_config_dict', 'get_channel_ids_from_config_dict', - 'load_config', 'get_type2relevant_cols', 'filter_input', 'merge_protein_and_ion_cols', - 'merge_protein_cols_and_ion_dict', 'get_quantitative_columns', 'get_ionname_columns', - 'adapt_headers_on_extended_df', 'split_extend_df', 'add_merged_ionnames', - 'reformat_and_write_longtable_according_to_config', 'adapt_subtable', 'process_with_dask', - 'reshape_input_df', 'sort_and_add_columns', 'extend_sample_allcolumns_for_plexdia_case', - 'adapt_input_df_columns_in_case_of_plexDIA', 'extend_sampleID_column_for_plexDIA_case', - 'set_mtraq_reduced_ion_column_into_dataframe', 'remove_mtraq_modifications_from_ion_ids', 'is_plexDIA_table', - 'parse_channel_from_peptide_column', 'merge_sample_id_and_channels', 'merge_channel_and_sample_string', - 'reformat_and_write_wideformat_table', 'check_for_processed_runs_in_results_folder', 'import_data', - 'reformat_and_save_input_file', 'add_ion_protein_headers_if_applicable', 'get_input_type_and_config_dict', - 'get_original_file_from_aq_reformat', 'import_config_dict', 'load_samplemap', 'prepare_loaded_tables', - 'LongTableReformater', 'AcquisitionTableHandler', 'AcquisitionTableInfo', 'AcquisitionTableHeaders', - 'AcquisitionTableOutputPaths', 'AcquisitionTableReformater', 'AcquisitionTableHeaderFilter', - 'merge_acquisition_df_parameter_df'] - -# %% ../nbdev_nbs/04_utils.ipynb 2 + + import os import pathlib if "__file__" in globals():#only run in the translated python file, as __file__ is not defined with ipython From 43166f1072aa3abeb6da212e3caa92b71da21cfc Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Mon, 14 Oct 2024 17:22:25 +0200 Subject: [PATCH 5/6] remove redundant comment --- directlfq/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/directlfq/utils.py b/directlfq/utils.py index 457eaf2..9d151f3 100644 --- a/directlfq/utils.py +++ b/directlfq/utils.py @@ -227,7 +227,7 @@ def add_columns_to_lfq_results_table(lfq_results_df, input_file, columns_to_add) all_columns = filter_columns_to_existing_columns(all_columns, input_file) lfq_results_df = lfq_results_df[[x is not None for x in lfq_results_df[config.PROTEIN_ID]]] - if (len(columns_to_add) == 0) and (len(standard_columns_for_input_type)==0) : #if there are no columns to add and no additional standard columns defined return the original dataframe + if (len(columns_to_add) == 0) and (len(standard_columns_for_input_type)==0) : return lfq_results_df input_df = pd.read_csv(input_file, sep="\t", usecols=all_columns).drop_duplicates(subset=protein_column_input_table) From 0e4c55d5f1536ea7760a3759b8dd2e2408977249 Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Mon, 14 Oct 2024 17:50:35 +0200 Subject: [PATCH 6/6] clarify if statement --- directlfq/dashboard_parts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/directlfq/dashboard_parts.py b/directlfq/dashboard_parts.py index 683c5e5..5264b95 100644 --- a/directlfq/dashboard_parts.py +++ b/directlfq/dashboard_parts.py @@ -327,7 +327,7 @@ def run_pipeline(self, *args): file_of_proteins_for_normalization = None if self.protein_subset_for_normalization_file.value == '' else self.protein_subset_for_normalization_file.value num_cores = None if self.num_cores_vals.value == -1 else self.num_cores_vals.value yaml_filt_dict_path = None if self.yaml_filt_dict_path.value == '' else self.yaml_filt_dict_path.value - if type(additional_headers) == str: #the user will enter a string with semicolon separated values + if additional_headers is not None: #the user will enter a string with semicolon separated values additional_headers = additional_headers.split(';') lfq_manager.run_lfq(input_file = input_file, input_type_to_use = input_type_to_use, maximum_number_of_quadratic_ions_to_use_per_protein = 10,