From 189c214e2d55942ca5d907b5f0d62df37e79abc1 Mon Sep 17 00:00:00 2001 From: darrelln32 Date: Tue, 9 Apr 2024 14:17:54 -0400 Subject: [PATCH] CELLRANGER AND PIPELINE directories moving CELLRANGER and PIPELINE directories from STATS to STAGING directory --- demux_run_dag.py | 2 +- scripts/cellranger_config.py | 2 +- scripts/cellranger_multi.py | 8 ++++---- scripts/deliver_cellranger.py | 2 +- scripts/deliver_pipeline.py | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/demux_run_dag.py b/demux_run_dag.py index 7a743f4..12e3733 100644 --- a/demux_run_dag.py +++ b/demux_run_dag.py @@ -193,7 +193,7 @@ def stats(ds, **kwargs): scripts.cellranger.launch_cellranger_by_sample_sheet(sample_sheet, sequencer_and_run_prefix) # add DONE file when all the 10X pipeline finished, -K to wait until finish - cmd = 'bsub -K -J wait_stats_done_for_{} -w \"ended(create_json___{}*)\" touch /igo/stats/CELLRANGER/{}/DONE'.format(sequencer_and_run_prefix, sequencer_and_run_prefix, sequencer_and_run_prefix) + cmd = 'bsub -K -J wait_stats_done_for_{} -w \"ended(create_json___{}*)\" touch /igo/staging/CELLRANGER/{}/DONE'.format(sequencer_and_run_prefix, sequencer_and_run_prefix, sequencer_and_run_prefix) print(cmd) subprocess.run(cmd, shell=True) diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py index a45d3c1..0d4a590 100644 --- a/scripts/cellranger_config.py +++ b/scripts/cellranger_config.py @@ -1,5 +1,5 @@ # work folder -STATS_AREA = "/igo/stats/CELLRANGER/" +STATS_AREA = "/igo/staging/CELLRANGER/" # config info ACCESS = 0o775 diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py index 583f6a3..3bead5c 100644 --- a/scripts/cellranger_multi.py +++ b/scripts/cellranger_multi.py @@ -66,7 +66,7 @@ def find_fastq_file(sample_ID_list): DRIVE_LOCATION = "/igo/work/igo/Cellranger_Multi_Config/" ORIGIN_DRIVE_LOCATION = "/rtssdc/mohibullahlab/LIMS/LIMS_cellranger_multi/" BAMTOFASTQ = "/igo/work/nabors/tools/cellranger-7.0.0/lib/bin/bamtofastq" -STATS_AREA = "/igo/stats/PIPELINE/" +STATS_AREA = "/igo/staging/PIPELINE/" # endpoint for cellranger multi ENDPOINT= "https://igolims.mskcc.org:8443/LimsRest/getTenxSampleInfo?requestId=" @@ -157,7 +157,7 @@ def new_config_and_generate_cmd(self): # get reads number and sub sample cell number def update_info_from_step1(self, fb_project_id): # get total reads number for gene expression library - reads_file = "/igo/stats/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/metrics_summary.csv".format(fb_project_id, self.name, list(self.samples.keys())[0]) + reads_file = "/igo/staging/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/metrics_summary.csv".format(fb_project_id, self.name, list(self.samples.keys())[0]) summary_metrix = pd.read_csv(reads_file) ind = summary_metrix.index[(summary_metrix["Category"] == "Library") & (summary_metrix["Metric Name"] == "Number of reads") & (summary_metrix["Library Type"] == "Gene Expression") & (summary_metrix["Grouped By"] == "Physical library ID")].tolist() reads_number = summary_metrix.iloc[ind[0]]["Metric Value"] @@ -165,7 +165,7 @@ def update_info_from_step1(self, fb_project_id): self.ge_reads_number = reads_number # update sub sample cell number - cell_file = "/igo/stats/PIPELINE/Project_{}_step1/{}/outs/multi/multiplexing_analysis/tag_calls_summary.csv".format(fb_project_id, self.name) + cell_file = "/igo/staging/PIPELINE/Project_{}_step1/{}/outs/multi/multiplexing_analysis/tag_calls_summary.csv".format(fb_project_id, self.name) cell_matrix = pd.read_csv(cell_file) for key, value in self.samples.items(): if value in cell_matrix["Category"].values: @@ -286,7 +286,7 @@ def cellragner_ch_vdj(config, file_name, ch_project_ID, project_ID, ge): # create bam2fastq cmd per sub sample for key in config.sub_sample_info.keys(): name2 = ge + "_" + key - source_bam = "/igo/stats/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/count/sample_alignments.bam".format(ch_project_ID, ge, key) + source_bam = "/igo/staging/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/count/sample_alignments.bam".format(ch_project_ID, ge, key) destination_bam = "{}Project_{}/bamtofastq/{}".format(CONFIG_AREA, project_ID, name2) cmd = "bsub -K -J {}_bamtofastq -o {}_bamtofastq.out -n 8 -M 8 {} --reads-per-fastq={} {} {}".format(name2, name2, BAMTOFASTQ, config.ge_reads_number, source_bam, destination_bam) print(cmd) diff --git a/scripts/deliver_cellranger.py b/scripts/deliver_cellranger.py index 13d80cd..f1c946a 100644 --- a/scripts/deliver_cellranger.py +++ b/scripts/deliver_cellranger.py @@ -4,7 +4,7 @@ # given project ID, look through cellranger folder and return a list of path of folders need to copy -CELLRANGER_DIR = '/igo/stats/CELLRANGER/' +CELLRANGER_DIR = '/igo/staging/CELLRANGER/' # structure '/igo/stats/CELLRANGER/RUNNAME/PROJECTID/SAMPLEFOLDER # find all the cellranger result given project ID, return a list of address diff --git a/scripts/deliver_pipeline.py b/scripts/deliver_pipeline.py index d72b158..26e8af8 100644 --- a/scripts/deliver_pipeline.py +++ b/scripts/deliver_pipeline.py @@ -7,7 +7,7 @@ - Re-run setaccess.py (on a separate server) At time of delivery for all 10X projects: -- Search under folder /igo/stats/CELLRANGER/ for any possible cell ranger output +- Search under folder /igo/staging/CELLRANGER/ for any possible cell ranger output - If existing, then copy to delivery/pipeline/cellranger directory """ @@ -64,7 +64,7 @@ def deliver_pipeline_output(project, pi, recipe): # if recipe is CRISPRSeq or GeoMx, go to pipeline folder and find output, if exists the copy # add cellranger multi output for featurebarcoding project here for now elif recipe == "CRISPRSeq" or recipe == "GeoMx" or recipe == "GeoMX" or recipe == "10XGenomics_FeatureBarcoding": - pipeline_path = "/igo/stats/PIPELINE/Project_" + project + pipeline_path = "/igo/staging/PIPELINE/Project_" + project if not os.path.exists(pipeline_path): print("No pipeline result available") else: