CELLRANGER AND PIPELINE directories

moving CELLRANGER and PIPELINE directories from STATS to STAGING directory
mskcc · Apr 9, 2024 · 189c214 · 189c214
1 parent e22ea68
commit 189c214
Show file tree

Hide file tree

Showing 5 changed files with 9 additions and 9 deletions.
diff --git a/demux_run_dag.py b/demux_run_dag.py
@@ -193,7 +193,7 @@ def stats(ds, **kwargs):
                 scripts.cellranger.launch_cellranger_by_sample_sheet(sample_sheet, sequencer_and_run_prefix)
 
                 # add DONE file when all the 10X pipeline finished, -K to wait until finish
-                cmd = 'bsub -K -J wait_stats_done_for_{} -w \"ended(create_json___{}*)\" touch /igo/stats/CELLRANGER/{}/DONE'.format(sequencer_and_run_prefix, sequencer_and_run_prefix, sequencer_and_run_prefix)
+                cmd = 'bsub -K -J wait_stats_done_for_{} -w \"ended(create_json___{}*)\" touch /igo/staging/CELLRANGER/{}/DONE'.format(sequencer_and_run_prefix, sequencer_and_run_prefix, sequencer_and_run_prefix)
                 print(cmd)
                 subprocess.run(cmd, shell=True)
 

diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py
@@ -1,5 +1,5 @@
 # work folder
-STATS_AREA = "/igo/stats/CELLRANGER/"
+STATS_AREA = "/igo/staging/CELLRANGER/"
 
 # config info 
 ACCESS = 0o775

diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py
@@ -66,7 +66,7 @@ def find_fastq_file(sample_ID_list):
 DRIVE_LOCATION = "/igo/work/igo/Cellranger_Multi_Config/"
 ORIGIN_DRIVE_LOCATION = "/rtssdc/mohibullahlab/LIMS/LIMS_cellranger_multi/"
 BAMTOFASTQ = "/igo/work/nabors/tools/cellranger-7.0.0/lib/bin/bamtofastq"
-STATS_AREA = "/igo/stats/PIPELINE/"
+STATS_AREA = "/igo/staging/PIPELINE/"
 # endpoint for cellranger multi
 ENDPOINT= "https://igolims.mskcc.org:8443/LimsRest/getTenxSampleInfo?requestId="
 
@@ -157,15 +157,15 @@ def new_config_and_generate_cmd(self):
     # get reads number and sub sample cell number
     def update_info_from_step1(self, fb_project_id):
         # get total reads number for gene expression library
-        reads_file = "/igo/stats/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/metrics_summary.csv".format(fb_project_id, self.name, list(self.samples.keys())[0])
+        reads_file = "/igo/staging/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/metrics_summary.csv".format(fb_project_id, self.name, list(self.samples.keys())[0])
         summary_metrix = pd.read_csv(reads_file)
         ind = summary_metrix.index[(summary_metrix["Category"] == "Library") & (summary_metrix["Metric Name"] == "Number of reads") & (summary_metrix["Library Type"] == "Gene Expression") & (summary_metrix["Grouped By"] == "Physical library ID")].tolist()
         reads_number = summary_metrix.iloc[ind[0]]["Metric Value"]
         reads_number = int(reads_number.replace(",", "")) + 10000
         self.ge_reads_number = reads_number
 
         # update sub sample cell number
-        cell_file = "/igo/stats/PIPELINE/Project_{}_step1/{}/outs/multi/multiplexing_analysis/tag_calls_summary.csv".format(fb_project_id, self.name)
+        cell_file = "/igo/staging/PIPELINE/Project_{}_step1/{}/outs/multi/multiplexing_analysis/tag_calls_summary.csv".format(fb_project_id, self.name)
         cell_matrix = pd.read_csv(cell_file)
         for key, value in self.samples.items():
             if value in cell_matrix["Category"].values:
@@ -286,7 +286,7 @@ def cellragner_ch_vdj(config, file_name, ch_project_ID, project_ID, ge):
     # create bam2fastq cmd per sub sample
     for key in config.sub_sample_info.keys():
         name2 = ge + "_" + key
-        source_bam = "/igo/stats/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/count/sample_alignments.bam".format(ch_project_ID, ge, key)
+        source_bam = "/igo/staging/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/count/sample_alignments.bam".format(ch_project_ID, ge, key)
         destination_bam = "{}Project_{}/bamtofastq/{}".format(CONFIG_AREA, project_ID, name2)
         cmd = "bsub -K -J {}_bamtofastq -o {}_bamtofastq.out -n 8 -M 8 {} --reads-per-fastq={} {} {}".format(name2, name2, BAMTOFASTQ, config.ge_reads_number, source_bam, destination_bam)
         print(cmd)

diff --git a/scripts/deliver_cellranger.py b/scripts/deliver_cellranger.py
@@ -4,7 +4,7 @@
 
 # given project ID, look through cellranger folder and return a list of path of folders need to copy
 
-CELLRANGER_DIR = '/igo/stats/CELLRANGER/'
+CELLRANGER_DIR = '/igo/staging/CELLRANGER/'
 # structure '/igo/stats/CELLRANGER/RUNNAME/PROJECTID/SAMPLEFOLDER
 
 # find all the cellranger result given project ID, return a list of address

diff --git a/scripts/deliver_pipeline.py b/scripts/deliver_pipeline.py
@@ -7,7 +7,7 @@
 - Re-run setaccess.py (on a separate server)
 
 At time of delivery for all 10X projects:
-- Search under folder /igo/stats/CELLRANGER/ for any possible cell ranger output
+- Search under folder /igo/staging/CELLRANGER/ for any possible cell ranger output
 - If existing, then copy to delivery/pipeline/cellranger directory
 """
 
@@ -64,7 +64,7 @@ def deliver_pipeline_output(project, pi, recipe):
     # if recipe is CRISPRSeq or GeoMx, go to pipeline folder and find output, if exists the copy
     # add cellranger multi output for featurebarcoding project here for now
     elif recipe == "CRISPRSeq" or recipe == "GeoMx" or recipe == "GeoMX" or recipe == "10XGenomics_FeatureBarcoding":
-        pipeline_path = "/igo/stats/PIPELINE/Project_" + project
+        pipeline_path = "/igo/staging/PIPELINE/Project_" + project
         if not os.path.exists(pipeline_path):
             print("No pipeline result available")
         else: