Skip to content

Commit

Permalink
Merge pull request #83 from mskcc/UpdateRecipe
Browse files Browse the repository at this point in the history
Update recipe
  • Loading branch information
CuijieLu authored May 20, 2024
2 parents 3c7a34b + dc946b4 commit dfb15c2
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 99 deletions.
4 changes: 2 additions & 2 deletions SampleSheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def split_sample_sheet(self):
if sample sheet recipes have mixed DLP and other all DLP need to go on a separate sample sheet named "_DLP"
"""
# if 10x DRAGEN demux add to header CreateFastqForIndexReads,1,,,,,,,
if any("10X_" in s for s in self.recipe_set):
if any("SC_Chromium" in s for s in self.recipe_set):
print("Adding CreateFastqForIndexReads,1 to sample sheet header since 10X samples are present")
self.df_ss_header.loc[len(self.df_ss_header.index)-1] = ["CreateFastqForIndexReads",1,"","","","","","",""]
self.df_ss_header.loc[len(self.df_ss_header.index)] = ["[Data]","","","","","","","",""]
Expand All @@ -111,7 +111,7 @@ def split_sample_sheet(self):
split_ss_list = [ss_copy, self]

was_split = False
if "DLP" in self.recipe_set and len(self.recipe_set) > 1:
if "SC_DLP" in self.recipe_set and len(self.recipe_set) > 1:
print("Copying all DLP samples to a new sample sheet")
# copy all DLP rows to a new sample sheet
dlp_data = self.df_ss_data[self.df_ss_data["Sample_Well"].str.match("DLP") == True].copy()
Expand Down
1 change: 1 addition & 0 deletions deliver_pipeline_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
def deliver(ds, **kwargs):
project = kwargs["params"]["project"]
pi = kwargs["params"]["pi"]
# recipe here is actually request name
recipe = kwargs["params"]["recipe"]
print("Delivering the pipeline output and/or .bams for {} {} {}".format(project, pi, recipe))

Expand Down
14 changes: 5 additions & 9 deletions demux_run_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def demux(ds, **kwargs):

# check if the sample sheet contains DLP project
is_DLP = False
if "DLP" in sample_sheet.recipe_set:
if "SC_DLP" in sample_sheet.recipe_set:
is_DLP = True
dragen_demux = True

Expand Down Expand Up @@ -214,7 +214,7 @@ def stats(ds, **kwargs):

def fingerprinting(ds, **kwargs):
# read in sample sheet as arguments, filter out projects that need to run fingerprinting
recipe_list_for_fp = [".*IMPACT*", ".*Heme*", "IDT_Exome*", "WholeExomeSequencing", "Twist_Exome", "MSK-ACCESS*", "CMO-CH", "HumanWholeGenome"]
recipe_list_for_fp = ["PED-PEG", "WGS_Deep", "HC_IMPACT", "HC_IMPACT-Heme", "HC_ACCESS", "WES_Human", "HC_CMOCH"]
# call fingerprinting_dag.py for each project
samplesheet_path = kwargs["params"]["samplesheet"]

Expand All @@ -228,13 +228,9 @@ def fingerprinting(ds, **kwargs):
project_list_to_run = []
for project, recipe in sample_sheet.project_dict.items():
# fingerprinting only support human
if project_genome_dict[project] == "Human":
for recipe_list_item in recipe_list_for_fp:
print(project, recipe)
expr = re.compile(recipe_list_item)
if expr.match(recipe):
project_list_to_run.append(project)
break
if project_genome_dict[project] == "Human" and recipe in recipe_list_for_fp:
project_list_to_run.append(project)

print("Projects need to run fp: {}".format(project_list_to_run))
if len(project_list_to_run) == 0:
return "No project need to run fingerprinting"
Expand Down
24 changes: 4 additions & 20 deletions scripts/cellranger_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,6 @@
"Mouse": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0 "
}
},
"atac_count": {
"tool": " /igo/work/nabors/tools/cellranger-atac-2.1.0/cellranger-atac count ",
"genome": {
"Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-GRCh38-1.0.1 ",
"Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-mm10-1.1.0 "
}
},
"cnv": {
"tool": " /igo/work/nabors/tools/cellranger-dna-1.1.0/cellranger-dna cnv ",
"genome": {
"Human": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCh38-1.0.0 ",
"Mouse": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCm38-1.0.0 "
}
},
"multi": {
"tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger multi "
},
Expand Down Expand Up @@ -62,12 +48,10 @@
ARC_OPTIONS = " --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"

# 10X recipe list for different pipelines
COUNT_FLAVORS = ["10X_Genomics_GeneExpression-3", "10X_Genomics_GeneExpression-5"]
VDJ_FLAVORS = ["10X_Genomics_VDJ"]
ATAC_FLAVORS = ["10X_Genomics_ATAC"]
CNV_FLAVORS = ["10X_Genomics_CNV"]
ARC_FLAVORS = ["10X_Genomics_Multiome", "10X_Genomics_Multiome_ATAC", "10X_Genomics_Multiome_GeneExpression"]
SPATIAL_FLAVORS = ["10X_Genomics_Visium"]
COUNT_FLAVORS = ["SC_Chromium-GEX-3", "SC_Chromium-GEX-5"]
VDJ_FLAVORS = ["SC_Chromium-TCR", "SC_Chromium-BCR"]
ARC_FLAVORS = ["SC_Chromium-Multiome", "SC_Chromium-Multiome_ATAC", "SC_Chromium-Multiome_GEX"]
SPATIAL_FLAVORS = ["ST_Visium"]

# we do not want to PROCESS SAIL (15500) or SCRI (12437) projects
SCRI = "12437"
Expand Down
106 changes: 40 additions & 66 deletions scripts/deliver_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,70 +27,59 @@
PICARD = "java -jar /igo/home/igo/resources/picard2.23.2/picard.jar "
NGS_STATS_FASTQ_ENDPOINT = "http://igodb.mskcc.org:8080/ngs-stats/permissions/getRequestPermissions/"

def deliver_pipeline_output(project, pi, recipe):
if not project or not pi or not recipe:
def deliver_pipeline_output(project, pi, requestName):
if not project or not pi or not requestName:
return "Project, pi and recipe are all required arguments."
# change pi to all lowercase
pi = pi.lower()
delivery_folder = LAB_SHARE_DIR + "/" + pi + "/Project_" + project + "/pipeline"

if recipe.startswith("RNASeq"):
if requestName == "RNALibraryPrep":
print("Delivering all RNASeq .bams for {} {} {}".format(project, pi, recipe))
bamdict = find_bams(project, STATS_DIR)
bsub_commands = write_bams_to_share(bamdict, delivery_folder)
reconcile_bam_fastq_list(project, bamdict)
return "Completed RNA bams delivery"

# if is missionbio recipe, find tapestri pipelie output and copy all sample folders
elif recipe == "MissionBio":
tapestri_path = "/igo/staging/stats/MissionBio/Project_" + project
if not os.path.exists(tapestri_path):
print("No tapestri result available")
else:
tapestri_delivery_folder = delivery_folder + "/Tapestri"
if not os.path.exists(tapestri_delivery_folder):
print("Creating pipeline delivery folder {}".format(tapestri_delivery_folder))
os.makedirs(tapestri_delivery_folder)

# copy each sample folder to the delivery folder
tapestri_path = tapestri_path + "/"
sample_list = os.listdir(tapestri_path)
for sample in sample_list:
sample_folder = tapestri_path + sample
destination = tapestri_delivery_folder + "/" + sample
print("copy {}".format(sample_folder))
shutil.copytree(sample_folder, destination, symlinks=True)

# if recipe is CRISPRSeq or GeoMx, go to pipeline folder and find output, if exists the copy
# add cellranger multi output for featurebarcoding project here for now
elif recipe == "CRISPRSeq" or recipe == "GeoMx" or recipe == "GeoMX" or recipe == "10XGenomics_FeatureBarcoding":
pipeline_path = "/igo/staging/PIPELINE/Project_" + project
if not os.path.exists(pipeline_path):
print("No pipeline result available")
else:
if not os.path.exists(delivery_folder):
print("Creating pipeline delivery folder {}".format(delivery_folder))
os.makedirs(delivery_folder)

# copy each sample folder to the delivery folder
pipeline_path = pipeline_path + "/"
sample_list = os.listdir(pipeline_path)
for sample in sample_list:
sample_path = pipeline_path + sample
destination = delivery_folder + "/" + sample
print("copy {}".format(sample_path))
if os.path.isdir(sample_path):
shutil.copytree(sample_path, destination, symlinks=True)
else:
cmd = "cp {} {}".format(sample_path, destination)
print(cmd)
call(cmd, shell=True)

# if 10X recipe or SCRI project starting with 12437, copy cell ranger result to project folder
elif recipe.startswith("10XGenomics") or project.startswith("12437_"):
# TCR seq only need deliver manifest, those files located under viale lab drive
# example file: /pskis34/LIMS/TCRseqManifest/Project_13545_TCRseq_Manifest_Beta.csv
elif requestName == "TCRSeq":
pipeline_path_prefix = "/rtssdc/mohibullahlab/LIMS/TCRseqManifest/Project_" + project + "_TCRseq"
TCR_delivery_folder = delivery_folder + "/Manifest"
if not os.path.exists(TCR_delivery_folder):
print("Creating pipeline delivery folder {}".format(TCR_delivery_folder))
os.makedirs(TCR_delivery_folder)

cmd = "cp {}* {}/".format(pipeline_path_prefix, TCR_delivery_folder)
print(cmd)
call(cmd, shell=True)

# For all other projects, check CELLRANGER folder first then PIPELINE folder
else:
folder_list = scripts.deliver_cellranger.find_cellranger(project)
if len(folder_list) == 0:
print("No cellranger result available")
# check PIPELINE folder
pipeline_path = "/igo/staging/PIPELINE/Project_" + project
if not os.path.exists(pipeline_path):
print("No cellranger/pipeline result available")
else:
if not os.path.exists(delivery_folder):
print("Creating pipeline delivery folder {}".format(delivery_folder))
os.makedirs(delivery_folder)

# copy each sample folder to the delivery folder
pipeline_path = pipeline_path + "/"
sample_list = os.listdir(pipeline_path)
for sample in sample_list:
sample_path = pipeline_path + sample
destination = delivery_folder + "/" + sample
print("copy {}".format(sample_path))
if os.path.isdir(sample_path):
shutil.copytree(sample_path, destination, symlinks=True)
else:
cmd = "cp {} {}".format(sample_path, destination)
print(cmd)
call(cmd, shell=True)
else:
# create pipeline folder if not exists
cellranger_delivery_folder = delivery_folder + "/cellranger"
Expand All @@ -105,21 +94,6 @@ def deliver_pipeline_output(project, pi, recipe):
print("copy {}".format(folder))
shutil.copytree(folder, sample_delivery_name, symlinks=True)

# TCR seq only need deliver manifest, those files located under viale lab drive
# example file: /pskis34/LIMS/TCRseqManifest/Project_13545_TCRseq_Manifest_Beta.csv
elif recipe == "TCRSeq-IGO":
pipeline_path_prefix = "/rtssdc/mohibullahlab/LIMS/TCRseqManifest/Project_" + project + "_TCRseq"
TCR_delivery_folder = delivery_folder + "/Manifest"
if not os.path.exists(TCR_delivery_folder):
print("Creating pipeline delivery folder {}".format(TCR_delivery_folder))
os.makedirs(TCR_delivery_folder)

cmd = "cp {}* {}/".format(pipeline_path_prefix, TCR_delivery_folder)
print(cmd)
call(cmd, shell=True)

else:
print("Pipeline delivery is not needed for recipe {} and project {}".format(recipe, project))
return "Completed pipeline delivery"

def find_bams(project, stats_base_dir):
Expand Down
4 changes: 2 additions & 2 deletions stats_by_project_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def run_stats(ds, **kwargs):
print(cmd)
subprocess.run(cmd, shell=True)

elif "10X_" in recipe:
elif "SC_Chromium" in recipe:
scripts.cellranger.launch_cellranger_by_project_location(project_directory, recipe, species)
elif "ONT" in recipe:
elif "Nanopore" in recipe:
cmd = "bsub -J ont_stats_{} -n 16 -M 16 /igo/work/nabors/tools/venvpy3/bin/python /igo/work/igo/igo-demux/scripts/ont_stats.py {}".format(project_id, project_directory)
print(cmd)
subprocess.run(cmd, shell=True)
Expand Down

0 comments on commit dfb15c2

Please sign in to comment.