From 2d56c10383c9af6960a6455bca7915f6d31b35f3 Mon Sep 17 00:00:00 2001 From: sreichl Date: Fri, 13 Sep 2024 17:33:01 +0200 Subject: [PATCH] adapt to Snakemake 8; move env, config, annot export into the result folder to be self-contained --- README.md | 2 +- config/README.md | 6 ++++-- config/config.yaml | 1 - workflow/Snakefile | 17 ++++++++++------- workflow/envs/global.yaml | 7 +++++++ workflow/profiles/default/config.yaml | 3 +++ workflow/rules/envs_export.smk | 24 +++++++++--------------- workflow/rules/mixscape.smk | 4 ---- workflow/rules/visualize.smk | 2 -- 9 files changed, 34 insertions(+), 32 deletions(-) create mode 100644 workflow/envs/global.yaml create mode 100644 workflow/profiles/default/config.yaml diff --git a/README.md b/README.md index fa8ed76..d24fbdf 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ [![DOI](https://zenodo.org/badge/481635018.svg)](https://zenodo.org/badge/latestdoi/481635018) # scCRISPR-seq Perturbation Analysis Snakemake Workflow using Seurat's Mixscape -A [Snakemake](https://snakemake.readthedocs.io/en/stable/) workflow for performing perturbation analyses of pooled (multimodal) CRISPR screens with scRNA-seq read-out (scCRISPR-seq, CROP-seq, Perturb-seq) powered by the R package [Seurat's](https://satijalab.org/seurat/index.html) method [Mixscape](https://satijalab.org/seurat/articles/mixscape_vignette.html). +A [Snakemake 8](https://snakemake.readthedocs.io/en/stable/) workflow for performing perturbation analyses of pooled (multimodal) CRISPR screens with scRNA-seq read-out (scCRISPR-seq, CROP-seq, Perturb-seq) powered by the R package [Seurat's](https://satijalab.org/seurat/index.html) method [Mixscape](https://satijalab.org/seurat/articles/mixscape_vignette.html). This workflow adheres to the module specifications of [MR.PARETO](https://github.com/epigen/mr.pareto), an effort to augment research by modularizing (biomedical) data science. For more details, instructions and modules check out the project's repository. Please consider starring and sharing modules that are useful to you, this helps me in prioritizing my efforts! diff --git a/config/README.md b/config/README.md index c289d3e..02ad380 100644 --- a/config/README.md +++ b/config/README.md @@ -1,8 +1,10 @@ # Configuration -You need one configuration file and one annotation file to run the complete workflow. You can use the provided example as starting point. If in doubt read the comments in the config and/or try the default values. +You need one configuration file and one annotation file to run the complete workflow. If in doubt read the comments in the config and/or try the default values. -- project configuration (config/config.yaml): different for every project/dataset and configures the analyses to be performed. +- project configuration (`config/config.yaml`): different for every project/dataset and configures the analyses to be performed. - sample annotation (sample_annotation): CSV file consisting of two columns - name: name of the dataset (tip: keep it short). - data: absolute path to the Seurat object as .rds. + +Set workflow-specific `resources` or command line arguments (CLI) in the workflow profile `workflow/profiles/default.config.yaml`, which supersedes global Snakemake profiles. \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index f445a75..7429d1b 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -2,7 +2,6 @@ ##### RESOURCES ##### mem: '32000' threads: 1 -partition: 'shortq' ##### GENERAL ##### annotation: /path/to/mixscape_seurat_annotation.csv diff --git a/workflow/Snakefile b/workflow/Snakefile index 3c570f1..23ec8a9 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -1,3 +1,7 @@ + +##### global workflow dependencies ##### +conda: "envs/global.yaml" + ##### libraries ##### import os import sys @@ -5,7 +9,8 @@ import pandas as pd import yaml from snakemake.utils import min_version -min_version("7.15.2") +##### set minimum snakemake version ##### +min_version("8.20.1") ##### module name ##### module_name = "mixscape_seurat" @@ -27,18 +32,16 @@ rule all: sample=samples), prtb_score_plots = expand(os.path.join(result_path,'{sample}','plots','PerturbScore'), sample=samples), - envs = expand(os.path.join(config["result_path"],'envs',module_name,'{env}.yaml'),env=['seurat_mixscape','seurat_lda']), - configs = os.path.join(config["result_path"],'configs',module_name,'{}_config.yaml'.format(config["project_name"])), - annotations = os.path.join(config["result_path"],'configs',module_name,'{}_annot.csv'.format(config["project_name"])), + envs = expand(os.path.join(result_path,'envs','{env}.yaml'),env=['seurat_mixscape','seurat_lda']), + configs = os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])), + annotations = os.path.join(result_path,'configs','{}_annot.csv'.format(config["project_name"])), resources: mem_mb=config.get("mem", "8000"), threads: config.get("threads", 1) log: os.path.join("logs","rules","all.log"), - params: - partition=config.get("partition"), - + ##### load rules ##### include: os.path.join("rules", "common.smk") include: os.path.join("rules", "mixscape.smk") diff --git a/workflow/envs/global.yaml b/workflow/envs/global.yaml new file mode 100644 index 0000000..482aa8e --- /dev/null +++ b/workflow/envs/global.yaml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - numpy=2.0.1 + - pandas=2.2.2 \ No newline at end of file diff --git a/workflow/profiles/default/config.yaml b/workflow/profiles/default/config.yaml new file mode 100644 index 0000000..29bebdd --- /dev/null +++ b/workflow/profiles/default/config.yaml @@ -0,0 +1,3 @@ +default-resources: + slurm_partition: shortq + slurm_extra: "'--qos=shortq'" \ No newline at end of file diff --git a/workflow/rules/envs_export.smk b/workflow/rules/envs_export.smk index a59877b..89e60e2 100644 --- a/workflow/rules/envs_export.smk +++ b/workflow/rules/envs_export.smk @@ -1,20 +1,18 @@ # one rule per used conda environment to document the exact versions and builds of the used software rule env_export: output: - report(os.path.join(config["result_path"],'envs','mixscape_seurat','{env}.yaml'), + report(os.path.join(result_path,'envs','{env}.yaml'), caption="../report/software.rst", category="Software", - subcategory="{}_mixscape_seurat".format(config["project_name"]) + subcategory="{}_{}".format(config["project_name"],module_name) ), conda: "../envs/{env}.yaml" resources: - mem_mb=1000, #config.get("mem", "16000"), + mem_mb=1000, threads: config.get("threads", 1) log: os.path.join("logs","rules","env_{env}.log"), - params: - partition=config.get("partition"), shell: """ conda env export > {output} @@ -23,18 +21,16 @@ rule env_export: # add configuration files to report rule config_export: output: - configs = report(os.path.join(config["result_path"],'configs','mixscape_seurat','{}_config.yaml'.format(config["project_name"])), + configs = report(os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])), caption="../report/configs.rst", category="Configuration", - subcategory="{}_mixscape_seurat".format(config["project_name"]) + subcategory="{}_{}".format(config["project_name"],module_name) ) resources: - mem_mb=1000, #config.get("mem", "16000"), + mem_mb=1000, threads: config.get("threads", 1) log: os.path.join("logs","rules","config_export.log"), - params: - partition=config.get("partition"), run: with open(output["configs"], 'w') as outfile: yaml.dump(config, outfile) @@ -44,18 +40,16 @@ rule annot_export: input: config["annotation"], output: - annot = report(os.path.join(config["result_path"],'configs','mixscape_seurat','{}_annot.csv'.format(config["project_name"])), + annot = report(os.path.join(result_path,'configs','{}_annot.csv'.format(config["project_name"])), caption="../report/configs.rst", category="Configuration", - subcategory="{}_mixscape_seurat".format(config["project_name"]) + subcategory="{}_{}".format(config["project_name"],module_name) ) resources: - mem_mb=1000, #config.get("mem_small", "16000"), + mem_mb=1000, threads: config.get("threads", 1) log: os.path.join("logs","rules","annot_export.log"), - params: - partition=config.get("partition"), shell: """ cp {input} {output} diff --git a/workflow/rules/mixscape.smk b/workflow/rules/mixscape.smk index b3309c7..a12155b 100644 --- a/workflow/rules/mixscape.smk +++ b/workflow/rules/mixscape.smk @@ -25,8 +25,6 @@ rule mixscape: "../envs/seurat_mixscape.yaml" log: os.path.join("logs","rules","mixscape_{sample}.log"), - params: - partition=config.get("partition"), script: "../scripts/mixscape.R" @@ -56,7 +54,5 @@ rule lda: "../envs/seurat_lda.yaml" log: os.path.join("logs","rules","lda_{sample}.log"), - params: - partition=config.get("partition"), script: "../scripts/lda.R" \ No newline at end of file diff --git a/workflow/rules/visualize.smk b/workflow/rules/visualize.smk index 6edb1f5..cfc4579 100644 --- a/workflow/rules/visualize.smk +++ b/workflow/rules/visualize.smk @@ -41,7 +41,5 @@ rule visualize: "../envs/seurat_mixscape.yaml" log: os.path.join("logs","rules","visualize_{sample}.log"), - params: - partition=config.get("partition"), script: "../scripts/visualize.R"