From 2d56c10383c9af6960a6455bca7915f6d31b35f3 Mon Sep 17 00:00:00 2001
From: sreichl <reichl.stephan@gmail.com>
Date: Fri, 13 Sep 2024 17:33:01 +0200
Subject: [PATCH] adapt to Snakemake 8; move env, config, annot export into the
 result folder to be self-contained

---
 README.md                             |  2 +-
 config/README.md                      |  6 ++++--
 config/config.yaml                    |  1 -
 workflow/Snakefile                    | 17 ++++++++++-------
 workflow/envs/global.yaml             |  7 +++++++
 workflow/profiles/default/config.yaml |  3 +++
 workflow/rules/envs_export.smk        | 24 +++++++++---------------
 workflow/rules/mixscape.smk           |  4 ----
 workflow/rules/visualize.smk          |  2 --
 9 files changed, 34 insertions(+), 32 deletions(-)
 create mode 100644 workflow/envs/global.yaml
 create mode 100644 workflow/profiles/default/config.yaml

diff --git a/README.md b/README.md
index fa8ed76..d24fbdf 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 [![DOI](https://zenodo.org/badge/481635018.svg)](https://zenodo.org/badge/latestdoi/481635018)
 
 # scCRISPR-seq Perturbation Analysis Snakemake Workflow using Seurat's Mixscape
-A [Snakemake](https://snakemake.readthedocs.io/en/stable/) workflow for performing perturbation analyses of pooled (multimodal) CRISPR screens with scRNA-seq read-out (scCRISPR-seq, CROP-seq, Perturb-seq) powered by the R package [Seurat's](https://satijalab.org/seurat/index.html) method [Mixscape](https://satijalab.org/seurat/articles/mixscape_vignette.html).
+A [Snakemake 8](https://snakemake.readthedocs.io/en/stable/) workflow for performing perturbation analyses of pooled (multimodal) CRISPR screens with scRNA-seq read-out (scCRISPR-seq, CROP-seq, Perturb-seq) powered by the R package [Seurat's](https://satijalab.org/seurat/index.html) method [Mixscape](https://satijalab.org/seurat/articles/mixscape_vignette.html).
 
 This workflow adheres to the module specifications of [MR.PARETO](https://github.com/epigen/mr.pareto), an effort to augment research by modularizing (biomedical) data science. For more details, instructions and modules check out the project's repository. Please consider starring and sharing modules that are useful to you, this helps me in prioritizing my efforts!
 
diff --git a/config/README.md b/config/README.md
index c289d3e..02ad380 100644
--- a/config/README.md
+++ b/config/README.md
@@ -1,8 +1,10 @@
 # Configuration
 
-You need one configuration file and one annotation file to run the complete workflow. You can use the provided example as starting point. If in doubt read the comments in the config and/or try the default values.
+You need one configuration file and one annotation file to run the complete workflow. If in doubt read the comments in the config and/or try the default values.
 
-- project configuration (config/config.yaml): different for every project/dataset and configures the analyses to be performed.
+- project configuration (`config/config.yaml`): different for every project/dataset and configures the analyses to be performed.
 - sample annotation (sample_annotation): CSV file consisting of two columns
     -  name: name of the dataset (tip: keep it short).
     -  data: absolute path to the Seurat object as .rds.
+
+Set workflow-specific `resources` or command line arguments (CLI) in the workflow profile `workflow/profiles/default.config.yaml`, which supersedes global Snakemake profiles.
\ No newline at end of file
diff --git a/config/config.yaml b/config/config.yaml
index f445a75..7429d1b 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -2,7 +2,6 @@
 ##### RESOURCES #####
 mem: '32000'
 threads: 1
-partition: 'shortq'
 
 ##### GENERAL #####
 annotation: /path/to/mixscape_seurat_annotation.csv
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 3c570f1..23ec8a9 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -1,3 +1,7 @@
+
+##### global workflow dependencies #####
+conda: "envs/global.yaml"
+
 ##### libraries #####
 import os
 import sys
@@ -5,7 +9,8 @@ import pandas as pd
 import yaml
 from snakemake.utils import min_version
 
-min_version("7.15.2")
+##### set minimum snakemake version #####
+min_version("8.20.1")
 
 ##### module name #####
 module_name = "mixscape_seurat"
@@ -27,18 +32,16 @@ rule all:
                                sample=samples),
         prtb_score_plots = expand(os.path.join(result_path,'{sample}','plots','PerturbScore'),
                                  sample=samples),
-        envs = expand(os.path.join(config["result_path"],'envs',module_name,'{env}.yaml'),env=['seurat_mixscape','seurat_lda']),
-        configs = os.path.join(config["result_path"],'configs',module_name,'{}_config.yaml'.format(config["project_name"])),
-        annotations = os.path.join(config["result_path"],'configs',module_name,'{}_annot.csv'.format(config["project_name"])),
+        envs = expand(os.path.join(result_path,'envs','{env}.yaml'),env=['seurat_mixscape','seurat_lda']),
+        configs = os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])),
+        annotations = os.path.join(result_path,'configs','{}_annot.csv'.format(config["project_name"])),
     resources:
         mem_mb=config.get("mem", "8000"),
     threads: config.get("threads", 1)
     log:
         os.path.join("logs","rules","all.log"),
-    params:
-        partition=config.get("partition"),
 
-        
+
 ##### load rules #####
 include: os.path.join("rules", "common.smk")
 include: os.path.join("rules", "mixscape.smk")
diff --git a/workflow/envs/global.yaml b/workflow/envs/global.yaml
new file mode 100644
index 0000000..482aa8e
--- /dev/null
+++ b/workflow/envs/global.yaml
@@ -0,0 +1,7 @@
+channels:
+  - conda-forge
+  - bioconda
+  - nodefaults
+dependencies:
+  - numpy=2.0.1
+  - pandas=2.2.2
\ No newline at end of file
diff --git a/workflow/profiles/default/config.yaml b/workflow/profiles/default/config.yaml
new file mode 100644
index 0000000..29bebdd
--- /dev/null
+++ b/workflow/profiles/default/config.yaml
@@ -0,0 +1,3 @@
+default-resources:
+    slurm_partition: shortq
+    slurm_extra: "'--qos=shortq'"
\ No newline at end of file
diff --git a/workflow/rules/envs_export.smk b/workflow/rules/envs_export.smk
index a59877b..89e60e2 100644
--- a/workflow/rules/envs_export.smk
+++ b/workflow/rules/envs_export.smk
@@ -1,20 +1,18 @@
 # one rule per used conda environment to document the exact versions and builds of the used software        
 rule env_export:
     output:
-        report(os.path.join(config["result_path"],'envs','mixscape_seurat','{env}.yaml'),
+        report(os.path.join(result_path,'envs','{env}.yaml'),
                       caption="../report/software.rst", 
                       category="Software", 
-                      subcategory="{}_mixscape_seurat".format(config["project_name"])
+                      subcategory="{}_{}".format(config["project_name"],module_name)
                      ),
     conda:
         "../envs/{env}.yaml"
     resources:
-        mem_mb=1000, #config.get("mem", "16000"),
+        mem_mb=1000,
     threads: config.get("threads", 1)
     log:
         os.path.join("logs","rules","env_{env}.log"),
-    params:
-        partition=config.get("partition"),
     shell:
         """
         conda env export > {output}
@@ -23,18 +21,16 @@ rule env_export:
 # add configuration files to report
 rule config_export:
     output:
-        configs = report(os.path.join(config["result_path"],'configs','mixscape_seurat','{}_config.yaml'.format(config["project_name"])), 
+        configs = report(os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])), 
                          caption="../report/configs.rst", 
                          category="Configuration", 
-                         subcategory="{}_mixscape_seurat".format(config["project_name"])
+                         subcategory="{}_{}".format(config["project_name"],module_name)
                         )
     resources:
-        mem_mb=1000, #config.get("mem", "16000"),
+        mem_mb=1000,
     threads: config.get("threads", 1)
     log:
         os.path.join("logs","rules","config_export.log"),
-    params:
-        partition=config.get("partition"),
     run:
         with open(output["configs"], 'w') as outfile:
             yaml.dump(config, outfile)
@@ -44,18 +40,16 @@ rule annot_export:
     input:
         config["annotation"],
     output:
-        annot = report(os.path.join(config["result_path"],'configs','mixscape_seurat','{}_annot.csv'.format(config["project_name"])), 
+        annot = report(os.path.join(result_path,'configs','{}_annot.csv'.format(config["project_name"])), 
                          caption="../report/configs.rst", 
                          category="Configuration", 
-                         subcategory="{}_mixscape_seurat".format(config["project_name"])
+                         subcategory="{}_{}".format(config["project_name"],module_name)
                         )
     resources:
-        mem_mb=1000, #config.get("mem_small", "16000"),
+        mem_mb=1000,
     threads: config.get("threads", 1)
     log:
         os.path.join("logs","rules","annot_export.log"),
-    params:
-        partition=config.get("partition"),
     shell:
         """
         cp {input} {output}
diff --git a/workflow/rules/mixscape.smk b/workflow/rules/mixscape.smk
index b3309c7..a12155b 100644
--- a/workflow/rules/mixscape.smk
+++ b/workflow/rules/mixscape.smk
@@ -25,8 +25,6 @@ rule mixscape:
         "../envs/seurat_mixscape.yaml"
     log:
         os.path.join("logs","rules","mixscape_{sample}.log"),
-    params:
-        partition=config.get("partition"),
     script:
         "../scripts/mixscape.R"
 
@@ -56,7 +54,5 @@ rule lda:
         "../envs/seurat_lda.yaml"
     log:
         os.path.join("logs","rules","lda_{sample}.log"),
-    params:
-        partition=config.get("partition"),
     script:
         "../scripts/lda.R"
\ No newline at end of file
diff --git a/workflow/rules/visualize.smk b/workflow/rules/visualize.smk
index 6edb1f5..cfc4579 100644
--- a/workflow/rules/visualize.smk
+++ b/workflow/rules/visualize.smk
@@ -41,7 +41,5 @@ rule visualize:
         "../envs/seurat_mixscape.yaml"
     log:
         os.path.join("logs","rules","visualize_{sample}.log"),
-    params:
-        partition=config.get("partition"),
     script:
         "../scripts/visualize.R"