From 7612fd4a8828c9e670a1c6e8ed8684de5e4952d6 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 25 Oct 2024 13:49:06 +0200
Subject: [PATCH 1/5] Added Pan1c prefix instead of pan1c

---
 Snakefile | 200 ++++++++++++++++++++++++++----------------------------
 1 file changed, 97 insertions(+), 103 deletions(-)

diff --git a/Snakefile b/Snakefile
index fbbc6ef..12975d7 100644
--- a/Snakefile
+++ b/Snakefile
@@ -36,17 +36,17 @@ nHAP = len(SAMPLES)
 with gzip.open("data/haplotypes/"+config['reference'], "r") as handle:
     CHRLIST = [line.decode().split("#")[-1].split('\n')[0] for line in handle.readlines() if line.decode()[0] == ">"]
 
-graph_tools = ["pan1c"] + (config["get_MC"] == "True")*["MC"] 
+graph_tools = ["PGGB"] + (config["get_MC"] == "True")*["MC"] 
 
 # Adding optionnal output based on config.yaml, using the following function
 def which_analysis():
     
     ## Default analysis
     analysis_inputs = [     
-        expand("output/stats/{gtool}."+config['name']+".core.stats.tsv", gtool=graph_tools), # core stats
-        expand("output/panacus.reports/{gtool}."+config['name']+".{chromosome}.histgrowth.html", chromosome=CHRLIST, gtool=graph_tools), # panacus histgrowth 
-        expand("output/chrGraphs.figs/{gtool}."+config['name']+".{chromosome}.1Dviz.png", chromosome=CHRLIST, gtool=graph_tools), # visualizations from odgi on chromosome graphs
-        expand("output/stats/{gtool}."+config['name']+".chrGraph.general.stats.tsv", gtool=graph_tools) # chromosomes graph statistics
+        expand("output/stats/Pan1c.{gtool}."+config['name']+".core.stats.tsv", gtool=graph_tools), # core stats
+        expand("output/panacus.reports/Pan1c.{gtool}."+config['name']+".{chromosome}.histgrowth.html", chromosome=CHRLIST, gtool=graph_tools), # panacus histgrowth 
+        expand("output/chrGraphs.figs/Pan1c.{gtool}."+config['name']+".{chromosome}.1Dviz.png", chromosome=CHRLIST, gtool=graph_tools), # visualizations from odgi on chromosome graphs
+        expand("output/stats/Pan1c.{gtool}."+config['name']+".chrGraph.general.stats.tsv", gtool=graph_tools) # chromosomes graph statistics
     ]
     
     ## Optionals analysis steps
@@ -55,41 +55,35 @@ def which_analysis():
 
     if config["get_ASMs_SyRI"] == "True": # Creating SyRI for each input assembly 
         analysis_inputs.append(
-            expand("output/asm.syri.figs/"+config['name']+".{haplotype}.syri.{tool}.png", haplotype=SAMPLES_NOREF, tool=["mm2"])
+            expand("output/asm.syri.figs/Pan1c."+config['name']+".{haplotype}.syri_{tool}.png", haplotype=SAMPLES_NOREF, tool=["mm2"])
         )
     if config["get_chrInputs_SyRI"] == "True": # Creating SyRI figures for each PGGB input
         analysis_inputs.append(
-            expand("output/chrInput.syri.figs/"+config['name']+".{chromosome}.syri.png", chromosome=CHRLIST)
+            expand("output/chrInput.syri.figs/Pan1c."+config['name']+".{chromosome}.syri_mm2.png", chromosome=CHRLIST)
         )
     if config["run_Quast"] == "True": # Running Quast on input haplotypes
         analysis_inputs.append(
-            "output/"+config['name']+".quast.report.html"
+            "output/Pan1c."+config['name']+".quast.report.html"
         )
     if config["get_contig_pos"] == "True": # Chromosome decomposition into its contig figure
         analysis_inputs.append(
-            expand("output/chr.contig/{haplotype}.contig.png", haplotype=CHRLIST) 
+            expand("output/chr.contig/Pan1c."+config['name']+"{chromosome}.contig.png", chromosome=CHRLIST) 
         )
 
         if config["create_report"] == "True": # Creating report (need contig)
             analysis_inputs.append(
-                expand("output/{gtool}."+config['name']+".report.md", gtool=graph_tools)
+                expand("output/Pan1c.{gtool}."+config['name']+".report.md", gtool=graph_tools)
             )
-            analysis_inputs.append("output/report_data/"+config['name']+".assembly.json")
-            analysis_inputs.append("output/report_data/"+config['name']+".graph.json")
+            analysis_inputs.append("output/report_data/Pan1c."+config['name']+".assembly.json")
+            analysis_inputs.append("output/report_data/Pan1c."+config['name']+".graph.json")
     if config["get_VCF"] == "True": # VCF from the final graph against the "reference"
         analysis_inputs.append(
             expand("output/{gtool}.vcf.figs", gtool=graph_tools)
         )
-        analysis_inputs.append("output/report_data/"+config['name']+".var.json")
+        analysis_inputs.append("output/report_data/Pan1c."+config['name']+".var.json")
 
     return analysis_inputs
 
-"""
-Functions   ---------------------------------------------------------------------------------------
-"""
-def get_mem_mb(wildcards, attempt, threads, multiplier=config["mem_multiplier"]):
-    return attempt * multiplier * threads
-
 """
 Rules   -------------------------------------------------------------------------------------------
 """
@@ -97,8 +91,8 @@ Rules   ------------------------------------------------------------------------
 # Main target rule
 rule all:
     input:
-        expand("output/{gtool}."+config['name']+".gfa.gz", gtool=graph_tools), # Final graph (main output)
-        "output/pan1c."+config['name']+".gfa.metadata", # Metadata for the final (also in top of gfa files as # line)
+        expand("output/Pan1c.{gtool}."+config['name']+".gfa.gz", gtool=graph_tools), # Final graph (main output)
+        "output/Pan1c."+config['name']+".gfa.metadata", # Metadata for the final (also in top of gfa files as # line)
         which_analysis()
 
 """
@@ -153,7 +147,7 @@ rule quast_stats:
         fas=expand("data/haplotypes/{haplotype}.fa.gz", haplotype=SAMPLES_NOREF),
         ref="data/haplotypes/"+config['reference']
     output:
-        report="output/"+config['name']+".quast.report.html"
+        report="output/Pan1c."+config['name']+".quast.report.html"
     threads: 16
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 4000
@@ -213,13 +207,13 @@ rule assemblathon_stats:
         mv data/haplotypes/{wildcards.haplotype}.csv {output.csv}
         """
 
-rule contig_position:
+rule contig_positions:
     # Produce figures with contig positions
     input:
         fa="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz",
         fai="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz.fai"
     output:
-        fig="output/chr.contig/{chromosome}.contig.png",
+        fig="output/chr.contig/Pan1c."+config['name']+"{chromosome}.contig.png",
         outdir=temp(directory("output/chr.contig/{chromosome}"))
     threads: 1
     resources:
@@ -290,8 +284,8 @@ rule SyRI_on_ASM_mm2:
         ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
         qry="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz"
     output:
-        fig="output/asm.syri.figs/"+config['name']+".{haplotype}.syri.mm2.png",
-        vcf="data/asm.syri.mm2/"+config['name']+".{haplotype}.syri.mm2.vcf.gz"
+        fig="output/asm.syri.figs/Pan1c."+config['name']+".{haplotype}.syri_mm2.png",
+        vcf="data/asm.syri.mm2/Pan1c."+config['name']+".{haplotype}.syri.mm2.vcf.gz"
     log: 
         cmd="logs/SyRI_ASM/{haplotype}.SyRI_ASM.mm2.cmd.log",
         time="logs/SyRI_ASM/{haplotype}.SyRI_ASM.mm2.time.log"
@@ -335,8 +329,8 @@ rule SyRI_on_ASM_wfm:
         ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
         qry="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz"
     output:
-        fig="output/asm.syri.figs/"+config['name']+".{haplotype}.syri.wfm.png",
-        vcf="data/asm.syri.wfm/"+config['name']+".{haplotype}.syri.wfm.vcf.gz"
+        fig="output/asm.syri.figs/Pan1c."+config['name']+".{haplotype}.syri_wfm.png",
+        vcf="data/asm.syri.wfm/Pan1c."+config['name']+".{haplotype}.syri.wfm.vcf.gz"
     log: 
         cmd="logs/SyRI_ASM/{haplotype}.SyRI_ASM.wfm.cmd.log",
         time="logs/SyRI_ASM/{haplotype}.SyRI_ASM.wfm.time.log"
@@ -384,19 +378,19 @@ def asm_json_inputs(wildcards):
 
     if config["get_contig_pos"] == "True":
         sections["contig_pos"] = expand(
-            "output/chr.contig/{chromosome}.contig.png",
+            "output/chr.contig/Pan1c."+config['name']+"{chromosome}.contig.png",
             chromosome=CHRLIST
         )
 
     if config["get_ASMs_SyRI"] == "True":
         sections["SyRI_on_ASMs_figs"] = expand(
-            "output/asm.syri.figs/"+config['name']+".{haplotype}.syri.mm2.png", 
+            "output/asm.syri.figs/Pan1c."+config['name']+".{haplotype}.syri_mm2.png", 
             haplotype=SAMPLES_NOREF
         )
 
     if config["get_chrInputs_SyRI"] == "True":
         sections["SyRI_on_chrInputs_figs"] = expand(
-            "output/chrInput.syri.figs/"+config['name']+".{chromosome}.syri.png", 
+            "output/chrInput.syri.figs/Pan1c."+config['name']+".{chromosome}.syri_mm2.png", 
             chromosome=CHRLIST
         )
 
@@ -407,8 +401,8 @@ rule asm_json:
     input:
         unpack(asm_json_inputs)
     output:
-        json="output/report_data/"+config['name']+".assembly.json",
-        merged="output/report_data/"+config['name']+".assemblathon_stats.tsv"
+        json="output/report_data/Pan1c."+config['name']+".assembly.json",
+        merged="output/report_data/Pan1c."+config['name']+".assemblathon_stats.tsv"
     threads: 1
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 16000
@@ -444,7 +438,7 @@ rule SyRI_on_chrInput:
     input:
         fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
     output:
-        fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.syri.png"
+        fig="output/chrInput.syri.figs/Pan1c."+config['name']+".{chromosome}.syri_mm2.png"
     threads: 8
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 12000
@@ -497,10 +491,10 @@ rule wfmash_on_chr:
         fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
         fai='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz.fai'
     output:
-        mapping=temp("data/chrGraphs/pan1c.{chromosome}/pan1c.{chromosome}.wfmash.mapping.paf"),
-        aln=temp("data/chrGraphs/pan1c.{chromosome}/pan1c.{chromosome}.wfmash.aln.paf"),
-        mapping_gz="data/chrGraphs/pan1c.{chromosome}/pan1c.{chromosome}.wfmash.mapping.paf.gz",
-        aln_gz="data/chrGraphs/pan1c.{chromosome}/pan1c.{chromosome}.wfmash.aln.paf.gz"
+        mapping=temp("data/chrGraphs/PGGB.{chromosome}/Pan1c."+config['name']+".{chromosome}.wfmash.mapping.paf"),
+        aln=temp("data/chrGraphs/PGGB.{chromosome}/Pan1c."+config['name']+".{chromosome}.wfmash.aln.paf"),
+        mapping_gz="data/chrGraphs/PGGB.{chromosome}/Pan1c."+config['name']+".{chromosome}.wfmash.mapping.paf.gz",
+        aln_gz="data/chrGraphs/PGGB.{chromosome}/Pan1c."+config['name']+".{chromosome}.wfmash.aln.paf.gz"
     threads: 16
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 2000
@@ -553,7 +547,7 @@ rule seqwish:
         fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
         aln=rules.wfmash_on_chr.output.aln_gz
     output:
-        gfa_gz="data/chrGraphs/pan1c.{chromosome}/pan1c.{chromosome}.seqwish.gfa.gz"
+        gfa_gz="data/chrGraphs/PGGB.{chromosome}/Pan1c."+config['name']+".{chromosome}.seqwish.gfa.gz"
     threads: 8
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 4000
@@ -585,8 +579,8 @@ rule gfaffix_on_chr:
     input:
         rules.seqwish.output.gfa_gz
     output:
-        gfa_gz="data/chrGraphs/pan1c.{chromosome}/pan1c.{chromosome}.seqwish.gfaffixD.gfa.gz",
-        transform="data/chrGraphs/pan1c.{chromosome}/pan1c.{chromosome}.seqwish.gfaffixD.transform.txt"
+        gfa_gz="data/chrGraphs/PGGB.{chromosome}/Pan1c."+config['name']+".{chromosome}.seqwish.gfaffixD.gfa.gz",
+        transform="data/chrGraphs/PGGB.{chromosome}/Pan1c."+config['name']+".{chromosome}.seqwish.gfaffixD.transform.txt"
     threads: 1
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 24000
@@ -617,10 +611,10 @@ rule gfaffix_on_chr:
 rule odgi_postprocessing:
     # Running pggb's postprocessing (mainly odgi) steps with gfaffix graph
     input:
-        tags="output/pan1c."+config['name']+".gfa.metadata",
+        tags="output/Pan1c."+config['name']+".gfa.metadata",
         gfa_gz=rules.gfaffix_on_chr.output.gfa_gz
     output:
-        gfa_gz='data/chrGraphs/pan1c.'+config['name']+'.{chromosome}.gfa.gz'
+        gfa_gz="data/chrGraphs/Pan1c.PGGB."+config['name']+".{chromosome}.gfa.gz"
     threads: 8
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 4000
@@ -686,10 +680,10 @@ rule odgi_postprocessing:
 
 rule MC_graph:
     input:
-        tags="output/pan1c."+config['name']+".gfa.metadata",
+        tags="output/Pan1c."+config['name']+".gfa.metadata",
         fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
     output:
-        gfa_gz='data/chrGraphs/MC.'+config['name']+'.{chromosome}.gfa.gz'
+        gfa_gz='data/chrGraphs/Pan1c.MC.'+config['name']+'.{chromosome}.gfa.gz'
     threads: 16
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 32000
@@ -740,9 +734,9 @@ rule MC_graph:
 rule generate_graph_list:
     # Generate a text file containing all created graphs
     input:
-        gfas=expand('data/chrGraphs/{{gtool}}.'+config['name']+'.{chromosome}.tmp.gfa', chromosome=CHRLIST)
+        gfas=expand('data/chrGraphs/Pan1c.{{gtool}}.'+config['name']+'.{chromosome}.tmp.gfa', chromosome=CHRLIST)
     output:
-        "data/chrGraphs/graphsList.{gtool}.txt"
+        temp("data/chrGraphs/graphsList.{gtool}.txt")
     threads: 1
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 4000
@@ -756,13 +750,13 @@ rule graph_squeeze:
     # Using odgi to merge every subgraphs into a final one
     input:
         glist="data/chrGraphs/graphsList.{gtool}.txt",
-        tags="output/pan1c."+config['name']+".gfa.metadata",
-        graphs=expand('data/chrGraphs/{{gtool}}.'+config['name']+'.{chromosome}.tmp.gfa', chromosome=CHRLIST)
+        tags="output/Pan1c."+config['name']+".gfa.metadata",
+        graphs=expand('data/chrGraphs/Pan1c.{{gtool}}.'+config['name']+'.{chromosome}.tmp.gfa', chromosome=CHRLIST)
     output:
-        gfa_gz="output/{gtool}."+config['name']+".gfa.gz"
+        gfa_gz="output/Pan1c.{gtool}."+config['name']+".gfa.gz"
     log: 
-        cmd="logs/squeeze/{gtool}."+config['name']+".squeeze.cmd.log",
-        time="logs/squeeze/{gtool}."+config['name']+".squeeze.time.log",
+        cmd="logs/squeeze/Pan1c.{gtool}."+config['name']+".squeeze.cmd.log",
+        time="logs/squeeze/Pan1c.{gtool}."+config['name']+".squeeze.time.log",
     threads: 16
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 2000
@@ -794,10 +788,10 @@ rule graph_squeeze:
 rule graph_stats:
     # Using GFAstats to produce stats on every chromosome graphs
     input:
-        graph='data/chrGraphs/{gtool}.'+config['name']+'.{chromosome}.gfa.gz'
+        graph='data/chrGraphs/Pan1c.{gtool}.'+config['name']+'.{chromosome}.gfa.gz'
     output:
-        genstats="output/stats/chrGraphs.{gtool}/{gtool}."+config['name']+".{chromosome}.general.stats.tsv",
-        pathstats="output/stats/chrGraphs.{gtool}/{gtool}."+config['name']+".{chromosome}.path.stats.tsv"
+        genstats="output/stats/chrGraphs.{gtool}/Pan1c.{gtool}."+config['name']+".{chromosome}.general.stats.tsv",
+        pathstats="output/stats/chrGraphs.{gtool}/Pan1c.{gtool}."+config['name']+".{chromosome}.path.stats.tsv"
     threads: 4
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 8000
@@ -815,10 +809,10 @@ rule graph_stats:
 rule graph_figs:
     # Creating figures using odgi viz 
     input:
-        graph='data/chrGraphs/{gtool}.'+config['name']+'.{chromosome}.tmp.gfa'
+        graph='data/chrGraphs/Pan1c.{gtool}.'+config['name']+'.{chromosome}.tmp.gfa'
     output:
-        oneDviz="output/chrGraphs.figs/{gtool}."+config['name']+".{chromosome}.1Dviz.png",
-        pcov="output/chrGraphs.figs/{gtool}."+config['name']+".{chromosome}.pcov.png"
+        oneDviz="output/chrGraphs.figs/Pan1c.{gtool}."+config['name']+".{chromosome}.1Dviz.png",
+        pcov="output/chrGraphs.figs/Pan1c.{gtool}."+config['name']+".{chromosome}.pcov.png"
     threads: 4
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 4000
@@ -841,10 +835,10 @@ rule graph_figs:
 rule aggregate_graphs_stats:
     # Reading and merging all stats files from chromosome graphs into a .tsv.
     input:
-        genstats=expand("output/stats/chrGraphs.{{gtool}}/{{gtool}}."+config['name']+".{chromosome}.general.stats.tsv", chromosome=CHRLIST)
+        genstats=expand("output/stats/chrGraphs.{{gtool}}/Pan1c.{{gtool}}."+config['name']+".{chromosome}.general.stats.tsv", chromosome=CHRLIST)
     output:
-        genstats="output/stats/{gtool}."+config['name']+".chrGraph.general.stats.tsv",
-        pathstats="output/stats/{gtool}."+config['name']+".chrGraph.path.stats.tsv"
+        genstats="output/stats/Pan1c.{gtool}."+config['name']+".chrGraph.general.stats.tsv",
+        pathstats="output/stats/Pan1c.{gtool}."+config['name']+".chrGraph.path.stats.tsv"
     threads: 1
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 2000
@@ -865,8 +859,8 @@ rule get_graph_tags:
     input:
         "config.yaml"
     output:
-        md="output/pan1c."+config['name']+".gfa.metadata",
-        json="output/report_data/"+config['name']+".tags.json"
+        md="output/Pan1c."+config['name']+".gfa.metadata",
+        json="output/report_data/Pan1c."+config['name']+".tags.json"
     threads: 1
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 8000
@@ -883,9 +877,9 @@ rule get_graph_tags:
 rule pggb_input_stats:
     # Produces statistics on pggb input sequences
     input:
-        flag="output/stats/{gtool}."+config['name']+".chrGraph.general.stats.tsv"
+        flag="output/stats/Pan1c.{gtool}."+config['name']+".chrGraph.general.stats.tsv"
     output:
-        "output/stats/{gtool}."+config['name']+".chrInput.stats.tsv"
+        "output/stats/Pan1c.{gtool}."+config['name']+".chrInput.stats.tsv"
     threads: 1
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 32000
@@ -901,10 +895,10 @@ rule pggb_input_stats:
 rule core_statistics:
     # Aggregate chrInput, chrGraph and pggb statistics into a single tsv 
     input:
-        chrInputStats = "output/stats/{gtool}."+config['name']+".chrInput.stats.tsv",
-        chrGraphStats = "output/stats/{gtool}."+config['name']+".chrGraph.general.stats.tsv"
+        chrInputStats = "output/stats/Pan1c.{gtool}."+config['name']+".chrInput.stats.tsv",
+        chrGraphStats = "output/stats/Pan1c.{gtool}."+config['name']+".chrGraph.general.stats.tsv"
     output:
-        tsv = "output/stats/{gtool}."+config['name']+".core.stats.tsv"
+        tsv = "output/stats/Pan1c.{gtool}."+config['name']+".core.stats.tsv"
     threads: 1
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 2000
@@ -921,11 +915,11 @@ rule core_statistics:
 rule graph_json:
     # Produce the Graph JSON for Pan1c QC
     input:
-        genstats = expand("output/stats/{gtool}."+config['name']+".chrGraph.general.stats.tsv", gtool=graph_tools),
-        pathstats = expand("output/stats/{gtool}."+config['name']+".chrGraph.path.stats.tsv", gtool=graph_tools),
-        odgifigs = expand("output/report/{gtool}."+config['name']+".{chromosome}.report.fig.png", gtool=graph_tools, chromosome=CHRLIST)
+        genstats = expand("output/stats/Pan1c.{gtool}."+config['name']+".chrGraph.general.stats.tsv", gtool=graph_tools),
+        pathstats = expand("output/stats/Pan1c.{gtool}."+config['name']+".chrGraph.path.stats.tsv", gtool=graph_tools),
+        odgifigs = expand("output/report/Pan1c.{gtool}."+config['name']+".{chromosome}.report.fig.png", gtool=graph_tools, chromosome=CHRLIST)
     output:
-        json="output/report_data/"+config['name']+".graph.json"
+        json="output/report_data/Pan1c."+config['name']+".graph.json"
     threads: 1
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 16000
@@ -968,12 +962,12 @@ rule get_pav:
 rule panacus_stats:
     # Produces panacus reports for a chromosome graph
     input:
-        graph='data/chrGraphs/{gtool}.'+config['name']+'.{chromosome}.tmp.gfa'
+        graph='data/chrGraphs/Pan1c.{gtool}.'+config['name']+'.{chromosome}.tmp.gfa'
     output:
-        html='output/panacus.reports/{gtool}.'+config['name']+'.{chromosome}.histgrowth.html'
+        html='output/panacus.reports/Pan1c.{gtool}.'+config['name']+'.{chromosome}.histgrowth.html'
     log: 
-        cmd="logs/panacus/{gtool}.{chromosome}.panacus.cmd.log",
-        time="logs/panacus/{gtool}.{chromosome}.panacus.time.log"
+        cmd="logs/panacus/Pan1c.{gtool}.{chromosome}.panacus.cmd.log",
+        time="logs/panacus/Pan1c.{gtool}.{chromosome}.panacus.time.log"
     params:
         app_path=config['app.path'],
         pan_name=config['name'],
@@ -997,9 +991,9 @@ rule panacus_stats:
 rule vg_deconstruct:
     # Produce a VCF based on the "reference" haplotype
     input:
-        graph="output/{gtool}."+config['name']+".xg",
+        graph="output/Pan1c.{gtool}."+config['name']+".xg",
     output:
-        vcf=temp("output/{gtool}."+config['name']+".vcf"),
+        vcf=temp("output/Pan1c.{gtool}."+config['name']+".vcf"),
     threads: 8
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 32000
@@ -1007,8 +1001,8 @@ rule vg_deconstruct:
         app_path=config['app.path'],
         ref=config['reference']
     log: 
-        cmd="logs/vg_deconstruct/{gtool}.vg_deconstruct.cmd.log",
-        time="logs/vg_deconstruct/{gtool}.vg_deconstruct.time.log"
+        cmd="logs/vg_deconstruct/Pan1c.{gtool}.vg_deconstruct.cmd.log",
+        time="logs/vg_deconstruct/Pan1c.{gtool}.vg_deconstruct.time.log"
     shell:
         """
         /usr/bin/time -v -o {log.time} \
@@ -1024,8 +1018,8 @@ rule vg_deconstruct:
 rule vcf_fig:
     # Produce a figure describing INS/DEL length distribution from vg deconstruct and SyRI
     input:
-        vg="output/{gtool}."+config['name']+".vcf.gz",
-        syris_mm2=expand("data/asm.syri.mm2/"+config['name']+".{haplotype}.syri.mm2.vcf.gz", haplotype=SAMPLES_NOREF)
+        vg="output/Pan1c.{gtool}."+config['name']+".vcf.gz",
+        syris_mm2=expand("data/asm.syri.mm2/Pan1c."+config['name']+".{haplotype}.syri.mm2.vcf.gz", haplotype=SAMPLES_NOREF)
     output:
         vcf_fig=directory("output/{gtool}.vcf.figs")
     threads: 1
@@ -1077,7 +1071,7 @@ rule vcf_fig:
 
 rule vg_vcf_2_tsv:
     input:
-        "output/{gtool}."+config['name']+".vcf.gz"
+        "output/Pan1c.{gtool}."+config['name']+".vcf.gz"
     output:
         temp("tmp/var_json/vg_{gtool}.tsv")
     threads: 1
@@ -1090,7 +1084,7 @@ rule vg_vcf_2_tsv:
 
 rule syri_vcf_2_tsv:
     input:
-        expand("data/asm.syri.mm2/"+config['name']+".{haplotype}.syri.mm2.vcf.gz", haplotype=SAMPLES_NOREF)
+        expand("data/asm.syri.mm2/Pan1c."+config['name']+".{haplotype}.syri.mm2.vcf.gz", haplotype=SAMPLES_NOREF)
     output:
         temp("tmp/var_json/syri_mm2.tsv")
     threads: 1
@@ -1138,7 +1132,7 @@ rule var_json:
     input:
         unpack(var_json_inputs)
     output:
-        json="output/report_data/"+config['name']+".var.json"
+        json="output/report_data/Pan1c."+config['name']+".var.json"
     threads: 1
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 48000
@@ -1155,12 +1149,12 @@ rule var_json:
 rule create_pan1c_report_fig:
     # Produces a markdown report figure of chromosomes graphs
     input:
-        graph='data/chrGraphs/{gtool}.'+config['name']+'.{chromosome}.tmp.gfa',
-        contigfig="output/chr.contig/{chromosome}.contig.png",
+        graph='data/chrGraphs/Pan1c.{gtool}.'+config['name']+'.{chromosome}.tmp.gfa',
+        contigfig="output/chr.contig/Pan1c."+config['name']+"{chromosome}.contig.png",
     output:
-        odgifig=temp("tmp/{gtool}.{chromosome}.odgi.png"),
-        namefig=temp("tmp/{gtool}.{chromosome}.name.png"),
-        reportfig="output/report/{gtool}."+config['name']+".{chromosome}.report.fig.png"
+        odgifig=temp("tmp/Pan1c.{gtool}.{chromosome}.odgi.png"),
+        namefig=temp("tmp/Pan1c.{gtool}.{chromosome}.name.png"),
+        reportfig="output/report/Pan1c.{gtool}."+config['name']+".{chromosome}.report.fig.png"
     threads: 4
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 2000
@@ -1198,14 +1192,14 @@ rule create_pan1c_report_fig:
 rule create_chrGraphs_figs:
     # Produce figures based on aggregated path stats
     input:
-        pathstats="output/stats/{gtool}."+config['name']+".chrGraph.path.stats.tsv"
+        pathstats="output/stats/Pan1c.{gtool}."+config['name']+".chrGraph.path.stats.tsv"
     output:
         barplots=expand("output/chrGraphs.stats.figs/{{gtool}}."+config['name']+".path.decomp.{chromosome}.png", chromosome=CHRLIST),
         scatters=expand("output/chrGraphs.stats.figs/{{gtool}}."+config['name']+".2D.scatter.{chromosome}.png", chromosome=CHRLIST),
         heatmaps=expand("output/chrGraphs.stats.figs/{{gtool}}."+config['name']+".sharred.content.{chromosome}.png", chromosome=CHRLIST),
-        barplot_mean="output/chrGraphs.stats.figs/{gtool}."+config['name']+".path.decomp.mean.png",
-        scatter_mean="output/chrGraphs.stats.figs/{gtool}."+config['name']+".2D.scatter.mean.png",
-        heatmap_diff="output/chrGraphs.stats.figs/{gtool}."+config['name']+".shared.content.diff.png"
+        barplot_mean="output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".path.decomp.mean.png",
+        scatter_mean="output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".2D.scatter.mean.png",
+        heatmap_diff="output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".shared.content.diff.png"
     threads: 1
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 16000
@@ -1232,26 +1226,26 @@ def get_report_sections(wildcards):
     """
     sections = dict()
 
-    sections["metadata"] = "output/pan1c."+config['name']+".gfa.metadata"
-    sections["odgifigs"] = expand("output/report/{gtool}."+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
+    sections["metadata"] = "output/Pan1c."+config['name']+".gfa.metadata"
+    sections["odgifigs"] = expand("output/report/Pan1c.{gtool}."+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
     sections["genstats"] = f"output/stats/{wildcards.gtool}."+config['name']+".chrGraph.general.stats.tsv"
     sections["pathstats"] = f"output/stats/{wildcards.gtool}."+config['name']+".chrGraph.path.stats.tsv"
-    sections["barplots"] = expand("output/chrGraphs.stats.figs/{gtool}."+config['name']+".path.decomp.{chromosome}.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
-    sections["scatters"] = expand("output/chrGraphs.stats.figs/{gtool}."+config['name']+".2D.scatter.{chromosome}.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
-    sections["heatmaps"] = expand("output/chrGraphs.stats.figs/{gtool}."+config['name']+".sharred.content.{chromosome}.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
+    sections["barplots"] = expand("output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".path.decomp.{chromosome}.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
+    sections["scatters"] = expand("output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".2D.scatter.{chromosome}.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
+    sections["heatmaps"] = expand("output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".sharred.content.{chromosome}.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
     sections["barplot_mean"] = f"output/chrGraphs.stats.figs/{wildcards.gtool}."+config['name']+".path.decomp.mean.png"
     sections["scatter_mean"] = f"output/chrGraphs.stats.figs/{wildcards.gtool}."+config['name']+".2D.scatter.mean.png"
     sections["heatmap_diff"] = f"output/chrGraphs.stats.figs/{wildcards.gtool}."+config['name']+".shared.content.diff.png"
 
     if config["get_ASMs_SyRI"] == "True":
         sections["SyRI_on_ASMs_figs"] = expand(
-            "output/asm.syri.figs/"+config['name']+".{haplotype}.syri.mm2.png", 
+            "output/asm.syri.figs/Pan1c."+config['name']+".{haplotype}.syri_mm2.png", 
             haplotype=SAMPLES_NOREF
             )
 
     if config["get_chrInputs_SyRI"] == "True":
         sections["SyRI_on_chrInputs_figs"] = expand(
-            "output/chrInput.syri.figs/"+config['name']+".{chromosome}.syri.png", 
+            "output/chrInput.syri.figs/Pan1c."+config['name']+".{chromosome}.syri_mm2.png", 
             chromosome=CHRLIST
             )
 
@@ -1265,8 +1259,8 @@ rule create_pan1c_report:
     input:
         unpack(get_report_sections)
     output:
-        report="output/{gtool}."+config['name']+".report.md",
-        html="output/{gtool}."+config['name']+".report.html"
+        report="output/Pan1c.{gtool}."+config['name']+".report.md",
+        html="output/Pan1c.{gtool}."+config['name']+".report.html"
     threads: 4
     resources:
         mem_mb = lambda wildcards, threads: threads * config["mem_multiplier"] * 500
-- 
GitLab


From 230d9103fe5d6f4ae90a844ae887a2ccd160afd2 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 25 Oct 2024 14:48:27 +0200
Subject: [PATCH 2/5] Testing

---
 rules/tools.smk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rules/tools.smk b/rules/tools.smk
index db1f566..490e74a 100644
--- a/rules/tools.smk
+++ b/rules/tools.smk
@@ -27,9 +27,9 @@ rule samtools_index:
 rule run_bgzip:
     # Run BGZIP on the file
     input: 
-        "{file}"
+        "{file}.fa"
     output:
-        "{file}.gz"
+        "{file}.fa.gz"
     threads: 4
     retries: 1
     resources:
-- 
GitLab


From b23eea95d1796bc4e91631ff873951519181c79e Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 25 Oct 2024 14:50:58 +0200
Subject: [PATCH 3/5] Fixed missing Pan1c

---
 Snakefile                 | 6 +++---
 scripts/graph.pan1c_QC.py | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/Snakefile b/Snakefile
index 12975d7..db81dc7 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1194,9 +1194,9 @@ rule create_chrGraphs_figs:
     input:
         pathstats="output/stats/Pan1c.{gtool}."+config['name']+".chrGraph.path.stats.tsv"
     output:
-        barplots=expand("output/chrGraphs.stats.figs/{{gtool}}."+config['name']+".path.decomp.{chromosome}.png", chromosome=CHRLIST),
-        scatters=expand("output/chrGraphs.stats.figs/{{gtool}}."+config['name']+".2D.scatter.{chromosome}.png", chromosome=CHRLIST),
-        heatmaps=expand("output/chrGraphs.stats.figs/{{gtool}}."+config['name']+".sharred.content.{chromosome}.png", chromosome=CHRLIST),
+        barplots=expand("output/chrGraphs.stats.figs/Pan1c.{{gtool}}."+config['name']+".path.decomp.{chromosome}.png", chromosome=CHRLIST),
+        scatters=expand("output/chrGraphs.stats.figs/Pan1c.{{gtool}}."+config['name']+".2D.scatter.{chromosome}.png", chromosome=CHRLIST),
+        heatmaps=expand("output/chrGraphs.stats.figs/Pan1c.{{gtool}}."+config['name']+".sharred.content.{chromosome}.png", chromosome=CHRLIST),
         barplot_mean="output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".path.decomp.mean.png",
         scatter_mean="output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".2D.scatter.mean.png",
         heatmap_diff="output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".shared.content.diff.png"
diff --git a/scripts/graph.pan1c_QC.py b/scripts/graph.pan1c_QC.py
index 3c1bb07..8c41d82 100644
--- a/scripts/graph.pan1c_QC.py
+++ b/scripts/graph.pan1c_QC.py
@@ -2,7 +2,7 @@
 Graph JSON creator for Pan1c-QC
 
 @author: alexis.mergez@inrae.fr
-@version: 1.0
+@version: 1.1
 """
 
 import os
@@ -99,6 +99,8 @@ for tsv in args.path:
         for query in chrdf["Query.name"].unique():
             shared_table[gtool][chrid][query] = chrdf[chrdf["Query.name"] == query].drop(columns="Query.name").set_index("Target.name").to_dict(orient="index")
 
+
+
 ## Assembling output JSON
 
 Graph_JSON = {
-- 
GitLab


From 2658cabfd90535217bc26488dcb60883ae7408ce Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 25 Oct 2024 14:57:47 +0200
Subject: [PATCH 4/5] Found other missing 'Pan1c.'

---
 Snakefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index db81dc7..6684e67 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1228,14 +1228,14 @@ def get_report_sections(wildcards):
 
     sections["metadata"] = "output/Pan1c."+config['name']+".gfa.metadata"
     sections["odgifigs"] = expand("output/report/Pan1c.{gtool}."+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
-    sections["genstats"] = f"output/stats/{wildcards.gtool}."+config['name']+".chrGraph.general.stats.tsv"
-    sections["pathstats"] = f"output/stats/{wildcards.gtool}."+config['name']+".chrGraph.path.stats.tsv"
+    sections["genstats"] = f"output/stats/Pan1c.{wildcards.gtool}."+config['name']+".chrGraph.general.stats.tsv"
+    sections["pathstats"] = f"output/stats/Pan1c.{wildcards.gtool}."+config['name']+".chrGraph.path.stats.tsv"
     sections["barplots"] = expand("output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".path.decomp.{chromosome}.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
     sections["scatters"] = expand("output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".2D.scatter.{chromosome}.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
     sections["heatmaps"] = expand("output/chrGraphs.stats.figs/Pan1c.{gtool}."+config['name']+".sharred.content.{chromosome}.png", chromosome=CHRLIST, gtool=[wildcards.gtool])
-    sections["barplot_mean"] = f"output/chrGraphs.stats.figs/{wildcards.gtool}."+config['name']+".path.decomp.mean.png"
-    sections["scatter_mean"] = f"output/chrGraphs.stats.figs/{wildcards.gtool}."+config['name']+".2D.scatter.mean.png"
-    sections["heatmap_diff"] = f"output/chrGraphs.stats.figs/{wildcards.gtool}."+config['name']+".shared.content.diff.png"
+    sections["barplot_mean"] = f"output/chrGraphs.stats.figs/Pan1c.{wildcards.gtool}."+config['name']+".path.decomp.mean.png"
+    sections["scatter_mean"] = f"output/chrGraphs.stats.figs/Pan1c.{wildcards.gtool}."+config['name']+".2D.scatter.mean.png"
+    sections["heatmap_diff"] = f"output/chrGraphs.stats.figs/Pan1c.{wildcards.gtool}."+config['name']+".shared.content.diff.png"
 
     if config["get_ASMs_SyRI"] == "True":
         sections["SyRI_on_ASMs_figs"] = expand(
-- 
GitLab


From a2bb964af3aecb66402509e10348f01a07e2e1d3 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 25 Oct 2024 14:58:30 +0200
Subject: [PATCH 5/5] Undid modif to run_bgzip

---
 rules/tools.smk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rules/tools.smk b/rules/tools.smk
index 490e74a..db1f566 100644
--- a/rules/tools.smk
+++ b/rules/tools.smk
@@ -27,9 +27,9 @@ rule samtools_index:
 rule run_bgzip:
     # Run BGZIP on the file
     input: 
-        "{file}.fa"
+        "{file}"
     output:
-        "{file}.fa.gz"
+        "{file}.gz"
     threads: 4
     retries: 1
     resources:
-- 
GitLab