# Metagenome Orchestra V2.2a configuration file template
#
# Modification of this document: 2019-08-12
#
#
#
#    =================================================
#    Mago web site: http://mago.fe.uni-lj.si
#    =================================================
#
#    Metagenome Orchestra (magoOrchestra, Orchestra or Mago)
#    is distributed under the
#    Creative Commons Attribution CC BY license
#    https://creativecommons.org/licenses/
#
#
#
#    ------------------------------------------------------
#    If you use Orchestra or its derivatives, please cite:
#       ...submitted for publication, please check back to
#          Orchestra web site for reference update
#
#    Please also cite software that Orchestra contains,
#        and that you use for your pipeline processing.
#    Please see credits.txt, at Orchestra web
#        site for details about the included software.
#    ------------------------------------------------------
#
#
#
#    IMPORTANT: Orchestra is developed and disseminated in a good
#    faith and desire to work according to expectations, but authors
#    DO NOT give any guarantees about it correctness.
#
#    USE IT AT YOUR OWN RISK.
#
#    Authors cannot be held legally or morally responsible for any
#    consequences that may arise from using or misusing Orchestra.
#
#    IMPORTANT: Orchestra is a skeleton application for a synergic
#    execution of many externally developed pieces of software.
#    These are disseminated as integrated parts of Orchestra to
#    provide user-friendly out-of-the-box experience.
#    Nonetheless, every included piece of software remains OWNED
#    and COPYRIGHTED by its respective developers.
#    Please see document credits.txt, at Orchestra web site.
#
#    =================================================
#    Mago web site: http://mago.fe.uni-lj.si
#    =================================================
#
#
#
#    Metagenome Orchestra is developed by:
#
#    Blaz Stres (blaz.stres@gmail.com)
#    University of Ljubljana
#    Biotechnical Faculty,
#    Faculty of Medicine,
#    Faculty of Geodetic and Civil Engineering
#
#    University of Innsbruck (blaz.stres@uibk.ac.at)
#    Department of Microbiology
#
#
#
#    Bostjan Murovec (bostjan.murovec@fe.uni-lj.si)
#    University of Ljubljana
#    Faculty of electrical engineering
#
#    Please contact us with comments, suggestions for
#    improvements or descriptions of discovered bugs.
#    Your feedback is most welcome.


#--------------------------------------------------------------------------
# Usage:
#--------------------------------------------------------------------------
#
# 1. Fill in parameters below according to your preferences.
#
#    A. This document is fairly long because it contains
#       complete instructions about setting Orchestra parameters.
#       For production, a user is expected to remove the majority of
#       comments out of this file to make it easier to navigate.
#
#    B. Section titles ([Global], [FastQC], etc.) must appear
#       in a file prior to parameters that belong to them.
#       Only sections where a user actually specifies some
#       parameters need to be present.
#
#    C. Many parameters do not need to be set. In many cases default
#       values suffice. Please do not be overwhelmed by the number of
#       available settings below. Generally, to start using Orchestra
#       it suffices to specify input files, output directory and
#       components to be run as part of the processing.
#
#
#
# 2. Start Orchestra by following container specific instructions
#    in separate documents.
#
#    A. For Singularity image:
#
#       http://mago.fe.uni-lj.si/singularity_instructions_V2_2.txt
#
#       A brief help is also available through command:
#       singularity help /path/to/mago.simg
#
#
#
#    B. For Docker container:
#
#       http://mago.fe.uni-lj.si/docker_instructions_V2_2.txt
#
#
#
#    C. For Oracle VirtualBox virtual machine:
#
#       http://mago.fe.uni-lj.si/virtualbox_instructions_V2_2.txt
#
#
#
# 3. After Orchestra is run its results are going to be located
#    within a specified output directory, and organized in
#    subdirectories. There is subdirectory 02_report within the
#    output directory, which contains exact command line as well
#    as captured STDOUT and STDERR outputs of each executed
#    external program for post-mortem inspection of execution
#    progress, if such a need arises.


#--------------------------------------------------------------------------
# General information about configuration files:
#--------------------------------------------------------------------------

# Lines that begin with a dash (#) are ignored.
# Parameters, which are supposed to be unspecified,
# may be commented out with a dash (#) instead of
# being deleted (for documentation purposes).
# Empty lines and lines that contain only spaces and
# tab characters are ignored as well.


# NOTE: absolute paths are advised when specifying
# file and directory names.

# NOTE: Home directory cannot be abbreviated as '~'.
# Instead, please use e.g. /home/user_name/some_dir


# During its running, Orchestra checks for the
# presence of intermediate results of its potential
# previous runs. If these are available, they are
# reused in order to speedup execution.
#
# This is useful for experimenting with different
# parameters. If a user changes only parameters for e.g.
# CheckM taxonomy workflow, then Orchestra re-executes
# only this part, AND other parts that may be affected
# by the changes. Steps that are not affected by the
# introduced parameter change, are skipped, which saves
# an appreciated amount of execution time.
#
# In order for this to work, please specify the same
# output directory (below parameter out_directory_root)
# upon all consecutive runs that logically belong
# together. Please, do NOT modify any files within this
# directory by yourself.
#
# NOTE 1: if Orchestra crashes during its operation or
# if it is terminated prematurely by an operator, the
# left-over results may be corrupted. During its NEXT run,
# Orchestra tries to automatically recover from such
# situations by re-running steps with corrupted and
# incomplete results. It also re-runs all additional steps
# that may be affected by these very results. Consequently,
# Orchestra implements a full crash recovery mechanism.
# If a crash or premature termination occurs, the only
# intervention that is required from an operator is to
# re-run Orchestra. The actual configuration file for this
# very re-run can and SHOULD stay the same. An operator
# does not need and should NOT suggest Orchestra which
# steps should be re-executed in such cases.
#
# NOTE 2: please do not manually delete intermediate results.
# Orchestra determines steps to be re-executed by inspecting
# its own status files, and it has almost no ability to
# gracefully recover from manually deleted intermediate files.
#
# NOTE 3: Orchestra does not re-execute a certain step of its
# pipeline processing, if it believes that the associated results
# are up to date. If for some reason you want to force a
# re-execution of such a step, delete an ENTIRE subdirectory
# that is associated with the step. Orchestra is deliberately
# designed to handle gracefully deletions of entire subdirectories,
# since this provides an easy way for an operator to force
# a re-execution, if a need arises. For example, if Orchestra
# believes that the results of the Concoct binner are up to date,
# but you want to re-execute it anyway, then simply delete
# the ENTIRE subdirectory that is associated with Concoct.
#
# NOTE 4: A more fine-grained forcing of re-execution is possible.
# Within an output directory, there exists directory 01_status.
# Within it there is a status file for each already run step,
# whether or not it completed its execution successfully.
# It is safe to delete any or several of these files, by means
# of which Orchestra considers the associated steps as not
# executed yet. Consequently, it will attempt to execute them
# on the next re-run of the pipeline.
# Association of these files with processing steps should
# be self evident from their descriptive file names.
#
# NOTE 5: Please perform above suggested active interventions
# in output directory only when Orchestra is NOT being executed.
#
# NOTE 6: if you change parameters for execution of a certain
# step, then Orchestra will detect changes and consider the
# available results outdated. In this case a re-run will happen
# automatically, and there is no need for an operator to
# intervene and delete subdirectories and/or status files.
#
# ------------------------------------------------------------
# IMPORTANT        IMPORTANT        IMPORTANT        IMPORTANT
# ------------------------------------------------------------
# NOTE 7: Orchestra assumes that a re-run with an EXISTING
# output directory is done with the SAME input files
# (input reads and potential externally provided scaffold)
# as in its previous run. It is assumed that only parameters
# of execution are changed (or previous run was interrupted).
# If your intention is to process different input data,
# then set an output directory to a new NON-existing
# directory. Failing to do so, will corrupt the already
# generated files in a given output directory, and
# produce gibberish results.
#
# ------------------------------------------------------------
# IMPORTANT        IMPORTANT        IMPORTANT        IMPORTANT
# ------------------------------------------------------------
# NOTE 8: when you attempt to re-run some steps with
# changed parameters, do NOT disable the already completed
# steps in an attempt to save time. If you leave these
# settings intact, Orchestra will figure out by itself that
# the resulting files are already up to date, and it will NOT
# re-execute unnecessary steps. However, if you disable
# an already completed step, then Orchestra will assume that
# your intention is to execute a different workflow than
# in the previous run, and it will modify its behavior
# accordingly, which is not an intended behavior.
#
# As an example, let us suppose that CheckM coverage
# file was generated in a previous Orchestra run.
# This file was then inputted to CheckM taxonomy workflow.
# In the second Orchestra run, you intend to change
# some parameters for CheckM taxonomy workflow.
# Hence, this workflow needs to be re-executed, but
# CheckM coverage file does not need to be re-generated.
# The proper way to handle this situation is to leave
# generation of CheckM coverage file ENABLED.
# Orchestra will determine by itself that the coverage
# file is already generated and up to date, so it will
# not waste the time to re-generate it once more. The
# already present coverage file will be inputted to the
# CheckM taxonomy workflow, which will be re-executed
# (as expected due to the changes of its parameters).
# On the other hand, if generation of CheckM coverage file
# had been disabled in a MISLEADING attempt to save some
# execution time, then Orchestra would assume that CheckM
# taxonomy workflow is supposed to be executed without
# CheckM coverage file as one of its inputs.
#
# In conclusion, do NOT try to suggest Orchestra,
# which pipeline steps need to be re-executed. Instead,
# on each run, just indicate, how Orchestra should proceed,
# like it is its first execution time.
# The rest is (hopefully) taken care of automatically.


#************************************************************
# global parameters that affect execution
# of the entire Orchestra.
#************************************************************
[Global]


# OPTIONAL: memory_limit_type (default: None)
#
# Upper bound limit on a memory that Orchestra
# is allowed to consume during its execution.
#
# It is a good idea to limit memory consumption
# since many programs that are included in
# Orchestra may exhaust the available resources
# fairly quickly, and destabilize the underlying
# operating system by causing excessive swapping
# of work memory to a disk.
#
# The available memory-limiting options are:
#
# None:      no limitations; this may be a good
#            option, if you run Orchestra on your
#            own computer and you want to maximize
#            the chance that Orchestra completes
#            its processing. On the other hand,
#            if Orchestra is run on a High
#            Performance Computing (HPC) facilities,
#            then some memory limiting is a
#            recommended choice, since excessive
#            swapping may impair execution of other
#            programs (from other users) on the
#            same machine.
#
# Free:      set memory limit to the amount of free
#            memory as determined at the beginning
#            of an Orchestra run.
#
# Manual:    manually specify memory limit in GigaBytes
#            with parameter manual_memory_limit (below).
#
# If some processing step requires more memory than
# the specified limit allows, then such step will
# be terminated abruptly, whereas its results will
# be corrupted. Orchestra handles such situations
# gracefully by avoiding to execute any further
# processing steps that rely on these very results.

#memory_limit_type = None
#memory_limit_type = Free
#memory_limit_type = Manual


# REQUIRED IF PARAMETER memory_limit IS SET TO Manual:
#         manual_memory_limit_GB
#
# Manual specification of memory limit in GigaBytes.
# It is relevant only when parameter memory_limit
# is set to Manual. Floating point values are allowed,
# so it is possible to specify memory with a finer
# granulation than GigaBytes unit.

#manual_memory_limit_GB = 50    # 50 GigaBytes
#manual_memory_limit_GB = 20.3  # 20 GigaBytes + 300 MegaBytes


# REQUIRED: out_directory_root
#
# Specification of an output directory.
#
# This directory is created during Orchestra execution
# to store pipeline results. Alternatively, results
# of previous executions may already be present in
# a specified output directory, by means of which,
# Orchestra avoids re-running of certain steps.
#
# ----------------------------------------------------
# This is NOT a directory with input files.
#
# Input files may be and SHOULD be located at
# arbitrary different locations.
# ----------------------------------------------------
#
# Example: by making the following setting:
#
#     out_directory_root = /home/johnDoe/Analysis
#
#     it is expected that directory /home/johnDoe/Analysis
#     does NOT already exist prior to Orchestra execution,
#     or that it was created by Orchestra itself during
#     some of its previous runs.
#     On the other hand, the parent directory
#     /home/johnDoe should already exist in any case.

#out_directory_root = /home/user/some_out_dir


# BOTH REQUIRED: input_R1_reads_file
#                input_R2_reads_file
#
# File names of input R1 and R2 reads in fastq format.
#
# A. One possibility is to specify file names of one R1 and one R2 file.
#
# B. Several R1 and R2 files may be targeted by standard Linux file
#    patterns like "sequences_R1_*.fastq" and "sequences_R2_*.fastq".
#
# C. It is NOT possible to enumerate several R1 and/or R2 files by
#    specifying these two parameters more than once.
#
#
#
# NOTE 1: Each individual file pattern must target only R1 or R2 reads,
#         and there must be no mix-up between the two types of files.
#         If in doubt, please open Linux terminal in directory with R1
#         and/or R2 files, and execute something like "ls some_R1_*.fastq"
#         and "ls some_R2_*.fastq", to check the list of files that
#         are targeted with a specified file pattern (some_R1_*.fastq
#         and some_R2_*.fastq in this example).
#
# NOTE 2: All R1 files to be matched with a specified file pattern
#         must be located in the same directory. Likewise for R2 files.
#         Both sets of files may be (and typically are) located in the
#         same directory, but it is also possible that R1 files are located
#         in a different directory, or even on a different disk/partition
#         than R2 files.
#
# NOTE 3: Each R1 file, which is targeted with R1 file pattern, must be
#         matched with the corresponding R2 file of the R2 file pattern.
#         File names of the corresponding files must differ only in R1 and
#         R2 name fragment.
#         E.g. Orchestra will match file "some_file_R1_00037.fastq" with
#         "some_file_R2_00037.fastq". Any mismatches of the R1 and R2 file
#         names will produce gibberish results.
#
#
#
# TIP:    If your directory with R1 (and/or) R2 files contain more files
#         than you intend to process within an isolated Orchestra run,
#         and if it is impossible to target only the appropriate subset
#         of them with a file pattern, then it is possible to proceed
#         as follows.
#
#         SOLUTION 1: create a new directory (say /home/me/special_reads).
#                     Then copy or move the appropriate subset of R1 and
#                     R2 files to a new directory.
#                     This way it is easy to target only these files
#                     with generic file patterns like
#                     /home/me/special_reads/*R1*.fastq    and
#                     /home/me/special_reads/*R2*.fastq.
#
#         SOLUTION 2: create a new directory (say /home/me/special_reads).
#                     Within this directory create symbolic links to the
#                     appropriate files. Symbolic links are created with
#                     command "ln -s /path/to/physical_file /path/to/symlink".
#                     Then target all of linked files with generic patterns
#                     /home/me/special_reads/*R1*.fastq    and
#                     /home/me/special_reads/*R2*.fastq.
#
#        Solution 1 is probably easier to do, whereas solution 2 has an
#        advantage that no files are actually being moved around a file
#        system. This way, the same files may be linked to different
#        directories and simultaneously take part of different input
#        combinations without occupying disk space more than once.

#input_R1_reads_file = /home/user/input_dir/some_name_R1.fastq
#input_R2_reads_file = /home/user/input_dir/some_name_R2.fastq

#input_R1_reads_file = /home/user/input_dir/some_pattern_*R1*.fastq
#input_R2_reads_file = /home/user/input_dir/some_pattern_*R2*.fastq

#input_R1_reads_file = /home/user/separate_dir_R1/some_pattern_*R1*.fastq
#input_R2_reads_file = /home/user/separate_dir_R2/some_pattern_*R2*.fastq


# OPTIONAL: input_scaffold_file_1
#           input_scaffold_file_2
#           input_scaffold_file_3
#           ...
#           input_scaffold_file_n
#
# File names of an arbitrary number of externally provided
# input scaffold files in fasta format.
#
# The appendices "_1", "_2", "_3", etc. enumerate these
# scaffolds, which are bookkept internally by Orchestra
# by the respective tags "external_1", "external_2",
# "external_3", etc.
#
# External scaffold files may be provided instead of or in
# addition to the scaffolds that are built with the internal
# assemblers (see below).
#
# It is valid to provide a scaffold file that was built with
# an UNRELATED Orchestra run. On the other hand, if you merely
# repeat the Orchestra processing with the same input files
# and with the same target output directory, then Orchestra
# will reuse the already present scaffolds, and for that
# matter please do not specify these scaffolds as external.
# Use parameters input_scaffold_file_n only for scaffolds
# that are built with separate and independent Orchestra
# runs. Consequently, these external scaffolds should be
# located outside the current output directory.
#
# Orchestra attempts to enumerate as many parameters of
# the form "input_scaffold_file_x" as they are specified.
# However, their enumeration must be consecutive. Orchestra
# stops enumerating external scaffolds, as soon as
# certain external scaffold parameter within the sequence
# does not exist.
#
# For example, by specifying the following parameters:
#
# input_scaffold_file_1 = /some_dir/some_scaffold.fasta
# input_scaffold_file_2 = /another_dir/another_scaffold.fasta
# input_scaffold_file_4 = /still_some_dir/still_another_scaffold.fasta
# input_scaffold_file_5 = /distant_dir/excelent_scaffold.fasta
# input_scaffold_file_6 = /near_dir/suspicious_scaffold.fasta
#
# only external scaffolds some_scaffold.fasta (_1) and
# another_scaffold.fasta (_2) will be processed. Orchestra
# will try to read parameter input_scaffold_file_3, which
# does not exist, so it will stop its attempt to enumerate
# further external scaffolds. Hence, it is vital that input
# scaffolds are specified as a consecutive sequence of
# integers, starting at 1.
#
# Orchestra recognizes literal "None" in the place of
# an external scaffold file name. Such entry instructs
# Orchestra to ignore the parameter in question but to
# continue enumerating further external scaffolds.
#
# For example, the above example may be augmented in
# the following manner:
#
# input_scaffold_file_1 = /some_dir/some_scaffold.fasta
# input_scaffold_file_2 = /another_dir/another_scaffold.fasta
# input_scaffold_file_3 = None
# input_scaffold_file_4 = /still_some_dir/still_another_scaffold.fasta
# input_scaffold_file_5 = /distant_dir/excelent_scaffold.fasta
# input_scaffold_file_6 = /near_dir/suspicious_scaffold.fasta
#
# This time, Orchestra enumerates all 5 specified scaffolds and
# tags them according to their specification. E.g.
# suspicious_scaffold.fasta is tagged internally as external_6.
#
# There can be as many "None" specifications as desirable.
# The following example is fully valid.
#
# input_scaffold_file_1 = /some_dir/some_scaffold.fasta
# input_scaffold_file_2 = /another_dir/another_scaffold.fasta
# input_scaffold_file_3 = None
# input_scaffold_file_4 = None
# input_scaffold_file_5 = None
# input_scaffold_file_6 = None
# input_scaffold_file_7 = None
# input_scaffold_file_8 = /still_some_dir/still_another_scaffold.fasta
# input_scaffold_file_9 = None
# input_scaffold_file_10 = None
# input_scaffold_file_11 = /distant_dir/excelent_scaffold.fasta
# input_scaffold_file_12 = None
# input_scaffold_file_13 = None
# input_scaffold_file_14 = None
# input_scaffold_file_15 = None
# input_scaffold_file_16 = None
# input_scaffold_file_17 = /near_dir/suspicious_scaffold.fasta
# input_scaffold_file_18 = None
# input_scaffold_file_19 = None
# input_scaffold_file_20 = None
#
#
#
# IMPORTANT: The "None" specification is provided in order to easily
# exclude some scaffolds from repeated analyses, whereas at the same
# time the tags of the remaining scaffolds are preserved.
# THIS POINT IS CRUCIAL.
#
# For example, let us suppose that an initial Orchestra analysis
# is done with the following three external scaffolds.
#
# input_scaffold_file_1 = /some_dir/some_scaffold.fasta
# input_scaffold_file_2 = /another_dir/another_scaffold.fasta
# input_scaffold_file_3 = /still_some_dir/still_another_scaffold.fasta
#
# Upon an Orchestra re-run, it is decided that external scaffold 2
# should be excluded from the analysis. The WRONG approach is to
# re-enumerate the third scaffold into the second one.
#
# WRONG: input_scaffold_file_1 = /some_dir/some_scaffold.fasta
# WRONG: input_scaffold_file_2 = /still_some_dir/still_another_scaffold.fasta
#
# Orchestra preserves all data about previously consumed scaffolds
# (their indexing, generation of SAM, BAM, indexed bam, abundances,
# constellated bins, ...). By renaming and consequently re-tagging
# a scaffold, Orchestra is forced to delete all this information,
# and regenerate it once more. This is a huge waste of time.
#
# Instead, care should be taken that external scaffolds from RELATED
# previous Orchestra runs (which target the same output directory)
# preserve their tags. The above WRONG situation should be remedied
# in the following CORRECT way.
#
# CORRECT:
# input_scaffold_file_1 = /some_dir/some_scaffold.fasta
# input_scaffold_file_2 = None
# input_scaffold_file_3 = /still_some_dir/still_another_scaffold.fasta


# If no external scaffold is provided, then at least one built-in
# assembler needs to be enabled.
#
# NOTE 1: do not specify scaffold file, which is generated within the
#         previous Orchestra run (and is, therefore, located within
#         the output directory). If such scaffold exists, and if its
#         associated assembler is ENABLED by the appropriate setting
#         below, then Orchestra will consume the available previous
#         results WITHOUT re-running the corresponding assembler.
#
#         Of course, you may copy Orchestra-generated scaffold
#         files to another place and use them as an input to some
#         other Orchestra run, instead of assembling them
#         from scratch every time.
#
# NOTE 2: Orchestra assumes that a re-run with an EXISTING
#         output directory is done with the SAME input files.
#         This holds for a scaffold file specified by
#         input_scaffold_file as well. If you change
#         scaffold file between different pipeline runs,
#         Orchestra will not detect the change and it will
#         reuse the potentially already generated intermediate
#         files, which are derived based on a previous
#         version of a scaffold. Consequently, the end results
#         will be gibberish.
#
#         If your intention is to process the same input
#         data in conjunction with a different scaffold,
#         then set output directory to a new NON-existing
#         directory.
#
#         However, it is fully supported to not define
#         input_scaffold_file on a consecutive run, and
#         then enable it again in some future Orchestra
#         rerun, provided that the specified scaffold
#         file remains the same.
#
# When several scaffolds are available for pipeline processing
# (for example due to specification of parameter(s) input_scaffold_file_x
# together with enabling at least one assembler, and/or by enabling
# several assemblers), then Orchestra executes a Cartesian product
# of enabled binners with all available scaffolds and enabled SAM
# generation methods (as described further on).
#
# For example, if two scaffolds are available and three SAM generation
# methods are enabled, then each enabled binner is run for 2x3=6 times.
#
# A separate DasTool analysis (if DasTool is enabled) is performed for
# each available scaffold. An individual DasTool run takes into accounts
# results of all binning steps that are associated with a certain scaffold.
#
# Please note that by enabling several scaffolds and SAM methods,
# execution time of an entire pipeline as well as its disk space
# consumption may significantly increase.
#
# Generally, only one external scaffold or one scaffold-generation
# method together with only one SAM-generation method is supposed
# to be enabled, unless the intention is to compare relative
# performance of these methods.

#input_scaffold_file_1 = /home/user/scaffold_dir/some_scaffold.fa


# OPTIONAL: delete_intermediate_files (default NO)
#
# Set to Yes to make Orchestra delete intermediate
# files during processing as soon as they are not
# needed any more in order to save disk space.
# Orchestra deletes only those files that are not
# needed for the rest of the current run, as well
# as in the future for its potential re-executions.
# Therefore, setting this parameter to Yes does
# not prevent efficient re-execution of Orchestra
# in the same output directory (e.g. to experiment
# with different parameters).
# In addition, Orchestra only deletes files that
# are related to a certain processing step, after
# the very step is completed successfully. In the
# case of an error, all files that are related
# to an erroneous step are left untouched as an
# aid for a post-mortem analysis of an error.

#delete_intermediate_files = Yes


# OPTIONAL: number_of_threads
#
# Number of threads to use for parallel execution.
#
# If the parameter is not specified, Orchestra
# will try to determine the number of available
# processors on a system by querying the underlying
# operating system, and it will consume as many of
# them as available.
#
# If your intention is not to consume all available
# resources (e.g. because the same hardware will
# execute some other calculations in parallel), then
# you may manually set the value of this parameter to
# a LOWER value than the number of available CPUs.
# Setting this number to a larger value than the
# numbers of CPUs, will DECREASE computation speed
# (but will have no other adverse consequences).
#
# Another reason for lowering this number below the
# actual CPU count is to lower memory consumption.
# If an experience shows that Orchestra (in fact,
# some of its external programs) consumes too
# much memory, sometimes (but not always) the issue
# may be alleviated by reducing the number of
# threads that execute in parallel.

#number_of_threads = 4


#************************************************************
# Parameters for the FastQC Quality Control application
#
# https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
#************************************************************
[FastQC]


# OPTIONAL: include_FastQC (default NO)
#
# Set to Yes to run FastQC quality control application
# on input data. Its results are placed in a separate
# subdirectory within the specified output directory
# for later inspection. Aside from this, these results
# are not used in any way in the later steps of pipeline
# processing. In order to filter input sequences based
# on their quality, use the FastP program (see below).

#include_FastQC = yes


# OPTIONAL: params_FastQC (quote delimited)
#
# Specify any algorithmic parameters that FastQC knows about.
#
# Example: params_FastQC = "--kmers 4"
#
# NOTE 1: this option is available for experienced users.
#         Leave params_FastQC unset, if you do not have a
#         specific reason to do otherwise, or if you are
#         not familiar with FastQC parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: the value of this parameter is passed directly
#         to FastQC without any checking or formatting.
#         It is the responsibility of a user to assure
#         the correctness of these parameters.
#
# NOTE 4: the value of each parameter that is not set in this
#         manner is set to its default value by FastQC itself.
#         Please consult FastQC documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from FastQC help.
# If you have questions regarding these parameters,
# please consult documentation or authors of FastQC.
# -----------------------------------------------------------
#
# --nogroup      Disable grouping of bases for reads >50bp.
#                All reports will show data for every base
#                in the read.  WARNING: Using this option will
#                cause fastqc to crash and burn if you use it on
#                really long reads, and your plots may end up a
#                ridiculous size. You have been warned!
#
# -c             Specifies a non-default file which contains the list of
# --contaminants contaminants to screen overrepresented sequences against.
#                The file must contain sets of named contaminants in the
#                form name[tab]sequence.  Lines prefixed with a hash will
#                be ignored.
#
# -a             Specifies a non-default file which contains the list of
# --adapters     adapter sequences which will be explicitly searched against
#                the library. The file must contain sets of named adapters
#                in the form name[tab]sequence. Lines prefixed with a hash
#                will be ignored.
#
# -l             Specifies a non-default file which contains a set of
# --limits       criteria which will be used to determine the warn/error
#                limits for the various modules. This file can also be
#                used to selectively remove some modules from the output
#                all together.  The format needs to mirror the default
#                limits.txt file found in the Configuration folder.
#
# -k --kmers     Specifies the length of Kmer to look for in the Kmer
#                content module. Specified Kmer length must be between
#                2 and 10. Default length is 7 if not specified.
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_FastQC unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with FastQC parameters.

#params_FastQC = "..."


#************************************************************
# Parameters for the FastP Quality Control application
#
# https://github.com/OpenGene/fastp
#************************************************************
[FastP]


# OPTIONAL: include_FastP (default NO)
#
# Set to Yes to run FastP quality control application
# on input data. FastP results are placed in a separate
# subdirectory within the specified output directory for
# later inspection.

#include_FastP = yes


# OPTIONAL: FastP_generate_output (default NO)
#
# Set to Yes to generate FastP filtered R1 and R2 files,
# which contain only sequences that fulfil the prescribed
# quality criteria. Set this parameter to No, if you would
# only like to generate FastP reports about input data.

#FastP_generate_output = yes


# OPTIONAL: FastP_filter (default NO)
#
# Set to Yes to use FastP filtered input data in further
# steps of pipeline processing. This way only sequences
# of a prescribed quality take part in scaffold building,
# binning, and other analyses.
# Set this parameter to No, if you FastP should merely
# generate reports and possibly filtered R1 and R2 files,
# which are stored on a disk but otherwise do not take
# part in further analysis. The entire set of original
# R1 and R2 reads takes part in scaffold building,
# binning, and other analyses.
#
# Note 1: if this parameter is set to Yes, then FastP
#         output is going to be produced regardless
#         of the above setting FastP_generate_output.
#         FastP_generate_output will be set to Yes
#         automatically by Orchestra.
#
# Note 2: if the above setting FastP_generate_output
#         is set to Yes, but parameter FastP_filter
#         is set to No, then FastP output sequences
#         will be placed in the FastP subdirectory
#         within the output directory, but they will
#         not be used further on for the analysis.
#         The analysis will be executed on the
#         unaltered input files.
#
# Note 3: if you set FastP_filter to Yes, then it is
#         advised to set FastP_generate_output to Yes
#         manually as well. If you intend to check the
#         influence of filtering on further results, then
#         you will probably run Orchestra one time with
#         FastP_filter = Yes, and another time with
#         FastP_filter = No. If FastP_generate_output
#         is set to Yes manually in both cases, then
#         Orchestra will notice that FastP parameters are
#         not changed between the two runs, and it will
#         avoid re-running of FastP. In contrast, if
#         FastP_generate_output is set to Yes by Orchestra
#         as a consequence of the setting FastP_filter = Yes,
#         then this setting is not preserved when
#         FastP_filter is set to No. Therefore, the FastP
#         parameters are not preserved as well, which
#         results in an unnecessary re-rerun of FastP.
#         Albeit, the only adverse consequence of this
#         outcome is a bit of a wasted processing time.

#FastP_filter = yes


# OPTIONAL: params_FastP (quote delimited)
#
# Specify any algorithmic parameters that FastP knows about.
#
# Example: params_FastP = "--phred64"
#
# NOTE 1: this option is available for experienced users.
#         Leave params_FastP unset, if you do not have a
#         specific reason to do otherwise, or if you are
#         not familiar with FastP parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: the value of this parameter is passed directly to FastP
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 4: the value of each parameter that is not set in this
#         manner is set to its default value by FastP itself.
#         Please consult FastP documentation for details.
#
#
#
# -----------------------------------------------------------
#
# Leave params_FastP unset, if you are not familiar with FastP
# parameters. In this case FastP will perform quality scanning
# according to its generic settings, which may or may not be
# a good starting point.
#
# Note 1: often, adapters (see below) do not need to be
#         trimmed by FastP, since sequenators do it by themselves.
#
# Note 2: default value of parameter --qualified_quality_phred
#         is 15, which is generally to low. Please consider
#         increasing it to 30, maybe 35 or even higher.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from FastP help.
# If you have questions regarding these parameters,
# please consult documentation or authors of FastP.
# -----------------------------------------------------------
#
# -6, --phred64        indicate the input is using phred64 scoring
#                      (it'll be converted to phred33, so the
#                       output will still be phred33)
#
# -R, --report_title   report title string, default is "fastp report"
#
# -A, --disable_adapter_trimming   adapter trimming is enabled by default.
#                                  If this option is specified, adapter
#                                  trimming is disabled
#
# -a, --adapter_sequence           the adapter for read1. For SE data,
#                                  if not specified, the adapter will be
#                                  auto-detected. For PE data, this is used
#                                  if R1/R2 are found not overlapped.
#                                  (string [=auto])
#
# --adapter_sequence_r2            the adapter for read2 (PE data only).
#                                  This is used if R1/R2 are found not
#                                  overlapped. If not specified, it will be
#                                  the same as <adapter_sequence>
#                                  (string [=auto])
#
# --detect_adapter_for_pe          by default, the auto-detection for
#                                  adapter is for SE data input only,
#                                  turn on this option to enable it for
#                                  PE data.
#
# -f, --trim_front1                trimming how many bases in front for
#                                  read1, default is 0 (int [=0])
#
# -t, --trim_tail1                 trimming how many bases in tail for
#                                  read1, default is 0 (int [=0])
#
# -b, --max_len1                   if read1 is longer than max_len1, then
#                                  trim read1 at its tail to make it as
#                                  long as max_len1. Default 0 means no
#                                  limitation (int [=0])
#
# -F, --trim_front2                trimming how many bases in front for
#                                  read2. If it's not specified, it will
#                                  follow read1's settings (int [=0])
#
# -T, --trim_tail2                 trimming how many bases in tail for
#                                  read2. If it's not specified, it will
#                                  follow read1's settings (int [=0])
#
# -B, --max_len2                   if read2 is longer than max_len2, then
#                                  trim read2 at its tail to make it as
#                                  long as max_len2. Default 0 means no
#                                  limitation. If it's not specified,
#                                  it will follow read1's settings
#                                  (int [=0])
#
# -g, --trim_poly_g                force polyG tail trimming, by default
#                                  trimming is automatically enabled for
#                                  Illumina NextSeq/NovaSeq data
#
# --poly_g_min_len                 the minimum length to detect polyG in
#                                  the read tail. 10 by default. (int [=10])
#
# -G, --disable_trim_poly_g        disable polyG tail trimming, by default
#                                  trimming is automatically enabled for
#                                  Illumina NextSeq/NovaSeq data
#
# -x, --trim_poly_x                enable polyX trimming in 3' ends.
#
# --poly_x_min_len                 the minimum length to detect polyX in
#                                  the read tail. 10 by default. (int [=10])
#
# -5, --cut_front                  move a sliding window from front (5')
#                                  to tail, drop the bases in the window
#                                  if its mean quality < threshold, stop
#                                  otherwise.
#
# -3, --cut_tail                   move a sliding window from tail (3')
#                                  to front, drop the bases in the window
#                                  if its mean quality < threshold, stop
#                                  otherwise.
#
# -r, --cut_right                  move a sliding window from front to tail,
#                                  if meet one window with mean quality
#                                  < threshold, drop the bases in the window
#                                  and the right part, and then stop.
#
# -W, --cut_window_size            the window size option shared by
#                                  cut_front, cut_tail or cut_sliding.
#                                  Range: 1~1000, default: 4 (int [=4])
#
# -M, --cut_mean_quality           the mean quality requirement option
#                                  shared by cut_front, cut_tail or
#                                  cut_sliding. Range: 1~36 default:
#                                  20 (Q20) (int [=20])
#
# --cut_front_window_size          the window size option of cut_front,
#                                  default to cut_window_size if not
#                                  specified (int [=4])
#
# --cut_front_mean_quality         the mean quality requirement option for
#                                  cut_front, default to cut_mean_quality
#                                  if not specified (int [=20])
#
# --cut_tail_window_size           the window size option of cut_tail,
#                                  default to cut_window_size if not
#                                  specified (int [=4])
#
# --cut_tail_mean_quality          the mean quality requirement option for
#                                  cut_tail, default to cut_mean_quality
#                                  if not specified (int [=20])
#
# --cut_right_window_size          the window size option of cut_right,
#                                  default to cut_window_size if not
#                                  specified (int [=4])
#
# --cut_right_mean_quality         the mean quality requirement option
#                                  for cut_right, default to
#                                  cut_mean_quality if not specified
#                                  (int [=20])
#
# -Q, --disable_quality_filtering  quality filtering is enabled by default.
#                                  If this option is specified, quality
#                                  filtering is disabled
#
# -q, --qualified_quality_phred    the quality value that a base is
#                                  qualified. Default 15 means phred
#                                  quality >=Q15 is qualified. (int [=15])
#
# -u, --unqualified_percent_limit  how many percents of bases are allowed
#                                  to be unqualified (0~100). Default 40
#                                  means 40% (int [=40])
#
# -n, --n_base_limit               if one read's number of N base is
#                                  >n_base_limit, then this read/pair is
#                                  discarded. Default is 5 (int [=5])
#
# -L, --disable_length_filtering   length filtering is enabled by default.
#                                  If this option is specified, length
#                                  filtering is disabled
#
# -l, --length_required            reads shorter than length_required will
#                                  be discarded, default is 15. (int [=15])
#
# --length_limit                   reads longer than length_limit will be
#                                  discarded, default 0 means no limitation.
#                                  (int [=0])
#
# -y, --low_complexity_filter      enable low complexity filter. The
#                                  complexity is defined as the percentage
#                                  of base that is different from its next
#                                  base (base[i] != base[i+1]).
#
# -Y, --complexity_threshold       the threshold for low complexity filter
#                                  (0~100). Default is 30, which means 30%
#                                  complexity is required. (int [=30])
#
# --filter_by_index1               specify a file contains a list of
#                                  barcodes of index1 to be filtered out,
#                                  one barcode per line (string [=])
#
# --filter_by_index2               specify a file contains a list of
#                                  barcodes of index2 to be filtered out,
#                                  one barcode per line (string [=])
#
# --filter_by_index_threshold      the allowed difference of index barcode
#                                  for index filtering, default 0 means
#                                  completely identical. (int [=0])
#
# -c, --correction                 enable base correction in overlapped
#                                  regions (only for PE data), default
#                                  is disabled
#
# --overlap_len_require            the minimum length of the overlapped
#                                  region for overlap analysis based adapter
#                                  trimming and correction. 30 by default.
#                                  (int [=30])
#
# --overlap_diff_limit             the maximum difference of the overlapped
#                                  region for overlap analysis based adapter
#                                  trimming and correction. 5 by default.
#                                  (int [=5])
#
# -U, --umi                        enable unique molecular identifier (UMI)
#                                  preprocessing
#
# --umi_loc                    specify the location of UMI, can be
#                              (index1/index2/read1/read2/per_index/per_read,
#                              default is none (string [=])
#
# --umi_len                        if the UMI is in read1/read2, its length
#                                  should be provided (int [=0])
#
# --umi_prefix                     if specified, an underline will be used
#                                  to connect prefix and UMI (i.e. prefix=UMI,
#                                  UMI=AATTCG, final=UMI_AATTCG). No prefix
#                                  by default (string [=])
#
# --umi_skip                       if the UMI is in read1/read2, fastp can
#                                  skip several bases following UMI, default
#                                  is 0 (int [=0])
#
# -p, --overrepresentation_analysis  enable overrepresented sequence analysis.
#
# -P, --overrepresentation_sampling  one in (--overrepresentation_sampling)
#                                    reads will be computed for
#                                    overrepresentation analysis (1~10000),
#                                    smaller is slower, default is 20.
#                                    (int [=20])
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_FastP unset, if you are not familiar with FastP
# parameters. In this case FastP will perform quality scanning
# according to its generic settings, which may or may not be
# a good starting point.
#
# Note 1: often, adapters (see above) do not need to be
#         trimmed by FastP, since sequenators do it by themselves.
#
# Note 2: default value of parameter --qualified_quality_phred
#         is 15, which is generally to low. Please consider
#         increasing it to 30, maybe 35 or even higher.

#params_FastP = "--qualified_quality_phred 40"


#************************************************************
# Parameters for Idba_UD Assembler
#
# https://i.cs.hku.hk/~alse/hkubrg/projects/idba_ud/
#
# This section is relevant only, if input scaffold file
# is not specified with parameter input_scaffold_file
#************************************************************
[Idba_UD]


# OPTIONAL: include_Idba_UD (default NO)
#
# Set to Yes to run Idba_UD scaffold assembling and
# include its results in further analysis.
#
# When several scaffolds are available (for example due to
# specification of previous parameter input_scaffold_file
# together with enabling at least one assembler, or by
# enabling several assemblers), then Orchestra executes a
# Cartesian product of analyses with all available scaffolds
# and other multi-choice selections as described further on.

#include_Idba_UD = yes


# OPTIONAL: params_Idba_UD (quote delimited)
# Specify any algorithmic parameters that Idba_UD knows about.
#
# Example: params_Idba_UD = "--seed_kmer=35"
#
# NOTE 1: this option is available for experienced users.
#         Leave params_Idba_UD unset, if you do not have a
#         specific reason to do otherwise, or if you are
#         not familiar with Idba_UD parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: the value of this parameter is passed directly to Idba_UD
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 4: the value of each parameter that is not set in this
#         manner is set to its default value by Idba_UD itself.
#         Please consult Idba_UD documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from Idba_UD help.
# If you have questions regarding these parameters,
# please consult documentation or authors of Idba_UD.
# -----------------------------------------------------------
#
# --mink arg (=20)                   minimum k value (<=124)
# --maxk arg (=100)                  maximum k value (<=124)
# --step arg (=20)                   increment of k-mer of each iteration
# --inner_mink arg (=10)             inner minimum k value
# --inner_step arg (=5)              inner increment of k-mer
# --prefix arg (=3)                  prefix length used to build sub k-mer table
# --min_count arg (=2)               minimum multiplicity for filtering k-mer when building the graph
# --min_support arg (=1)             minimum support in each iteration
# --seed_kmer arg (=30)              seed kmer size for alignment
# --min_contig arg (=200)            minimum size of contig
# --similar arg (=0.95)              similarity for alignment
# --max_mismatch arg (=3)            max mismatch of error correction
# --min_pairs arg (=3)               minimum number of pairs
# --no_bubble                        do not merge bubble
# --no_local                         do not use local assembly
# --no_coverage                      do not iterate on coverage
# --no_correct                       do not do correction
# --pre_correction                   perform pre-correction before assembly
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_Idba_UD unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with Idba_UD parameters.

#params_Idba_UD = "--seed_kmer=35"


# OPTIONAL: Idba_UD_variant
# Orchestra provides four modifications of the original
# Idba_UD algorithm. These are offered due to the
# discovered or reported issues with the program usage.
#
# Possible values are:
#
#     original: the unmodified Idba_UD, as it is distributed by its authors.
#
#     maxShort512 (default): this variant increases a permissive length
#                  of short sequences from 128bp to 512bp. This is expected
#                  to be merely a programming quirk, which does not affect
#                  the generation of a scaffold (but this is not verified).
#                  Idba_UD sometimes crashes without this modification.
#
#     maxShort512_noMergeSimilarPaths: in addition to the change provided
#                  with "maxShort512", this variant also disables function
#                  contig_graph.MergeSimilarPath(). According to some reports
#                  this function has a bug and/or it consumes a lot of RAM
#                  memory, which leads to program crashes. You should try
#                  this variant, if Idba_UD runs out of memory when running
#                  variants "original" or "maxShort512".
#
#     maxShort512_kmer16: in addition to the change provided with
#                  "maxShort512" (but not "maxShort512_noMergeSimilarPaths"),
#                  this variant also increases (presumably) max kmer length
#                  from 4 to 16. The change is introduced, since one of
#                  Idba_UD users reported a more satisfactory operation
#                  with this modification.
#
#     maxShort512_kmer16_noMergeSimilarPaths: in addition to both changes
#                  provided with "maxShort512_kmer16", this variant also
#                  disables function contig_graph.MergeSimilarPath().
#                  You should try this variant, if Idba_UD runs out of
#                  memory when running variant "maxShort512_kmer16".
#
# Leave this parameter unset, if you do not have a
# specific reason to do otherwise.

#Idba_UD_variant = original
#Idba_UD_variant = maxShort512
#Idba_UD_variant = maxShort512_noMergeSimilarPaths
#Idba_UD_variant = maxShort512_kmer16
#Idba_UD_variant = maxShort512_kmer16_noMergeSimilarPaths


#************************************************************
# Parameters for MegaHIT Assembler
#
# https://github.com/voutcn/megahit
#
# This section is relevant only, if input scaffold file
# is not specified with parameter input_scaffold_file
#************************************************************
[MegaHIT]


# OPTIONAL: include_MegaHIT (default NO)
#
# Set to Yes to run MegaHIT scaffold assembling
# and include its results in further analysis.
#
# When several scaffolds are available (for example due to
# specification of previous parameter input_scaffold_file
# together with enabling at least one assembler, or by
# enabling several assemblers), then Orchestra executes a
# Cartesian product of analyses with all available scaffolds
# and other multi-choice selections as described further on.

#include_MegaHIT = yes


# OPTIONAL: params_MegaHIT (quote delimited)
#
# Specify any algorithmic parameters that MegaHIT knows about.
#
# Example: params_MegaHIT = "..."
#
# NOTE 1: this option is available for experienced users.
#         Leave params_MegaHIT unset, if you do not have a
#         specific reason to do otherwise, or if you are
#         not familiar with MegaHIT parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: the value of this parameter is passed directly to MegaHIT
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 4: the value of each parameter that is not set in this
#         manner is set to its default value by MegaHIT itself.
#         Please consult MegaHIT documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from MegaHIT help.
# If you have questions regarding these parameters,
# please consult documentation or authors of MegaHIT.
# -----------------------------------------------------------
#
# Basic assembly options:
# --min-count <int>        minimum multiplicity for filtering
#                          (k_min+1)-mers [2]
# --k-list    <int,int,..> comma-separated list of kmer size
#                          all must be odd, in the range 15-255,
#                          increment <= 28); [21,29,39,59,79,99,119,141]
#
# Another way to set --k-list (overrides --k-list if one of them set):
# --k-min  <int> minimum kmer size (<= 255), must be odd number [21]
# --k-max  <int> maximum kmer size (<= 255), must be odd number [141]
# --k-step <int> increment of kmer size of each iteration (<= 28),
#                must be even number [12]
#
# Advanced assembly options:
# --no-mercy                do not add mercy kmers
# --bubble-level    <int>   intensity of bubble merging (0-2), 0 to disable [2]
# --merge-level     <l,s>   merge complex bubbles of length <= l*kmer_size and
#                           similarity >= s [20,0.95]
# --prune-level     <int>   strength of low depth pruning (0-3) [2]
# --prune-depth     <int>   remove unitigs with avg kmer depth less than
#                           this value [2]
# --low-local-ratio <float> ratio threshold to define low local coverage
#                           contigs [0.2]
# --max-tip-len     <int>   remove tips less than this value [2*k]
# --no-local                disable local assembly
# --kmin-1pass              use 1pass mode to build SdBG of k_min
#
# Presets parameters:
# --presets <str> override a group of parameters; possible values:
#                 meta-sensitive: '--min-count 1 --k-list 21,29,39,49,
#                 ...,129,141'
#                 meta-large: '--k-min 27 --k-max 127 --k-step 10'
#                 (large & complex metagenomes, like soil)
#
# Hardware options:
# -m/--memory <float> max memory in byte to be used in SdBG construction
#                     (if set between 0-1, fraction of the machine's
#                     total memory) [0.9]
# --mem-flag  <int>   SdBG builder memory mode
#                     0: minimum; 1: moderate; others: use all memory
#                     specified by '-m/--memory' [1]
# --use-gpu           use GPU
# --gpu-mem   <float> GPU memory in byte to be used. Default: auto detect to
#                     use up all free GPU memory.
#
# Output options:
# --min-contig-len <int> minimum length of contigs to output [200]
# --keep-tmp-files       keep all temporary files
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_MegaHIT unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with MegaHIT parameters.

#params_MegaHIT = "..."


#************************************************************
# Parameters for MetaSPAdes Assembler
#
# http://cab.spbu.ru/software/spades/
#
# This section is relevant only, if input scaffold file
# is not specified with parameter input_scaffold_file
#************************************************************
[MetaSPAdes]


# OPTIONAL: include_MetaSPAdes (default NO)
#
# Set to Yes to run MetaSPAdes scaffold assembling
# and include its results in further analysis.
#
# When several scaffolds are available (for example due to
# specification of previous parameter input_scaffold_file
# together with enabling at least one assembler, or by
# enabling several assemblers), then Orchestra executes a
# Cartesian product of analyses with all available scaffolds
# and other multi-choice selections as described further on.

#include_MetaSPAdes = yes


# OPTIONAL: params_MetaSPAdes (quote delimited)
#
# Specify any algorithmic parameters that MetaSPAdes knows about.
#
# Example: params_MetaSPAdes = "--rna"
#
# NOTE 1: this option is available for experienced users.
#         Leave params_MetaSPAdes unset, if you do not have a
#         specific reason to do otherwise, or if you are
#         not familiar with MetaSPAdes parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: the value of this parameter is passed directly to MetaSPAdes
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 4: the value of each parameter that is not set in this
#         manner is set to its default value by MetaSPAdes itself.
#         Please consult MetaSPAdes documentation for details.
#
# NOTE 5: according to MetaSPAdes instructions, it is recommended
#         to set parameter/flag "--meta" for metagenomic assembling.
#         Other supported options are:
#             --rna:      RNA-Seq data sets
#             --sc:       single-cell data sets
#             --plasmids: plasmids from WGS data sets
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from MetaSPAdes help.
# If you have questions regarding these parameters,
# please consult documentation or authors of MetaSPAdes.
# -----------------------------------------------------------
#
# Basic options:
# --sc         this flag is required for MDA (single-cell) data
# --meta       this flag is required for metagenomic sample data
# --rna        this flag is required for RNA-Seq data
# --plasmid    runs plasmidSPAdes pipeline for plasmid detection
# --iontorrent this flag is required for IonTorrent data
#
# Pipeline options:
# --careful    tries to reduce number of mismatches and short indels
# --disable-rr disables repeat resolution stage of assembling
#
# Advanced options:
# -m/--memory <int>         RAM limit for SPAdes in Gb
#                           (terminates if exceeded) [default: 250]
# -k <int,int,...>          comma-separated list of k-mer sizes
#                           (must be odd and less than 128) [default: 'auto']
# --cov-cutoff <float>      coverage cutoff value (a positive float number,
#                           or 'auto', or 'off') [default: 'off']
# --phred-offset <33 or 64> PHRED quality offset in the input reads
#                           (33 or 64) [default: auto-detect]
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_MetaSPAdes unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with MetaSPAdes parameters.

#params_MetaSPAdes = "--meta"


#************************************************************
# Parameters for SAM, BAM and abundance files generation
#
# http://bio-bwa.sourceforge.net/
# http://bowtie-bio.sourceforge.net/bowtie2/
# https://sourceforge.net/projects/bbmap/
# http://www.htslib.org/
#************************************************************
[SAM_BAM]


# NOTE conversion of input files to SAM and BAM format
#      as well as generation of abundance files is done
#      only, if other pieces of enabled software
#      (as set below) require these files as their input.
#      There are no parameters for explicitly enabling
#      generation of these files, since Orchestra handles
#      these decisions automatically.


#============================================================
# Parameters for SAM file generation
#============================================================


# OPTIONAL include_SAM_generation_Bwa     (default no)
#          include_SAM_generation_Bowtie2 (default no)
#          include_SAM_generation_BBMap   (default no)
#
# Enumeration of desired SAM file generation methods.
# Possible choices are Bwa, Bowtie2 and BBMap,
# which refer to the names of the respective programs
# for SAM file generation (internet links above).
#
# It is possible to select several methods. Orchestra
# generates SAM files according to a Cartesian product
# of all above specified scaffolds (enabled assemblers
# and potential external scaffold file) and enabled
# SAM generation methods. For example, if two assemblers
# and all three SAM generation methods are enabled,
# then Orchestra generates six different SAM files,
# and, consequently, runs each enabled binner for
# six times as well.
#
# Generally, only one SAM-generation method is supposed
# to be enabled, unless the intention is to compare
# relative performance of these methods.
#
# If no SAM generation method is enabled, then Orchestra
# automatically enables method Bwa for preserving the
# continuity with Orchestra versions V1.x, where this
# was the only available method.

#include_SAM_generation_Bwa     = Yes
#include_SAM_generation_Bowtie2 = Yes
#include_SAM_generation_BBMap   = Yes


#************************************************************
# relevant only, if Bwa SAM file generation is enabled
#************************************************************
#
# Generation of SAM file with Bwa is a two-step process.
# First scaffold is indexed, and then input sequences
# are aligned to the scaffold. This two-step process is
# reflected in the choice of parameters below for
# fine-tuning each of the two steps.
#
#
#
# OPTIONAL: params_bwa_index (quote delimited)
#
# Specify any algorithmic parameters that Bwa Index
# command knows about.
#
# Example: params_bwa_index = "-a bwtsw"
#
# NOTE 1: this option is available for experienced users.
#         Leave params_bwa_index unset, if you do not have a
#         specific reason to do otherwise, or if you are
#         not familiar with Bwa index parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: do not specify option '-6' or '-p' since these change
#         output file names. This would crash Orchestra processing.
#
# NOTE 4: the value of each parameter that is not set in this
#         manner is set to its default value by Bwa itself.
#         Please consult Bwa documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from Bwa help.
# If you have questions regarding these parameters,
# please consult documentation or authors of Bwa.
# -----------------------------------------------------------
#
# -a STR BWT construction algorithm: bwtsw, is or rb2 [auto]
# -b INT     block size for the bwtsw algorithm
#            (effective with -a bwtsw) [10000000]
#
# Warning: `-a bwtsw' does not work for short genomes,
#          while `-a is' and `-a div' do not work
#          for long genomes.
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_bwa_index unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with Bwa index parameters.

#params_bwa_index = "-a bwtsw"


# OPTIONAL: bwa_alignment_algorithm
#
# Specify Bwa alignment algorithm. Possible values are:
#         mem (default), bwasw, aln.
#
# Example: bwa_alignment_algorithm = mem
#
# This option is available for experienced users.
# Leave bwa_alignment_algorithm unset, if you do not
# have a specific reason to do otherwise, or if you
# are not familiar with Bwa alignment algorithms.

#bwa_alignment_algorithm = mem


# OPTIONAL: params_bwa_mem   (quote delimited)
#           params_bwa_bwasw (quote delimited)
#           params_bwa_aln   (quote delimited)
#
# Specify any algorithmic parameters that the respective
# Bwa alignment algorithms know about.
#
# NOTE 1: this option is available for experienced users.
#         Leave these parameters unset, if you do not have a
#         specific reason to do otherwise, or if you are
#         not familiar with Bwa alignment parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: do not specify any options that modify file names.
#         This would crash Orchestra processing.
#
# NOTE 4: do not specify number of threads here.
#         Orchestra does this by itself.
#
# NOTE 5: the value of each parameter that is not set in this
#         manner is set to its default value by Bwa itself.
#         Please consult Bwa documentation for details.
#
# Leave this parameter unset, if you do not have a
# specific reason to do otherwise, or if you are not
# familiar with Bwa alignment algorithms.

#params_bwa_mem   = ""
#params_bwa_bwasw = ""
#params_bwa_aln   = ""


#************************************************************
# relevant only, if Bowtie2 SAM file generation is enabled
#************************************************************
#
# Generation of SAM file with Bowtie2 is a two-step process.
# First scaffold is indexed, and then input sequences
# are aligned to the scaffold. This two-step process is
# reflected in the choice of parameters below for
# fine-tuning each of the two steps.
#
#
#
# OPTIONAL: params_bowtie2_index (quote delimited)
#
# Specify any algorithmic parameters that bowtie2-build
# command for building index knows about.
#
# Example: params_bowtie2_index = "--noauto"
#
# NOTE 1: this option is available for experienced users.
#         Leave params_Bowtie2_index unset, if you do not
#         have a specific reason to do otherwise, or if
#         you are not familiar with bowtie2-build parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: the value of each parameter that is not set in
#         this manner is set to its default value by
#         bowtie2-build itself. Please consult Bowtie2
#         documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from Bowtie2 help.
# If you have questions regarding these parameters,
# please consult documentation or authors of Bowtie2.
# -----------------------------------------------------------
#
# --large-index            force generated index to be 'large', even if ref
#                          has fewer than 4 billion nucleotides
#
# --verbose                log the issued command
#
# -a/--noauto              disable automatic -p/--bmax/--dcv memory-fitting
#
# -p/--packed              use packed strings internally; slower, less memory
#
# -q/--quiet               verbose output (for debugging)
#
# --bmax <int>             max bucket sz for blockwise suffix-array builder
#
# --bmaxdivn <int>         max bucket sz as divisor of ref len (default: 4)
#
# --dcv <int>              diff-cover period for blockwise (default: 1024)
#
# --nodc                   disable diff-cover (algorithm becomes quadratic)
#
# -o/--offrate <int>       SA is sampled every 2^<int> BWT chars (default: 5)
#
# -t/--ftabchars <int>     # of chars consumed in initial lookup (default: 10)
#
# --seed <int>             seed for random number generator
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_bowtie2_index unset, if you do not have a
# specific reason to do otherwise, or if you are not
# familiar with bowtie2-build parameters.

#params_bowtie2_index = "--noauto"


# OPTIONAL: params_bowtie2_alignment (quote delimited)
#
# Specify any algorithmic parameters that bowtie2
# command for sequence alignment knows about.
#
# Example: params_bowtie2_alignment = "--noauto"
#
# NOTE 1: this option is available for experienced users.
#         Leave params_bowtie2_alignment unset, if you do not
#         have a specific reason to do otherwise, or if
#         you are not familiar with bowtie2 parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: the value of each parameter that is not set in
#         this manner is set to its default value by
#         bowtie2 itself. Please consult Bowtie2
#         documentation for details.
#
# -----------------------------------------------------------
# There are very many viable bowtie2 parameters,
# and for that matter they are not listed here.
# Please consult Bowtie2 documentation.
# -----------------------------------------------------------
#
# Leave params_bowtie2_alignment unset, if you do not have a
# specific reason to do otherwise, or if you are not
# familiar with bowtie2 parameters.

#params_bowtie2_alignment = "--noauto"


#************************************************************
# relevant only, if BBMap SAM file generation is enabled
#************************************************************
#
# Generation of SAM file with BBMap is a two-step process.
# First scaffold is indexed, and then input sequences
# are aligned to the scaffold. This two-step process is
# reflected in the choice of parameters below for
# fine-tuning each of the two steps.
#
#
#
# OPTIONAL: params_bbmap_index (quote delimited)
#
# Specify any algorithmic parameters that BBMap
# command for building index knows about.
#
# Example: params_bbmap_index = "k=13"
#
# NOTE 1: this option is available for experienced users.
#         Leave params_bbmap_index unset, if you do not
#         have a specific reason to do otherwise, or if
#         you are not familiar with BBMap parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: the value of each parameter that is not set in
#         this manner is set to its default value by
#         BBMap itself. Please consult BBMap
#         documentation for details.
#
# -----------------------------------------------------------
# There are very many viable BBMap parameters,
# and for that matter they are not listed here.
# Please consult Bowtie2 documentation.
# -----------------------------------------------------------
#
# Leave params_bbmap_index unset, if you do not have a
# specific reason to do otherwise, or if you are not
# familiar with BBMap parameters.

#params_bbmap_index = "k=13"


# OPTIONAL: params_bbmap_alignment (quote delimited)
#
# Specify any algorithmic parameters that BBMap
# command for sequence alignment knows about.
#
# Example: params_bbmap_alignment = "tipsearch=100"
#
# NOTE 1: this option is available for experienced users.
#         Leave params_bbmap_alignment unset, if you do not
#         have a specific reason to do otherwise, or if
#         you are not familiar with BBMap parameters.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: the value of each parameter that is not set in
#         this manner is set to its default value by
#         BBMap itself. Please consult BBMap
#         documentation for details.
#
# -----------------------------------------------------------
# There are very many viable BBMap parameters,
# and for that matter they are not listed here.
# Please consult BBMap documentation.
# -----------------------------------------------------------
#
# Leave params_bbmap_alignment unset, if you do not have a
# specific reason to do otherwise, or if you are not
# familiar with BBMap parameters.

#params_bbmap_alignment = "tipsearch=100"


#************************************************************
# parameters for converting resulting SAM files
# to a sorted BAM file format
#************************************************************


# OPTIONAL: params_samtools_raw_BAM (quote delimited)
#
# Specify any samtools-view algorithmic parameters for
# conversion of a SAM file to a raw (unsorted) BAM file.
#
# NOTE 1: Parameters "-h -b -@" are already provided by
#         Orchestra and must not be repeated here.
#
# NOTE 2: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 3: do not specify any options that modify file names.
#         This would crash Orchestra processing.
#
# NOTE 4: do not specify number of threads here.
#         Orchestra does this by itself.
#
# NOTE 5: the value of each parameter that is not set in this
#         manner is set to its default value by Samtools itself.
#         Please consult Samtools documentation for details.
#
# Leave this parameter unset, if you do not have a
# specific reason to do otherwise, or if you are not
# familiar with Samtools parameters.
#
# However, you may consider a frequent
# option "-F 4" for including only mapped
# sequences in resulting BAM files.
# This option was forced in Orchestra V1.x,
# whereas now it is under the control of
# a pipeline configurator.

#params_samtools_raw_BAM = "-F 4"


#************************************************************
# Parameters for execution of MaxBin
#
# https://sourceforge.net/projects/maxbin/
#************************************************************
[MaxBin]


# OPTIONAL: include_MaxBin (default NO)
#
# Set to Yes to run MaxBin and include its
# results in further analysis.

#include_MaxBin = yes


# OPTIONAL: MaxBin_own_abundance_file (default: Yes)
#
# Set to Yes to let MaxBin build its own abundance
# file, or to No to use abundance file that is
# produced by the BBMap's method (from pileup file).
# The first choice takes longer to compute, but it
# makes MaxBin results more reproducible.

#MaxBin_own_abundance_file = No


# OPTIONAL: params_MaxBin (quote delimited)
#
# Specify any algorithmic parameters that MaxBin knows about.
#
# Example: params_MaxBin = "-prob_threshold 0.7 -markerset 40"
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to MaxBin
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: this setting also indirectly affects other binners,
#         which rely on MaxBin results for their processing.
#
# NOTE 4: the value of each parameter that is not set in this
#         manner is set to its default value by MaxBin itself.
#         Please consult MaxBin documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from MaxBin help.
# If you have questions regarding these parameters,
# please consult documentation or authors of MaxBin.
# -----------------------------------------------------------
#
# -min_contig_length minimum contig length. Default 1000
# -max_iteration     maximum Expectation-Maximization algorithm
#                    iteration number. Default 50
# -prob_threshold    probability threshold for EM final
#                    classification. Default 0.9
# -plotmarker
# -markerset         marker gene sets, 107 (default) or 40
#
# for debug purpose
# -verbose
# -preserve_intermediate
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_MaxBin unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with MaxBin parameters.

#params_MaxBin = "-prob_threshold 0.7 -markerset 40"


#************************************************************
# Parameters for execution of Concoct
#
# https://concoct.readthedocs.io/en/latest/
#************************************************************
[Concoct]


# OPTIONAL: include_Concoct (default NO)
#
# Set to Yes to run Concoct and include its
# results in further analysis.

#include_Concoct = yes


# OPTIONAL: Concoct_abundance_from_MaxBin (default: Yes)
#
# Set to Yes to let Concoct use abundance file
# that was produced by MaxBin, or to No
# to use abundance file that is produced by the
# BBMap's method (from pileup file).
#
# NOTE: if MaxBin does not produce its own abundance
#       file (because MaxBin is not enabled or not
#       configured to do so), then Concoct automatically
#       uses the BBMap's abundance file.

#Concoct_abundance_from_MaxBin = No


# OPTIONAL: params_Concoct (quote delimited)
# Specify any algorithmic parameters that Concoct knows about.
#
# Example: params_Concoct = "--clusters 300 --kmer_length 3"
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to Concoct
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by Concoct itself.
#         Please consult Concoct documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from Concoct help.
# If you have questions regarding these parameters,
# please consult documentation or authors of Concoct.
# -----------------------------------------------------------
#
# -c CLUSTERS, --clusters CLUSTERS
#                   specify maximal number of clusters
#                   for VGMM, default 400.
#
# -k KMER_LENGTH, --kmer_length KMER_LENGTH
#                   specify kmer length, default 4.
#
# -l LENGTH_THRESHOLD, --length_threshold LENGTH_THRESHOLD
#                   specify the sequence length threshold, contigs shorter
#                   than this value will not be included. Defaults to 1000.
#
# -r READ_LENGTH, --read_length READ_LENGTH
#                   specify read length for coverage, default 100
#
# --total_percentage_pca TOTAL_PERCENTAGE_PCA
#                   The percentage of variance explained by the principal
#                   components for the combined data.
# -s SEED, --seed SEED
#                   Specify an integer to use as seed for clustering.
#                   0 gives a random seed, 1 is the default seed and any
#                   other positive integer can be used. Other values give
#                   ArgumentTypeError.
#
# -i ITERATIONS, --iterations ITERATIONS
#                   Specify maximum number of iterations for the VBGMM.
#                   Default value is 500
#
# -e EPSILON, --epsilon EPSILON
#                   Specify the epsilon for VBGMM. Default value is 1.0e-6
#
# --no_cov_normalization
#                   By default the coverage is normalized with regards to
#                   samples, then normalized with regards of contigs and
#                   finally log transformed. By setting this flag you skip
#                   the normalization and only do log transform of the
#                   coverage.
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_Concoct unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with Concoct parameters.

#params_Concoct = "-l 1000 --total_percentage_pca 60"
#params_Concoct = "--converge_out"


#************************************************************
# Parameters for execution of MetaBat2
#
# https://bitbucket.org/berkeleylab/metabat
#************************************************************
[MetaBat2]


# OPTIONAL: include_MetaBat2 (default NO)
# Set to Yes to include results of MetaBat2 in further analysis.
# MetaBat2 is run through its script runMetaBat2.sh.

#include_MetaBat2 = yes


# OPTIONAL: params_MetaBat2 (quote delimited)
# Specify any algorithmic parameters that MetaBat2 SCRIPT knows about.
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to MetaBat2 script
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by MetaBat2 itself.
#         Please consult MetaBat2 documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from Metabat2 help.
# If you have questions regarding these parameters,
# please consult documentation or authors of MetaBat2.
# -----------------------------------------------------------
#
# -m [ --minContig ] arg (=2500)
#         Minimum size of a contig for binning (should be >=1500).
#
# --maxP arg (=95)
#         Percentage of 'good' contigs considered for binning decided
#         by connection among contigs. The greater, the more sensitive.
#
# --minS arg (=60)
#         Minimum score of a edge for binning (should be between 1 and 99).
#         The greater, the more specific.
#
# --maxEdges arg (=200)
#         Maximum number of edges per node. The greater, the more sensitive.
#
# --pTNF arg (=0)
#         TNF probability cutoff for building TNF graph.
#         Use it to skip the preparation step. (0: auto).
#
# --noAdd Turning off additional binning for lost or small contigs.
#
# -x [ --minCV ] arg (=1)
#         Minimum mean coverage of a contig in each library for binning.
#
# --minCVSum arg (=1)
#         Minimum total effective mean coverage of a contig
#         (sum of depth over minCV) for binning.
#
# -s [ --minClsSize ] arg (=200000) Minimum size of a bin as the output.
#
# --seed arg (=0) For exact reproducibility. (0: use random seed)
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_MetaBat2 unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with MetaBat2 parameters.

#params_MetaBat2 = "-m 1500 --minS 10 -s 200"


#************************************************************
# Parameters for execution of BinSanity
#
# https://github.com/edgraham/BinSanity
#************************************************************
[BinSanity]


# OPTIONAL: include_BinSanity_plain (default NO)
# Set to Yes to include results of the plain BinSanity binner
# (and not BinSanity workflow) in further analysis.

#include_BinSanity_plain = yes


# OPTIONAL: include_BinSanity_workflow (default NO)
# Set to Yes to include results of the BinSanity workflow
# (and not plain BinSanity binner) in further analysis.
#
# Both BinSanity methods may be enabled at the same time
# and are executed independently, when both are enabled.

#include_BinSanity_workflow = yes


# OPTIONAL: include_BinSanity_lc (default NO)
# Set to Yes to include results of the BinSanity lc
# in further analysis. This binning method consumes
# less memory, and can process larger jobs on the
# same hardware configuration than Binsanity plain
# and workflow incarnations.
#
# All three BinSanity methods may be enabled at the same time
# and are executed independently, when more than
# one of them is enabled.

#include_BinSanity_lc = yes


# OPTIONAL: BinSanity_get_ids_cutoff_size (integer)
# Specify cutoff size for BinSanity get-ids script, which builds a list
# of valid contigs to be inputted to BinSanity binning. Contigs, smaller
# than this value are going to be ignored.
#
# Leave this parameter unset, if you do not have a
# specific reason to do otherwise.

#BinSanity_get_ids_cutoff_size = 200


# OPTIONAL: BinSanity_profile_transform (quote delimited)
# Specify one of the BinSanity-profile transform options.
# Valid values are:
#     scale --> Scaled by multiplying by 100 and log transformed (recommended)
#     None --> Raw Coverage Values
#     log --> Log transform
#     X5 --> Multiplication by 5
#     X10 --> Multiplication by 10
#     X100 --> multiplication by 100
#     SQR --> Square root
#
# Leave this parameter unset, if you do not have a
# specific reason to do otherwise.

#BinSanity_profile_transform = "scale"


# OPTIONAL: params_BinSanity_plain (quote delimited)
# Specify any algorithmic parameters that plain BinSanity binner knows about.
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to BinSanity binner
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by BinSanity itself.
#         Please consult BinSanity documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from BinSanity help.
# If you have questions regarding these parameters,
# please consult documentation or authors of BinSanity.
# -----------------------------------------------------------
#
# -p PREFERENCE Specify a preference (default is -3)
#               Note: decreasing the preference leads to more lumping,
#               increasing will lead to more splitting. If your range
#               of coverages are low you will want to decrease the
#               preference, if you have 10 or less replicates increasing
#               the preference could benefit you.
#
# -m MAXITER    Specify a max number of iterations [default is 2000]
#
# -v CONVITER   Specify the convergence iteration number (default is 200)
#               e.g. Number of iterations with no change in the number
#               of estimated clusters that stops the convergence.
#
# -d DAMP       Specify a damping factor between 0.5 and 1, default is 0.9
#
# -x CONTIGSIZE Specify the contig size cut-off [Default 1000 bp]
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_BinSanity_plain unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with BinSanity parameters.

#params_BinSanity_plain = "-p -6 -m 500 -v 100 -x 200"


# OPTIONAL: params_BinSanity_workflow (quote delimited)
# Specify any algorithmic parameters that BinSanity workflow knows about.
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to BinSanity
#         workflow without any checking or formatting. It is the
#         responsibility of a user to assure the correctness of
#         these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by BinSanity itself.
#         Please consult BinSanity documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from BinSanity help.
# If you have questions regarding these parameters,
# please consult documentation or authors of BinSanity.
# -----------------------------------------------------------
#
# -p PREFERENCE Specify a preference (default is -3)
#               Note: decreasing the preference leads to more lumping,
#               increasing will lead to more splitting. If your range
#               of coverages are low you will want to decrease the
#               preference, if you have 10 or less replicates increasing
#               the preference could benefit you.
#
# -m MAXITER    Specify a max number of iterations [default is 4000]
#
# -v CONVITER   Specify the convergence iteration number (default is 200)
#               e.g. Number of iterations with no change in the number
#               of estimated clusters that stops the convergence.
#
# -d DAMP       Specify a damping factor between 0.5 and 1, default is 0.95
#
# -x CONTIGSIZE Specify the contig size cut-off [Default 1000 bp]
#
# --kmer KMER   Indicate a number for the kmer calculation, the [Default: 4]
#
# --refine-preference INPUTREFINEDPREF
#               Specify a preference for refinement. [Default: -25]
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_BinSanity_workflow unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with BinSanity-wf parameters.

#params_BinSanity_workflow = "-p -6 -m 500 -v 100 -x 200"


# OPTIONAL: params_BinSanity_lc (quote delimited)
# Specify any algorithmic parameters that BinSanity lc knows about.
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to
#         BinSanity lc without any checking or formatting.
#         It is the responsibility of a user to assure the
#         correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by BinSanity itself.
#         Please consult BinSanity documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from BinSanity help.
# If you have questions regarding these parameters,
# please consult documentation or authors of BinSanity.
# -----------------------------------------------------------
#
# -p Preference            Specify a preference [Default: -3]
#                          Note: decreasing the preference leads to more lumping,
#                          increasing will lead to more splitting. If your range
#                          of coverages are low you will want to decrease the
#                          preference, if you have 10 or less replicates increasing
#                          the preference could benefit you.
#
# -m MaximumIterations     Specify a max number of iterations [Default: 4000]
#
# -v ConvergenceIterations Specify the convergence iteration number [Default:400]
#                          e.g Number of iterations with no change in the number
#                          of estimated clusters that stops the convergence.
#
# -d DampeningFactor       Specify a damping factor between 0.5 and 1 [Default: 0.95]
#
# -x SizeCutOff            Specify the contig size cut-off [Default:1000 bp]
#
# --kmer Kmer              Indicate a number for the kmer calculation [Default: 4]
#
#  --refine-preference     Specify a preference for refinement [Default: -25]
#
#  -C ClusterNumber        Indicate a number of initial clusters for kmean [Default:100]#
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_BinSanity_lc unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with BinSanity-lc parameters.

#params_BinSanity_lc = "-p -6 -m 500 -v 100 -x 200"


#************************************************************
# Parameters for execution of DasTool
#
# https://github.com/cmks/DAS_Tool
#************************************************************
[DasTool]


# OPTIONAL: include_DasTool (default NO)
# Set to Yes to run DasTool.
#
# Please note that at least one scaffold,
# binner and SAM generation method must
# be enabled to run DasTool.
#
# A separate DasTool analysis is performed
# for each available scaffold. An individual
# DasTool run takes into accounts results of
# all binning steps that are associated with
# a certain scaffold.
#
# If DasTool is not enabled, then bins that
# are produced by individual binners are
# directly fed to further steps of pipeline
# processing (CheckM, ezTree, Prokka-Roary,
# FastANI).
# If DasTool is enabled, then bins from
# individual binners are filtered and
# refined by DasTool, and only DasTool-
# generated bins are fed to further
# steps of analyses.

#include_DasTool = yes


# OPTIONAL: DasTool_Diamond_search_engine (default NO)
#
# Set to Yes to select Diamond as a DasTool search engine.
#
# Default selection is Blast. Engine usearch is not available.
#
# Leave this parameter unset, if you do not have a
# specific reason to do otherwise.

#DasTool_Diamond_search_engine = yes


# OPTIONAL: params_DasTool (quote delimited)
# Specify any algorithmic parameters that DasTool knows about.
#
# Example: params_DasTool = "--score_threshold 0.7"
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to DasTool
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by DasTool itself.
#         Please consult DasTool documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from DasTool help.
# If you have questions regarding these parameters,
# please consult documentation or authors of DasTool.
# -----------------------------------------------------------
#
# --score_threshold   Score threshold until selection algorithm will
#                     keep selecting bins [0..1]. (default: 0.5)
#
# --duplicate_penalty Penalty for duplicate single copy genes per bin
#                     (weight b). Only change if you know what you're
#                     doing. [0..3] (default: 0.6)
#
# --megabin_penalty   Penalty for megabins (weight c). Only change if
#                     you know what you're doing. [0..3] (default: 0.5)
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_DasTool unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with DasTool parameters.

#params_DasTool = "--score_threshold 0.7"


#************************************************************
# Parameters for execution of CheckM
# analysis of DasTool generated bins
#
# http://ecogenomics.github.io/CheckM/
#************************************************************
[CheckM]


# OPTIONAL: include_CheckM_lineage_wf (default NO)
# Set to Yes to run CheckM lineage workflow (lineage_wf).

#include_CheckM_lineage_wf = yes


# OPTIONAL: params_CheckM_lineage_wf (quote delimited)
# Specify any algorithmic parameters that CheckM lineage workflow knows about.
#
# Example: params_ChekM_lineage_wf = "--length 0.8 --aai_strain 0.85"
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to CheckM
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by CheckM itself.
#         Please consult CheckM documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from CheckM help.
# If you have questions regarding these parameters,
# please consult documentation or authors of CheckM.
# -----------------------------------------------------------
#
# -u, --unique UNIQUE   minimum number of unique phylogenetic markers
#                       required to use lineage-specific marker set
#                       (default: 10)
#
# -m, --multi MULTI     maximum number of multi-copy phylogenetic markers
#                       before defaulting to domain-level marker set
#                       (default: 10)
#
# --force_domain        use domain-level sets for all bins
#
# --no_refinement       do not perform lineage-specific marker set refinement
#
# --individual_markers  treat marker as independent (i.e., ignore
#                       co-located set structure)
#
# --skip_adj_correction do not exclude adjacent marker genes when
#                       estimating contamination
#
# --skip_pseudogene_correction
#                       skip identification and filtering of pseudogenes
#
# --aai_strain AAI_STRAIN
#                       AAI threshold used to identify strain
#                       heterogeneity (default: 0.9)
#
# --ignore_thresholds   ignore model-specific score thresholds
#
# -e, --e_value E_VALUE e-value cut off (default: 1e-10)
#
# -l, --length LENGTH   percent overlap between target and query (default: 0.7)
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_CheckM_lineage_wf unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with CheckM parameters.

#params_CheckM_lineage_wf = "--length 0.8 --aai_strain 0.85"


# OPTIONAL: number_of_threads_CheckM_lineage_pplacer
# Specify number of threads for executing pplacer as a part of
# CheckM lineage workflow. Default is to use the same number of threads
# as for CheckM lineage workflow itself. However, memory demand of pplacer
# increases linearly with the number of its threads. Consequently,
# if Orchestra runs out of memory because pplacer exceeds
# memory capacity of the running hardware, then you can manually
# decrease its number of threads without unnecessarily lowering
# number of threads of other Orchestra parts.

#number_of_threads_CheckM_lineage_pplacer = 4


# OPTIONAL: reduced_tree_CheckM_lineage (default NO)
# Set to Yes to use reduced tree (requires <16GB of memory) for determining
# lineage of each bin. Use this option if your computer has less than about
# 40 GB of RAM or, if CheckM_lineage runs out of memory.

#reduced_tree_CheckM_lineage = Yes


# OPTIONAL: collect_CheckM_filtered_bins (default NO)
# Set to Yes to collect bins that fulfill below specified
# criterion into separate directory 61_CheckM_filtered_bins.
# This eases further downstream analysis of bins outside
# of the Orchestra's pipeline.
#
# NOTE: CheckM lineage workflow needs to be enabled
# to filter bins. Otherwise, this parameter is ignored.

#collect_CheckM_filtered_bins = Yes


# OPTIONAL: copy_CheckM_filtered_bins (default YES)
# This parameter is relevant only, if
# collect_CheckM_filtered_bins is set to Yes.
# Set copy_CheckM_filtered_bins to Yes to make copies of filtered
# bins in directory 61_CheckM_filtered_bins. If this parameter
# is set to No, then symbolic links to bins are created, which
# makes it harder to export bins, but consumes less disk space.

#copy_CheckM_filtered_bins = No


# ALL FOUR OPTIONAL (numerical values between 0 and 100):
#        filter_CheckM_lineage_completeness_min
#        filter_CheckM_lineage_contamination_max
#
#        filter_CheckM_lineage_completeness_max
#        filter_CheckM_lineage_contamination_min
#
# Intervals of minimal and maximal completeness and contamination
# (as reported by CheckM lineage_wf) that collected bins
# must possess in order to be copied or linked to directory
# 61_CheckM_filtered_bins.
#
# Often, only minimal value of completeness and maximal
# value of contamination are prescribed, but specifying the
# other two as well (or instead of) enables easier studying
# of less prominent bins, if such a need arises.
#
# Example 1: filter_CheckM_lineage_completeness_min = 90
#            filter_CheckM_lineage_contamination_max = 5
#            ...the other two parameters unspecified
#
# Example 2: filter_CheckM_lineage_completeness_min = 70
#            filter_CheckM_lineage_completeness_max = 90
#            filter_CheckM_lineage_contamination_max = 7
#            ...the fourth parameter unspecified
#
# Example 3: filter_CheckM_lineage_completeness_min = 70
#            filter_CheckM_lineage_completeness_max = 92
#            filter_CheckM_lineage_contamination_min = 4
#            filter_CheckM_lineage_contamination_max = 9
#
# If some of these parameters is disabled, it does not
# impose its respective restriction on collected bins.
#
# To collect all bins, disable all four limiting parameters.
#
# NOTE: the above recommendations are only
#       general and generic. There are situations,
#       where different settings also make sense.
#       If you are an expert, then you probably know
#       better how to set these parameters.

#filter_CheckM_lineage_completeness_min = 30
#filter_CheckM_lineage_contamination_max = 90

#filter_CheckM_lineage_completeness_max = 70
#filter_CheckM_lineage_contamination_min = 10


# OPTIONAL: filter_CheckM_FastANI_analysis (default NO)
# Set to Yes to run special FastANI analysis on the
# resulting set of filtered bins.
# Unlike the separate FastANI step, described further on,
# no external references need to be provided here,
# since all resulting bins constellate query as well as
# reference for this multi-query multi-reference FastANI run.
#
# NOTE: at least two bins need to be collected by
#       the above filter for this step to run.

#filter_CheckM_FastANI_analysis = Yes


# OPTIONAL: params_CheckM_FastANI_analysis (quote delimited)
# Specify any algorithmic parameters that FastANI knows about.
#
# Example: params_CheckM_FastANI_analysis = "--fragLen 2000"
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to FastANI
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by FastANI itself.
#         Please consult FastANI documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from FastANI help.
# If you have questions regarding these parameters,
# please consult documentation or authors of FastANI.
# -----------------------------------------------------------
#
# -k <value>, --kmer <value>    kmer size <= 16 [default : 16]
#
# --fragLen <value>             fragment length [default : 3,000]
#
# --minFrag <value>             minimum matched fragments for
#                               trusting ANI [default : 50]
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_FastANI unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with FastANI parameters.
#

#params_CheckM_FastANI_analysis = "--fragLen 500"


# OPTIONAL: include_CheckM_coverage_generation (default NO)
# Set to Yes to run CheckM coverage command to produce
# coverage file of collected bins.

#include_CheckM_coverage_generation = yes


# OPTIONAL: params_CheckM_coverage (quote delimited)
# Specify any algorithmic parameters that CheckM coverage knows about.
#
# Example: params_CheckM_coverage = "--min_align 0.95"
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to CheckM
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by CheckM itself.
#         Please consult CheckM documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from CheckM help.
# If you have questions regarding these parameters,
# please consult documentation or authors of CheckM.
# -----------------------------------------------------------
#
# -a, --min_align MIN_ALIGN
#       minimum alignment length as percentage of read length (default: 0.98)
#
# -e, --max_edit_dist MAX_EDIT_DIST
#       maximum edit distance as percentage of read length (default: 0.02)
#
# -m, --min_qc MIN_QC
#       minimum quality score (in phred) (default: 15)
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_CheckM_coverage unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with CheckM parameters.

#params_CheckM_coverage = "--min_align 0.95"


# OPTIONAL: include_CheckM_taxonomy_wf (default NO)
# Set to Yes to run CheckM taxonomy workflow (taxonomy_wf).

#include_CheckM_taxonomy_wf = yes


# BOTH REQUIRED if CheckM taxonomy workflow is enabled:
#        CheckM_taxonomy_wf_taxonomic_rank
#        CheckM_taxonomy_wf_taxon
#
# Specifies taxonomic rank and taxon, respectively, of interest.
#
# Permissible values for taxonomic rank are:
#     life, domain, phylum, class, order, family, genus or species.
#
# The set of plausible values for taxon changes with the
# selected taxonomic rank.
#
# Example 1: CheckM_taxonomy_wf_taxonomic_rank = Family
#            CheckM_taxonomy_wf_taxon = Proteobacteriaceae
#
# Example 2: CheckM_taxonomy_wf_taxonomic_rank = Genus
#            CheckM_taxonomy_wf_taxon = Bacteroides

#CheckM_taxonomy_wf_taxonomic_rank = Genus
#CheckM_taxonomy_wf_taxon = Prevotella


# OPTIONAL: params_CheckM_taxonomy_wf (quote delimited)
# Specify any algorithmic parameters that CheckM taxonomy workflow knows about.
#
# Example: params_ChekM_taxonomy_wf = "--aai_strain 0.85"
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to CheckM
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by CheckM itself.
#         Please consult CheckM documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from CheckM help.
# If you have questions regarding these parameters,
# please consult documentation or authors of CheckM.
# -----------------------------------------------------------
#
# --individual_markers
#       treat marker as independent (i.e., ignore co-located set structure)
#
# --skip_adj_correction
#       do not exclude adjacent marker genes when estimating contamination
#
# --skip_pseudogene_correction
#       skip identification and filtering of pseudogenes
#
# --aai_strain AAI_STRAIN
#       AAI threshold used to identify strain heterogeneity (default: 0.9)
#
# --ignore_thresholds
#       ignore model-specific score thresholds
#
# -e, --e_value E_VALUE
#       e-value cut off (default: 1e-10)
#
# -l, --length LENGTH
#       percent overlap between target and query (default: 0.7)
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_CheckM_taxonomy_wf unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with CheckM parameters.

#params_CheckM_taxonomy_wf = "--aai_strain 0.85"


#************************************************************
# Parameters for execution of ezTree
# analysis of collected bins
#
# https://github.com/yuwwu/ezTree
#************************************************************
[ezTree]


# OPTIONAL: include_ezTree (default NO)
# Set to Yes to run ezTree.
#
# CheckM lineage workflow is going to be enabled
# regardless of its above settings, if user's
# specified bin filtering is applied by setting at
# least one of the following parameters (see below):
#     ezTree_CheckM_lineage_completeness_min
#     ezTree_CheckM_lineage_contamination_max
#     ezTree_CheckM_lineage_completeness_max
#     ezTree_CheckM_lineage_contamination_min

#include_ezTree = yes


# ALL FOUR OPTIONAL (numerical values between 0 and 100):
#        ezTree_CheckM_lineage_completeness_min
#        ezTree_CheckM_lineage_contamination_max
#
#        ezTree_CheckM_lineage_completeness_max
#        ezTree_CheckM_lineage_contamination_min
#
# Intervals of minimal and maximal completeness and contamination
# (as reported by CheckM lineage_wf) that collected bins
# must possess in order to be included in ezTree analysis.
#
# Often, only minimal value of completeness and maximal
# value of contamination are prescribed, but specifying the
# other two as well (or instead of) enables easier studying
# of less prominent bins, if such a need arises.
#
# Example 1: ezTree_CheckM_lineage_completeness_min = 90
#            ezTree_CheckM_lineage_contamination_max = 5
#            ...the other two parameters unspecified
#
# Example 2: ezTree_CheckM_lineage_completeness_min = 70
#            ezTree_CheckM_lineage_completeness_max = 90
#            ezTree_CheckM_lineage_contamination_max = 7
#            ...the fourth parameter unspecified
#
# Example 3: ezTree_CheckM_lineage_completeness_min = 70
#            ezTree_CheckM_lineage_completeness_max = 92
#            ezTree_CheckM_lineage_contamination_min = 4
#            ezTree_CheckM_lineage_contamination_max = 9
#
# If some of these parameters is disabled, it does not
# impose its respective restriction on collected bins.
#
# To select all collected bins for analysis, disable all
# four limiting parameters. In this case CheckM lineage
# workflow does not have to be executed (although it
# can be, if it is needed for other purposes).
#
# NOTE 1: ezTree requires bins of a sufficient quality
#         to produce its results. Also, too many bins
#         may cause ezTree to fail with a reason that
#         it is impossible to find any PFAM families
#         that exist once and only once in all genomes.
#
# NOTE 2: consequently, it is generally advised
#         to set parameter
#         ezTree_CheckM_lineage_completeness_min
#         to at least a value of 50 (percent) or
#         greater. Other typical choices are between
#         50.0001 and 70, between 70.0001 and 90,
#         90.0001 and 100.
#         At the same time, it is generally advised
#         to set parameter
#         ezTree_CheckM_lineage_contamination_max
#         to at most a value of 5 or 10 (percent).
#         Failing to do so, will typically result
#         in inclusion of bins with insufficient
#         quality in ezTree analysis.
#
# NOTE 3: the above recommendations are only
#         general and generic. There are situations,
#         where different settings also make sense.
#         If you are an expert, then you probably know
#         better how to set these parameters.

#ezTree_CheckM_lineage_completeness_min = 30
#ezTree_CheckM_lineage_contamination_max = 90

#ezTree_CheckM_lineage_completeness_max = 70
#ezTree_CheckM_lineage_contamination_min = 10


# OPTIONAL: param_ezTree_evalue (default 1e-10)
#
# Specify ezTree parameter evalue.
#
# Example: param_ezTree_evalue = 5e-10
#
# Leave this parameter unset, if you do not have a
# specific reason to do otherwise.

#param_ezTree_evalue = 5e-10


# OPTIONAL: param_ezTree_model (default JTT)
#
# Specify ezTree evolutionary model.
# Permissive values are JTT, WAG, or LG.
#
# Leave this parameter unset, if you do not have a
# specific reason to do otherwise.

#param_ezTree_model = WAG


#************************************************************
# Parameters for execution of Prokka
# annotation of collected bins
#
# https://github.com/tseemann/prokka
#************************************************************
[Prokka]


# OPTIONAL: include_Prokka (default NO)
# Set to Yes to run Prokka.
#
# CheckM lineage workflow is going to be enabled
# regardless of its above settings, if user's
# specified bin filtering is applied by setting at
# least one of the following parameters (see below):
#     Prokka_CheckM_lineage_completeness_min
#     Prokka_CheckM_lineage_contamination_max
#     Prokka_CheckM_lineage_completeness_max
#     Prokka_CheckM_lineage_contamination_min

#include_Prokka = yes


# OPTIONAL: Prokka_add_Pfam_database (default NO)
# Set to Yes to also take into account Pram HMM
# database in addition to databases that are
# distributed with Prokka.

#Prokka_add_Pfam_database = yes


# ALL FOUR OPTIONAL (numerical values between 0 and 100):
#        Prokka_CheckM_lineage_completeness_min
#        Prokka_CheckM_lineage_contamination_max
#
#        Prokka_CheckM_lineage_completeness_max
#        Prokka_CheckM_lineage_contamination_min
#
# Intervals of minimal and maximal completeness and contamination
# (as reported by CheckM lineage_wf) that collected bins
# must possess in order to be included in Prokka annotation.
#
# If some of these parameters is disabled, it does not
# impose its respective restriction on collected bins.
#
# To select all collected bins for analysis, disable all
# four limiting parameters. In this case CheckM lineage
# workflow does not have to be executed (although it
# can be, if it is needed for other purposes).
#
# Please see a more detailed description of analogous
# parameters in the ezTree section above.

#Prokka_CheckM_lineage_completeness_min = 30
#Prokka_CheckM_lineage_contamination_max = 90

#Prokka_CheckM_lineage_completeness_max = 70
#Prokka_CheckM_lineage_contamination_min = 10


# OPTIONAL: params_Prokka (quote delimited)
# Specify any algorithmic parameters that Prokka knows about.
#
# Example: params_Prokka = "--metagenome"
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to Prokka
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by Prokka itself.
#         Please consult Prokka documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from Prokka help.
# If you have questions regarding these parameters,
# please consult documentation or authors of Prokka.
# -----------------------------------------------------------
#
# --addgenes         Add 'gene' features for each 'CDS' feature
#                    (default OFF)
#
# --addmrna          Add 'mRNA' features for each 'CDS' feature
#                    (default OFF)
#
# --locustag [X]     Locus tag prefix [auto] (default '')
#
# --increment [N]    Locus tag counter increment (default '1')
#
# --gffver [N]       GFF version (default '3')
#
# --compliant        Force Genbank/ENA/DDJB compliance: --addgenes
#                    --mincontiglen 200 --centre XXX (default OFF)
#
# --centre [X]       Sequencing centre ID. (default '')
#
# --accver [N]       Version to put in Genbank file (default '1')
#
#
#
# Organism details:
#
# --genus [X]        Genus name (default 'Genus')
#
# --species [X]      Species name (default 'species')
#
# --strain [X]       Strain name (default 'strain')
#
# --plasmid [X]      Plasmid name or identifier (default '')
#
#
#
# Annotations:
#
# --kingdom [X]      Annotation mode:
#                    Archaea|Bacteria|Bacteria|Bacteria|Mitochondria|Viruses
#                    (default 'Bacteria')
#
# --gcode [N]        Genetic code / Translation table
#                    (set if --kingdom is set) (default '0')
#
# --gram [X]         Gram: -/neg +/pos (default '')
#
# --usegenus         Use genus-specific BLAST databases
#                    (needs --genus) (default OFF)
#
# --proteins [X]     FASTA or GBK file to use as 1st priority (default '')
#
# --hmms [X]         Trusted HMM to first annotate from (default '')
#
# --metagenome       Improve gene predictions for highly fragmented genomes
#                    (default OFF)
#
# --rawproduct       Do not clean up /product annotation (default OFF)
#
# --cdsrnaolap       Allow [tr]RNA to overlap CDS (default OFF)
#
#
#
# Matching:
#
# --evalue [n.n]     Similarity e-value cut-off (default '1e-09')
#
# --coverage [n.n]   Minimum coverage on query protein (default '80')
#
#
#
# Computation:
#
# --fast             Fast mode - only use basic BLASTP databases
#                    (default OFF)
#
# --noanno           For CDS just set /product="unannotated protein"
#                    (default OFF)
#
# --mincontiglen [N] Minimum contig size [NCBI needs 200] (default '1')
#
# --rfam             Enable searching for ncRNAs with Infernal+Rfam
#                    (SLOW!) (default '0')
#
# --norrna           Don't run rRNA search (default OFF)
#
# --notrna           Don't run tRNA search (default OFF)
#
# --rnammer          Prefer RNAmmer over Barrnap for rRNA prediction
#                    (default OFF)
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_Prokka unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with Prokka parameters.
#
# However, according to Orchestra typical/expected use cases
# flag "--metagenome" should probably be always set.

#params_Prokka = "--metagenome"


#************************************************************
# Parameters for execution of Roary
# building of pan genome from
# collected bins and Prokka annotations
#
# https://github.com/sanger-pathogens/Roary
#************************************************************
[Roary]


# OPTIONAL: include_Roary (default NO)
# Set to Yes to run Roary.
#
# If this step is enabled, then Prokka is
# going to be enabled by Orchestra regardless
# of the above Prokka settings.
#
# Please note that the subset of collected
# bins that enter Roary's pan genome build process
# is the same as it is inputted to Prokka by the
# user's specified criteria in the Prokka
# section of this configuration file.

#include_Roary = yes


# OPTIONAL: params_Roary (quote delimited)
# Specify any algorithmic parameters that Roary knows about.
#
# Example: params_Roary = "-ap"
#          (allow paralogs in core alignment)
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to Roary
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by Roary itself.
#         Please consult Roary documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from Roary help.
# If you have questions regarding these parameters,
# please consult documentation or authors of Roary.
# -----------------------------------------------------------
#
# -e        create a multiFASTA alignment of core genes using PRANK
#
# -n        fast core gene alignment with MAFFT, use with -e
#
# -i        minimum percentage identity for blastp [95]
#
# -cd FLOAT percentage of isolates a gene must be in to be core [99]
#
# -g INT    maximum number of clusters [50000]
#
# -s        dont split paralogs
#
# -t INT    translation table [11]
#
# -ap       allow paralogs in core alignment
#
# -y        add gene inference information to spreadsheet,
#           doesnt work with -e
#
# -iv STR   change the MCL inflation value [1.5]
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_Roary unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with Roary parameters.

#params_Roary = "-ap"


#************************************************************
# Parameters for execution of FastANI
# alignment-free computing of whole-genome
# Average Nucleotide Identity between genomes
#
# https://github.com/ParBLiSS/FastANI
#************************************************************
[FastANI]


# In order to successfuly utilize the FastANI part of
# Orchestra processing it is necessary to follow the
# FastANI instructions and provide Orcherstra an appropriate
# external database for genome comparison. File or directory
# with external database is specified through configuration
# parameters below.
#
# Database can be either a collection of draft genomes
# produced within ones own study, an external narrow database
# for analyses of single specific phylogenetic group
# (for example databases from D1 to D5 at
# http://enve-omics.ce.gatech.edu/data/fastani,
# or a general database containing a large number of high
# quality genomes (for example the NCBI database at the
# same link http://enve-omics.ce.gatech.edu/data/fastani)
# for assessing species boundaries of novel cohort of draft
# metagenome assembled genomes.
#
# Genome Taxonomy Database
# The users are also directed to consider Orchestra as the first step
# (taking the raw sequencing data to metagenome assembled genomes)
# to an emerging platform of Genome Taxonomy Database
# (https://gtdb.ecogenomic.org) and provides ongoing improvements
# in exact prokaryotic genome taxonomy.


# OPTIONAL: include_FastANI (default NO)
# Set to Yes to run FastANI.
#
# CheckM lineage workflow is going to be enabled
# regardless of its above settings, if user's
# specified bin filtering is applied by setting at
# least one of the following parameters (see below):
#     FastANI_CheckM_lineage_completeness_min
#     FastANI_CheckM_lineage_contamination_max
#     FastANI_CheckM_lineage_completeness_max
#     FastANI_CheckM_lineage_contamination_min

#include_FastANI = yes


# ALL FOUR OPTIONAL (numerical values between 0 and 100):
#        FastANI_CheckM_lineage_completeness_min
#        FastANI_CheckM_lineage_contamination_max
#
#        FastANI_CheckM_lineage_completeness_max
#        FastANI_CheckM_lineage_contamination_min
#
# Intervals of minimal and maximal completeness and contamination
# (as reported by CheckM lineage_wf) that collected bins
# must possess in order to be included in FastANI analysis.
#
# If some of these parameters is disabled, it does not
# impose its respective restriction on collected bins.
#
# To select all collected bins for analysis, disable all
# four limiting parameters. In this case CheckM lineage
# workflow does not have to be executed (although it
# can be, if it is needed for other purposes).
#
# Please see a more detailed description of analogous
# parameters in the ezTree section above.

#FastANI_CheckM_lineage_completeness_min = 30
#FastANI_CheckM_lineage_contamination_max = 90

#FastANI_CheckM_lineage_completeness_max = 70
#FastANI_CheckM_lineage_contamination_min = 10


# REQUIRED if include_FastANI=yes: FastANI_analysis_variant
# Selects one of the following FastANI processing variants.
#
# 1ref-1query: user specifies one reference genome; Orchestra
#              makes a separate FastANI analysis of each eligible
#              collected bin against the specified reference genome
#              (behind the scenes Orchestra supplies to FastANI
#              parameters "--ref" and "--query").
#              This is the only mode of operation with a
#              possibility of generating output mappings for
#              visualization (FastANI parameter "--visualize",
#              which is automatically provided by Orchestra).
#
# 1ref-Mquery: user specifies one reference genome; Orchestra
#              makes one FastANI analysis for the entire set
#              of eligible collected bins (behind the scenes
#              Orchestra supplies to FastANI parameters
#              "--ref" and "--queryList").
#
# Mref-1query: user specifies several reference genomes; Orchestra
#              makes a separate FastANI analysis of each eligible
#              collected bin against the specified set of reference
#              genomes (behind the scenes Orchestra supplies to
#              FastANI parameters "--refList" and "--query").
#
# Mref-Mquery: user specifies several reference genomes; Orchestra
#              makes one FastANI analysis for the entire set of
#              eligible collected bins (behind the scenes Orchestra
#              supplies to FastANI parameters "--refList" and
#              "--queryList").
#
# One and only one of the following selections must to be enabled,
# if FastANI processing is enabled.

# FastANI_analysis_variant = 1ref-1query
# FastANI_analysis_variant = 1ref-Mquery
# FastANI_analysis_variant = Mref-1query
# FastANI_analysis_variant = Mref-Mquery


# REQUIRED if FastANI_analysis_variant is 1ref-1query or 1ref-Mquery:
#          fastANI_reference_genome
# File name of a single reference genome (fasta/fastq/fna)[.gz]
# for computing collected bins' Average Nucleotide
# Identity against. Some reference genomes may be downloaded from
# http://enve-omics.ce.gatech.edu/data/fastani

#FastANI_reference_genome = '/home/johnDoe/data/refGenome.fna'


# REQUIRED if FastANI_analysis_variant is Mref-1query or Mref-Mquery:
#          fastANI_reference_genome_pattern
#
# Linux file pattern that targets several reference genome files.
# For example: '/home/johnDoe/data/refGenome*.fna'
#
# NOTE:   it is NOT possible to enumerate several reference genome
#         files by specifying this parameter more than once. All
#         required reference genome files must be targeted by
#         the specified Linux file pattern.
#         If in doubt, please open Linux terminal in directory with
#         reference genome files, and execute something like
#         "ls refGenome*.fna".
#
# TIP:    If your directory with reference genome files contain more files
#         than you intend to process within an isolated Orchestra run,
#         and if it is impossible to target only the appropriate subset
#         of them with a file pattern, then it is possible to proceed
#         as follows.
#
#         SOLUTION 1: create a new directory (say /home/me/special_refs).
#                     Then copy or move the appropriate subset of
#                     reference genome files to a new directory.
#                     This way it is easy to target only these files
#                     with generic file patterns like
#                     /home/me/special_refs/refGenome*.fna
#
#         SOLUTION 2: create a new directory (say /home/me/special_refs).
#                     Within this directory create symbolic links to the
#                     appropriate files. Symbolic links are created with
#                     Linux command "ln -s /path/to/file /path/to/symlink".
#                     Then target all of linked files with generic patterns
#                     /home/me/special_refs/refGenome*.fna
#
#        Solution 1 is probably easier to do, whereas solution 2 has the
#        advantage that no files are actually being moved around a file
#        system. This way, the same files may be linked to different
#        directories and simultaneously take part of different analysis
#        combinations without occupying disk space more than once.

#FastANI_reference_genome_pattern = '/home/johnDoe/data/refGenome*.fna'


# OPTIONAL: params_FastANI (quote delimited)
# Specify any algorithmic parameters that FastANI knows about.
#
# Example: params_FastANI = "--fragLen 2000"
#
# NOTE 1: you should NOT specify any input and output files,
#         since Orchestra handles these; only parameters
#         that affect calculations are feasible here.
#
# NOTE 2: the value of this parameter is passed directly to FastANI
#         without any checking or formatting. It is the responsibility
#         of a user to assure the correctness of these parameters.
#
# NOTE 3: the value of each parameter that is not set in this
#         manner is set to its default value by FastANI itself.
#         Please consult FastANI documentation for details.
#
# -----------------------------------------------------------
# VIABLE PARAMETERS ARE THE FOLLOWING.
# Descriptions are copied verbatim
# (or slightly modified) from FastANI help.
# If you have questions regarding these parameters,
# please consult documentation or authors of FastANI.
# -----------------------------------------------------------
#
# -k <value>, --kmer <value>    kmer size <= 16 [default : 16]
#
# --fragLen <value>             fragment length [default : 3,000]
#
# --minFrag <value>             minimum matched fragments for
#                               trusting ANI [default : 50]
#
# -----------------------------------------------------------
# END OF VIABLE PARAMETERS.
# -----------------------------------------------------------
#
# Leave params_FastANI unset, if you do not have a specific
# reason to do otherwise, or if you are not familiar
# with FastANI parameters.
#

#params_FastANI = "--fragLen 500"