###############PROJECT PARAMETERS####################### mode = site # main analysis mode. Currently POTION supports only site-models analysis. CDS_dir_path = /home/mviana/POTION-1.1.2/Lc_prokka_potion/Lc_prokka_ffn/ homology_file_path = /home/mviana/POTION-1.1.2/Lc_prokka_potion/Lc_prokka_fastortho_t2.out project_dir_path = /home/mviana/POTION-1.1.2/Lc_prokka_potion/Lc_prokka_potion_t2 max_processors = 24 remove_identical = yes # "yes" to remove 100% identical nucleotide groups at the very beginning of # analysis, "no" otherwise verbose = 1 # 1 to print nice log messages telling you what is going on. 0 otherwise ############SEQUENCE/GROUP PARAMETERS################ groups_to_process = all # Defines which lines of the cluster file (ortholog groups) will be processed. # Use "all" to process every group, "-" to set groups between two given lines # (including the said lines). # Use "!" to not process a specific line, can be used with "-" to specify a # set to not be processed. Useful if groups are taking too long to finish. # Use "," or ";" to set distinct sets # Examples: 1;4-10;12 will process groups 1, 4 to 10 and group 12 # all;!3 will process all groups, except group 3 # all;!3-5 will process all groups, except groups 3 to 5 behavior_about_bad_clusters = 1 # what should POTION do if it finds a cluster with a sequence removed # due to any filter? Possible options are: # 0 - does not filter any sequence (not recommended) # 1 - removal of any flagged sequence # 2 - removal of any group with flagged sequences homology_filter = 1 # this variable controls for what POTION will do if a group with paralogous # genes is found. Possible options are: # 0 - analyze all sequences within group # 1 - remove all paralogous within group, analyzing only single-copy genes # 2 - remove groups with paralogous genes # 3 - remove single-copy genes, analyzing all paralogous within group together # 4 - remove single-copy genes and split remaining paralogous into individual # species, evaluating each subgroup individually validation_criteria = all # quality criteria to remove sequences. Possible values are: # 1 - checks for valid start codons # 2 - checks for valid stop codons # 3 - checks for sequence size multiple of 3 # 4 - checks for nucleotides outside ATCG # 'all' applies every verification additional_start_codons = () # these codons, plus the ones specified in codon table, will be the valid start # codons for validation purposes additional_stop_codons = () # same as start codons codon_table = 11 # codon table id (http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi) absolute_min_sequence_size = 150 # minimum sequence length cutoff for sequence/group further evaluation absolute_max_sequence_size = 10000 # maximum sequence length cutoff for sequence/group further evaluation relative_min_sequence_size = 0.8 # sequences smaller than mean|meadian times this value will be filtered relative_max_sequence_size = 1.2 # sequences greater than mean|meadian times this value will be filtered sequence_size_average_metric = mean # which average metric will be calculated to determine the # minimum/maximum relative lengths ranges for sequence removal # Possible values are "mean" and "median" min_group_identity = 80 # mean minimum group identity cutoff in pairwise sequence alignments max_group_identity = 98 # mean maximum group identity cutoff in pairwise sequence alignments group_identity_comparison = nt # the kind of sequence that will be used when computing mean group identity # possible values are "nt" or "aa" min_sequence_identity = 80 # minimum (mean/median) sequence identity cutoff in pairwise sequence alignments max_sequence_identity = 100 # maximum (mean/median) sequence identity cutoff in pairwise sequence alignemnts sequence_identity_average_metric = mean # would you like to use mean or median to measure sequence identity? # possible values are "mean" and "median" sequence_identity_comparison = nt # the kind of sequence that will be used when computing sequence identity # possible values are "nt" and "aa" min_gene_number_per_cluster = 4 # minimum # genes in group after all filtering steps max_gene_number_per_cluster = 46 # maximum # genes in group after all filtering steps min_specie_number_per_cluster = 4 # minimum # species in group after all filtering steps max_specie_number_per_cluster = 46 # maximum # species in group after all filtering steps reference_genome_file = # genome reference name, leave blank for none (same name used in fasta file) ############THIRD-PARTY SOFTWARE CONFIGURATION################ multiple_alignment = prank # program used for multiple sequence alignment. Possible values are # muscle, mafft and prank bootstrap = 100 # number of bootstraps in phylogenetic analysis phylogenetic_tree_speed = fast # fast or slow analysis? Used in phylip dnaml or proml only phylogenetic_tree = dnaml # program used for phylogenetic tree reconstruction. Possible values are # proml dnaml, phyml_aa and phyml_nt recombination_qvalue = 0.1 # q-value for recombination detection. Must occur for all the specified tests rec_minimum_confirmations = 2 # minimum number of significant recombination tests positives rec_mandatory_tests = phi # any combination of the three test names, separated by spaces, or N.A. to use # any test remove_gaps = strict # numeric values between 0 and 1 will remove columns with that percentage of # gaps. Values of "strict" or "strictplus" will use respectively these # filters to remove unreliable regions (described in trimal article) PAML_models = m12 m78 # codeml models to be generated. "m12" and/or "m78" values acceptable. pvalue = 0.05 # p-values for positive selection detection qvalue = 0.05 # q-values for positive selection detection