update splice hadnling

sinamajidian · sinamajidian · commit 2ac5a28d2f68 · 2023-10-23T11:10:51.000+02:00
diff --git a/FastOMA.nf b/FastOMA.nf
@@ -16,7 +16,7 @@ params.genetrees_folder = params.output_folder + "/genetrees"
 
 process omamer_run{
   time {1.h}
-  memory {90.GB}
+  memory {40.GB}
   publishDir params.hogmap_folder
   input:
     path proteomes_omamerdb_inputhog
@@ -29,7 +29,7 @@ process omamer_run{
     then
         cp ${proteomes_omamerdb_inputhog[2]}/${proteomes_omamerdb_inputhog[0]}.hogmap  ${proteomes_omamerdb_inputhog[0]}.hogmap
     else
-        omamer search --db ${proteomes_omamerdb_inputhog[1]} --query ${proteomes_omamerdb_inputhog[0]} --out ${proteomes_omamerdb_inputhog[0]}.hogmap
+        omamer search -n 10 --db ${proteomes_omamerdb_inputhog[1]} --query ${proteomes_omamerdb_inputhog[0]} --out ${proteomes_omamerdb_inputhog[0]}.hogmap
     fi
   """  // --nthreads 10
 }
@@ -70,7 +70,7 @@ process hog_big{
   publishDir params.pickles_temp
   cpus  6
   time {60.h}     // for very big rhog it might need more, or you could re-run and add `-resume`
-  memory {80.GB}
+  memory {50.GB}
   input:
     val rhogsbig_tree_ready
   output:
@@ -101,7 +101,7 @@ process hog_rest{
 
 
 process collect_subhogs{
-  memory {200.GB}
+  memory {150.GB}
   publishDir params.output_folder, mode: 'copy'
   input:
     val ready_hog_rest
diff --git a/README.md b/README.md
@@ -2,16 +2,14 @@ FastOMA
 ======
 FastOMA is a scalable software package to infer orthology relationship.
 
-
-
 # Input and Output: 
 
 ### Input: 
 1- Sets of protein sequences in FASTA format (with `.fa` extension) in the folder `proteome`.
 The name of each fasta file is the name of species. Please make sure that the name of fasta records do not contain `||`. 
 
 
-2- The omamer database which you can download [this](https://omabrowser.org/All/LUCA.h5) 
+2- The omamer database which you can download [this](https://omabrowser.org/All/LUCA-v2.0.0.h5) 
 which is from [OMA browser](https://omabrowser.org/oma/current/). 
 This file is `13 Gb` containing all the gene families of the Tree of Life or you can download it for a subset of them, e.g. Primates (352MB). 
 
@@ -110,10 +108,10 @@ If you face any difficulty during installation, feel free to create a [github is
 Then, cd to the `testdata` folder and download the omamer database and change its name to `omamerdb.h5`.
 ```
 cd FastOMA/testdata
-wget https://omabrowser.org/All/Primates.h5    # 352MB
+wget https://omabrowser.org/All/Primates-v2.0.0.h5     # 105MB
 mv Primates.h5  in_folder/omamerdb.h5 
 ```
-(This is for the test however, I would suggest downloading the `LUCA.h5` instead of `Primates.h5` for your real analysis.). Check the item 2 in the [input section](https://github.com/sinamajidian/FastOMA#input) for details.
+(This is for the test however, I would suggest downloading the `LUCA-v2.0.0.h5` instead of `Primates-v2.0.0.h5` for your real analysis.). Check the item 2 in the [input section](https://github.com/sinamajidian/FastOMA#input) for details.
 
 Now we have such a structure in our  testdata folder.
 ``` 
@@ -265,7 +263,7 @@ HUMAN00034;HUMAN00035
 - phylostragraphy 
 
 ## Change log
-- Update  v0.1.2: improve rootHOG inference, splice 
+- Update   v0.1.2: improve rootHOG inference, splice, OMAmerv2 with multi-hits
 - Release  v0.1.0: improve nextflow pipeline and outputs. 
 - prelease v.0.0.6: use `--fragment-detection` for `infer-subhogs` and `--low-so-detection --fragment-detection`
 - prelease v.0.0.6: using input hogmpa