Vicky-Hunt-Lab
diff --git a/‎bin/build_coord_files‎
Lines changed: 1 addition & 1 deletion b/‎bin/build_coord_files‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎hlsmallrna/create_noncoding.py‎ ‎bin/extract_nc‎hlsmallrna/create_noncoding.py renamed to bin/extract_nc
Lines changed: 18 additions & 9 deletions b/‎hlsmallrna/create_noncoding.py‎ ‎bin/extract_nc‎hlsmallrna/create_noncoding.py renamed to bin/extract_nc
Lines changed: 18 additions & 9 deletions
diff --git a/‎bin/overlap_ss.sh‎
Lines changed: 2 additions & 2 deletions b/‎bin/overlap_ss.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎bin/revcomp_rna‎
100644100755
Lines changed: 2 additions & 2 deletions b/‎bin/revcomp_rna‎
100644100755
Lines changed: 2 additions & 2 deletions
diff --git a/‎config.toml‎
Lines changed: 0 additions & 28 deletions b/‎config.toml‎
Lines changed: 0 additions & 28 deletions
diff --git a/‎environment.yml‎
Lines changed: 17 additions & 16 deletions b/‎environment.yml‎
Lines changed: 17 additions & 16 deletions
diff --git a/‎example_config.yml‎
Lines changed: 39 additions & 0 deletions b/‎example_config.yml‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎hlsmallrna/__init__.py‎
Lines changed: 8 additions & 3 deletions b/‎hlsmallrna/__init__.py‎
Lines changed: 8 additions & 3 deletions
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2022 Vicky Hunt Lab Members
+# Copyright 2022 - 2025 Vicky Hunt Lab Members
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 
@@ -1,4 +1,5 @@
-# Copyright 2022 Vicky Hunt Lab Members
+#!/usr/bin/env python3
+# Copyright 2022 - 2025 Vicky Hunt Lab Members
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,8 +17,7 @@
 
 from sys import argv
 from collections import defaultdict
-
-from .config import do_log
+from argparse import ArgumentParser
 
 def get_pairs(array):
     '''
@@ -115,14 +115,14 @@ def merge_cds(coding_reigon):
 
     return n_merged
 
-def extract_noncoding(genome, gff_path, quiet=0, output='result.fasta'):
+def extract_noncoding(genome, gff_path, output='result.fasta'):
     '''
     Extract the noncoding reigon from the genome, basied on a GFF file
     '''
     gff_iter = DataIterator(gff_path)
     genome_data = SeqIO.parse(genome, 'fasta')
 
-    do_log(quiet, '====> Calculating coordinates')
+    print('====> Calculating coordinates')
 
     coordinates = defaultdict(lambda: [])
     mRNAs = defaultdict(lambda: [])
@@ -141,13 +141,13 @@ def extract_noncoding(genome, gff_path, quiet=0, output='result.fasta'):
         if item[2] == 'CDS':
             coding_reigon[item[0] + item[6]].append([int(item[3]), int(item[4])])
 
-    do_log(quiet, '====> Merging and validating coordinates')
+    print('====> Merging and validating coordinates')
 
     cds_merged = merge_cds(coding_reigon)
     mRNA_merged = merge_cds(mRNAs)
     validate_gff(mRNAs, coding_reigon)
 
-    do_log(quiet, f'Merged {cds_merged} coding reigons and {mRNA_merged} mRNAs')
+    print(f'Merged {cds_merged} coding reigons and {mRNA_merged} mRNAs')
 
     for key in mRNAs.keys():
         for item in mRNAs[key]:
@@ -159,12 +159,21 @@ def extract_noncoding(genome, gff_path, quiet=0, output='result.fasta'):
             coordinates[key].append(item[0])
             coordinates[key].append(item[1])
 
-    do_log(quiet, '====> Extarcting fragments')
+    print('====> Extarcting fragments')
 
     for key in coordinates.keys():
         coordinates[key].sort()
 
     SeqIO.write(extract_fragments(genome_data, coordinates, mRNA_start, mRNA_end), output, 'fasta')
 
 if __name__ == '__main__':
-    extract_noncoding(argv[1], argv[2])
+    parser = ArgumentParser('')
+
+    parser.add_argument('genome', help='FASTA containing the genome to extract from')
+    parser.add_argument('gff_file', help='GFF file containing annotations of CDS and mRNA regions')
+
+    parser.add_argument('-o', '--output', help='FASTA file to write output to', default='result.fasta')
+
+    args = parser.parse_args()
+
+    extract_noncoding(args.genome, args.gff_file, output=args.output)
@@ -1,5 +1,5 @@
 #!/bin/sh
-# Copyright 2022 Vicky Hunt Lab Members
+# Copyright 2022 - 2025 Vicky Hunt Lab Members
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -177,4 +177,4 @@ tar -cf Ouput_zip.tar unique.txt genome* Main* Pass* beg* end* length* overhang*
 rm  unique.txt genome* Main* Pass* beg* end* length* overhang* cleanup.txt No* ps* Rev* ms*
 
 echo "Distribution of sequences counted"
-echo "Complete"
+echo "Complete"
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2022 Vicky Hunt Lab Members
+# Copyright 2022 - 2025 Vicky Hunt Lab Members
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -37,4 +37,4 @@ def reverse_complement(seq):
 seqs = SeqIO.parse(args.input, 'fasta')
 seqs = map(reverse_complement, seqs)
 
-SeqIO.write(seqs, args.output, 'fasta')
+SeqIO.write(seqs, args.output, 'fasta')
@@ -3,22 +3,23 @@ channels:
   - bioconda
   - conda-forge
 dependencies:
-  - python=3.8
-  - toml=0.10.*
-  - pysam=0.19.*
-  - biopython=1.*
-  - gffutils=0.11.*
-  - matplotlib=3.5.*
-  - cutadapt=4.*
-  - fastqc=0.11.*
-  - bbmap=38.*
-  - unitas=1.6.1
-  - perl-archive-extract=0.88
-  - bioconda/label/main::perl-lwp-simple=6.39
-  - bowtie2=2.4.*
-  - samtools=1.*
-  - bedtools=2.*
+  - python
+  - pyyaml
+  - pysam
+  - biopython
+  - gffutils
+  - matplotlib
+  - cutadapt
+  - unitas
+  - perl-archive-extract
+  - bioconda/label/main::perl-lwp-simple
+  - bowtie2
+  - samtools
+  - bedtools
+  - eggnog-mapper
+  - scipy
+  - r
+  - bioconductor-topgo
   - pip
   - pip:
     - .
-
 
@@ -0,0 +1,39 @@
+smallRNA_fastq: smallrna.fastq
+# size_sorted_fastqs: binned_reads
+compress_output: true
+keep_intermediate_files: true
+cds: cds.fasta
+unspliced_transcriptome: unspliced.fasta
+trim:
+  # input: smallrna.fastq
+  kit: qiagen
+  # 5_prime: ACGTTTAG
+  # 3_prime: CGTAGGAT
+  min_quality: 20
+# new behaviour: output by first base as well e.g. file of 26G
+sort:
+  # input: output/trimmed_reads.fq
+  genome: genome.fasta
+  align_to_cds: True
+  min_length: 5
+  max_length: 50
+  mismatches: 0
+unitas:
+  # input: output/binned_rna
+  refseq:
+    - gene
+    - miRNA: test/miRNA.fasta
+    - piRNA: test/piRNA.fasta
+    - tRNA: test/tRNA.fasta
+    - TE: test/transposable_elements.fasta
+  species: x
+targetid:
+  min_seq_length: 5
+  target_files:
+    - test/file1.fasta
+    - test/file2.fasta
+  mismatches: 0
+  enrich:
+    eggnog_data_dir: /home/user/eggnog-mapper-data
+    exclude_files:
+      - test/file2.fasta
@@ -1,4 +1,4 @@
-# Copyright 2022 Vicky Hunt Lab Members
+# Copyright 2022 - 2025 Vicky Hunt Lab Members
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,10 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .__main__ import main, main_ssoverlap
+from .__main__ import main
+from .ss_overlap import main_ssoverlap
+from .label_for_unitas import label_for_unitas_cli
 
 def climain():
     main()
 
 def ssoverlap_main():
-    main_ssoverlap()
+    main_ssoverlap()
+
+def labelforunitas_main():
+    label_for_unitas_cli()
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`#!/usr/bin/env python3`
`2`		`-# Copyright 2022 Vicky Hunt Lab Members`
	`2`	`+# Copyright 2022 - 2025 Vicky Hunt Lab Members`
`3`	`3`	`#`
`4`	`4`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`5`	`5`	`# you may not use this file except in compliance with the License.`