Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
# Changes

## Branch: stoptrain_after_trinity

### Feature: `--stop_after_trinity` flag for `funannotate train`

Adds a new `--stop_after_trinity` argument to `funannotate train` that exits cleanly
after the Trinity genome-guided assembly step, before PASA runs. This supports
pipelines that process multiple strains per species and need to reuse a single Trinity
assembly across annotation runs.

**`funannotate/train.py`**
- Added `--stop_after_trinity` argument (store_true).
- After Trinity transcripts are confirmed/built, logs the output path and calls
`sys.exit(0)` when the flag is set.

**`funannotate/funannotate.py`**
- Added `--stop_after_trinity` to the `train` subcommand help text.

### Feature: Normalized FASTQ files compressed in place after Trinity normalization

After `in silico` read normalization, the output `.norm.fq` files are now gzip-compressed
in place (via `lib.Fzip_inplace`) to reduce disk usage. Cache detection on re-runs is
updated to look for `.norm.fq.gz` instead of symlinks to plain `.norm.fq`.

**`funannotate/train.py`** (`runNormalization`)
- After normalization, `left.norm.fq`, `right.norm.fq`, and `single.norm.fq` are each
compressed to `.gz` and the function returns the `.gz` paths.
- Re-run checks replaced `os.path.islink()` with `lib.checkannotations()` against the
`.gz` paths.

## Branch: eggnog_geneprod_issue

### Fix: `funannotate --version` always reported base version in egg installs
Expand Down
27 changes: 16 additions & 11 deletions funannotate/aux_scripts/tbl2asn_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,17 @@ def split_tbl2asn(folder):
tblout.write(''.join(contig))


def tbl2asn_safe_run(*args, **kwargs):
"""Call run(), catch exceptions."""
def tbl2asn_safe_run(cmd, dir, dialect='tbl2asn'):
"""Call tbl2asn_runner(), catch exceptions."""
try:
tbl2asn_runner(*args, **kwargs)
tbl2asn_runner(cmd, dir, dialect)
except Exception as e:
print(("error: %s run(*%r, **%r)" % (e, args, kwargs)))
print(("error: %s run(%r, %r, %r)" % (e, cmd, dir, dialect)))


def tbl2asn_runner(cmd, dir):
cmd = cmd + ['-Z', os.path.join(dir, 'discrepency.report.txt'), '-p', dir]
def tbl2asn_runner(cmd, dir, dialect='tbl2asn'):
indir_flag = '-indir' if dialect == 'table2asn' else '-p'
cmd = cmd + ['-Z', os.path.join(dir, 'discrepency.report.txt'), indir_flag, dir]
FNULL = open(os.path.devnull, 'w')
subprocess.call(cmd, stdout=FNULL, stderr=FNULL)

Expand All @@ -102,7 +103,8 @@ def runtbl2asn_parallel(folder, template, discrepency, organism, isolate, strain
if not organism:
sys.stderr.write("tbl2asn error: organism not specified\n")
sys.exit(1)
# build dialect-aware cmd; -p is appended per-subdir below
# build dialect-aware cmd; indir flag is appended per-subdir below
binary, dialect = lib.resolve_tbl2asn_binary()
cmd = lib.build_tbl2asn_cmd(
folder=folder,
template=template,
Expand All @@ -114,10 +116,13 @@ def runtbl2asn_parallel(folder, template, discrepency, organism, isolate, strain
discrepancy=None, # appended per-subdir in tbl2asn_runner
gcode=gcode,
mgcode=mgcode,
binary=binary,
dialect=dialect,
)
# strip the global -p folder; tbl2asn_runner appends -p <subdir>
if '-p' in cmd:
idx = cmd.index('-p')
# strip the global indir flag; tbl2asn_runner appends it per-subdir
indir_flag = '-indir' if dialect == 'table2asn' else '-p'
if indir_flag in cmd:
idx = cmd.index(indir_flag)
del cmd[idx:idx + 2]
# check for folders in the input folder, if present, run tbl2asn on each folder and then combine
multiple = []
Expand All @@ -129,7 +134,7 @@ def runtbl2asn_parallel(folder, template, discrepency, organism, isolate, strain
p = multiprocessing.Pool(cpus)
results = []
for i in multiple:
results.append(p.apply_async(tbl2asn_safe_run, (cmd, i)))
results.append(p.apply_async(tbl2asn_safe_run, (cmd, i, dialect)))
p.close()
p.join()
# now collect the results make in main folder
Expand Down
92 changes: 73 additions & 19 deletions funannotate/config/test.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Submit-block ::= {
contact {
name name {
last "Palmer",
first "Jonathan"
first "Jonathan",
middle "",
initials "",
suffix "",
title ""
},
affil std {
affil "USDA Forest Service",
Expand All @@ -13,8 +17,7 @@ Submit-block ::= {
country "USA",
street "1 Gifford Pinchot Drive",
email "nextgenusfs@gmail.com",
fax "",
phone "555-555-5555",
phone "",
postal-code "53726"
}
}
Expand All @@ -26,8 +29,20 @@ Submit-block ::= {
name name {
last "Palmer",
first "Jonathan",
initials "J.M.",
suffix ""
middle "",
initials "M.",
suffix "",
title ""
}
},
{
name name {
last "Stajich",
first "Jason",
middle "",
initials "E.",
suffix "",
title ""
}
}
},
Expand All @@ -44,7 +59,6 @@ Submit-block ::= {
},
subtype new
}

Seqdesc ::= pub {
pub {
gen {
Expand All @@ -55,23 +69,63 @@ Seqdesc ::= pub {
name name {
last "Palmer",
first "Jonathan",
initials "J.M.",
suffix ""
middle "",
initials "M.",
suffix "",
title ""
}
},
{
name name {
last "Stajich",
first "Jason",
middle "",
initials "E.",
suffix "",
title ""
}
}
},
affil std {
affil "USDA Forest Service",
div "CFMR",
city "Madison",
sub "WI",
country "USA",
street "1 Gifford Pinchot Drive",
postal-code "53726"
}
},
title “Annotate generated by FunAnnotate: fungal automated genome annotation”
title "Annotate generated by Funannotate: fungal automated genome
annotation"
}
}
}
Seqdesc ::= user {
type str "DBLink",
data {
{
label str "BioProject",
num 1,
data strs {
"PRJNAXXXXXXXXX"
}
},
{
label str "BioSample",
num 1,
data strs {
"SAMNXXXXXXXXX"
}
}
}
}
Seqdesc ::= user {
type str "Submission",
data {
{
label str "AdditionalComment",
data str "ALT EMAIL:nextgenusfs@gmail.com"
}
}
}
Seqdesc ::= user {
type str "Submission",
data {
{
label str "AdditionalComment",
data str "Submission Title:None"
}
}
}

1 change: 1 addition & 0 deletions funannotate/funannotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@
--isolate Isolate name
--cpus Number of CPUs to use. Default: 2
--no-progress Do not print progress to stdout for long sub jobs
--stop_after_trinity Stop pipeline after Trinity genome-guided assembly

ENV Vars: If not passed, will try to load from your $PATH.
--PASAHOME
Expand Down
11 changes: 9 additions & 2 deletions funannotate/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,7 +1065,13 @@ def line_count(fname):

def countfasta(input):
count = 0
with open(input, "r") as f:
with open(input, "rb") as probe:
magic = probe.read(2)
opener = open
if magic == b"\x1f\x8b":
opener = gzip.open
log.debug("countfasta: opening {:} as gzip (gzip={})".format(input, magic == b"\x1f\x8b"))
with opener(input, "rt") as f:
for line in f:
if line.startswith(">"):
count += 1
Expand Down Expand Up @@ -8505,13 +8511,14 @@ def build_tbl2asn_cmd(
if mgcode and int(mgcode) != 1:
meta_parts.append("[mgcode={}]".format(int(mgcode)))
meta = " ".join(meta_parts)
indir_flag = "-indir" if dialect == "table2asn" else "-p"
cmd = [
binary,
"-y",
'"Annotated using ' + fun_version + '"',
"-N",
str(version),
"-p",
indir_flag,
folder,
"-t",
template,
Expand Down
Loading