seq2pipe/pipeline_runner.py at master · qiime-lab/seq2pipe · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
#!/usr/bin/env python3
"""
pipeline_runner.py
==================
qiime2_agent.tool_run_qiime2_pipeline を Streamlit から呼び出すための
ラッパーモジュール。グローバル変数を注入して stdout をキャプチャする。
"""

import sys
import datetime
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, Optional

# qiime2_agent をインポート（if __name__ == "__main__" ガード済み）
sys.path.insert(0, str(Path(__file__).parent))
import qiime2_agent as _agent


# ─────────────────────────────────────────────────────────────────────────────
# 設定・結果の値オブジェクト
# ─────────────────────────────────────────────────────────────────────────────

@dataclass
class PipelineConfig:
    """パイプライン実行パラメータ"""
    fastq_dir: str
    paired_end: bool = True
    trim_left_f: int = 17
    trim_left_r: int = 21
    trunc_len_f: int = 270
    trunc_len_r: int = 220
    metadata_path: str = ""
    classifier_path: str = ""
    n_threads: int = 4
    sampling_depth: int = 5000
    group_column: str = ""
    output_dir: str = ""   # 空なら ~/seq2pipe_results/<timestamp>/ を自動生成
    manifest_path: str = ""  # 事前作成マニフェスト（空なら自動生成）
    model: str = ""          # Ollama モデル名（空なら DEFAULT_MODEL）


@dataclass
class PipelineResult:
    """パイプライン実行結果"""
    success: bool
    output_dir: str
    export_dir: str
    log_lines: list = field(default_factory=list)
    error_message: str = ""
    completed_steps: list = field(default_factory=list)
    failed_steps: list = field(default_factory=list)


# ─────────────────────────────────────────────────────────────────────────────
# stdout キャプチャ用 Tee クラス
# ─────────────────────────────────────────────────────────────────────────────

class _Tee:
    """print() をオリジナル stdout と log_callback の両方に送る"""
    def __init__(self, original, callback):
        self._orig = original
        self._cb = callback
        self.encoding = getattr(original, 'encoding', 'utf-8')
        self._in_callback = False  # 再帰防止フラグ

    def write(self, s):
        if self._in_callback:
            # コールバック連鎖の内側 → 本物の stdout に直接書く（1回だけ）
            try:
                self._orig.write(s)
            except Exception:
                pass
            return
        # コールバック連鎖の外側
        line = s.rstrip('\n').rstrip()
        if line and self._cb:
            # テキストあり＋コールバックあり → コールバック経由で stdout に届ける
            # （_in_callback=True の write() が _orig に書くので直接書かない）
            self._in_callback = True
            try:
                self._cb(line)
            except Exception:
                # コールバック失敗時のフォールバック
                try:
                    self._orig.write(s)
                except Exception:
                    pass
            finally:
                self._in_callback = False
        else:
            # 空行・改行のみ → 直接書く（コールバックを通さない）
            try:
                self._orig.write(s)
            except Exception:
                pass

    def flush(self):
        try:
            self._orig.flush()
        except Exception:
            pass

    def reconfigure(self, **kwargs):
        # sys.stdout.reconfigure() が呼ばれても壊れないようにするスタブ
        pass

    def isatty(self):
        return False


# ─────────────────────────────────────────────────────────────────────────────
# パイプライン実行
# ─────────────────────────────────────────────────────────────────────────────

def run_pipeline(
    config: PipelineConfig,
    log_callback: Optional[Callable[[str], None]] = None,
) -> PipelineResult:
    """
    QIIME2 フルパイプラインを実行する。

    qiime2_agent のグローバル変数を注入してから
    tool_run_qiime2_pipeline を呼び出す。
    stdout を _Tee でキャプチャして log_callback に転送する。
    """
    # ── 出力ディレクトリの決定 ────────────────────────────────────────
    if config.output_dir:
        out_dir = Path(config.output_dir)
    else:
        ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        out_dir = Path.home() / "seq2pipe_results" / ts
    out_dir.mkdir(parents=True, exist_ok=True)
    fig_dir = out_dir / "figures"
    fig_dir.mkdir(parents=True, exist_ok=True)

    # ── グローバル注入 ────────────────────────────────────────────────
    _agent.SESSION_OUTPUT_DIR = str(out_dir)
    _agent.SESSION_FIGURE_DIR = str(fig_dir)
    _agent.AUTO_YES = True   # input() をスキップして自律実行
    if config.model:
        _agent.DEFAULT_MODEL = config.model

    # ── カスタムマニフェストのモンキーパッチ ──────────────────────────
    import shutil as _shutil
    _orig_generate_manifest = _agent.tool_generate_manifest
    if config.manifest_path and Path(config.manifest_path).exists():
        _manifest_src = config.manifest_path

        def _patched_manifest(fastq_dir, output_path, **kw):
            _shutil.copy(_manifest_src, output_path)
            return f"✅ カスタムマニフェストを使用: {_manifest_src}"

        _agent.tool_generate_manifest = _patched_manifest

    # ── stdout キャプチャ開始 ─────────────────────────────────────────
    log_lines = []

    def _log(line: str):
        log_lines.append(line)
        if log_callback:
            log_callback(line)

    orig_stdout = sys.stdout
    sys.stdout = _Tee(orig_stdout, _log)

    try:
        result_text = _agent.tool_run_qiime2_pipeline(
            fastq_dir=config.fastq_dir,
            paired_end=config.paired_end,
            trim_left_f=config.trim_left_f,
            trim_left_r=config.trim_left_r,
            trunc_len_f=config.trunc_len_f,
            trunc_len_r=config.trunc_len_r,
            metadata_path=config.metadata_path,
            classifier_path=config.classifier_path,
            n_threads=config.n_threads,
            sampling_depth=config.sampling_depth,
            group_column=config.group_column,
        )

        lines = result_text.splitlines()
        success = not any(l.startswith("❌") for l in lines[:5])

        return PipelineResult(
            success=success,
            output_dir=str(out_dir),
            export_dir=str(out_dir / "exported"),
            log_lines=log_lines,
            error_message="" if success else result_text[:500],
            completed_steps=[l for l in lines if l.startswith("✅")],
            failed_steps=[l for l in lines if l.startswith("❌")],
        )

    except Exception as e:
        import traceback
        tb = traceback.format_exc()
        _log(f"パイプラインエラー: {e}")
        return PipelineResult(
            success=False,
            output_dir=str(out_dir),
            export_dir=str(out_dir / "exported"),
            log_lines=log_lines,
            error_message=f"{e}\n{tb}",
        )

    finally:
        sys.stdout = orig_stdout
        _agent.tool_generate_manifest = _orig_generate_manifest  # モンキーパッチを元に戻す


# ─────────────────────────────────────────────────────────────────────────────
# エクスポートファイルの分類
# ─────────────────────────────────────────────────────────────────────────────

def get_exported_files(export_dir: str) -> dict:
    """
    exported/ ディレクトリを走査してカテゴリ別ファイル辞書を返す。

    戻り値例:
    {
        "feature_table": ["/path/exported/feature-table.tsv"],
        "taxonomy":      ["/path/exported/taxonomy/taxonomy.tsv"],
        "denoising":     ["/path/exported/denoising_stats/stats.tsv"],
        "alpha":         ["/path/exported/alpha/shannon_vector/alpha-diversity.tsv", ...],
        "beta":          ["/path/exported/beta/bray_curtis.../distance-matrix.tsv", ...],
    }
    """
    base = Path(export_dir)
    result = {
        "feature_table": [],
        "taxonomy": [],
        "denoising": [],
        "alpha": [],
        "beta": [],
    }

    if not base.exists():
        return result

    # feature-table.tsv
    ft = base / "feature-table.tsv"
    if ft.exists():
        result["feature_table"].append(str(ft))

    # taxonomy/taxonomy.tsv
    tax = base / "taxonomy" / "taxonomy.tsv"
    if tax.exists():
        result["taxonomy"].append(str(tax))

    # denoising_stats/*.tsv
    ds = base / "denoising_stats"
    if ds.exists():
        result["denoising"] = [str(f) for f in ds.glob("*.tsv")]

    # alpha/<metric>/*.tsv
    alpha_base = base / "alpha"
    if alpha_base.exists():
        for metric_dir in sorted(alpha_base.iterdir()):
            result["alpha"] += [str(f) for f in metric_dir.glob("*.tsv")]

    # beta/<matrix>/*.tsv
    beta_base = base / "beta"
    if beta_base.exists():
        for matrix_dir in sorted(beta_base.iterdir()):
            result["beta"] += [str(f) for f in matrix_dir.glob("*.tsv")]

    return result