Skip to content

Commit cf4d154

Browse files
authored
VCF Annotator CLI support for reading/writing stdin/stdout (#531)
Closes #529 Tested with stdin/out and this vcf file: ``` $ curl -O https://raw.githubusercontent.com/c-BIG/NPM-sample-qc/refs/heads/master/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/NA12878-chr14-AKT1.vcf.gz $ export GA4GH_VRS_DATAPROXY_URI=seqrepo+file://$HOME/dev/data/seqrepo/2024-12-20 # existing functionality (file in, file out) $ vrs-annotate vcf NA12878-chr14-AKT1.vcf.gz --vcf-out NA12878-chr14-AKT1.VRS-ANNOTATED.vcf.gz # file in, stdout out $ vrs-annotate vcf NA12878-chr14-AKT1.vcf.gz --vcf-out - # stdin in, file out $ cat NA12878-chr14-AKT1.vcf.gz | vrs-annotate vcf - --vcf-out NA12878-chr14-AKT1.VRS-ANNOTATED.vcf.gz # stdin in, stdout out $ cat NA12878-chr14-AKT1.vcf.gz | vrs-annotate vcf - --vcf-out - ```
1 parent 884f905 commit cf4d154

2 files changed

Lines changed: 46 additions & 17 deletions

File tree

src/ga4gh/vrs/extras/annotator/cli.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,19 +70,45 @@ def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # noqa: AR
7070
)(func)
7171

7272

73+
class PathOrDash(click.ParamType):
74+
"""click ParamType to support converting to Path and allowing '-' for stdin/out"""
75+
76+
name = "path-or-dash"
77+
78+
def __init__(self, **kwargs) -> None:
79+
"""Initialize PathOrDash"""
80+
self._path_type = kwargs.pop("path_type", Path)
81+
self._inner = click.Path(**kwargs, path_type=self._path_type)
82+
83+
def convert(
84+
self, value: str, param: click.Parameter | None, ctx: click.Context | None
85+
) -> str | Path:
86+
"""Convert the value using click.Path if it is not '-'"""
87+
if value == "-":
88+
return value
89+
# Use click.Path for validation
90+
return self._inner.convert(value, param, ctx)
91+
92+
7393
@_cli.command(name="vcf")
7494
@_log_level_option
7595
@click.argument(
7696
"vcf-in",
7797
nargs=1,
78-
type=click.Path(exists=True, readable=True, dir_okay=False, path_type=Path),
98+
type=PathOrDash(
99+
allow_dash=True,
100+
exists=True,
101+
readable=True,
102+
dir_okay=False,
103+
path_type=Path,
104+
),
79105
)
80106
@click.option(
81107
"--vcf-out",
82108
required=False,
83-
type=click.Path(writable=True, allow_dash=False, path_type=Path),
109+
type=PathOrDash(writable=True, allow_dash=True, path_type=Path),
84110
help=(
85-
"Declare save location for output annotated VCF. At least one form of output must be declared."
111+
'Declare save location for output annotated VCF (or "-" to write to stdout). At least one form of output must be declared.'
86112
),
87113
)
88114
@click.option(
@@ -141,8 +167,8 @@ def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # noqa: AR
141167
help="Suppress messages printed to stdout",
142168
)
143169
def _annotate_vcf_cli(
144-
vcf_in: Path,
145-
vcf_out: Path | None,
170+
vcf_in: Path | str,
171+
vcf_out: Path | str | None,
146172
pkl_out: Path | None,
147173
ndjson_out: Path | None,
148174
vrs_attributes: bool,
@@ -152,7 +178,7 @@ def _annotate_vcf_cli(
152178
require_validation: bool,
153179
silent: bool,
154180
) -> None:
155-
"""Extract VRS objects from VCF located at VCF_IN.
181+
"""Extract VRS objects from VCF located at VCF_IN. VCF_IN can be "-" to read from stdin.
156182
157183
$ vrs-annotate vcf input.vcf.gz --vcf-out output.vcf.gz --pkl-out vrs_objects.pkl
158184
@@ -186,11 +212,10 @@ def _annotate_vcf_cli(
186212
start = timer()
187213
msg = f"Annotating {vcf_in} with the VCF Annotator..."
188214
_logger.info(msg)
189-
if not silent:
190-
click.echo(msg)
215+
191216
try:
192217
annotator.annotate(
193-
vcf_in.absolute(),
218+
vcf_in,
194219
output_vcf_path=vcf_out,
195220
vrs_attributes=vrs_attributes,
196221
assembly=assembly,

src/ga4gh/vrs/extras/annotator/vcf.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ def _update_vcf_header(
191191
@use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.MISSING)
192192
def annotate(
193193
self,
194-
input_vcf_path: Path,
195-
output_vcf_path: Path | None = None,
194+
input_vcf_path: Path | Literal["-"],
195+
output_vcf_path: Path | Literal["-"] | None = None,
196196
vrs_attributes: bool = False,
197197
assembly: str = "GRCh38",
198198
compute_for_ref: bool = True,
@@ -202,8 +202,8 @@ def annotate(
202202
"""Given a VCF, produce an output VCF annotated with VRS allele IDs, and/or
203203
additional storage outputs as implemented in a specific child class.
204204
205-
:param input_vcf_path: Location of input VCF
206-
:param output_vcf_path: The path for the output VCF file
205+
:param input_vcf_path: Location of input VCF (or "-" to read from stdin)
206+
:param output_vcf_path: The path for the output VCF file (or "-" to write to stdout)
207207
:param vrs_attributes: If `True`, include VRS_Start, VRS_End, VRS_State
208208
properties in the VCF INFO field. If `False` will not include these
209209
properties. Only used if `output_vcf_path` is defined.
@@ -217,13 +217,17 @@ def annotate(
217217
:raise VCFAnnotatorError: if no output formats are selected
218218
"""
219219
self.raise_for_output_args(output_vcf_path, **kwargs)
220-
221-
vcf = pysam.VariantFile(filename=str(input_vcf_path.absolute()))
220+
# This can be pushed up to the click arg parsing too
221+
pysam_in_filename = (
222+
"-" if input_vcf_path == "-" else str(input_vcf_path.absolute())
223+
)
224+
vcf = pysam.VariantFile(filename=pysam_in_filename, mode="r")
222225
if output_vcf_path:
223226
self._update_vcf_header(vcf, compute_for_ref, vrs_attributes)
224-
vcf_out = pysam.VariantFile(
225-
str(output_vcf_path.absolute()), "w", header=vcf.header
227+
pysam_out_filename = (
228+
"-" if output_vcf_path == "-" else str(output_vcf_path.absolute())
226229
)
230+
vcf_out = pysam.VariantFile(pysam_out_filename, mode="w", header=vcf.header)
227231
else:
228232
vcf_out = None
229233

0 commit comments

Comments
 (0)