Skip to content

Commit 314f408

Browse files
Merge pull request #14 from Riminder/fix/text-parsing-retrocompatibility-text-vs-texts
fix: text vs texts
2 parents 3d12be3 + a243717 commit 314f408

4 files changed

Lines changed: 56 additions & 24 deletions

File tree

hrflow/hrflow/text/parsing.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ def __init__(self, api):
1010
"""Init."""
1111
self.client = api
1212

13-
def post(self, texts: t.List[str]) -> t.Dict[str, t.Any]:
13+
def post(
14+
self, text: t.Optional[str] = None, texts: t.Optional[t.List[str]] = None
15+
) -> t.Dict[str, t.Any]:
1416
"""
1517
Parse a raw Text. Extract over 50 data point from any raw input text.
1618
@@ -23,7 +25,16 @@ def post(self, texts: t.List[str]) -> t.Dict[str, t.Any]:
2325
`/text/parsing` response
2426
"""
2527

26-
payload = dict(texts=texts)
28+
if text is not None:
29+
if texts is not None:
30+
raise ValueError("Only one of text or texts must be provided.")
31+
else:
32+
payload = dict(text=text)
33+
else:
34+
if texts is None:
35+
raise ValueError("Either text or texts must be provided.")
36+
else:
37+
payload = dict(texts=texts)
2738

2839
response = self.client.post("text/parsing", json=payload)
2940

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "hrflow"
3-
version = "3.2.0"
3+
version = "3.2.1"
44
description = "Python hrflow.ai API package"
55
authors = ["HrFlow.ai <contact@hrflow.ai>"]
66
license = "MIT"

tests/test_text.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,9 @@ def _content_is_png(content: bytes) -> bool:
7373
return content.startswith(b"\x89PNG\r\n\x1a\n")
7474

7575

76-
def _imaging_test_valid_size(width: t.Literal[256, 512]):
76+
def _imaging_test_valid_size(hrflow_client, width: t.Literal[256, 512]):
7777
model = TextImagingResponse.model_validate(
78-
Hrflow(
79-
api_secret=_var_from_env_get("HRFLOW_API_KEY"),
80-
api_user=_var_from_env_get("HRFLOW_USER_EMAIL"),
81-
).text.imaging.post(text="plumber", width=width)
78+
hrflow_client.text.imaging.post(text="plumber", width=width)
8279
)
8380
assert model.code == requests.codes.ok
8481
response = requests.get(model.data.image_url)
@@ -89,14 +86,14 @@ def _imaging_test_valid_size(width: t.Literal[256, 512]):
8986

9087
@pytest.mark.text
9188
@pytest.mark.imaging
92-
def test_imaging_basic_256():
93-
_imaging_test_valid_size(256)
89+
def test_imaging_basic_256(hrflow_client):
90+
_imaging_test_valid_size(hrflow_client, 256)
9491

9592

9693
@pytest.mark.text
9794
@pytest.mark.imaging
98-
def test_imaging_basic_512():
99-
_imaging_test_valid_size(512)
95+
def test_imaging_basic_512(hrflow_client):
96+
_imaging_test_valid_size(hrflow_client, 512)
10097

10198

10299
@pytest.mark.text
@@ -372,10 +369,37 @@ def test_ocr_basic(hrflow_client):
372369

373370
@pytest.mark.text
374371
@pytest.mark.parsing
375-
def test_parsing_basic(hrflow_client):
372+
def test_parsing_basic_with_texts_param(hrflow_client):
376373
texts = ["John Doe can be contacted on john.doe@hrflow.ai"]
377374
model = TextParsingResponse.model_validate(
378375
hrflow_client.text.parsing.post(texts=texts)
379376
)
380377
assert model.code == requests.codes.ok
381378
assert len(model.data) == len(texts)
379+
380+
381+
@pytest.mark.text
382+
@pytest.mark.parsing
383+
def test_parsing_basic_with_text_param(hrflow_client):
384+
text = "John Doe can be contacted on john.doe@hrflow.ai"
385+
model = TextParsingResponse.model_validate(
386+
hrflow_client.text.parsing.post(text=text)
387+
)
388+
assert model.code == requests.codes.ok
389+
390+
391+
@pytest.mark.text
392+
@pytest.mark.parsing
393+
def test_parsing_basic_with_no_text_or_texts_param(hrflow_client):
394+
with pytest.raises(ValueError):
395+
TextParsingResponse.model_validate(hrflow_client.text.parsing.post())
396+
397+
398+
@pytest.mark.text
399+
@pytest.mark.parsing
400+
def test_parsing_basic_with_text_and_texts_param(hrflow_client):
401+
text = "John Doe can be contacted on john.doe@hrflow.ai"
402+
with pytest.raises(ValueError):
403+
TextParsingResponse.model_validate(
404+
hrflow_client.text.parsing.post(text=text, texts=[text])
405+
)

tests/utils/schemas.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,7 @@ class TextParsingDataItemEntity(BaseModel):
126126
def _check(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
127127
start = values.get("start")
128128
end = values.get("end")
129-
assert start <= end or fail(
130-
f"start={start} is expected to be smaller than end={end}"
131-
)
129+
assert start <= end or fail(f"{start=} is expected to be smaller than {end=}")
132130
return values
133131

134132

@@ -161,6 +159,9 @@ class TextParsingDataItem(BaseModel):
161159
@model_validator(mode="before")
162160
@classmethod
163161
def _check(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
162+
if isinstance(values, list):
163+
return [cls._check(item) for item in values]
164+
164165
text = values.get("text")
165166
entities = values.get("entities")
166167
parsing = values.get("parsing")
@@ -179,15 +180,13 @@ def _check(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
179180
parsed = text[entity["start"] : entity["end"]]
180181
holder = parsing[parsing_key_name]
181182

182-
assert parsed in holder or fail(
183-
f"parsed='{parsed}' is expected to be in holder='{holder}'"
184-
)
183+
assert parsed in holder or fail(f"{parsed=} is expected to be in {holder=}")
185184

186185
return values
187186

188187

189188
class TextParsingResponse(HrFlowAPIResponse):
190-
data: t.Optional[t.List[TextParsingDataItem]] = None
189+
data: t.Optional[t.Union[TextParsingDataItem, t.List[TextParsingDataItem]]] = None
191190

192191

193192
class TextOCRDataItemPage(BaseModel):
@@ -365,8 +364,7 @@ def _check(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
365364
board_key = values.get("board_key")
366365
board = values.get("board")
367366
assert board_key == board["key"] or fail(
368-
f"job.board_key='{board_key}' is expected to be the same as"
369-
f" job.board.key='{board['key']}'"
367+
f"{board_key=} is expected to be the same as {board['key']=}"
370368
)
371369
return values
372370

@@ -461,8 +459,7 @@ def _check(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
461459
source_key = values["source_key"]
462460
source = values["source"]
463461
assert source_key == source["key"] or fail(
464-
f"profile.source_key='{source_key}' is expected to be the same as"
465-
f" profile.source.key='{source['key']}'"
462+
f"{source_key=} is expected to be the same as {source['key']=}"
466463
)
467464
return values
468465

0 commit comments

Comments
 (0)