Merge pull request #14 from Riminder/fix/text-parsing-retrocompatibility-text-vs-texts

corentin-hrflow · web-flow · commit 314f40827c38 · 2024-01-02T14:24:22.000+01:00
fix: text vs texts
diff --git a/hrflow/hrflow/text/parsing.py b/hrflow/hrflow/text/parsing.py
@@ -10,7 +10,9 @@ def __init__(self, api):
         """Init."""
         self.client = api
 
-    def post(self, texts: t.List[str]) -> t.Dict[str, t.Any]:
+    def post(
+        self, text: t.Optional[str] = None, texts: t.Optional[t.List[str]] = None
+    ) -> t.Dict[str, t.Any]:
         """
         Parse a raw Text. Extract over 50 data point from any raw input text.
 
@@ -23,7 +25,16 @@ def post(self, texts: t.List[str]) -> t.Dict[str, t.Any]:
             `/text/parsing` response
         """
 
-        payload = dict(texts=texts)
+        if text is not None:
+            if texts is not None:
+                raise ValueError("Only one of text or texts must be provided.")
+            else:
+                payload = dict(text=text)
+        else:
+            if texts is None:
+                raise ValueError("Either text or texts must be provided.")
+            else:
+                payload = dict(texts=texts)
 
         response = self.client.post("text/parsing", json=payload)
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "hrflow"
-version = "3.2.0"
+version = "3.2.1"
 description = "Python hrflow.ai API package"
 authors = ["HrFlow.ai <contact@hrflow.ai>"]
 license = "MIT"
diff --git a/tests/test_text.py b/tests/test_text.py
@@ -73,12 +73,9 @@ def _content_is_png(content: bytes) -> bool:
     return content.startswith(b"\x89PNG\r\n\x1a\n")
 
 
-def _imaging_test_valid_size(width: t.Literal[256, 512]):
+def _imaging_test_valid_size(hrflow_client, width: t.Literal[256, 512]):
     model = TextImagingResponse.model_validate(
-        Hrflow(
-            api_secret=_var_from_env_get("HRFLOW_API_KEY"),
-            api_user=_var_from_env_get("HRFLOW_USER_EMAIL"),
-        ).text.imaging.post(text="plumber", width=width)
+        hrflow_client.text.imaging.post(text="plumber", width=width)
     )
     assert model.code == requests.codes.ok
     response = requests.get(model.data.image_url)
@@ -89,14 +86,14 @@ def _imaging_test_valid_size(width: t.Literal[256, 512]):
 
 @pytest.mark.text
 @pytest.mark.imaging
-def test_imaging_basic_256():
-    _imaging_test_valid_size(256)
+def test_imaging_basic_256(hrflow_client):
+    _imaging_test_valid_size(hrflow_client, 256)
 
 
 @pytest.mark.text
 @pytest.mark.imaging
-def test_imaging_basic_512():
-    _imaging_test_valid_size(512)
+def test_imaging_basic_512(hrflow_client):
+    _imaging_test_valid_size(hrflow_client, 512)
 
 
 @pytest.mark.text
@@ -372,10 +369,37 @@ def test_ocr_basic(hrflow_client):
 
 @pytest.mark.text
 @pytest.mark.parsing
-def test_parsing_basic(hrflow_client):
+def test_parsing_basic_with_texts_param(hrflow_client):
     texts = ["John Doe can be contacted on john.doe@hrflow.ai"]
     model = TextParsingResponse.model_validate(
         hrflow_client.text.parsing.post(texts=texts)
     )
     assert model.code == requests.codes.ok
     assert len(model.data) == len(texts)
+
+
+@pytest.mark.text
+@pytest.mark.parsing
+def test_parsing_basic_with_text_param(hrflow_client):
+    text = "John Doe can be contacted on john.doe@hrflow.ai"
+    model = TextParsingResponse.model_validate(
+        hrflow_client.text.parsing.post(text=text)
+    )
+    assert model.code == requests.codes.ok
+
+
+@pytest.mark.text
+@pytest.mark.parsing
+def test_parsing_basic_with_no_text_or_texts_param(hrflow_client):
+    with pytest.raises(ValueError):
+        TextParsingResponse.model_validate(hrflow_client.text.parsing.post())
+
+
+@pytest.mark.text
+@pytest.mark.parsing
+def test_parsing_basic_with_text_and_texts_param(hrflow_client):
+    text = "John Doe can be contacted on john.doe@hrflow.ai"
+    with pytest.raises(ValueError):
+        TextParsingResponse.model_validate(
+            hrflow_client.text.parsing.post(text=text, texts=[text])
+        )
diff --git a/tests/utils/schemas.py b/tests/utils/schemas.py
@@ -126,9 +126,7 @@ class TextParsingDataItemEntity(BaseModel):
     def _check(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
         start = values.get("start")
         end = values.get("end")
-        assert start <= end or fail(
-            f"start={start} is expected to be smaller than end={end}"
-        )
+        assert start <= end or fail(f"{start=} is expected to be smaller than {end=}")
         return values
 
 
@@ -161,6 +159,9 @@ class TextParsingDataItem(BaseModel):
     @model_validator(mode="before")
     @classmethod
     def _check(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
+        if isinstance(values, list):
+            return [cls._check(item) for item in values]
+
         text = values.get("text")
         entities = values.get("entities")
         parsing = values.get("parsing")
@@ -179,15 +180,13 @@ def _check(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
             parsed = text[entity["start"] : entity["end"]]
             holder = parsing[parsing_key_name]
 
-            assert parsed in holder or fail(
-                f"parsed='{parsed}' is expected to be in holder='{holder}'"
-            )
+            assert parsed in holder or fail(f"{parsed=} is expected to be in {holder=}")
 
         return values
 
 
 class TextParsingResponse(HrFlowAPIResponse):
-    data: t.Optional[t.List[TextParsingDataItem]] = None
+    data: t.Optional[t.Union[TextParsingDataItem, t.List[TextParsingDataItem]]] = None
 
 
 class TextOCRDataItemPage(BaseModel):
@@ -365,8 +364,7 @@ def _check(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
         board_key = values.get("board_key")
         board = values.get("board")
         assert board_key == board["key"] or fail(
-            f"job.board_key='{board_key}' is expected to be the same as"
-            f" job.board.key='{board['key']}'"
+            f"{board_key=} is expected to be the same as {board['key']=}"
         )
         return values
 
@@ -461,8 +459,7 @@ def _check(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
         source_key = values["source_key"]
         source = values["source"]
         assert source_key == source["key"] or fail(
-            f"profile.source_key='{source_key}' is expected to be the same as"
-            f" profile.source.key='{source['key']}'"
+            f"{source_key=} is expected to be the same as {source['key']=}"
         )
         return values