mathematica-pub · tcoile · Dec 15, 2025 · Nov 4, 2025 · Nov 5, 2025 · Dec 15, 2025
diff --git a/README.md b/README.md
@@ -1,10 +1,21 @@
 # dkany
 Python package for accessing open data websites powered by dkan
 
-# `uv` and package management
-We're using [`uv`](https://docs.astral.sh/uv/), since pipenv was having a hard time building.  Install it with `pip install uv`, then it should work almost identically to `pipenv`
+# How to use
+
+## Installation
+Add the library to your project with your preferred package manager, e.g.
+- `pip install dkany`
+- `uv add dkany`
+- `pipenv install dkany`
+
+## Usage
+See `./scratch/basic_run.py` for example code using the library to create, update, and delete datasets.
 
 # Local Development
+## `uv` and package management
+We're using [`uv`](https://docs.astral.sh/uv/), since pipenv was having a hard time building.  Install it with `pip install uv`, then it should work almost identically to `pipenv`
+
 
 To install the package and it's dependences for development, run 
 ```
@@ -28,8 +39,8 @@ There are many ways to run the tests associated with this app.
 
 
 # Ideas for Improvement
-TODO: Validate dataset file (All columns have column names)
+- Validate dataset file (All columns have column names)
 
-# Deploying
+# Deploying to PyPi
 
-See [our confluence doc on deploying to AWS CodeArtifact](https://mathematicampr.atlassian.net/wiki/spaces/WEB/pages/2514354711/Deploying+to+AWS+CodeArtifact)
+See `.github/workflows/build-and-publish.yml` for the workflow that publishes this library.  New versions are automatically published to test.pypi.org when a pre-release is made, and to pypi.org when a release is published.
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
     "pyyaml>=6.0.3",
     "requests>=2.32.5",
     "requests-toolbelt>=1.0.0",
+    "typing-extensions>=4.0.0",
 ]
 
 [dependency-groups]

diff --git a/scratch/basic_run.py b/scratch/basic_run.py
@@ -1,15 +1,65 @@
 from dkany.client import DKANClient as DkanyClient
 
-def main():
-    client = DkanyClient(
-        base_url = "https://edit.data.medicaid.gov"
+
+def create_client() -> DkanyClient:
+    return DkanyClient(
+        base_url="https://edit.data.medicaid.gov", user_name="DEMO", password="your_api_key"
     )
 
-    test_dataset_id = "9e407144-9ed9-5cee-937a-17d65b07a9a7"
 
-    exists = client.check_dataset_exists(test_dataset_id)
+def create_dataset() -> str:
+    client = create_client()
+
+    body = {
+        "title": "Test Dataset from DKAN Client",
+        "type": ["dataset"],
+        "license": "http://opendatacommons.org/licenses/odc-by/1.0/",
+        "accessLevel": "published",
+    }
+
+    response = client.create_dataset(body)
+
+    print(f"Created dataset with ID: {response['identifier']}")
+    return response["identifier"]
+
+
+def dataset_exists(dataset_id: str) -> None:
+    client = create_client()
+
+    exists = client.check_dataset_exists(dataset_id)
 
     print(f"dataset {exists} exits")
 
+
+def update_dataset(dataset_id: str):
+    client = create_client()
+
+    body = {
+        "title": "Updated Test Dataset from DKAN Client",
+        "type": ["dataset"],
+        "license": "http://opendatacommons.org/licenses/odc-by/1.0/",
+        "accessLevel": "hidden",
+    }
+
+    response = client.update_dataset(dataset_id, body)
+
+    print(f"Updated dataset with ID: {response['identifier']}")
+
+
+def remove_dataset(dataset_id: str):
+    client = create_client()
+
+    client.delete_dataset(dataset_id)
+
+    print(f"Deleted dataset with ID: {dataset_id}")
+
+
+def main():
+    dataset_id = create_dataset()
+    dataset_exists(dataset_id)
+    update_dataset(dataset_id)
+    remove_dataset(dataset_id)
+
+
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/src/dkany/__about__.py b/src/dkany/__about__.py
@@ -1 +1 @@
-__version__ = "0.1.3"
+__version__ = "0.1.4"
diff --git a/src/dkany/client/client.py b/src/dkany/client/client.py
@@ -1,13 +1,24 @@
 import logging
 from copy import deepcopy as copy
 from datetime import datetime as dt
-from typing import List, Optional
+from typing import List, Optional, Dict, Any
 
 import requests
 from requests.cookies import RequestsCookieJar
-from requests_toolbelt import sessions # type: ignore
+from requests.models import Response
+from requests_toolbelt import sessions  # type: ignore
 
 from dkany.client.errors import BadResponse
+from dkany.client.types import (
+    DkanSearchResponse,
+    DkanSearchParams,
+    DkanDatasetMetadataResponse,
+    DkanCreateDatasetResponse,
+    DkanUpdateDatasetResponse,
+    DkanDeleteDatasetResponse,
+    DkanGetDatasetResponse,
+    DkanMetadataFilterParams,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -18,13 +29,19 @@ def url_join(url_part_list):
 
 class DKANClient:
     """
-    docstring
+    The main interface with the DKAN API.
+
+    Arguments:
+        base_url: The base URL of the DKAN instance.
+        cookie_dict: A dictionary of cookies to attach to requests
+        user_name: The CMS Username (Four characters)
+        password: The DKAN API key associated with the user_name
     """
 
     def __init__(
         self,
         base_url: Optional[str] = None,
-        cookie_dict: Optional[dict] = None,
+        cookie_dict: Optional[Dict[str, str]] = None,
         user_name: Optional[str] = None,
         password: Optional[str] = None,
     ):
@@ -51,7 +68,9 @@ def __init__(
         self.existing_dataset_url = (
             "api/1/metastore/schemas/dataset/items/{dataset_identifier}?_format=json"
         )
-        self.revise_dataset_url = "api/1/metastore/schemas/dataset/items/{dataset_identifier}/revisions?_format=json"
+        self.revise_dataset_url = (
+            "api/1/metastore/schemas/dataset/items/{dataset_identifier}/revisions?_format=json"
+        )
         self.query_datastore_url = (
             "api/1/datastore/query/{dataset_identifier}/{datastore_idx}?_format=json"
         )
@@ -71,22 +90,29 @@ def __str__(self) -> str:
         return f"DKAN client for {self.base_url} with user {self.user_name}"
 
     def _process_response(
-        self, response, acceptable_responses: Optional[List[int]] = None
+        self, response: Response, acceptable_responses: Optional[List[int]] = None
     ):
         acceptable_responses = acceptable_responses or [200, 201]
         if response.status_code not in acceptable_responses:
             raise BadResponse(response, acceptable_responses)
         out = response.json()
         return out
 
-    def _paged_search(self, params, page):
+    def _paged_search(self, params: DkanSearchParams, page) -> DkanSearchResponse:
         params["page"] = page
 
         response = self.session.get(self.search_url, params=params)
+        json = self._process_response(response)
+        if any(k not in json for k in ("total", "results", "facets")):
+            err = (
+                "Malformed search response received from DKAN instance.  "
+                + "Expected keys 'total', 'results', and 'facets' got: "
+                + ", ".join(json.keys())
+            )
+            raise SystemError(err)
+        return json
 
-        return self._process_response(response)
-
-    def _search_all_pages(self, params):
+    def _search_all_pages(self, params: DkanSearchParams) -> Dict[str, DkanDatasetMetadataResponse]:
         page = 1
         out = self._paged_search(params, page)
         total = int(out["total"])
@@ -105,10 +131,10 @@ def _search_all_pages(self, params):
 
     def search(
         self, title: Optional[str] = None, tags=None, categories=None, page="ALL"
-    ):
-        params = {}
+    ) -> Dict[str, DkanDatasetMetadataResponse]:
+        params = DkanSearchParams()
         if title is not None:
-            params["title"] = title
+            params["fulltext"] = title  # todo: is this what's intended by this param?
         if tags is not None:
             params["keyword"] = tags
         if categories is not None:
@@ -128,7 +154,11 @@ def search(
 
         return out
 
-    def filter_search_results(self, search_results, filter_params):
+    def filter_search_results(
+        self,
+        search_results: Dict[str, DkanDatasetMetadataResponse],
+        filter_params: Optional[DkanMetadataFilterParams],
+    ) -> Dict[str, DkanDatasetMetadataResponse]:
         if filter_params is None:
             return search_results
         if len(filter_params.keys()) == 0:
@@ -138,30 +168,34 @@ def filter_search_results(self, search_results, filter_params):
 
         for search_key, search_result_value in inital_search_results:
             for filter_key, filter_value in filter_params.items():
-                if search_result_value[filter_key] != filter_value:
+                if search_result_value[filter_key] != filter_value:  # type: ignore # mypy issue with TypedDict optional keys
                     search_results.pop(search_key)
                     break
 
         return search_results
 
-    def create_dataset(self, body):
+    def create_dataset(self, body: Dict[str, Any]) -> DkanCreateDatasetResponse:
         response = self.session.post(self.post_new_dataset_url, json=body)
         return self._process_response(response)
 
-    def delete_dataset(self, dataset_identifier):
+    def delete_dataset(self, dataset_identifier: str) -> DkanDeleteDatasetResponse:
         response = self.session.delete(
             self.existing_dataset_url.format(dataset_identifier=dataset_identifier)
         )
         return self._process_response(response)
 
-    def update_dataset(self, dataset_identifier, body):
+    def update_dataset(
+        self, dataset_identifier: str, body: DkanDatasetMetadataResponse
+    ) -> DkanUpdateDatasetResponse:
         response = self.session.put(
             self.existing_dataset_url.format(dataset_identifier=dataset_identifier),
             json=body,
         )
         return self._process_response(response)
 
-    def mark_dataset_hidden(self, dataset_identifier, message=""):
+    def mark_dataset_hidden(
+        self, dataset_identifier: str, message: str = ""
+    ) -> DkanUpdateDatasetResponse:
         """
         Sets dataset accesslevel to "hidden"
         Hides dataset from searches made on data.medicare.gov user interface
@@ -174,7 +208,7 @@ def mark_dataset_hidden(self, dataset_identifier, message=""):
         )
         return self._process_response(response)
 
-    def mark_dataset_public(self, dataset_identifier, message=""):
+    def mark_dataset_public(self, dataset_identifier: str, message="") -> DkanCreateDatasetResponse:
         """
         Sets dataset accesslevel to "published"
         Makes a dataset searchable through data.medicare.gov user interface
@@ -187,21 +221,21 @@ def mark_dataset_public(self, dataset_identifier, message=""):
         )
         return self._process_response(response)
 
-    def get_dataset_metadata(self, dataset_identifier):
+    def get_dataset_metadata(self, dataset_identifier: str) -> DkanDatasetMetadataResponse:
         response = self.session.get(
             self.existing_dataset_url.format(dataset_identifier=dataset_identifier),
             params={"_format": "json"},
         )
         return self._process_response(response)
 
-    def check_dataset_exists(self, dataset_identifier):
+    def check_dataset_exists(self, dataset_identifier) -> bool:
         try:
             _ = self.get_dataset_metadata(dataset_identifier)
             return True
         except BadResponse:
             return False
 
-    def trigger_dataset_reimport(self, dataset_identifier):
+    def trigger_dataset_reimport(self, dataset_identifier) -> DkanUpdateDatasetResponse:
         body = self.get_dataset_metadata(dataset_identifier)
         body["modified"] = dt.now().strftime(self.dkan_time_format)
         return self.update_dataset(dataset_identifier, body)
@@ -216,7 +250,9 @@ def get_full_query_url(self, dataset_identifier, datastore_idx=0):
             ]
         )
 
-    def get_data_by_dataset_identifier(self, dataset_identifier, datastore_idx=0):
+    def get_data_by_dataset_identifier(
+        self, dataset_identifier, datastore_idx=0
+    ) -> DkanGetDatasetResponse:
         response = self.session.get(
             self.query_datastore_url.format(
                 dataset_identifier=dataset_identifier, datastore_idx=datastore_idx

diff --git a/src/dkany/client/errors.py b/src/dkany/client/errors.py
@@ -1,5 +1,10 @@
+from typing import List
+from requests.models import Response
+
+
 class Error(Exception):
     """Base class for exceptions in this module."""
+
     message: str
 
     def __str__(self):
@@ -10,12 +15,10 @@ def __repr__(self):
 
 
 class BadResponse(Error):
-    def __init__(self, response, acceptable_status_codes):
+    def __init__(self, response: Response, acceptable_status_codes: List[int]):
         status_code = response.status_code
         message = []
-        message.append(
-            "Status code returned not in acceptable status codes for this response"
-        )
+        message.append("Status code returned not in acceptable status codes for this response")
         message.append(
             f"Returned: {status_code}:{response.reason}, Acceptable Codes {acceptable_status_codes}"
         )