Skip to content

Commit e5cb0a9

Browse files
committed
feat: Enhance project structure and add utility functions
- Refactored main entry point in `main.py` to use logging instead of print statements. - Introduced regex utilities in `regex.py` for email, URL, and time pattern matching. - Updated async utilities in `async_utils.py` to include an `AsyncResource` base class for managing concurrency. - Added CLI configuration loading and parsing functions in `cli_utils.py` using Pydantic for type safety. - Implemented file handling utilities for JSON, TOML, YAML, and JSON Lines formats. - Created comprehensive unit tests for JSON, TOML, YAML, and async utilities. - Updated `tox.ini` to specify directories for linting and type checking.
1 parent 6ca393a commit e5cb0a9

21 files changed

Lines changed: 859 additions & 79 deletions

scripts/main.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,16 @@
1+
import logging
2+
3+
from project.env import PACKAGE_DIR
4+
5+
logger = logging.getLogger(__name__)
6+
7+
8+
def main() -> None:
9+
"""Sample entry point."""
10+
logging.basicConfig(level=logging.INFO)
11+
logger.info('Hello, World!')
12+
logger.info('PACKAGE_DIR=%s', PACKAGE_DIR)
13+
14+
115
if __name__ == '__main__':
2-
print('Hello, World!')
16+
main()

src/project/common/regex.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import re
2+
from typing import Final
3+
4+
from project.common.utils.regex_utils import concat, unmatched_group
5+
6+
# time regex
7+
TIME_PATTRN: Final[re.Pattern] = re.compile(r'\d+:\d+')
8+
9+
10+
# email regex
11+
LOCAL_PART_CHARS = r'[\w\-._]'
12+
DOMAIN_CHARS = r'[\w\-._]'
13+
TLD_CHARS = r'[A-Za-z]'
14+
15+
local_part = concat([LOCAL_PART_CHARS], without_grouping=True) + r'+'
16+
domain = concat([DOMAIN_CHARS], without_grouping=True) + r'+'
17+
tld = concat([TLD_CHARS], without_grouping=True) + r'+'
18+
19+
EMAIL_REGEX = local_part + r'@' + domain + r'\.' + tld
20+
EMAIL_PATTERN = re.compile(EMAIL_REGEX)
21+
22+
23+
# url regex
24+
SCHEME = r'https?'
25+
CHARS = r'[\w!?/+\-_~;.,*&@#$%()\[\]]'
26+
27+
url_chars = concat([CHARS], without_grouping=True) + r'+'
28+
29+
HTTP_URL_REGEX = SCHEME + r'://' + url_chars
30+
31+
DATA_SCHEME = r'data:'
32+
MEDIATYPE = r'[\w/+.-]+'
33+
BASE64 = r'base64'
34+
DATA = r'[\w+/=]+'
35+
36+
mediatype_part = unmatched_group(MEDIATYPE) + r'?'
37+
base64_part = unmatched_group(BASE64) + r'?'
38+
data_part = unmatched_group(DATA)
39+
40+
DATA_URL_REGEX = DATA_SCHEME + mediatype_part + r'(?:;' + base64_part + r')?,' + data_part
41+
42+
URL_REGEX = concat([HTTP_URL_REGEX, DATA_URL_REGEX])
43+
44+
HTTP_URL_PATTERN = re.compile(HTTP_URL_REGEX)
45+
DATA_URL_PATTERN = re.compile(DATA_URL_REGEX)
46+
URL_PATTERN = re.compile(URL_REGEX)
Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,55 @@
11
import asyncio
2-
from typing import Any, Callable
2+
from abc import ABC, abstractmethod
3+
from collections.abc import Awaitable, Callable, Coroutine
4+
from typing import Any
35

46

5-
def sync_to_async_func(sync_func: Callable) -> Callable:
6-
"""
7-
同期関数を非同期関数として使えるように変換する
8-
"""
7+
def sync_to_async_func[R](sync_func: Callable[..., R]) -> Callable[..., Awaitable[R]]:
8+
"""Convert a synchronous callable into an asynchronous callable."""
99

10-
async def wrapper(*args: Any, **kwargs: Any) -> Any:
10+
async def wrapper(*args: object, **kwargs: object) -> R:
1111
return await asyncio.to_thread(sync_func, *args, **kwargs)
1212

1313
wrapper.__name__ = sync_func.__name__
1414
wrapper.__doc__ = sync_func.__doc__
1515
return wrapper
1616

1717

18-
def async_to_sync_func(async_func: Callable) -> Callable:
19-
"""
20-
非同期関数を同期関数として使えるように変換する
21-
"""
18+
def async_to_sync_func[R](async_func: Callable[..., Coroutine[Any, Any, R]]) -> Callable[..., R]:
19+
"""Convert an asynchronous callable into a synchronous callable."""
2220

23-
def wrapper(*args: Any, **kwargs: Any) -> Any:
21+
def wrapper(*args: object, **kwargs: object) -> R:
2422
return asyncio.run(async_func(*args, **kwargs))
2523

2624
wrapper.__name__ = async_func.__name__
2725
wrapper.__doc__ = async_func.__doc__
2826
return wrapper
2927

3028

31-
async def run_async_function_with_semaphore(
32-
async_func: Callable, concurrency_sema: asyncio.Semaphore | None, *args: Any, **kwargs: Any
33-
) -> Any:
34-
"""
35-
指定した関数 func を、セマフォで同時実行数を制限して呼び出す関数。
36-
concurrency_sema が None の場合は制限しない。
37-
"""
29+
async def run_async_function_with_semaphore[R](
30+
async_func: Callable[..., Awaitable[R]],
31+
concurrency_sema: asyncio.Semaphore | None,
32+
*args: object,
33+
**kwargs: object,
34+
) -> R:
35+
"""Execute async_func with an optional semaphore limiting concurrency."""
3836
if concurrency_sema is not None:
3937
async with concurrency_sema:
4038
return await async_func(*args, **kwargs)
41-
else:
42-
return await async_func(*args, **kwargs)
39+
return await async_func(*args, **kwargs)
40+
41+
42+
class AsyncResource[R](ABC):
43+
"""Base class for async resources protected by a semaphore."""
44+
45+
def __init__(self, concurrency: int = 1) -> None:
46+
self.semaphore = asyncio.Semaphore(concurrency)
47+
48+
async def task(self, *args: object, **kwargs: object) -> R:
49+
async with self.semaphore:
50+
return await self.call(*args, **kwargs)
51+
52+
@abstractmethod
53+
async def call(self, *args: object, **kwargs: object) -> R:
54+
"""Execute the concrete asynchronous operation."""
55+
raise NotImplementedError

src/project/common/utils/cli_utils.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,58 @@
22
from pathlib import Path
33
from typing import Any
44

5+
from pydantic import BaseModel
6+
57
from project.common.utils.file.config import load_config
68

79
logging.basicConfig(level=logging.INFO)
810
logger = logging.getLogger(__name__)
911

1012

11-
def load_cli_config(config_file_path: str | Path | None = None, **kwargs: Any) -> dict[str, Any]:
12-
"""
13-
Load configuration from a file and merge it with runtime arguments.
14-
"""
13+
def load_cli_config(config_file_path: str | Path | None = None, **kwargs: object) -> dict[str, Any]:
14+
"""Load configuration from a file and merge it with runtime arguments."""
1515
if config_file_path:
16-
logger.info(f'Loading configuration from {config_file_path}')
16+
logger.info('Loading configuration from %s', config_file_path)
1717
merged = load_config(config_file_path)
1818
merged.update(kwargs)
19-
logger.info(f'Merged config from file with overrides: {kwargs.keys() if kwargs else {}}')
19+
logger.info('Merged config with overrides: %s', list(kwargs) if kwargs else [])
2020
else:
21-
merged = kwargs
21+
merged = dict(kwargs)
2222
logger.info('No config file provided; using runtime arguments only.')
2323
return merged
24+
25+
26+
def load_and_parse_config[T: BaseModel](
27+
config_class: type[T],
28+
config_file_path: str | Path | None = None,
29+
**kwargs: object,
30+
) -> T:
31+
"""Load configuration from file, merge with kwargs, and parse into Pydantic model.
32+
33+
This function provides type-safe configuration loading by:
34+
1. Loading config from file (if provided)
35+
2. Merging with CLI overrides
36+
3. Validating and parsing into the specified Pydantic model
37+
38+
Args:
39+
config_class: Pydantic BaseModel subclass to parse into
40+
config_file_path: Path to config file (JSON/YAML/TOML)
41+
**kwargs: CLI overrides to merge with file config
42+
43+
Returns:
44+
Validated instance of config_class
45+
46+
Raises:
47+
ValidationError: If configuration is invalid
48+
49+
Example:
50+
>>> from pydantic import BaseModel, Field
51+
>>> class MyConfig(BaseModel):
52+
... name: str = Field(...)
53+
... value: int = Field(default=0)
54+
>>> cfg = load_and_parse_config(MyConfig, 'config.json', value=42)
55+
>>> assert isinstance(cfg, MyConfig)
56+
57+
"""
58+
raw_config = load_cli_config(config_file_path, **kwargs)
59+
return config_class(**raw_config)

src/project/common/utils/file/config.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
from pathlib import Path
2-
from typing import Any, cast
2+
from typing import Any
33

44
from project.common.utils.file.json import load_json
55
from project.common.utils.file.toml import load_toml
66
from project.common.utils.file.yaml import load_yaml
77

88

99
def load_config(path: str | Path) -> dict[str, Any]:
10-
"""
11-
Load configuration from a file (JSON, YAML, or TOML).
12-
"""
10+
"""Load configuration from a file (JSON, YAML, or TOML)."""
1311
ext = Path(path).suffix.lower()
1412

1513
if ext == '.json':
@@ -24,4 +22,4 @@ def load_config(path: str | Path) -> dict[str, Any]:
2422
if not isinstance(data, dict):
2523
raise TypeError(f'Config file {path!r} did not return a dict, got {type(data).__name__}')
2624

27-
return cast(dict[str, Any], data)
25+
return data

src/project/common/utils/file/json.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
11
import json
22
from pathlib import Path
3+
from typing import Any
34

5+
JsonValue = dict[Any, Any] | list[Any] | str | int | float | bool | None
46

5-
def load_json(path: str | Path) -> dict | list:
7+
8+
def load_json(path: str | Path) -> JsonValue:
69
with Path(path).open(mode='r', encoding='utf-8') as fin:
7-
data = json.load(fin)
8-
return data
10+
return json.load(fin)
911

1012

1113
def save_as_indented_json(
12-
data: dict | list,
14+
data: JsonValue,
1315
path: str | Path,
1416
parents: bool = True,
1517
exist_ok: bool = True,
1618
) -> None:
17-
path = Path(path)
18-
path.parent.mkdir(parents=parents, exist_ok=exist_ok)
19-
with path.open(mode='w', encoding='utf-8') as fout:
19+
target = Path(path)
20+
target.parent.mkdir(parents=parents, exist_ok=exist_ok)
21+
with target.open(mode='w', encoding='utf-8') as fout:
2022
json.dump(data, fout, ensure_ascii=False, indent=4, separators=(',', ': '))
21-
return

src/project/common/utils/file/jsonlines.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,19 @@
33
import jsonlines
44

55

6-
def load_jsonlines(path: str | Path) -> list[dict]:
7-
data_list = []
8-
with jsonlines.open(str(path)) as reader:
9-
for data in reader:
10-
data_list.append(data)
11-
return data_list
6+
def load_jsonlines(path: str | Path) -> list[dict[str, object]]:
7+
with jsonlines.open(str(Path(path))) as reader:
8+
return [dict(entry) for entry in reader]
129

1310

1411
def save_as_jsonlines(
15-
data: list[dict],
12+
data: list[dict[str, object]],
1613
path: str | Path,
1714
parents: bool = True,
1815
exist_ok: bool = True,
1916
) -> None:
20-
path = Path(path)
21-
path.parent.mkdir(parents=parents, exist_ok=exist_ok)
22-
with jsonlines.open(str(path), mode='w') as writer:
17+
target = Path(path)
18+
target.parent.mkdir(parents=parents, exist_ok=exist_ok)
19+
with jsonlines.open(str(target), mode='w') as writer:
2320
for datum in data:
2421
writer.write(datum)
25-
return
Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,21 @@
11
from pathlib import Path
2+
from typing import Any
23

34
import toml
45

56

6-
def load_toml(path: str | Path) -> dict:
7+
def load_toml(path: str | Path) -> dict[str, Any]:
78
with Path(path).open(mode='r', encoding='utf-8') as fin:
8-
data = toml.load(fin)
9-
return data
9+
return toml.load(fin)
1010

1111

1212
def save_as_toml(
13-
data: dict,
13+
data: dict[str, Any],
1414
path: str | Path,
1515
parents: bool = True,
1616
exist_ok: bool = True,
1717
) -> None:
18-
path = Path(path)
19-
path.parent.mkdir(parents=parents, exist_ok=exist_ok)
20-
with path.open(mode='w', encoding='utf-8') as fout:
18+
target = Path(path)
19+
target.parent.mkdir(parents=parents, exist_ok=exist_ok)
20+
with target.open(mode='w', encoding='utf-8') as fout:
2121
toml.dump(data, fout)
22-
return

src/project/common/utils/file/yaml.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,23 @@
11
from pathlib import Path
2+
from typing import Any
23

34
import yaml
45

6+
YamlValue = dict[str, Any] | list[Any] | str | int | float | bool | None
57

6-
def load_yaml(path: str | Path) -> dict | list:
8+
9+
def load_yaml(path: str | Path) -> YamlValue:
710
with Path(path).open(mode='r', encoding='utf-8') as fin:
8-
data = yaml.safe_load(fin)
9-
return data
11+
return yaml.safe_load(fin)
1012

1113

1214
def save_as_indented_yaml(
13-
data: dict | list,
15+
data: YamlValue,
1416
path: str | Path,
1517
parents: bool = True,
1618
exist_ok: bool = True,
1719
) -> None:
18-
path = Path(path)
19-
path.parent.mkdir(parents=parents, exist_ok=exist_ok)
20-
with path.open(mode='w', encoding='utf-8') as fout:
20+
target = Path(path)
21+
target.parent.mkdir(parents=parents, exist_ok=exist_ok)
22+
with target.open(mode='w', encoding='utf-8') as fout:
2123
yaml.dump(data, fout, allow_unicode=True, indent=4, default_flow_style=False)
22-
return

src/project/common/utils/import_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
import importlib
22
import inspect
33
import sys
4+
from collections.abc import Callable
45
from pathlib import Path
5-
from typing import Callable
66

77

88
def import_function(function_file_path: str, function_name: str | None = None) -> Callable:
9-
function_file_path = Path(function_file_path).resolve()
10-
function_name = function_name or function_file_path.stem
9+
resolved_path: Path = Path(function_file_path).resolve()
10+
function_name = function_name or resolved_path.stem
1111

1212
project_root = Path.cwd()
1313
if str(project_root) not in sys.path:
1414
sys.path.append(str(project_root))
1515

16-
module_path = '.'.join(function_file_path.relative_to(project_root).with_suffix('').parts)
16+
module_path = '.'.join(resolved_path.relative_to(project_root).with_suffix('').parts)
1717
module = importlib.import_module(module_path)
1818
return getattr(module, function_name)
1919

0 commit comments

Comments
 (0)