Skip to content

Commit 2f0e923

Browse files
committed
first commit
0 parents  commit 2f0e923

26 files changed

Lines changed: 507 additions & 0 deletions

.gitignore

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
*.egg-info/
24+
.installed.cfg
25+
*.egg
26+
MANIFEST
27+
28+
# PyInstaller
29+
# Usually these files are written by a python script from a template
30+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
31+
*.manifest
32+
*.spec
33+
34+
# Installer logs
35+
pip-log.txt
36+
pip-delete-this-directory.txt
37+
38+
# Unit test / coverage reports
39+
htmlcov/
40+
.tox/
41+
.coverage
42+
.coverage.*
43+
.cache
44+
nosetests.xml
45+
coverage.xml
46+
*.cover
47+
.hypothesis/
48+
49+
# Translations
50+
*.mo
51+
*.pot
52+
53+
# Django stuff:
54+
*.log
55+
.static_storage/
56+
.media/
57+
local_settings.py
58+
59+
# Flask stuff:
60+
instance/
61+
.webassets-cache
62+
63+
# Scrapy stuff:
64+
.scrapy
65+
66+
# Sphinx documentation
67+
docs/_build/
68+
69+
# PyBuilder
70+
target/
71+
72+
# Jupyter Notebook
73+
.ipynb_checkpoints
74+
75+
# pyenv
76+
.python-version
77+
78+
# celery beat schedule file
79+
celerybeat-schedule
80+
81+
# SageMath parsed files
82+
*.sage.py
83+
84+
# Environments
85+
.env
86+
.venv
87+
env/
88+
venv/
89+
ENV/
90+
env.bak/
91+
venv.bak/
92+
93+
# Spyder project settings
94+
.spyderproject
95+
.spyproject
96+
97+
# Rope project settings
98+
.ropeproject
99+
100+
# mkdocs documentation
101+
/site
102+
103+
# mypy
104+
.mypy_cache/
105+
106+
# pycharm
107+
.idea/

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2019 Kurohashi-Kawahara lab, Kyoto University
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

MANIFEST.in

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
include README.md
2+
include LICENSE
3+
graft tests
4+
global-exclude __pycache__
5+
global-exclude *.py[co]

README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# python-textformatting
2+
3+
## Requirements
4+
5+
- Python 3.6.8
6+
7+
## Installation
8+
9+
```
10+
$ python setup.py install
11+
```
12+
13+
## Example
14+
15+
```python
16+
from textformatting import ssplit
17+
18+
text = "日本語のテキストを文単位に分割します。Pythonで書かれています。"
19+
sentences = ssplit(text) # ['日本語のテキストを文単位に分割します。', 'Pythonで書かれています。']
20+
```
21+
22+
## License
23+
24+
- MIT
25+
26+
## Authors
27+
28+
- Kyoto University (contact [at] nlp.ist.i.kyoto-u.ac.jp)
29+
- Hirokazu Kiyomaru

setup.cfg

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[aliases]
2+
test=pytest
3+
4+
[tool:pytest]
5+
addopts = --verbose

setup.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python
2+
import io
3+
import os
4+
5+
from setuptools import find_packages, setup
6+
7+
# package meta data
8+
NAME = 'textformatting'
9+
DESCRIPTION = 'A Japanese text formatter'
10+
EMAIL = 'contact@nlp.ist.i.kyoto-u.ac.jp'
11+
AUTHOR = 'Kurohashi-Kawahara Lab, Kyoto University'
12+
VERSION = ''
13+
14+
INSTALL_REQUIRES = []
15+
16+
SETUP_REQUIRES = [
17+
'pytest-runner'
18+
]
19+
20+
TEST_REQUIRES = [
21+
'pytest==4.6.5'
22+
]
23+
24+
here = os.path.abspath(os.path.dirname(__file__))
25+
with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
26+
long_description = '\n' + f.read()
27+
28+
about = {}
29+
with io.open(os.path.join(here, NAME, '__version__.py'), encoding='utf-8') as f:
30+
exec(f.read(), about)
31+
32+
setup(
33+
name=NAME,
34+
version=about['__version__'],
35+
description=DESCRIPTION,
36+
long_description=long_description,
37+
long_description_content_type='text/markdown',
38+
author=AUTHOR,
39+
author_email=EMAIL,
40+
packages=find_packages(exclude=('tests',)),
41+
install_requires=INSTALL_REQUIRES,
42+
setup_requires=SETUP_REQUIRES,
43+
tests_require=TEST_REQUIRES,
44+
license='MIT',
45+
classifiers=[
46+
'License :: OSI Approved :: MIT License',
47+
'Programming Language :: Python'
48+
]
49+
)

tests/test_ssplit.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import glob
2+
import json
3+
import typing
4+
5+
import os
6+
import pytest
7+
8+
from textformatting import ssplit
9+
10+
11+
def read_test_file(path):
12+
"""Read a test file.
13+
14+
Parameters
15+
----------
16+
path : str
17+
The path to a test file.
18+
19+
Returns
20+
-------
21+
typing.Tuple[str, typing.List[str]]
22+
"""
23+
with open(path) as f:
24+
dct = json.load(f)
25+
return dct['text'], dct['sentences']
26+
27+
28+
test_file_path_pattern = os.path.join(os.path.dirname(__file__), 'test_ssplit', '*.json')
29+
test_cases = [read_test_file(path) for path in sorted(glob.glob(test_file_path_pattern))]
30+
31+
32+
@pytest.mark.parametrize('test_case', test_cases)
33+
def test_ssplit(test_case):
34+
text, sentences = test_case
35+
assert ssplit(text) == sentences

tests/test_ssplit/000.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"text": "日本語のテキストを文単位に分割します。Pythonで書かれています。",
3+
"sentences": [
4+
"日本語のテキストを文単位に分割します。",
5+
"Pythonで書かれています。"
6+
]
7+
}

tests/test_ssplit/001.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"text": "今何時ですか?次の予定があるので失礼します。",
3+
"sentences": [
4+
"今何時ですか?",
5+
"次の予定があるので失礼します。"
6+
]
7+
}

tests/test_ssplit/002.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"text": "お疲れ様です!次の予定があるので失礼します。",
3+
"sentences": [
4+
"お疲れ様です!",
5+
"次の予定があるので失礼します。"
6+
]
7+
}

0 commit comments

Comments
 (0)