Skip to content

Commit 776a99e

Browse files
committed
Convert Chinese characters to Pinyin
1 parent b625818 commit 776a99e

1 file changed

Lines changed: 81 additions & 0 deletions

File tree

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created by PyCharm.
5+
File Name: LinuxBashShellScriptForOps:convert-chinese-to-pinyin.py
6+
Version: 0.0.1
7+
Author: dgden
8+
Author Email: dgdenterprise@gmail.com
9+
URL: https://github.com/DingGuodong/LinuxBashShellScriptForOps
10+
Download URL: https://github.com/DingGuodong/LinuxBashShellScriptForOps/tarball/master
11+
Create Date: 2021/3/16
12+
Create Time: 14:30
13+
Description: Convert Chinese characters to Pinyin
14+
Long Description:
15+
References:
16+
Prerequisites: pip install pypinyin
17+
pip install jieba
18+
Development Status: 3 - Alpha, 5 - Production/Stable
19+
Environment: Console
20+
Intended Audience: System Administrators, Developers, End Users/Desktop
21+
License: Freeware, Freely Distributable
22+
Natural Language: English, Chinese (Simplified)
23+
Operating System: POSIX :: Linux, Microsoft :: Windows
24+
Programming Language: Python :: 2.6
25+
Programming Language: Python :: 2.7
26+
Topic: Utilities
27+
"""
28+
import jieba
29+
from pypinyin import lazy_pinyin
30+
31+
# company_type_set = {
32+
# u"股份有限公司",
33+
# u"有限责任公司"
34+
# u"有限公司",
35+
# }
36+
37+
company_type_list = [u"公司", u"有限", u"责任", u"股份"]
38+
biz_type_list = [u"保险代理", u"保险经纪", u"保险", ]
39+
40+
41+
def remove_company_type(company_name):
42+
for company_type in company_type_list:
43+
company_name = company_name.replace(company_type, u"")
44+
return company_name
45+
46+
47+
def remove_biz_type(company_name):
48+
for biz_type in biz_type_list:
49+
company_name = company_name.replace(biz_type, u"")
50+
return company_name
51+
52+
53+
def get_company_name_and_brief(company_name):
54+
jieba_list = list(jieba.cut(company_name))
55+
56+
if 2 >= len(jieba_list) >= 1:
57+
company_name_brief = jieba_list[0]
58+
elif 3 >= len(jieba_list) >= 2:
59+
company_name_brief = u"".join(jieba_list[0:1])
60+
elif 5 >= len(jieba_list) >= 3:
61+
company_name_brief = u"".join(jieba_list[0:2])
62+
else:
63+
company_name_brief = u"".join(jieba_list[0:3])
64+
65+
company_name_brief = remove_biz_type(company_name_brief)
66+
67+
print company_name.encode("utf-8"), company_name_brief.encode("utf-8"), "".join(lazy_pinyin(company_name_brief))
68+
69+
70+
if __name__ == '__main__':
71+
company_name_list = [
72+
u"山东大有保险代理股份有限公司", # test case
73+
u"升宏保险代理有限公司",
74+
u"安信联合保险经纪有限公司",
75+
u"重庆恒蕴汽车保险代理有限公司",
76+
u"黑龙江善邦保险代理有限公司",
77+
u"国泰家和保险代理有限公司", # "和"
78+
u"北京易才宏业保险经纪有限公司"
79+
]
80+
for com_name in company_name_list:
81+
get_company_name_and_brief(com_name)

0 commit comments

Comments
 (0)