-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathtextrank_utils.py
More file actions
68 lines (56 loc) · 2.59 KB
/
textrank_utils.py
File metadata and controls
68 lines (56 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Copyright 2023 piglake
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import spacy
import pytextrank
from collections import Counter
from string import punctuation
def top_sentence(text, limit, nlp):
# load a spaCy model, depending on language, scale, etc.
keyword = []
pos_tag = ['PROPN', 'ADJ', 'NOUN', 'VERB']
doc = nlp(text.lower())
for token in doc:
if(token.text in nlp.Defaults.stop_words or token.text in punctuation):
continue
if(token.pos_ in pos_tag):
keyword.append(token.text)
freq_word = Counter(keyword)
max_freq = Counter(keyword).most_common(1)[0][1]
for w in freq_word:
freq_word[w] = (freq_word[w]/max_freq)
print("top_sentence:", doc.sents, len(text))
sent_strength={}
for sent in doc.sents:
for word in sent:
if word.text in freq_word.keys():
if sent in sent_strength.keys():
sent_strength[sent]+=freq_word[word.text]
else:
sent_strength[sent]=freq_word[word.text]
summary = []
sorted_x = sorted(sent_strength.items(), key=lambda kv: kv[1], reverse=True)
counter = 0
for i in range(len(sorted_x)):
summary.append(str(sorted_x[i][0]).capitalize())
counter += 1
if(counter >= limit):
break
return summary
if __name__ == "__main__":
# example text
text = "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types systems and systems of mixed types."
# add PyTextRank to the spaCy pipeline
#nlp.add_pipe("textrank")
#doc = nlp(text)
print(top_sentence(text, 3))