Skip to content

Commit 22425d4

Browse files
committed
Code files added
1 parent 343b512 commit 22425d4

2,051 files changed

Lines changed: 103925 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Lesson6/Activity11.ipynb

Lines changed: 295 additions & 0 deletions
Large diffs are not rendered by default.

Lesson6/Exercise51.ipynb

Lines changed: 708 additions & 0 deletions
Large diffs are not rendered by default.

Lesson6/Exercise52.ipynb

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"## Text Summarizer"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 15,
13+
"metadata": {},
14+
"outputs": [
15+
{
16+
"name": "stderr",
17+
"output_type": "stream",
18+
"text": [
19+
"[nltk_data] Downloading package stopwords to\n",
20+
"[nltk_data] C:\\Users\\dwight\\AppData\\Roaming\\nltk_data...\n",
21+
"[nltk_data] Package stopwords is already up-to-date!\n"
22+
]
23+
},
24+
{
25+
"data": {
26+
"text/plain": [
27+
"True"
28+
]
29+
},
30+
"execution_count": 15,
31+
"metadata": {},
32+
"output_type": "execute_result"
33+
}
34+
],
35+
"source": [
36+
"import nltk\n",
37+
"nltk.download('stopwords')"
38+
]
39+
},
40+
{
41+
"cell_type": "code",
42+
"execution_count": 16,
43+
"metadata": {},
44+
"outputs": [],
45+
"source": [
46+
"from collections import Counter\n",
47+
"from nltk.tokenize import sent_tokenize,word_tokenize\n",
48+
"from nltk.corpus import stopwords\n",
49+
"from string import punctuation\n",
50+
"from heapq import nlargest"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 22,
56+
"metadata": {
57+
"scrolled": false
58+
},
59+
"outputs": [],
60+
"source": [
61+
"STOPWORDS = set(stopwords.words('english') + list(punctuation))\n",
62+
"MIN_WORD_PROP, MAX_WORD_PROP = 0.1, 0.9\n",
63+
"\n",
64+
"def compute_word_frequencies(word_sentences):\n",
65+
" words = [word for sentence in word_sentences \n",
66+
" for word in sentence \n",
67+
" if word not in STOPWORDS]\n",
68+
" counter = Counter(words)\n",
69+
" limit = float(max(counter.values()))\n",
70+
" word_frequencies = {word: freq/limit \n",
71+
" for word,freq in counter.items()}\n",
72+
" # Drop words if too common or too uncommon\n",
73+
" word_frequencies = {word: freq \n",
74+
" for word,freq in word_frequencies.items() \n",
75+
" if freq > MIN_WORD_PROP \n",
76+
" and freq < MAX_WORD_PROP}\n",
77+
" return word_frequencies\n",
78+
"\n",
79+
"def sentence_score(word_sentence, word_frequencies):\n",
80+
" return sum([ word_frequencies.get(word,0) \n",
81+
" for word in word_sentence])\n",
82+
"\n",
83+
"def summarize(text:str, num_sentences=3):\n",
84+
" \"\"\"\n",
85+
" Summarize the text, by return the most relevant sentences\n",
86+
" :text the text to summarize\n",
87+
" :num_sentences the number of sentences to return\n",
88+
" \"\"\"\n",
89+
" # Make the text lowercase\n",
90+
" text = text.lower()\n",
91+
" \n",
92+
" # Break text into sentences \n",
93+
" sentences = sent_tokenize(text)\n",
94+
" \n",
95+
" # Break sentences into words\n",
96+
" word_sentences = [word_tokenize(sentence) \n",
97+
" for sentence in sentences]\n",
98+
" \n",
99+
" # Compute the word frequencies\n",
100+
" word_frequencies = compute_word_frequencies(word_sentences)\n",
101+
" \n",
102+
" # Calculate the scores for each of the sentences\n",
103+
" scores = [sentence_score(word_sentence, word_frequencies)\n",
104+
" for word_sentence in word_sentences]\n",
105+
" sentence_scores = list(zip(sentences, scores))\n",
106+
" \n",
107+
" # Rank the sentences\n",
108+
" top_sentence_scores = nlargest(num_sentences, \n",
109+
" sentence_scores,\n",
110+
" key=lambda t: t[1])\n",
111+
" \n",
112+
" # Return the top sentences\n",
113+
" return [t[0] for t in top_sentence_scores]"
114+
]
115+
},
116+
{
117+
"cell_type": "code",
118+
"execution_count": 18,
119+
"metadata": {},
120+
"outputs": [],
121+
"source": [
122+
"with open('data/PolarVortex.txt', 'r') as vortex_file:\n",
123+
" vortex_article = vortex_file.read()"
124+
]
125+
},
126+
{
127+
"cell_type": "code",
128+
"execution_count": 19,
129+
"metadata": {},
130+
"outputs": [
131+
{
132+
"data": {
133+
"text/plain": [
134+
"'On the coldest day in two decades on his fifth-generation dairy farm, Chris Pollack grabbed a thick black hose from the barn and ventured into the subzero cold,\\nwhere his beef cattle were chomping cud and waiting for water.\\nThe power had briefly gone out the previous morning, long enough to freeze the line that automatically fills the animals’ heated water trough. Pollack was here to replace it.\\n\\n\"Are you serious?\" Pollack said, peering inside the black hose. \"There’s water frozen in the end already.\"\\nHe lifted it up to a small space heater and waited for it to thaw.\\nSuch is life in the Deep Freeze of 2019.\\nThe past 48 hours in the American Midwest have been about endurance, as a breathtaking cold settled in over a massive stretch of the country. \\nThe record-setting frigid temperatures, some of the coldest on the planet Thursday, have frozen the Great Lakes, taxed electrical and natural gas infrastructure,\\n endangered livestock and tested the mettle of millions who are used to the cold but had never experienced anything like this.\\nIn some areas Thursday, temperatures dropped below minus-50 degrees, and the extreme weather was blamed for several deaths across the region,\\n including people who appear to have frozen to death in Milwaukee, Detroit and Rochester, Minn.\\nFrom Minnesota to New York, the polar vortex again prompted school closures, mail service interruptions and thousands of flight cancellations, \\nmany of them in and out of Chicago, which appeared otherworldly in a coating of frost and ice. Eighteen factories run by General Motors, \\nFiat Chrysler and Ford shut down Thursday because of the brutal weather and a fire at a natural gas compressor station.\\n'"
135+
]
136+
},
137+
"execution_count": 19,
138+
"metadata": {},
139+
"output_type": "execute_result"
140+
}
141+
],
142+
"source": [
143+
"vortex_article"
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"execution_count": 24,
149+
"metadata": {},
150+
"outputs": [
151+
{
152+
"data": {
153+
"text/plain": [
154+
"12"
155+
]
156+
},
157+
"execution_count": 24,
158+
"metadata": {},
159+
"output_type": "execute_result"
160+
}
161+
],
162+
"source": [
163+
"len(sent_tokenize(vortex_article))"
164+
]
165+
},
166+
{
167+
"cell_type": "code",
168+
"execution_count": 20,
169+
"metadata": {},
170+
"outputs": [
171+
{
172+
"data": {
173+
"text/plain": [
174+
"['in some areas thursday, temperatures dropped below minus-50 degrees, and the extreme weather was blamed for several deaths across the region,\\n including people who appear to have frozen to death in milwaukee, detroit and rochester, minn.\\nfrom minnesota to new york, the polar vortex again prompted school closures, mail service interruptions and thousands of flight cancellations, \\nmany of them in and out of chicago, which appeared otherworldly in a coating of frost and ice.',\n",
175+
" 'the record-setting frigid temperatures, some of the coldest on the planet thursday, have frozen the great lakes, taxed electrical and natural gas infrastructure,\\n endangered livestock and tested the mettle of millions who are used to the cold but had never experienced anything like this.',\n",
176+
" 'on the coldest day in two decades on his fifth-generation dairy farm, chris pollack grabbed a thick black hose from the barn and ventured into the subzero cold,\\nwhere his beef cattle were chomping cud and waiting for water.']"
177+
]
178+
},
179+
"execution_count": 20,
180+
"metadata": {},
181+
"output_type": "execute_result"
182+
}
183+
],
184+
"source": [
185+
"summarize(vortex_article)"
186+
]
187+
},
188+
{
189+
"cell_type": "code",
190+
"execution_count": 26,
191+
"metadata": {},
192+
"outputs": [
193+
{
194+
"data": {
195+
"text/plain": [
196+
"['in some areas thursday, temperatures dropped below minus-50 degrees, and the extreme weather was blamed for several deaths across the region,\\n including people who appear to have frozen to death in milwaukee, detroit and rochester, minn.\\nfrom minnesota to new york, the polar vortex again prompted school closures, mail service interruptions and thousands of flight cancellations, \\nmany of them in and out of chicago, which appeared otherworldly in a coating of frost and ice.']"
197+
]
198+
},
199+
"execution_count": 26,
200+
"metadata": {},
201+
"output_type": "execute_result"
202+
}
203+
],
204+
"source": [
205+
"summarize(vortex_article, num_sentences=1)"
206+
]
207+
},
208+
{
209+
"cell_type": "code",
210+
"execution_count": null,
211+
"metadata": {},
212+
"outputs": [],
213+
"source": []
214+
}
215+
],
216+
"metadata": {
217+
"kernelspec": {
218+
"display_name": "Python [conda env:packt]",
219+
"language": "python",
220+
"name": "conda-env-packt-py"
221+
},
222+
"language_info": {
223+
"codemirror_mode": {
224+
"name": "ipython",
225+
"version": 3
226+
},
227+
"file_extension": ".py",
228+
"mimetype": "text/x-python",
229+
"name": "python",
230+
"nbconvert_exporter": "python",
231+
"pygments_lexer": "ipython3",
232+
"version": "3.6.7"
233+
}
234+
},
235+
"nbformat": 4,
236+
"nbformat_minor": 2
237+
}

0 commit comments

Comments
 (0)