Skip to content

Commit 8d67175

Browse files
Fix duplicates in random/wikipedia generator and adding timeout to wikipedia requests
1 parent 7d74d1d commit 8d67175

3 files changed

Lines changed: 84 additions & 71 deletions

File tree

trdg/generators/from_random.py

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,41 +10,42 @@ class GeneratorFromRandom:
1010
"""Generator that uses randomly generated words"""
1111

1212
def __init__(
13-
self,
14-
count=-1,
15-
length=1,
16-
allow_variable=False,
17-
use_letters=True,
18-
use_numbers=True,
19-
use_symbols=True,
20-
fonts=[],
21-
language="en",
22-
size=32,
23-
skewing_angle=0,
24-
random_skew=False,
25-
blur=0,
26-
random_blur=False,
27-
background_type=0,
28-
distorsion_type=0,
29-
distorsion_orientation=0,
30-
is_handwritten=False,
31-
width=-1,
32-
alignment=1,
33-
text_color="#282828",
34-
orientation=0,
35-
space_width=1.0,
36-
character_spacing=0,
37-
margins=(5, 5, 5, 5),
38-
fit=False,
39-
output_mask=False,
40-
word_split=False,
41-
image_dir=os.path.join(
42-
"..", os.path.split(os.path.realpath(__file__))[0], "images"
43-
),
44-
stroke_width=0,
45-
stroke_fill="#282828",
46-
image_mode="RGB",
13+
self,
14+
count=-1,
15+
length=1,
16+
allow_variable=False,
17+
use_letters=True,
18+
use_numbers=True,
19+
use_symbols=True,
20+
fonts=[],
21+
language="en",
22+
size=32,
23+
skewing_angle=0,
24+
random_skew=False,
25+
blur=0,
26+
random_blur=False,
27+
background_type=0,
28+
distorsion_type=0,
29+
distorsion_orientation=0,
30+
is_handwritten=False,
31+
width=-1,
32+
alignment=1,
33+
text_color="#282828",
34+
orientation=0,
35+
space_width=1.0,
36+
character_spacing=0,
37+
margins=(5, 5, 5, 5),
38+
fit=False,
39+
output_mask=False,
40+
word_split=False,
41+
image_dir=os.path.join(
42+
"..", os.path.split(os.path.realpath(__file__))[0], "images"
43+
),
44+
stroke_width=0,
45+
stroke_fill="#282828",
46+
image_mode="RGB",
4747
):
48+
self.generated_count = 0
4849
self.count = count
4950
self.length = length
5051
self.allow_variable = allow_variable
@@ -91,9 +92,12 @@ def __init__(
9192
)
9293

9394
def __iter__(self):
94-
return self.generator
95+
return self
9596

9697
def __next__(self):
98+
if self.generated_count == self.count:
99+
raise StopIteration
100+
self.generated_count += 1
97101
return self.next()
98102

99103
def next(self):

trdg/generators/from_wikipedia.py

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10,37 +10,38 @@ class GeneratorFromWikipedia:
1010
"""Generator that uses sentences taken from random Wikipedia articles"""
1111

1212
def __init__(
13-
self,
14-
count=-1,
15-
minimum_length=1,
16-
fonts=[],
17-
language="en",
18-
size=32,
19-
skewing_angle=0,
20-
random_skew=False,
21-
blur=0,
22-
random_blur=False,
23-
background_type=0,
24-
distorsion_type=0,
25-
distorsion_orientation=0,
26-
is_handwritten=False,
27-
width=-1,
28-
alignment=1,
29-
text_color="#282828",
30-
orientation=0,
31-
space_width=1.0,
32-
character_spacing=0,
33-
margins=(5, 5, 5, 5),
34-
fit=False,
35-
output_mask=False,
36-
word_split=False,
37-
image_dir=os.path.join(
38-
"..", os.path.split(os.path.realpath(__file__))[0], "images"
39-
),
40-
stroke_width=0,
41-
stroke_fill="#282828",
42-
image_mode="RGB",
13+
self,
14+
count=-1,
15+
minimum_length=1,
16+
fonts=[],
17+
language="en",
18+
size=32,
19+
skewing_angle=0,
20+
random_skew=False,
21+
blur=0,
22+
random_blur=False,
23+
background_type=0,
24+
distorsion_type=0,
25+
distorsion_orientation=0,
26+
is_handwritten=False,
27+
width=-1,
28+
alignment=1,
29+
text_color="#282828",
30+
orientation=0,
31+
space_width=1.0,
32+
character_spacing=0,
33+
margins=(5, 5, 5, 5),
34+
fit=False,
35+
output_mask=False,
36+
word_split=False,
37+
image_dir=os.path.join(
38+
"..", os.path.split(os.path.realpath(__file__))[0], "images"
39+
),
40+
stroke_width=0,
41+
stroke_fill="#282828",
42+
image_mode="RGB",
4343
):
44+
self.generated_count = 0
4445
self.count = count
4546
self.minimum_length = minimum_length
4647
self.language = language
@@ -75,9 +76,12 @@ def __init__(
7576
)
7677

7778
def __iter__(self):
78-
return self.generator
79+
return self
7980

8081
def __next__(self):
82+
if self.generated_count == self.count:
83+
raise StopIteration
84+
self.generated_count += 1
8185
return self.next()
8286

8387
def next(self):

trdg/string_generator.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def create_strings_from_file(filename, count):
1919
raise Exception("No lines could be read in file")
2020
while len(strings) < count:
2121
if len(lines) >= count - len(strings):
22-
strings.extend(lines[0 : count - len(strings)])
22+
strings.extend(lines[0: count - len(strings)])
2323
else:
2424
strings.extend(lines)
2525

@@ -50,7 +50,12 @@ def create_strings_from_wikipedia(minimum_length, count, lang):
5050

5151
while len(sentences) < count:
5252
# We fetch a random page
53-
page = requests.get("https://{}.wikipedia.org/wiki/Special:Random".format(lang))
53+
54+
page_url = "https://{}.wikipedia.org/wiki/Special:Random".format(lang)
55+
try:
56+
page = requests.get(page_url, timeout=3.0) # take into account timeouts
57+
except requests.exceptions.Timeout:
58+
continue
5459

5560
soup = BeautifulSoup(page.text, "html.parser")
5661

@@ -61,8 +66,8 @@ def create_strings_from_wikipedia(minimum_length, count, lang):
6166
lines = list(
6267
filter(
6368
lambda s: len(s.split(" ")) > minimum_length
64-
and not "Wikipedia" in s
65-
and not "wikipedia" in s,
69+
and not "Wikipedia" in s
70+
and not "wikipedia" in s,
6671
[
6772
" ".join(re.findall(r"[\w']+", s.strip()))[0:200]
6873
for s in soup.get_text().splitlines()
@@ -71,7 +76,7 @@ def create_strings_from_wikipedia(minimum_length, count, lang):
7176
)
7277

7378
# Remove the last lines that talks about contributing
74-
sentences.extend(lines[0 : max([1, len(lines) - 5])])
79+
sentences.extend(lines[0: max([1, len(lines) - 5])])
7580

7681
return sentences[0:count]
7782

0 commit comments

Comments
 (0)