|
| 1 | +# encoding: utf-8 |
| 2 | +from __future__ import unicode_literals, print_function |
| 3 | + |
| 4 | +import json |
| 5 | +import re |
| 6 | +import sys |
| 7 | +import unittest |
| 8 | +import warnings |
| 9 | + |
| 10 | +import twitter |
| 11 | +from twitter import twitter_utils |
| 12 | + |
| 13 | +import responses |
| 14 | +from responses import GET, POST |
| 15 | + |
| 16 | +warnings.filterwarnings('ignore', category=DeprecationWarning) |
| 17 | + |
| 18 | + |
| 19 | +DEFAULT_URL = re.compile(r'https?://.*\.twitter.com/1\.1/.*') |
| 20 | +URLS = { |
| 21 | + "is_url": [ |
| 22 | + "t.co/test" |
| 23 | + "http://foo.com/blah_blah", |
| 24 | + "http://foo.com/blah_blah/", |
| 25 | + "http://foo.com/blah_blah_(wikipedia)", |
| 26 | + "http://foo.com/blah_blah_(wikipedia)_(again)", |
| 27 | + "http://www.example.com/wpstyle/?p=364", |
| 28 | + "https://www.example.com/foo/?bar=baz&inga=42&quux", |
| 29 | + # "http://✪df.ws/123", |
| 30 | + # "https://➡.ws/", |
| 31 | + # "http://➡.ws/䨹", |
| 32 | + # "http://⌘.ws", |
| 33 | + # "http://⌘.ws/", |
| 34 | + "http://foo.com/blah_(wikipedia)#cite-1", |
| 35 | + "http://foo.com/blah_(wikipedia)_blah#cite-1", |
| 36 | + "http://foo.com/(something)?after=parens", |
| 37 | + # "http://☺.damowmow.com/", |
| 38 | + "http://code.google.com/events/#&product=browser", |
| 39 | + "http://j.mp", |
| 40 | + "http://foo.bar/?q=Test%20URL-encoded%20stuff", |
| 41 | + "http://1337.net", |
| 42 | + "http://example.com/2.3.1.3/" |
| 43 | + "http://a.b-c.de", |
| 44 | + "foo.com" |
| 45 | + ], |
| 46 | + "is_not_url": [ |
| 47 | + "http://userid:password@example.com:8080", |
| 48 | + "http://userid:password@example.com:8080/", |
| 49 | + "http://userid@example.com", |
| 50 | + "http://userid@example.com/", |
| 51 | + "http://userid@example.com:8080", |
| 52 | + "http://userid@example.com:8080/", |
| 53 | + "http://userid:password@example.com", |
| 54 | + "http://userid:password@example.com/", |
| 55 | + # "http://142.42.1.1/", |
| 56 | + "2.3", |
| 57 | + ".hello.com", |
| 58 | + # "http://142.42.1.1:8080/", |
| 59 | + "ftp://foo.bar/baz", |
| 60 | + "http://مثال.إختبار", |
| 61 | + "http://例子.测试", |
| 62 | + "http://उदाहरण.परीक्षा", |
| 63 | + "http://", |
| 64 | + "http://.", |
| 65 | + "http://..", |
| 66 | + "http://../", |
| 67 | + "http://?", |
| 68 | + "http://??", |
| 69 | + "http://??/", |
| 70 | + "http://#", |
| 71 | + "http://##", |
| 72 | + "http://##/", |
| 73 | + "//", |
| 74 | + "//a", |
| 75 | + "///a", |
| 76 | + "///", |
| 77 | + "http:///a", |
| 78 | + "rdar://1234", |
| 79 | + "h://test", |
| 80 | + ":// should fail", |
| 81 | + "ftps://foo.bar/", |
| 82 | + "http://-error-.invalid/", |
| 83 | + # "http://a.b--c.de/", |
| 84 | + # "http://-a.b.co", |
| 85 | + # "http://a.b-.co", |
| 86 | + # "http://223.255.255.254", |
| 87 | + # "http://0.0.0.0", |
| 88 | + # "http://10.1.1.0", |
| 89 | + # "http://10.1.1.255", |
| 90 | + # "http://224.1.1.1", |
| 91 | + # "http://1.1.1.1.1", |
| 92 | + # "http://123.123.123", |
| 93 | + "http://3628126748", |
| 94 | + "http://.www.foo.bar/", |
| 95 | + "http://.www.foo.bar./", |
| 96 | + # "http://10.1.1.1" |
| 97 | + ] |
| 98 | +} |
| 99 | + |
| 100 | + |
| 101 | +class TestUrlRegex(unittest.TestCase): |
| 102 | + |
| 103 | + def test_yes_urls(self): |
| 104 | + for yes_url in URLS['is_url']: |
| 105 | + self.assertTrue(twitter_utils.is_url(yes_url), yes_url) |
| 106 | + |
| 107 | + def test_no_urls(self): |
| 108 | + for no_url in URLS['is_not_url']: |
| 109 | + self.assertFalse(twitter_utils.is_url(no_url), no_url) |
| 110 | + |
| 111 | + def test_regex_finds_unicode(self): |
| 112 | + string = "http://www.➡.ws" |
| 113 | + string2 = "http://www.example.com" |
| 114 | + pattern = re.compile(r'➡', re.U | re.I) |
| 115 | + pattern2 = re.compile(r'(?:http?://|www\\.)*(?:[\w+-_][.])', re.I | re.U) |
| 116 | + self.assertTrue(re.findall(pattern, string)) |
| 117 | + self.assertTrue(re.findall(pattern2, string2)) |
| 118 | + self.assertTrue(re.findall(pattern2, string)) |
0 commit comments