Skip to content

Commit e8e631b

Browse files
committed
End anchor fix
1 parent e06ed9e commit e8e631b

5 files changed

Lines changed: 27 additions & 54 deletions

File tree

src/RobotsTxtParser/Parser/Directives/DisAllow.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,11 @@ public function check($url)
126126
*/
127127
protected function getPath($url)
128128
{
129+
// Encode
129130
$url = $this->urlEncode($url);
130131
if (mb_stripos($url, '/') === 0) {
131-
// URL already is a path
132+
// Strip fragments
133+
$url = mb_split('#', $url)[0];
132134
return $url;
133135
}
134136
if (!$this->urlValidate($url)) {

src/RobotsTxtParser/Parser/Toolbox.php

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,21 @@ protected function checkPath($path, $paths)
3838
* @link https://github.com/hafriedlander/php-peg
3939
*/
4040
try {
41-
$rule = str_replace('#', '\#', $rule);
42-
if (preg_match('#' . $rule . '#', $path)) {
43-
if (mb_stripos($rule, '$') !== false) {
44-
/**
45-
* Bug when not exact match
46-
* @link https://github.com/t1gor/Robots.txt-Parser-Class/issues/63
47-
*/
48-
if (mb_strlen($rule) - 1 >= mb_strlen($path)) {
49-
return true;
50-
}
51-
} else {
41+
if (!preg_match('#' . $rule . '#', $path)) {
42+
// Rule does not match
43+
continue;
44+
} else if (mb_stripos($rule, '$') === false) {
45+
// No special parsing required
46+
return true;
47+
} else if (($wildcardPos = mb_strrpos($rule, '*')) !== false) {
48+
// Rule contains both an end anchor ($) and wildcard (*)
49+
$afterWildcard = mb_substr($rule, $wildcardPos + 1, mb_strlen($rule) - $wildcardPos - 2);
50+
if ($afterWildcard == mb_substr($path, -mb_strlen($afterWildcard))) {
5251
return true;
5352
}
53+
} else if (mb_substr($rule, 0, -1) == $path) {
54+
// Rule does contains an end anchor
55+
return true;
5456
}
5557
} catch (\Exception $e) {
5658
// An preg_match bug has occurred

tests/EndAnchorTest.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ public function testEndAnchor($robotsTxtContent)
2727

2828
$this->assertTrue($parser->userAgent()->isDisallowed('/asd/'));
2929
$this->assertFalse($parser->userAgent()->isAllowed('/asd/'));
30+
31+
$this->assertTrue($parser->userAgent('DenyMe')->isDisallowed('http://example.com/deny_all/'));
32+
$this->assertFalse($parser->userAgent('DenyMe')->isAllowed('http://example.com/deny_all/'));
3033
}
3134

3235
/**
@@ -42,6 +45,11 @@ public function generateDataForTest()
4245
User-Agent: *
4346
Disallow: /*
4447
Allow: /$
48+
49+
User-Agent: DenyMe
50+
Disallow: /deny_all/$
51+
Disallow: *deny_all/$
52+
Disallow: deny_all/$
4553
ROBOTS
4654
]
4755
];

tests/EndAnchorWildcardTest.php

Lines changed: 0 additions & 42 deletions
This file was deleted.

tests/EscapingTest.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ public function testEscaping($robotsTxtContent)
2222
$this->assertTrue($parser->userAgent()->isAllowed("/%5C."));
2323
$this->assertFalse($parser->userAgent()->isDisallowed("/%5C."));
2424

25+
/**
26+
* Additional tests to enable in the future, currently disabled due to bugs
27+
*/
2528
//$this->assertTrue($parser->userAgent()->isDisallowed("/("));
2629
//$this->assertFalse($parser->userAgent()->isAllowed("/("));
2730
}

0 commit comments

Comments
 (0)