Skip to content

Commit 8f18f82

Browse files
committed
Format check + minor improvements + updated readme
1 parent 3403c4c commit 8f18f82

6 files changed

Lines changed: 129 additions & 38 deletions

File tree

README.md

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
[![Packagist](https://img.shields.io/packagist/v/vipnytt/useragentparser.svg)](https://packagist.org/packages/vipnytt/useragentparser)
77
[![Chat](https://badges.gitter.im/VIPnytt/UserAgentParser.svg)](https://gitter.im/VIPnytt/UserAgentParser)
88

9-
# User-Agent string parser
10-
PHP class to parse User-Agent strings sent by web-crawlers.
9+
# User-Agent parser for robot rule sets
10+
Parser and group determiner optimalized for ``robots.txt``, ``X-Robots-tag`` and ``Robots-meta-tag`` usage cases.
1111

1212
[![SensioLabsInsight](https://insight.sensiolabs.com/projects/1386c14c-546c-4c42-ac55-91ea3a3a1ae1/big.png)](https://insight.sensiolabs.com/projects/1386c14c-546c-4c42-ac55-91ea3a3a1ae1)
1313

@@ -23,16 +23,17 @@ The library is available for install via [Composer](https://getcomposer.org). Ju
2323
Then run `composer update`.
2424

2525
## Features
26-
- Strip the version tag.
27-
- Find different groups the User-Agent belongs to.
28-
- Determine the correct group of records by finding the group with the most specific user-agent that still matches.
26+
- Stripping of the version tag.
27+
- List any _rule groups_ the User-Agent belongs to.
28+
- Determine the correct group of records by finding the group with the most specific User-agent that still matches.
2929

30-
### When do I need it?
31-
- Parsing of `robots.txt`, the rules for robots online.
32-
- Parsing of the _X-Robots-Tag_ HTTP-header.
33-
- Parsing of _Robots meta tags_ in HTML documents
30+
### When to use it?
31+
- When parsing `robots.txt` rule sets, for robots online.
32+
- When parsing the ``X-Robots-Tag`` HTTP-header.
33+
- When parsing ``Robots meta tags`` in HTML documents
3434

35-
Note: _The library is not compatible with User-Agent strings sent by eg. web-browsers. Contributions are of course welcome._
35+
Note: _Full User-agent strings, like them sent by eg. web-browsers, or found in your log files, are not compatible, this is by design._
36+
Supported User-agent string formats are ``UserAgentName/version`` with or without the version tag. Eg. ``MyWebCrawler/2.0`` or just ``MyWebCrawler``.
3637

3738

3839
## Getting Started
@@ -46,7 +47,7 @@ var_dump($parser->stripVersion());
4647
/* googlebot */
4748
```
4849

49-
### Find different groups the User-Agent belongs to
50+
### List different groups the User-agent belongs to
5051
```php
5152
use vipnytt\UserAgentParser;
5253

@@ -60,7 +61,7 @@ var_dump($parser->export());
6061
```
6162

6263
### Determine the correct group
63-
Determine the correct group of records by finding the group with the most specific user-agent that still matches
64+
Determine the correct group of records by finding the group with the most specific User-agent that still matches your rule sets
6465
```php
6566
use vipnytt\UserAgentParser;
6667

composer.json

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"name": "vipnytt/useragentparser",
3-
"description": "User-Agent string parser class",
3+
"description": "User-Agent string parser",
4+
"type": "library",
45
"keywords": [
56
"UserAgent",
67
"User-Agent",
@@ -10,33 +11,37 @@
1011
"Robots-meta-tag",
1112
"Robot",
1213
"Spider",
13-
"Web-Crawler"
14+
"Crawler"
1415
],
1516
"homepage": "https://github.com/VIPnytt/UserAgentParser",
16-
"type": "library",
1717
"license": "MIT",
18-
"minimum-stability": "dev",
19-
"prefer-stable": true,
20-
"require": {
21-
"php": ">=5.4.0",
22-
"ext-mbstring": "*"
23-
},
24-
"require-dev": {
25-
"phpunit/phpunit": ">=4.0",
26-
"codeclimate/php-test-reporter": ">=0.2.0"
27-
},
2818
"authors": [
2919
{
3020
"name": "VIP nytt",
3121
"email": "vipnytt@gmail.com",
32-
"role": "creator"
22+
"role": "Creator"
3323
},
3424
{
3525
"name": "Jan-Petter Gundersen",
3626
"email": "europe.jpg@gmail.com",
37-
"role": "developer"
27+
"role": "Developer"
3828
}
3929
],
30+
"require": {
31+
"php": ">=5.4.0",
32+
"ext-mbstring": "*"
33+
},
34+
"require-dev": {
35+
"phpunit/phpunit": ">=4.0",
36+
"codeclimate/php-test-reporter": ">=0.2.0"
37+
},
38+
"support": {
39+
"email": "vipnytt@gmail.com",
40+
"issues": "https://github.com/VIPnytt/UserAgentParser/issues",
41+
"wiki": "https://github.com/VIPnytt/UserAgentParser/wiki",
42+
"source": "https://github.com/VIPnytt/UserAgentParser",
43+
"docs": "https://github.com/VIPnytt/UserAgentParser/wiki"
44+
},
4045
"autoload": {
4146
"psr-4": {
4247
"vipnytt\\": "src/"

src/UserAgentParser.php

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?php
22
namespace vipnytt;
33

4-
use Exception;
4+
use vipnytt\UserAgentParser\Exceptions\FormatException;
55

66
/**
77
* Class UserAgentParser
@@ -17,29 +17,46 @@ class UserAgentParser
1717
* Constructor
1818
*
1919
* @param string $userAgent
20-
* @throws Exception
2120
*/
2221
public function __construct($userAgent)
2322
{
2423
mb_detect_encoding($userAgent);
25-
$this->userAgent = mb_strtolower(trim($userAgent));
24+
$this->userAgent = trim($userAgent);
25+
$this->checkFormat();
2626
$this->explode();
2727
}
2828

29+
/**
30+
* Validate the UserAgent format
31+
*
32+
* @throws FormatException
33+
*/
34+
protected function checkFormat()
35+
{
36+
if (preg_match('/\s/', $this->userAgent)) {
37+
throw new FormatException("Format not supported. Please use `name/version` or just `name`, eg. `MyUserAgent/1.0` and `MyUserAgent`.");
38+
}
39+
}
40+
2941
/**
3042
* Parses all possible User-Agent groups to an array
3143
*
3244
* @return array
3345
*/
3446
private function explode()
3547
{
36-
$this->groups = [$this->userAgent];
37-
$this->groups[] = $this->stripVersion();
38-
while (mb_strpos(end($this->groups), '-') !== false) {
39-
$current = end($this->groups);
40-
$this->groups[] = mb_substr($current, 0, mb_strrpos($current, '-'));
48+
$groups = [$this->userAgent];
49+
50+
$groups[] = $this->stripVersion();
51+
while (mb_stripos(end($groups), '-') !== false) {
52+
$current = end($groups);
53+
$groups[] = mb_substr($current, 0, mb_strripos($current, '-'));
54+
}
55+
foreach ($groups as $group) {
56+
if (!in_array($group, $this->groups)) {
57+
$this->groups[] = $group;
58+
}
4159
}
42-
$this->groups = array_unique($this->groups);
4360
}
4461

4562
/**
@@ -49,7 +66,7 @@ private function explode()
4966
*/
5067
public function stripVersion()
5168
{
52-
if (mb_strpos($this->userAgent, '/') !== false) {
69+
if (mb_stripos($this->userAgent, '/') !== false) {
5370
return mb_split('/', $this->userAgent, 2)[0];
5471
}
5572
return $this->userAgent;
@@ -64,8 +81,9 @@ public function stripVersion()
6481
*/
6582
public function match($array)
6683
{
84+
$array = array_map('mb_strtolower', $array);
6785
foreach ($this->groups as $userAgent) {
68-
if (in_array($userAgent, array_map('mb_strtolower', $array))) {
86+
if (in_array(mb_strtolower($userAgent), $array)) {
6987
return $userAgent;
7088
}
7189
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?php
2+
namespace vipnytt\UserAgentParser\Exceptions;
3+
4+
/**
5+
* Class FormatException
6+
*
7+
* @package vipnytt\UserAgentParser\Exceptions
8+
*/
9+
class FormatException extends UserAgentParserException
10+
{
11+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?php
2+
namespace vipnytt\UserAgentParser\Exceptions;
3+
4+
use Exception;
5+
6+
/**
7+
* Class UserAgentParserException
8+
*
9+
* @package vipnytt\UserAgentParser\Exceptions
10+
*/
11+
class UserAgentParserException extends Exception
12+
{
13+
}

tests/FormatTest.php

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?php
2+
namespace vipnytt\UserAgentParser\Tests;
3+
4+
use vipnytt\UserAgentParser;
5+
use vipnytt\UserAgentParser\Exceptions\FormatException;
6+
7+
/**
8+
* Class FormatTest
9+
*
10+
* @package vipnytt\UserAgentParser\Tests
11+
*/
12+
class FormatTest extends \PHPUnit_Framework_TestCase
13+
{
14+
/**
15+
* @dataProvider generateDataForTest
16+
* @param string $userAgent
17+
*/
18+
public function testFormat($userAgent)
19+
{
20+
$this->expectException(FormatException::class);
21+
new UserAgentParser($userAgent);
22+
}
23+
24+
/**
25+
* Generate test data
26+
* @return array
27+
*/
28+
public
29+
function generateDataForTest()
30+
{
31+
return [
32+
[
33+
'mybot 2.0',
34+
],
35+
[
36+
'my crawler',
37+
],
38+
[
39+
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
40+
],
41+
];
42+
}
43+
}

0 commit comments

Comments
 (0)