Skip to content

Commit 041dd4d

Browse files
committed
Multi-byte character support
Updated README.md
1 parent ddc1755 commit 041dd4d

3 files changed

Lines changed: 18 additions & 8 deletions

File tree

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
[![Packagist](https://img.shields.io/packagist/v/vipnytt/useragentparser.svg)](https://packagist.org/packages/vipnytt/useragentparser)
77
[![Chat](https://badges.gitter.im/VIPnytt/UserAgentParser.svg)](https://gitter.im/VIPnytt/UserAgentParser)
88

9-
# User-Agent string parser class
10-
PHP class to parse User-Agent strings.
9+
# User-Agent string parser
10+
PHP class to parse User-Agent strings sent by web-crawlers.
1111

1212
[![SensioLabsInsight](https://insight.sensiolabs.com/projects/1386c14c-546c-4c42-ac55-91ea3a3a1ae1/big.png)](https://insight.sensiolabs.com/projects/1386c14c-546c-4c42-ac55-91ea3a3a1ae1)
1313

@@ -27,11 +27,13 @@ Then run `composer update`.
2727
- Find different groups the User-Agent belongs to.
2828
- Determine the correct group of records by finding the group with the most specific user-agent that still matches.
2929

30-
## When do I need it?
30+
### When do I need it?
3131
- Parsing of `robots.txt`, the rules for robots online.
3232
- Parsing of the _X-Robots-Tag_ HTTP-header.
3333
- Parsing of _Robots meta tags_ in HTML documents
3434

35+
Note: _The library is not compatible with User-Agent strings sent by eg. web-browsers. Contributions are of course welcome._
36+
3537

3638
## Getting Started
3739

build/.gitkeep

Whitespace-only changes.

src/UserAgentParser.php

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
<?php
22
namespace vipnytt;
33

4+
use Exception;
5+
46
/**
57
* Class UserAgentParser
68
*
@@ -15,9 +17,15 @@ class UserAgentParser
1517
* Constructor
1618
*
1719
* @param string $userAgent
20+
* @throws Exception
1821
*/
1922
public function __construct($userAgent)
2023
{
24+
if (!extension_loaded('mbstring')) {
25+
throw new Exception('The extension `mbstring` must be installed and loaded for this library');
26+
}
27+
mb_detect_encoding($userAgent);
28+
2129
$this->userAgent = mb_strtolower(trim($userAgent));
2230
$this->explode();
2331
}
@@ -31,9 +39,9 @@ private function explode()
3139
{
3240
$this->groups = [$this->userAgent];
3341
$this->groups[] = $this->stripVersion();
34-
while (strpos(end($this->groups), '-') !== false) {
42+
while (mb_strpos(end($this->groups), '-') !== false) {
3543
$current = end($this->groups);
36-
$this->groups[] = substr($current, 0, strrpos($current, '-'));
44+
$this->groups[] = mb_substr($current, 0, mb_strrpos($current, '-'));
3745
}
3846
$this->groups = array_unique($this->groups);
3947
}
@@ -45,8 +53,8 @@ private function explode()
4553
*/
4654
public function stripVersion()
4755
{
48-
if (strpos($this->userAgent, '/') !== false) {
49-
return explode('/', $this->userAgent, 2)[0];
56+
if (mb_strpos($this->userAgent, '/') !== false) {
57+
return mb_split('/', $this->userAgent, 2)[0];
5058
}
5159
return $this->userAgent;
5260
}
@@ -62,7 +70,7 @@ public function stripVersion()
6270
public function match($array, $fallback = null)
6371
{
6472
foreach ($this->groups as $userAgent) {
65-
if (in_array($userAgent, array_map('strtolower', $array))) {
73+
if (in_array($userAgent, array_map('mb_strtolower', $array))) {
6674
return $userAgent;
6775
}
6876
}

0 commit comments

Comments
 (0)