Skip to content

Commit 417138d

Browse files
authored
Try to fix bot detection (#3)
- Add some unit tests
1 parent d14c8a3 commit 417138d

5 files changed

Lines changed: 88 additions & 3 deletions

File tree

Neolution.OrchardCoreModules.PageViewStats.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ VisualStudioVersion = 17.1.32228.430
55
MinimumVisualStudioVersion = 10.0.40219.1
66
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Neolution.OrchardCoreModules.PageViewStats", "Neolution.OrchardCoreModules.PageViewStats\Neolution.OrchardCoreModules.PageViewStats.csproj", "{4C09E371-1884-491B-B9EA-2A09F85F5487}"
77
EndProject
8+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "PageViewStats.UnitTests", "PageViewStats.UnitTests\PageViewStats.UnitTests.csproj", "{83A256C0-100A-4DE5-B253-C78919062C92}"
9+
EndProject
810
Global
911
GlobalSection(SolutionConfigurationPlatforms) = preSolution
1012
Debug|Any CPU = Debug|Any CPU
@@ -15,6 +17,10 @@ Global
1517
{4C09E371-1884-491B-B9EA-2A09F85F5487}.Debug|Any CPU.Build.0 = Debug|Any CPU
1618
{4C09E371-1884-491B-B9EA-2A09F85F5487}.Release|Any CPU.ActiveCfg = Release|Any CPU
1719
{4C09E371-1884-491B-B9EA-2A09F85F5487}.Release|Any CPU.Build.0 = Release|Any CPU
20+
{83A256C0-100A-4DE5-B253-C78919062C92}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21+
{83A256C0-100A-4DE5-B253-C78919062C92}.Debug|Any CPU.Build.0 = Debug|Any CPU
22+
{83A256C0-100A-4DE5-B253-C78919062C92}.Release|Any CPU.ActiveCfg = Release|Any CPU
23+
{83A256C0-100A-4DE5-B253-C78919062C92}.Release|Any CPU.Build.0 = Release|Any CPU
1824
EndGlobalSection
1925
GlobalSection(SolutionProperties) = preSolution
2026
HideSolutionNode = FALSE

Neolution.OrchardCoreModules.PageViewStats/Services/BotDetector.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
using System.Text.RegularExpressions;
77
using Neolution.OrchardCoreModules.PageViewStats.Resources;
88

9-
internal class BotDetector : IBotDetector
9+
public class BotDetector : IBotDetector
1010
{
1111
private static IReadOnlyList<Regex> botList;
1212

@@ -16,7 +16,7 @@ public BotDetector()
1616
var text = EmbeddedResources.ReadAllText("Neolution.OrchardCoreModules.PageViewStats.Resources>COUNTER_Robots_list.txt").Trim('\uFEFF', '\u200B');
1717

1818
// Transform line by line into a list of regex expressions.
19-
var list = text.Split(new[] { "\r\n" }, StringSplitOptions.None).Select(x => x).Where(x => !string.IsNullOrWhiteSpace(x));
19+
var list = text.Split(new[] { Environment.NewLine }, StringSplitOptions.None).Select(x => x).Where(x => !string.IsNullOrWhiteSpace(x));
2020
botList = list.Select(bot => new Regex(bot, RegexOptions.IgnoreCase | RegexOptions.Compiled)).ToList();
2121
}
2222

@@ -27,6 +27,6 @@ public bool CheckUserAgentString(string userAgentString)
2727
return false;
2828
}
2929

30-
return botList.Any(regex => regex.IsMatch(userAgentString));
30+
return botList.Any(regex => regex.IsMatch(userAgentString.Trim()));
3131
}
3232
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
namespace Neolution.OrchardCoreModules.PageViewStats.UnitTests
2+
{
3+
using Microsoft.Extensions.DependencyInjection;
4+
using Neolution.OrchardCoreModules.PageViewStats.Services;
5+
using Shouldly;
6+
7+
public class BotDetectorTests
8+
{
9+
[Fact]
10+
public void GivenRobotUserAgents_WhenCheckingForRobot_ThenShouldIdentifiedAsRobots()
11+
{
12+
var botDetector = GetBotDetectorService();
13+
14+
// Using real-world sample user agent strings that should be identified as robots
15+
botDetector.CheckUserAgentString("'DuckDuckBot-Https/1.1; (+https://duckduckgo.com/duckduckbot)'").ShouldBeTrue();
16+
botDetector.CheckUserAgentString("Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.115 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)").ShouldBeTrue();
17+
botDetector.CheckUserAgentString("Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/102.0.5005.115 Safari/537.36").ShouldBeTrue();
18+
botDetector.CheckUserAgentString("Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)").ShouldBeTrue();
19+
botDetector.CheckUserAgentString("Mozilla/5.0 (compatible; ev-crawler/1.0; +https://headline.com/legal/crawler)").ShouldBeTrue();
20+
}
21+
22+
[Fact]
23+
public void GivenHumanUserAgents_WhenCheckingForRobot_ThenShouldNotIdentifiedAsRobots()
24+
{
25+
var botDetector = GetBotDetectorService();
26+
27+
// Using the top user agent strings from our websites database that look like human-operated browsers
28+
botDetector.CheckUserAgentString("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36").ShouldBeFalse();
29+
botDetector.CheckUserAgentString("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0").ShouldBeFalse();
30+
botDetector.CheckUserAgentString("Mozilla/5.0 (iPhone; CPU iPhone OS 15_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Mobile/15E148 Safari/604.1").ShouldBeFalse();
31+
botDetector.CheckUserAgentString("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36").ShouldBeFalse();
32+
botDetector.CheckUserAgentString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15").ShouldBeFalse();
33+
botDetector.CheckUserAgentString("Mozilla/5.0 (X11; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0").ShouldBeFalse();
34+
}
35+
36+
private static IBotDetector GetBotDetectorService()
37+
{
38+
var services = new ServiceCollection();
39+
services.AddSingleton<IBotDetector, BotDetector>();
40+
var serviceProvider = services.BuildServiceProvider();
41+
var botDetector = serviceProvider.GetRequiredService<IBotDetector>();
42+
return botDetector;
43+
}
44+
}
45+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net6.0</TargetFramework>
5+
<ImplicitUsings>enable</ImplicitUsings>
6+
<Nullable>enable</Nullable>
7+
8+
<IsPackable>false</IsPackable>
9+
10+
<RootNamespace>Neolution.OrchardCoreModules.PageViewStats.UnitTests</RootNamespace>
11+
12+
<AssemblyName>Neolution.OrchardCoreModules.PageViewStats.UnitTests</AssemblyName>
13+
</PropertyGroup>
14+
15+
<ItemGroup>
16+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
17+
<PackageReference Include="Shouldly" Version="4.1.0" />
18+
<PackageReference Include="xunit" Version="2.4.2" />
19+
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.5">
20+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
21+
<PrivateAssets>all</PrivateAssets>
22+
</PackageReference>
23+
<PackageReference Include="coverlet.collector" Version="3.1.2">
24+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
25+
<PrivateAssets>all</PrivateAssets>
26+
</PackageReference>
27+
</ItemGroup>
28+
29+
<ItemGroup>
30+
<ProjectReference Include="..\Neolution.OrchardCoreModules.PageViewStats\Neolution.OrchardCoreModules.PageViewStats.csproj" />
31+
</ItemGroup>
32+
33+
</Project>

PageViewStats.UnitTests/Usings.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
global using Xunit;

0 commit comments

Comments
 (0)