Skip to content

Commit 7782d60

Browse files
authored
dev: relax config folder convention for subdomains to allow top-level (#274)
* dev: relax config folder convention for subdomains to allow top-level * fix: yard docs & add missing
1 parent ee6327c commit 7782d60

6 files changed

Lines changed: 93 additions & 7 deletions

File tree

Gemfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ group :development do
1111
gem 'html2rss-generator', github: 'html2rss/generator', branch: :main
1212

1313
gem 'nokogiri'
14+
gem 'public_suffix'
1415
gem 'rspec', '~> 3.0'
1516
gem 'rubocop'
1617
gem 'rubocop-performance'

Gemfile.lock

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,12 @@ GEM
111111
logger
112112
mime-types-data (~> 3.2025, >= 3.2025.0507)
113113
mime-types-data (3.2025.0924)
114+
mini_portile2 (2.8.9)
114115
net-http (0.9.1)
115116
uri (>= 0.11.1)
117+
nokogiri (1.18.8)
118+
mini_portile2 (~> 2.8.2)
119+
racc (~> 1.4)
116120
nokogiri (1.18.8-arm64-darwin)
117121
racc (~> 1.4)
118122
nokogiri (1.18.8-x86_64-darwin)
@@ -225,6 +229,7 @@ DEPENDENCIES
225229
html2rss-configs!
226230
html2rss-generator!
227231
nokogiri
232+
public_suffix
228233
rspec (~> 3.0)
229234
rubocop
230235
rubocop-performance

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ make test-domain DOMAIN=github.com
5757

5858
**Adding new configs**: Just create the YAML file and run tests. No spec file needed.
5959

60+
**Config folder convention**: Place configs under the registrable domain folder (e.g., `example.com/` or `bbc.co.uk/`). Legacy subdomain folders (e.g., `news.example.com/`) are allowed but not preferred.
61+
6062
## Documentation
6163

6264
- [Main Documentation](https://html2rss.github.io/html2rss-configs/)

spec/helper_spec.rb

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# frozen_string_literal: true
2+
3+
RSpec.describe Helper do
4+
describe '.url_to_registrable_domain' do
5+
it 'collapses subdomains to the registrable domain' do
6+
expect(described_class.url_to_registrable_domain('https://blog.example.com/posts')).to eq('example.com')
7+
end
8+
9+
it 'keeps multi-part TLDs intact for registrable domain' do
10+
expect(described_class.url_to_registrable_domain('https://news.bbc.co.uk/world')).to eq('bbc.co.uk')
11+
end
12+
13+
it 'preserves single-host domains' do
14+
expect(described_class.url_to_registrable_domain('https://example.com')).to eq('example.com')
15+
end
16+
17+
it 'returns nil for blank or invalid URLs', :aggregate_failures do
18+
expect(described_class.url_to_registrable_domain(nil)).to be_nil
19+
expect(described_class.url_to_registrable_domain('')).to be_nil
20+
expect(described_class.url_to_registrable_domain('not a url')).to be_nil
21+
end
22+
end
23+
24+
describe '.url_to_host_name' do
25+
it 'returns the full host' do
26+
expect(described_class.url_to_host_name('https://news.bbc.co.uk/world')).to eq('news.bbc.co.uk')
27+
end
28+
29+
it 'returns nil for blank or invalid URLs', :aggregate_failures do
30+
expect(described_class.url_to_host_name(nil)).to be_nil
31+
expect(described_class.url_to_host_name('')).to be_nil
32+
expect(described_class.url_to_host_name('not a url')).to be_nil
33+
end
34+
end
35+
36+
describe 'legacy naming guardrail' do
37+
it 'does not expose url_to_directory_name' do
38+
expect(described_class).not_to respond_to(:url_to_directory_name)
39+
end
40+
end
41+
42+
describe '.registrable_domain' do
43+
it 'falls back to host when PublicSuffix returns nil' do
44+
allow(PublicSuffix).to receive(:domain).with('example.local').and_return(nil)
45+
46+
expect(described_class.send(:registrable_domain, 'example.local')).to eq('example.local')
47+
end
48+
49+
it 'falls back to host when PublicSuffix raises DomainInvalid' do
50+
allow(PublicSuffix).to receive(:domain).with('invalid..host')
51+
.and_raise(PublicSuffix::DomainInvalid)
52+
53+
expect(described_class.send(:registrable_domain, 'invalid..host')).to eq('invalid..host')
54+
end
55+
end
56+
end

spec/support/helper.rb

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,37 @@
33
require 'json'
44
require 'nokogiri'
55
require 'yaml'
6-
require 'uri'
6+
require 'public_suffix'
77

88
##
99
# A collection of helper methods.
1010
module Helper
1111
##
1212
# @param url [String]
13+
# @return [String, nil]
14+
def self.url_to_registrable_domain(url)
15+
host = url_to_host_name(url)
16+
return host unless host
17+
18+
registrable_domain(host)
19+
end
20+
21+
##
22+
# @param url [String]
23+
# @return [String, nil]
24+
def self.url_to_host_name(url)
25+
Html2rss::Url.for_channel(url)&.host
26+
rescue ArgumentError
27+
nil
28+
end
29+
30+
##
31+
# @param host [String]
1332
# @return [String]
14-
def self.url_to_directory_name(url)
15-
URI(url.split('/')[0..2].join('/')).host.gsub(/^(api|www|webapp)\./, '')
33+
def self.registrable_domain(host)
34+
PublicSuffix.domain(host) || host
35+
rescue PublicSuffix::DomainInvalid
36+
host
1637
end
1738

1839
##

spec/support/shared_examples/config.yml_spec.rb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,17 @@
4141
config
4242
end
4343

44-
context 'with the file' do
45-
let(:host_name) { Helper.url_to_directory_name yaml['channel']['url'] }
44+
context 'with the file' do # rubocop:disable RSpec/MultipleMemoizedHelpers
45+
let(:host_name) { Helper.url_to_host_name yaml['channel']['url'] }
46+
let(:domain_name) { Helper.url_to_registrable_domain yaml['channel']['url'] }
4647
let(:dirname) { File.dirname(file_path).split(File::Separator).last }
4748

4849
it 'is parseable' do
4950
expect { yaml }.not_to raise_error
5051
end
5152

52-
it "resides in a folder named after channel.url's host" do
53-
expect(dirname).to eq(host_name)
53+
it "resides in a folder named after channel.url's host or domain" do
54+
expect([domain_name, host_name]).to include(dirname)
5455
end
5556
end
5657

0 commit comments

Comments
 (0)