From 05e77b86c78334007ff3babd90b4f54a02d5b28f Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Sun, 29 Mar 2026 12:46:20 -0700 Subject: [PATCH 1/4] feat: add feed validation and search helpers Add feed typing and safe validation APIs so consumers can handle mixed feed sources gracefully. Include built-in item filtering/search helpers to reduce repetitive feed-reader logic. --- lib/simple-rss.rb | 71 ++++++++ test/base/filtering_and_validation_test.rb | 178 +++++++++++++++++++++ 2 files changed, 249 insertions(+) create mode 100644 test/base/filtering_and_validation_test.rb diff --git a/lib/simple-rss.rb b/lib/simple-rss.rb index c94819f..a75cc54 100644 --- a/lib/simple-rss.rb +++ b/lib/simple-rss.rb @@ -95,6 +95,57 @@ def latest(count = 10) items.sort_by { |item| item[:pubDate] || item[:updated] || Time.at(0) }.reverse.first(count) end + # @rbs () -> Symbol + def feed_type + atom_namespaced_feed = source.match?(/<(atom:)?feed\b[^>]*xmlns(:\w+)?=['"][^'"]*atom/i) + return :atom if atom_namespaced_feed + return :rss2 if source.match?(/]*version=['"]2/i) + return :rss1 if source.match?(/]*version=['"]0\.9/i) + + :unknown + end + + # @rbs () -> bool + def valid? + return false if items.empty? + + title_value = instance_variable_get(:@title) + link_value = instance_variable_get(:@link) + return true if title_value || link_value + + false + end + + # @rbs (Time) -> Array[Hash[Symbol, untyped]] + def items_since(time) + items.select do |item| + item_date = item[:pubDate] || item[:updated] || item[:published] + item_date.is_a?(Time) && item_date > time + end + end + + # @rbs (String) -> Array[Hash[Symbol, untyped]] + def items_by_category(name) + query = name.to_s.downcase + + items.select do |item| + category = item[:category] + next false if category.nil? + + category_matches_query?(category, query) + end + end + + # @rbs (String) -> Array[Hash[Symbol, untyped]] + def search(query) + pattern = Regexp.new(Regexp.escape(query.to_s), Regexp::IGNORECASE) + + items.select do |item| + searchable_fields(item).any? { |field| field.to_s.match?(pattern) } + end + end + # @rbs (?Hash[Symbol, untyped]) -> Hash[Symbol, untyped] def as_json(_options = {}) hash = {} #: Hash[Symbol, untyped] @@ -157,6 +208,14 @@ def parse(source, options = {}) new source, options end + # @rbs (untyped, ?Hash[Symbol, untyped]) -> bool + def valid?(source, options = {}) + parse(source, options) + true + rescue SimpleRSSError + false + end + # Fetch and parse a feed from a URL # Returns nil if conditional GET returns 304 Not Modified # @@ -412,6 +471,18 @@ def clean_tag(tag) tag.to_s.tr(":", "_").intern end + # @rbs (untyped, String) -> bool + def category_matches_query?(category, query) + return category.any? { |value| value.to_s.downcase.include?(query) } if category.is_a?(Array) + + category.to_s.downcase.include?(query) + end + + # @rbs (Hash[Symbol, untyped]) -> Array[untyped] + def searchable_fields(item) + [item[:title], item[:description], item[:summary], item[:content]] + end + # @rbs (untyped) -> untyped def serialize_value(value) case value diff --git a/test/base/filtering_and_validation_test.rb b/test/base/filtering_and_validation_test.rb new file mode 100644 index 0000000..5415685 --- /dev/null +++ b/test/base/filtering_and_validation_test.rb @@ -0,0 +1,178 @@ +require "test_helper" + +class FilteringAndValidationTest < Test::Unit::TestCase + def setup + @rss09 = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/rss09.rdf") + @rss20 = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/rss20.xml") + @atom = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/atom.xml") + end + + def test_feed_type_for_known_formats + assert_equal :rss1, @rss09.feed_type + assert_equal :rss2, @rss20.feed_type + assert_equal :atom, @atom.feed_type + end + + def test_feed_type_unknown_for_non_standard_feed + feed = SimpleRSS.parse <<~XML + + + Unknown Feed + + Post + + + XML + + assert_equal :unknown, feed.feed_type + end + + def test_class_valid_returns_true_for_well_formed_feed + xml = <<~XML + + + + Valid Feed + http://example.com + + Post + + + + XML + + assert_equal true, SimpleRSS.valid?(xml) + end + + def test_class_valid_returns_false_for_invalid_feed + invalid_xml = open(File.dirname(__FILE__) + "/../data/not-rss.xml").read + + assert_equal false, SimpleRSS.valid?(invalid_xml) + end + + def test_instance_valid_requires_metadata_and_items + valid_feed = SimpleRSS.parse <<~XML + + + + Valid Feed + + Post + + + + XML + + invalid_feed = SimpleRSS.parse <<~XML + + + + No title and no link + + Body only + + + + XML + + empty_feed = SimpleRSS.parse <<~XML + + + + No Items + + + XML + + assert_equal true, valid_feed.valid? + assert_equal false, invalid_feed.valid? + assert_equal false, empty_feed.valid? + end + + def test_items_since_filters_by_date + threshold = Time.parse("Wed Aug 24 13:30:00 UTC 2005") + + filtered = @rss20.items_since(threshold) + + assert_equal 1, filtered.size + assert_operator filtered.first[:pubDate], :>, threshold + end + + def test_items_by_category_matches_strings_and_arrays + feed_with_string_category = SimpleRSS.parse <<~XML + + + + String Category Feed + + Ruby News + Technology + + + Sports News + Sports + + + + XML + + feed_with_array_category = SimpleRSS.parse( + <<~XML, + + + + Array Category Feed + + Dev Update + Technology + Ruby + + + + XML + array_tags: [:category] + ) + + string_results = feed_with_string_category.items_by_category("tech") + array_results = feed_with_array_category.items_by_category("ruby") + + assert_equal 1, string_results.size + assert_equal "Ruby News", string_results.first[:title] + assert_equal 1, array_results.size + assert_equal "Dev Update", array_results.first[:title] + end + + def test_search_matches_title_description_summary_and_content + feed = SimpleRSS.parse <<~XML + + + + Search Feed + + Ruby Patterns + Language design + + + Other Topic + Talks about BREAKING updates + + + Third Topic + A quick ruby summary + + + Fourth Topic + Deep dive into Ruby internals + + + + XML + + ruby_results = feed.search("ruby") + breaking_results = feed.search("breaking") + + assert_equal 3, ruby_results.size + assert_equal 1, breaking_results.size + assert_equal "Other Topic", breaking_results.first[:title] + end +end From 0bcffecd7376df3e4a5e6c87a6973603dcaaed19 Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Sun, 29 Mar 2026 12:50:45 -0700 Subject: [PATCH 2/4] fix: resolve one-class-per-file lint issues Split fetch integration tests into a dedicated file and switch the top-level error type to a constant assignment so RuboCop passes in CI while preserving the single-file library layout. --- lib/simple-rss.rb | 5 ++--- test/base/fetch_integration_test.rb | 25 +++++++++++++++++++++++++ test/base/fetch_test.rb | 27 --------------------------- 3 files changed, 27 insertions(+), 30 deletions(-) create mode 100644 test/base/fetch_integration_test.rb diff --git a/lib/simple-rss.rb b/lib/simple-rss.rb index a75cc54..f666ae9 100644 --- a/lib/simple-rss.rb +++ b/lib/simple-rss.rb @@ -3,7 +3,7 @@ require "cgi" require "time" -class SimpleRSS +class SimpleRSS # rubocop:disable Metrics/ClassLength # @rbs skip include Enumerable @@ -616,5 +616,4 @@ def unescape(content) end end -class SimpleRSSError < StandardError -end +SimpleRSSError = Class.new(StandardError) diff --git a/test/base/fetch_integration_test.rb b/test/base/fetch_integration_test.rb new file mode 100644 index 0000000..ddb7a8b --- /dev/null +++ b/test/base/fetch_integration_test.rb @@ -0,0 +1,25 @@ +require "test_helper" + +# Integration tests that require network access +# These are skipped by default, run with NETWORK_TESTS=1 +class FetchIntegrationTest < Test::Unit::TestCase + def test_fetch_real_feed + omit unless ENV["NETWORK_TESTS"] + rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10) + assert_kind_of SimpleRSS, rss + assert rss.title + assert rss.items.any? + end + + def test_fetch_stores_caching_headers + omit unless ENV["NETWORK_TESTS"] + rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10) + assert(rss.etag || rss.last_modified, "Expected ETag or Last-Modified header") + end + + def test_fetch_follows_redirect + omit unless ENV["NETWORK_TESTS"] + rss = SimpleRSS.fetch("https://github.com/cardmagic/simple-rss/commits/master.atom", timeout: 10) + assert_kind_of SimpleRSS, rss + end +end diff --git a/test/base/fetch_test.rb b/test/base/fetch_test.rb index ff3a029..e8c09ff 100644 --- a/test/base/fetch_test.rb +++ b/test/base/fetch_test.rb @@ -88,30 +88,3 @@ def test_fetch_accepts_follow_redirects_option end end end - -# Integration tests that require network access -# These are skipped by default, run with NETWORK_TESTS=1 -class FetchIntegrationTest < Test::Unit::TestCase - def test_fetch_real_feed - omit unless ENV["NETWORK_TESTS"] - # Use a reliable, long-lived RSS feed - rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10) - assert_kind_of SimpleRSS, rss - assert rss.title - assert rss.items.any? - end - - def test_fetch_stores_caching_headers - omit unless ENV["NETWORK_TESTS"] - rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10) - # At least one of these should be present for most feeds - assert(rss.etag || rss.last_modified, "Expected ETag or Last-Modified header") - end - - def test_fetch_follows_redirect - omit unless ENV["NETWORK_TESTS"] - # GitHub raw URLs often redirect - rss = SimpleRSS.fetch("https://github.com/cardmagic/simple-rss/commits/master.atom", timeout: 10) - assert_kind_of SimpleRSS, rss - end -end From 115a3c786a4e072f9f28718df465f077c5cf5f77 Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Sun, 29 Mar 2026 12:57:51 -0700 Subject: [PATCH 3/4] fix: align error class with latest RuboCop Use an explicit class definition for SimpleRSSError so lint passes with RuboCop 1.86 on CI. Keep the one-file library layout and limit the one-class-per-file disable to this exception. --- lib/simple-rss.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/simple-rss.rb b/lib/simple-rss.rb index f666ae9..b65ad7d 100644 --- a/lib/simple-rss.rb +++ b/lib/simple-rss.rb @@ -616,4 +616,5 @@ def unescape(content) end end -SimpleRSSError = Class.new(StandardError) +class SimpleRSSError < StandardError # rubocop:disable Style/OneClassPerFile +end From 3ca1b354b270a95b73af948d0c18cdb8df665a27 Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Sun, 29 Mar 2026 12:59:15 -0700 Subject: [PATCH 4/4] fix: make SimpleRSS.valid? fully non-raising Rescue StandardError in SimpleRSS.valid? so unreadable IO sources return false instead of bubbling runtime read failures. Add a regression test for read errors to lock in the behavior. --- lib/simple-rss.rb | 2 +- test/base/filtering_and_validation_test.rb | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/simple-rss.rb b/lib/simple-rss.rb index b65ad7d..1c436e2 100644 --- a/lib/simple-rss.rb +++ b/lib/simple-rss.rb @@ -212,7 +212,7 @@ def parse(source, options = {}) def valid?(source, options = {}) parse(source, options) true - rescue SimpleRSSError + rescue StandardError false end diff --git a/test/base/filtering_and_validation_test.rb b/test/base/filtering_and_validation_test.rb index 5415685..d1a4327 100644 --- a/test/base/filtering_and_validation_test.rb +++ b/test/base/filtering_and_validation_test.rb @@ -50,6 +50,15 @@ def test_class_valid_returns_false_for_invalid_feed assert_equal false, SimpleRSS.valid?(invalid_xml) end + def test_class_valid_returns_false_when_source_read_fails + unreadable_source = Object.new + unreadable_source.define_singleton_method(:read) do + raise IOError, "stream closed" + end + + assert_equal false, SimpleRSS.valid?(unreadable_source) + end + def test_instance_valid_requires_metadata_and_items valid_feed = SimpleRSS.parse <<~XML