Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 73 additions & 2 deletions lib/simple-rss.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
require "cgi"
require "time"

class SimpleRSS
class SimpleRSS # rubocop:disable Metrics/ClassLength
# @rbs skip
include Enumerable

Expand Down Expand Up @@ -95,6 +95,57 @@ def latest(count = 10)
items.sort_by { |item| item[:pubDate] || item[:updated] || Time.at(0) }.reverse.first(count)
end

# @rbs () -> Symbol
def feed_type
atom_namespaced_feed = source.match?(/<(atom:)?feed\b[^>]*xmlns(:\w+)?=['"][^'"]*atom/i)
return :atom if atom_namespaced_feed
return :rss2 if source.match?(/<rss[^>]*version=['"]2/i)
return :rss1 if source.match?(/<rdf:RDF/i)
return :rss09 if source.match?(/<rss[^>]*version=['"]0\.9/i)

:unknown
end

# @rbs () -> bool
def valid?
return false if items.empty?

title_value = instance_variable_get(:@title)
link_value = instance_variable_get(:@link)
return true if title_value || link_value

false
end

# @rbs (Time) -> Array[Hash[Symbol, untyped]]
def items_since(time)
items.select do |item|
item_date = item[:pubDate] || item[:updated] || item[:published]
item_date.is_a?(Time) && item_date > time
end
end

# @rbs (String) -> Array[Hash[Symbol, untyped]]
def items_by_category(name)
query = name.to_s.downcase

items.select do |item|
category = item[:category]
next false if category.nil?

category_matches_query?(category, query)
end
end

# @rbs (String) -> Array[Hash[Symbol, untyped]]
def search(query)
pattern = Regexp.new(Regexp.escape(query.to_s), Regexp::IGNORECASE)

items.select do |item|
searchable_fields(item).any? { |field| field.to_s.match?(pattern) }
end
end

# @rbs (?Hash[Symbol, untyped]) -> Hash[Symbol, untyped]
def as_json(_options = {})
hash = {} #: Hash[Symbol, untyped]
Expand Down Expand Up @@ -157,6 +208,14 @@ def parse(source, options = {})
new source, options
end

# @rbs (untyped, ?Hash[Symbol, untyped]) -> bool
def valid?(source, options = {})
parse(source, options)
true
rescue StandardError
false
end

# Fetch and parse a feed from a URL
# Returns nil if conditional GET returns 304 Not Modified
#
Expand Down Expand Up @@ -412,6 +471,18 @@ def clean_tag(tag)
tag.to_s.tr(":", "_").intern
end

# @rbs (untyped, String) -> bool
def category_matches_query?(category, query)
return category.any? { |value| value.to_s.downcase.include?(query) } if category.is_a?(Array)

category.to_s.downcase.include?(query)
end

# @rbs (Hash[Symbol, untyped]) -> Array[untyped]
def searchable_fields(item)
[item[:title], item[:description], item[:summary], item[:content]]
end

# @rbs (untyped) -> untyped
def serialize_value(value)
case value
Expand Down Expand Up @@ -545,5 +616,5 @@ def unescape(content)
end
end

class SimpleRSSError < StandardError
class SimpleRSSError < StandardError # rubocop:disable Style/OneClassPerFile
end
25 changes: 25 additions & 0 deletions test/base/fetch_integration_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
require "test_helper"

# Integration tests that require network access
# These are skipped by default, run with NETWORK_TESTS=1
class FetchIntegrationTest < Test::Unit::TestCase
def test_fetch_real_feed
omit unless ENV["NETWORK_TESTS"]
rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10)
assert_kind_of SimpleRSS, rss
assert rss.title
assert rss.items.any?
end

def test_fetch_stores_caching_headers
omit unless ENV["NETWORK_TESTS"]
rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10)
assert(rss.etag || rss.last_modified, "Expected ETag or Last-Modified header")
end

def test_fetch_follows_redirect
omit unless ENV["NETWORK_TESTS"]
rss = SimpleRSS.fetch("https://github.com/cardmagic/simple-rss/commits/master.atom", timeout: 10)
assert_kind_of SimpleRSS, rss
end
end
27 changes: 0 additions & 27 deletions test/base/fetch_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,30 +88,3 @@ def test_fetch_accepts_follow_redirects_option
end
end
end

# Integration tests that require network access
# These are skipped by default, run with NETWORK_TESTS=1
class FetchIntegrationTest < Test::Unit::TestCase
def test_fetch_real_feed
omit unless ENV["NETWORK_TESTS"]
# Use a reliable, long-lived RSS feed
rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10)
assert_kind_of SimpleRSS, rss
assert rss.title
assert rss.items.any?
end

def test_fetch_stores_caching_headers
omit unless ENV["NETWORK_TESTS"]
rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10)
# At least one of these should be present for most feeds
assert(rss.etag || rss.last_modified, "Expected ETag or Last-Modified header")
end

def test_fetch_follows_redirect
omit unless ENV["NETWORK_TESTS"]
# GitHub raw URLs often redirect
rss = SimpleRSS.fetch("https://github.com/cardmagic/simple-rss/commits/master.atom", timeout: 10)
assert_kind_of SimpleRSS, rss
end
end
187 changes: 187 additions & 0 deletions test/base/filtering_and_validation_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
require "test_helper"

class FilteringAndValidationTest < Test::Unit::TestCase
def setup
@rss09 = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/rss09.rdf")
@rss20 = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/rss20.xml")
@atom = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/atom.xml")
end

def test_feed_type_for_known_formats
assert_equal :rss1, @rss09.feed_type
assert_equal :rss2, @rss20.feed_type
assert_equal :atom, @atom.feed_type
end

def test_feed_type_unknown_for_non_standard_feed
feed = SimpleRSS.parse <<~XML
<?xml version="1.0" encoding="UTF-8"?>
<feed>
<title>Unknown Feed</title>
<entry>
<title>Post</title>
</entry>
</feed>
XML

assert_equal :unknown, feed.feed_type
end

def test_class_valid_returns_true_for_well_formed_feed
xml = <<~XML
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Valid Feed</title>
<link>http://example.com</link>
<item>
<title>Post</title>
</item>
</channel>
</rss>
XML

assert_equal true, SimpleRSS.valid?(xml)
end

def test_class_valid_returns_false_for_invalid_feed
invalid_xml = open(File.dirname(__FILE__) + "/../data/not-rss.xml").read

assert_equal false, SimpleRSS.valid?(invalid_xml)
end

def test_class_valid_returns_false_when_source_read_fails
unreadable_source = Object.new
unreadable_source.define_singleton_method(:read) do
raise IOError, "stream closed"
end

assert_equal false, SimpleRSS.valid?(unreadable_source)
end

def test_instance_valid_requires_metadata_and_items
valid_feed = SimpleRSS.parse <<~XML
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Valid Feed</title>
<item>
<title>Post</title>
</item>
</channel>
</rss>
XML

invalid_feed = SimpleRSS.parse <<~XML
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<description>No title and no link</description>
<item>
<description>Body only</description>
</item>
</channel>
</rss>
XML

empty_feed = SimpleRSS.parse <<~XML
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>No Items</title>
</channel>
</rss>
XML

assert_equal true, valid_feed.valid?
assert_equal false, invalid_feed.valid?
assert_equal false, empty_feed.valid?
end

def test_items_since_filters_by_date
threshold = Time.parse("Wed Aug 24 13:30:00 UTC 2005")

filtered = @rss20.items_since(threshold)

assert_equal 1, filtered.size
assert_operator filtered.first[:pubDate], :>, threshold
end

def test_items_by_category_matches_strings_and_arrays
feed_with_string_category = SimpleRSS.parse <<~XML
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>String Category Feed</title>
<item>
<title>Ruby News</title>
<category>Technology</category>
</item>
<item>
<title>Sports News</title>
<category>Sports</category>
</item>
</channel>
</rss>
XML

feed_with_array_category = SimpleRSS.parse(
<<~XML,
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Array Category Feed</title>
<item>
<title>Dev Update</title>
<category>Technology</category>
<category>Ruby</category>
</item>
</channel>
</rss>
XML
array_tags: [:category]
)

string_results = feed_with_string_category.items_by_category("tech")
array_results = feed_with_array_category.items_by_category("ruby")

assert_equal 1, string_results.size
assert_equal "Ruby News", string_results.first[:title]
assert_equal 1, array_results.size
assert_equal "Dev Update", array_results.first[:title]
end

def test_search_matches_title_description_summary_and_content
feed = SimpleRSS.parse <<~XML
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Search Feed</title>
<item>
<title>Ruby Patterns</title>
<description>Language design</description>
</item>
<item>
<title>Other Topic</title>
<description>Talks about BREAKING updates</description>
</item>
<item>
<title>Third Topic</title>
<summary>A quick ruby summary</summary>
</item>
<item>
<title>Fourth Topic</title>
<content>Deep dive into Ruby internals</content>
</item>
</channel>
</rss>
XML

ruby_results = feed.search("ruby")
breaking_results = feed.search("breaking")

assert_equal 3, ruby_results.size
assert_equal 1, breaking_results.size
assert_equal "Other Topic", breaking_results.first[:title]
end
end
Loading