Skip to content

Commit 76570ba

Browse files
committed
Make empty extraction a 422 failure and harden bookmarklet routing
1 parent 74867f3 commit 76570ba

8 files changed

Lines changed: 310 additions & 8 deletions

File tree

app/web/feeds/responder.rb

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def emit_response_result(target_kind:, identifier:, feed_request:, resolved_sour
6161
# @param result [Html2rss::Web::Feeds::Contracts::RenderResult]
6262
# @return [String]
6363
def write_response(response:, representation:, result:)
64-
response.status = result.status == :error ? 500 : 200
64+
response.status = status_for(result.status)
6565
response['Content-Type'] = FeedResponseFormat.content_type(representation)
6666
apply_cache_headers(response, result)
6767
::Html2rss::Web::HttpCache.vary(response, 'Accept')
@@ -92,7 +92,8 @@ def render_result(result, representation)
9292
# @param result [Html2rss::Web::Feeds::Contracts::RenderResult]
9393
# @return [void]
9494
def emit_result(target_kind:, identifier:, resolved_source:, result:)
95-
return emit_success(target_kind:, identifier:, resolved_source:) unless result.status == :error
95+
return emit_success(target_kind:, identifier:, resolved_source:) if result.status == :ok
96+
return emit_empty(target_kind:, identifier:, resolved_source:) if result.status == :empty
9697

9798
emit_failure(
9899
target_kind:,
@@ -117,6 +118,21 @@ def emit_success(target_kind:, identifier:, resolved_source:)
117118
Observability.emit(event_name: 'feed.render', outcome: 'success', details:, level: :info)
118119
end
119120

121+
# @param target_kind [Symbol]
122+
# @param identifier [String]
123+
# @param resolved_source [Html2rss::Web::Feeds::Contracts::ResolvedSource]
124+
# @return [void]
125+
def emit_empty(target_kind:, identifier:, resolved_source:)
126+
details = {
127+
strategy: resolved_source.generator_input[:strategy],
128+
url: resolved_source.generator_input.dig(:channel, :url),
129+
reason: 'content_extraction_empty'
130+
}
131+
details[:feed_name] = identifier if target_kind == :static
132+
133+
Observability.emit(event_name: 'feed.render', outcome: 'failure', details:, level: :warn)
134+
end
135+
120136
# @param target_kind [Symbol]
121137
# @param identifier [String]
122138
# @param error [StandardError]
@@ -127,6 +143,15 @@ def emit_failure(target_kind:, identifier:, error:)
127143

128144
Observability.emit(event_name: 'feed.render', outcome: 'failure', details:, level: :warn)
129145
end
146+
147+
# @param status [Symbol]
148+
# @return [Integer]
149+
def status_for(status)
150+
return 200 if status == :ok
151+
return 422 if status == :empty
152+
153+
500
154+
end
130155
end
131156
end
132157
end

frontend/src/__tests__/App.test.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,7 @@ describe('App', () => {
559559
render(<App />);
560560

561561
const bookmarklet = screen.getByRole('link', { name: 'Bookmarklet' });
562-
expect(bookmarklet.getAttribute('href')).toContain('/?url=');
562+
expect(bookmarklet.getAttribute('href')).toContain('/create?url=');
563563
expect(bookmarklet.getAttribute('href')).not.toContain('%27+encodeURIComponent');
564564
});
565565

frontend/src/__tests__/useFeedConversion.test.ts

Lines changed: 141 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,13 @@ describe('useFeedConversion', () => {
172172

173173
const { result } = renderHook(() => useFeedConversion());
174174
let thrownError:
175-
| (Error & { manualRetryStrategy?: string; retryAction?: string; kind?: string; code?: string; status?: number })
175+
| (Error & {
176+
manualRetryStrategy?: string;
177+
retryAction?: string;
178+
kind?: string;
179+
code?: string;
180+
status?: number;
181+
})
176182
| undefined;
177183

178184
await act(async () => {
@@ -509,6 +515,119 @@ describe('useFeedConversion', () => {
509515
});
510516
});
511517

518+
it('marks preview_unavailable for 422 extraction-warning responses', async () => {
519+
const createdFeed = {
520+
id: 'test-id',
521+
name: 'Test Feed',
522+
url: 'https://example.com',
523+
strategy: 'faraday',
524+
feed_token: 'test-token',
525+
public_url: 'https://example.com/feed',
526+
json_public_url: 'https://example.com/feed.json',
527+
created_at: '2024-01-01T00:00:00Z',
528+
updated_at: '2024-01-01T00:00:00Z',
529+
};
530+
531+
fetchMock
532+
.mockResolvedValueOnce(
533+
new Response(
534+
JSON.stringify({
535+
success: true,
536+
data: { feed: createdFeed },
537+
}),
538+
{
539+
status: 201,
540+
headers: { 'Content-Type': 'application/json' },
541+
}
542+
)
543+
)
544+
.mockResolvedValueOnce(
545+
new Response(
546+
JSON.stringify({
547+
version: 'https://jsonfeed.org/version/1.1',
548+
title: 'Content Extraction Issue',
549+
items: [],
550+
}),
551+
{
552+
status: 422,
553+
headers: { 'Content-Type': 'application/feed+json' },
554+
}
555+
)
556+
);
557+
558+
const { result } = renderHook(() => useFeedConversion());
559+
560+
await act(async () => {
561+
await result.current.convertFeed('https://example.com', 'faraday', 'testtoken');
562+
});
563+
564+
await waitFor(() => {
565+
expect(result.current.result?.readinessPhase).toBe('preview_unavailable');
566+
expect(result.current.result?.preview.error).toBe('Preview unavailable right now.');
567+
});
568+
});
569+
570+
it('does not classify extraction warning payloads as feed_ready even on 200 responses', async () => {
571+
const createdFeed = {
572+
id: 'test-id',
573+
name: 'Test Feed',
574+
url: 'https://example.com',
575+
strategy: 'faraday',
576+
feed_token: 'test-token',
577+
public_url: 'https://example.com/feed',
578+
json_public_url: 'https://example.com/feed.json',
579+
created_at: '2024-01-01T00:00:00Z',
580+
updated_at: '2024-01-01T00:00:00Z',
581+
};
582+
583+
fetchMock
584+
.mockResolvedValueOnce(
585+
new Response(
586+
JSON.stringify({
587+
success: true,
588+
data: { feed: createdFeed },
589+
}),
590+
{
591+
status: 201,
592+
headers: { 'Content-Type': 'application/json' },
593+
}
594+
)
595+
)
596+
.mockResolvedValueOnce(
597+
new Response(
598+
JSON.stringify({
599+
version: 'https://jsonfeed.org/version/1.1',
600+
title: 'Content Extraction Issue',
601+
description: 'We could not extract entries from https://example.com right now.',
602+
items: [
603+
{
604+
title: 'Preview unavailable for this source',
605+
content_text: 'No entries were extracted from https://example.com.',
606+
},
607+
],
608+
}),
609+
{
610+
status: 200,
611+
headers: { 'Content-Type': 'application/feed+json' },
612+
}
613+
)
614+
);
615+
616+
const { result } = renderHook(() => useFeedConversion());
617+
618+
await act(async () => {
619+
await result.current.convertFeed('https://example.com', 'faraday', 'testtoken');
620+
});
621+
622+
await waitFor(() => {
623+
expect(result.current.result?.readinessPhase).toBe('preview_unavailable');
624+
expect(result.current.result?.preview.error).toBe(
625+
'No entries could be extracted from this source right now.'
626+
);
627+
expect(result.current.result?.preview.items).toEqual([]);
628+
});
629+
});
630+
512631
it('normalizes hostname-only input before creating a feed', async () => {
513632
const createdFeed = {
514633
id: 'test-id',
@@ -643,7 +762,13 @@ describe('useFeedConversion', () => {
643762

644763
const { result } = renderHook(() => useFeedConversion());
645764
let thrownError:
646-
| (Error & { manualRetryStrategy?: string; retryAction?: string; kind?: string; code?: string; status?: number })
765+
| (Error & {
766+
manualRetryStrategy?: string;
767+
retryAction?: string;
768+
kind?: string;
769+
code?: string;
770+
status?: number;
771+
})
647772
| undefined;
648773

649774
await act(async () => {
@@ -687,7 +812,13 @@ describe('useFeedConversion', () => {
687812

688813
const { result } = renderHook(() => useFeedConversion());
689814
let thrownError:
690-
| (Error & { manualRetryStrategy?: string; retryAction?: string; kind?: string; code?: string; status?: number })
815+
| (Error & {
816+
manualRetryStrategy?: string;
817+
retryAction?: string;
818+
kind?: string;
819+
code?: string;
820+
status?: number;
821+
})
691822
| undefined;
692823

693824
await act(async () => {
@@ -813,7 +944,13 @@ describe('useFeedConversion', () => {
813944
const { result } = renderHook(() => useFeedConversion());
814945

815946
let thrownError:
816-
| (Error & { manualRetryStrategy?: string; retryAction?: string; kind?: string; code?: string; status?: number })
947+
| (Error & {
948+
manualRetryStrategy?: string;
949+
retryAction?: string;
950+
kind?: string;
951+
code?: string;
952+
status?: number;
953+
})
817954
| undefined;
818955
await act(async () => {
819956
try {

frontend/src/components/Bookmarklet.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ export function Bookmarklet() {
22
const bookmarkletHref = (() => {
33
if (globalThis.window === undefined) return '#';
44

5-
const targetPrefix = `${new URL('/', globalThis.location.href).toString()}?url=`;
5+
const targetPrefix = new URL('/create?url=', globalThis.location.href).toString();
66

77
return `javascript:window.location.assign(${JSON.stringify(targetPrefix)}+encodeURIComponent(window.location.href));`;
88
})();

frontend/src/hooks/useFeedConversion.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ interface JsonFeedItem {
1515
}
1616

1717
interface JsonFeedResponse {
18+
title?: string;
19+
description?: string;
1820
items?: JsonFeedItem[];
1921
}
2022

@@ -242,6 +244,18 @@ async function loadPreview(feed: FeedRecord, signal?: AbortSignal): Promise<Prev
242244

243245
try {
244246
const payload = (await response.json()) as JsonFeedResponse;
247+
if (isExtractionWarningPayload(payload)) {
248+
return {
249+
preview: {
250+
items: [],
251+
error: 'No entries could be extracted from this source right now.',
252+
isLoading: false,
253+
},
254+
readinessPhase: 'preview_unavailable',
255+
shouldRetry: false,
256+
};
257+
}
258+
245259
const items =
246260
payload.items
247261
?.map((item) => normalizePreviewItem(item))
@@ -746,3 +760,17 @@ function decodeHtmlEntities(value: string): string {
746760
textarea.innerHTML = value;
747761
return textarea.value;
748762
}
763+
764+
function isExtractionWarningPayload(payload: JsonFeedResponse): boolean {
765+
const title = normalizePreviewText(payload.title)?.toLowerCase();
766+
const description = normalizePreviewText(payload.description)?.toLowerCase();
767+
const firstItemText = normalizePreviewText(payload.items?.[0]?.content_text)?.toLowerCase();
768+
const firstItemTitle = normalizePreviewText(payload.items?.[0]?.title)?.toLowerCase();
769+
770+
return Boolean(
771+
title?.includes('content extraction issue') ||
772+
description?.includes('could not extract entries') ||
773+
firstItemText?.includes('no entries were extracted from') ||
774+
firstItemTitle === 'preview unavailable for this source'
775+
);
776+
}

spec/html2rss/web/api/v1_spec.rb

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,17 @@ def service_error_result
3232
)
3333
end
3434

35+
def empty_result
36+
Html2rss::Web::Feeds::Contracts::RenderResult.new(
37+
status: :empty,
38+
payload: nil,
39+
message: nil,
40+
ttl_seconds: 600,
41+
cache_key: 'feed_result:empty',
42+
error_message: nil
43+
)
44+
end
45+
3546
def json_feed_service_error_tuple(token)
3647
allow(Html2rss::Web::Feeds::Service).to receive(:call).and_return(service_error_result)
3748
get "/api/v1/feeds/#{token}.json"
@@ -433,6 +444,32 @@ def expected_featured_feeds
433444
expect([status, content_type, title]).to eq([500, 'application/feed+json', 'Error'])
434445
expect(cache_control).to include('no-store')
435446
end
447+
448+
it 'returns 422 for empty extraction feeds in xml representation', :aggregate_failures, openapi: false do
449+
token = Html2rss::Web::Auth.generate_feed_token('admin', "#{feed_url}/empty-xml", strategy: 'faraday')
450+
allow(Html2rss::Web::Feeds::Service).to receive(:call).and_return(empty_result)
451+
allow(Html2rss::Web::Feeds::RssRenderer).to receive(:call).and_return('<rss version="2.0"></rss>')
452+
453+
get "/api/v1/feeds/#{token}.xml"
454+
455+
expect(last_response.status).to eq(422)
456+
expect(last_response.content_type).to include('application/xml')
457+
expect(last_response.headers['Cache-Control']).to include('max-age=600')
458+
end
459+
460+
it 'returns 422 for empty extraction feeds in json feed representation', :aggregate_failures, openapi: false do
461+
token = Html2rss::Web::Auth.generate_feed_token('admin', "#{feed_url}/empty-json", strategy: 'faraday')
462+
allow(Html2rss::Web::Feeds::Service).to receive(:call).and_return(empty_result)
463+
allow(Html2rss::Web::Feeds::JsonRenderer).to receive(:call)
464+
.and_return('{"version":"https://jsonfeed.org/version/1.1","title":"Content Extraction Issue","items":[]}')
465+
466+
get "/api/v1/feeds/#{token}.json"
467+
468+
expect(last_response.status).to eq(422)
469+
expect(last_response.content_type).to eq('application/feed+json')
470+
expect(last_response.headers['Cache-Control']).to include('max-age=600')
471+
expect(JSON.parse(last_response.body).fetch('title')).to eq('Content Extraction Issue')
472+
end
436473
end
437474

438475
describe 'POST /api/v1/feeds', openapi: {

spec/html2rss/web/app_integration_spec.rb

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,18 @@
218218
)
219219
end
220220

221+
it 'returns 422 when extraction yields an empty feed warning', :aggregate_failures do
222+
unique_empty_url = "#{feed_url}/empty-warning"
223+
empty_token = Html2rss::Web::Auth.generate_feed_token(account[:username], unique_empty_url, strategy: 'faraday')
224+
stub_empty_feed_warning_result
225+
226+
get "/api/v1/feeds/#{empty_token}.json"
227+
228+
expect(last_response.status).to eq(422)
229+
expect(last_response.headers['Content-Type']).to eq('application/feed+json')
230+
expect(JSON.parse(last_response.body).fetch('title')).to eq('Content Extraction Issue')
231+
end
232+
221233
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
222234
def stub_escaped_feed_token(raw_token:, encoded_token:)
223235
escaped_token_payload = instance_double(
@@ -233,6 +245,23 @@ def stub_escaped_feed_token(raw_token:, encoded_token:)
233245
.to receive(:validate_and_decode).with(raw_token, feed_url, anything)
234246
.and_return(escaped_token_payload)
235247
end
248+
249+
# @return [void]
250+
def stub_empty_feed_warning_result
251+
Html2rss::Web::Feeds::Cache.clear!(reason: 'spec')
252+
allow(Html2rss::Web::Feeds::Service).to receive(:call).and_return(
253+
Html2rss::Web::Feeds::Contracts::RenderResult.new(
254+
status: :empty,
255+
payload: nil,
256+
message: nil,
257+
ttl_seconds: 600,
258+
cache_key: 'feed_result:empty',
259+
error_message: nil
260+
)
261+
)
262+
allow(Html2rss::Web::Feeds::JsonRenderer).to receive(:call)
263+
.and_return('{"version":"https://jsonfeed.org/version/1.1","title":"Content Extraction Issue","items":[]}')
264+
end
236265
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
237266
end
238267

0 commit comments

Comments
 (0)