diff --git a/CHANGELOG.md b/CHANGELOG.md index 7459ebd..0d290d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Failed job trend chart — "Failures — last 12 hours" sparkline added to the Solid Queue dashboard card below the throughput sparkline; bars render in the danger color to make failure spikes immediately visible - Error frequency report — `GET /failed_jobs/errors` groups all failed jobs by exception class and message prefix, showing count and an expandable sample backtrace per group; links through to a filtered failed jobs list via `?error_class=`; the failed jobs index gains an "Error Summary" button and shows an active-filter breadcrumb with a clear link ## [1.0.0] - 2026-05-27 diff --git a/app/assets/stylesheets/solid_stack_web/_07_dashboard.css b/app/assets/stylesheets/solid_stack_web/_07_dashboard.css index f37fa29..bc24208 100644 --- a/app/assets/stylesheets/solid_stack_web/_07_dashboard.css +++ b/app/assets/stylesheets/solid_stack_web/_07_dashboard.css @@ -86,6 +86,10 @@ a.sqw-inline-stat:hover { opacity: 0.7; text-decoration: none; } color: var(--primary); } +.sqw-sparkline-wrap--failures { + color: var(--danger); +} + .sqw-sparkline-label { display: block; font-size: 10px; diff --git a/app/controllers/solid_stack_web/dashboard_controller.rb b/app/controllers/solid_stack_web/dashboard_controller.rb index e91fc62..90e1947 100644 --- a/app/controllers/solid_stack_web/dashboard_controller.rb +++ b/app/controllers/solid_stack_web/dashboard_controller.rb @@ -5,6 +5,7 @@ def index @cache_stats = CacheStats.new.to_h @cable_stats = CableStats.new.to_h @throughput = ThroughputSparkline.new + @failures = FailedJobSparkline.new end end end diff --git a/app/helpers/solid_stack_web/application_helper.rb b/app/helpers/solid_stack_web/application_helper.rb index 20b7c36..48ecb99 100644 --- a/app/helpers/solid_stack_web/application_helper.rb +++ b/app/helpers/solid_stack_web/application_helper.rb @@ -87,6 +87,17 @@ def queue_depth_sparkline_svg(sparkline) end end + def failed_job_sparkline_svg(sparkline) + build_sparkline_svg(sparkline, aria_label: "Failed jobs over the last 12 hours") do |count, i| + hours_ago = SolidStackWeb::FailedJobSparkline::HOURS - i + if hours_ago == 1 + "#{count} #{count == 1 ? "failure" : "failures"} in the last hour" + else + "#{count} #{count == 1 ? "failure" : "failures"} (#{hours_ago}h–#{hours_ago - 1}h ago)" + end + end + end + private def build_sparkline_svg(sparkline, css_class: "sqw-sparkline", aria_label: nil, &tooltip_text) diff --git a/app/models/solid_stack_web/failed_job_sparkline.rb b/app/models/solid_stack_web/failed_job_sparkline.rb new file mode 100644 index 0000000..617df57 --- /dev/null +++ b/app/models/solid_stack_web/failed_job_sparkline.rb @@ -0,0 +1,23 @@ +module SolidStackWeb + class FailedJobSparkline + HOURS = 12 + + def buckets + @buckets ||= begin + now = Time.current + origin = now - HOURS.hours + times = ::SolidQueue::FailedExecution.where(created_at: origin..now).pluck(:created_at) + + HOURS.times.map do |i| + from = origin + i.hours + to = origin + (i + 1).hours + times.count { |t| t >= from && t < to } + end + end + end + + def max + buckets.max || 0 + end + end +end diff --git a/app/views/solid_stack_web/dashboard/index.html.erb b/app/views/solid_stack_web/dashboard/index.html.erb index 79438fa..f6bbc99 100644 --- a/app/views/solid_stack_web/dashboard/index.html.erb +++ b/app/views/solid_stack_web/dashboard/index.html.erb @@ -68,6 +68,18 @@ now +
+ Failures — last 12 hours +
+ <%= failed_job_sparkline_svg(@failures) %> + +
+
+ 12h ago + 6h ago + now +
+
diff --git a/spec/models/solid_stack_web/failed_job_sparkline_spec.rb b/spec/models/solid_stack_web/failed_job_sparkline_spec.rb new file mode 100644 index 0000000..bce6fa1 --- /dev/null +++ b/spec/models/solid_stack_web/failed_job_sparkline_spec.rb @@ -0,0 +1,50 @@ +require "rails_helper" + +RSpec.describe SolidStackWeb::FailedJobSparkline do + def create_failed(created_at:) + SolidQueue::Job.skip_callback(:create, :after, :prepare_for_execution) + job = SolidQueue::Job.create!(class_name: "FailingJob", queue_name: "default", + priority: 0, arguments: {}) + SolidQueue::FailedExecution.create!( + job: job, + error: { exception_class: "RuntimeError", message: "boom", backtrace: [] }, + created_at: created_at + ) + ensure + SolidQueue::Job.set_callback(:create, :after, :prepare_for_execution) + end + + describe "#buckets" do + it "returns 12 buckets" do + expect(described_class.new.buckets.size).to eq(12) + end + + it "returns all zeros when there are no failed executions" do + expect(described_class.new.buckets).to all(eq(0)) + end + + it "counts a failure within the window in the correct bucket" do + create_failed(created_at: 2.hours.ago) + buckets = described_class.new.buckets + expect(buckets[9]).to eq(1) + expect(buckets.sum).to eq(1) + end + + it "excludes failures outside the 12-hour window" do + create_failed(created_at: 13.hours.ago) + expect(described_class.new.buckets).to all(eq(0)) + end + end + + describe "#max" do + it "returns 0 when there are no failures" do + expect(described_class.new.max).to eq(0) + end + + it "returns the highest bucket count" do + 3.times { create_failed(created_at: 1.hour.ago) } + create_failed(created_at: 5.hours.ago) + expect(described_class.new.max).to eq(3) + end + end +end