diff --git a/internal/cli/commands.go b/internal/cli/commands.go index 1ab07f3..305e8e1 100644 --- a/internal/cli/commands.go +++ b/internal/cli/commands.go @@ -228,16 +228,10 @@ func newArticlesCommand() *cobra.Command { RunE: func(cmd *cobra.Command, args []string) error { showAll := viper.GetBool("all") - sinceStr := viper.GetString("since") - beforeStr := viper.GetString("before") - - since, err := parseDateFilter(sinceStr) - if err != nil { - return err - } - before, err := parseDateFilter(beforeStr) + since, before, err := parseDateRange(viper.GetString("since"), viper.GetString("before")) if err != nil { - return err + printError(err) + return markError(err) } return withDatabase(cmd, func(db *storage.Database) error { @@ -499,6 +493,21 @@ func parseDateFilter(dateStr string) (*time.Time, error) { return &parsed, nil } +func parseDateRange(sinceStr, beforeStr string) (*time.Time, *time.Time, error) { + since, err := parseDateFilter(sinceStr) + if err != nil { + return nil, nil, err + } + before, err := parseDateFilter(beforeStr) + if err != nil { + return nil, nil, err + } + if since != nil && before != nil && since.After(*before) { + return nil, nil, fmt.Errorf("--since (%s) must be on or before --before (%s)", sinceStr, beforeStr) + } + return since, before, nil +} + func confirm(prompt string) (bool, error) { reader := bufio.NewReader(os.Stdin) fmt.Printf("%s [y/N]: ", prompt) diff --git a/internal/cli/commands_test.go b/internal/cli/commands_test.go new file mode 100644 index 0000000..f625d4d --- /dev/null +++ b/internal/cli/commands_test.go @@ -0,0 +1,91 @@ +package cli + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseDateFilter(t *testing.T) { + t.Run("empty string returns nil", func(t *testing.T) { + got, err := parseDateFilter("") + require.NoError(t, err) + assert.Nil(t, got) + }) + + t.Run("valid date", func(t *testing.T) { + got, err := parseDateFilter("2024-01-15") + require.NoError(t, err) + require.NotNil(t, got) + assert.Equal(t, time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC), *got) + }) + + t.Run("invalid format rejected", func(t *testing.T) { + _, err := parseDateFilter("01/15/2024") + require.Error(t, err) + assert.Contains(t, err.Error(), "expected YYYY-MM-DD") + }) + + t.Run("invalid month rejected", func(t *testing.T) { + _, err := parseDateFilter("2024-13-01") + require.Error(t, err) + }) +} + +func TestParseDateRange(t *testing.T) { + t.Run("both empty returns nils", func(t *testing.T) { + since, before, err := parseDateRange("", "") + require.NoError(t, err) + assert.Nil(t, since) + assert.Nil(t, before) + }) + + t.Run("only since", func(t *testing.T) { + since, before, err := parseDateRange("2024-01-15", "") + require.NoError(t, err) + require.NotNil(t, since) + assert.Nil(t, before) + assert.Equal(t, time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC), *since) + }) + + t.Run("only before", func(t *testing.T) { + since, before, err := parseDateRange("", "2024-02-01") + require.NoError(t, err) + assert.Nil(t, since) + require.NotNil(t, before) + assert.Equal(t, time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC), *before) + }) + + t.Run("since equals before is allowed", func(t *testing.T) { + since, before, err := parseDateRange("2024-01-15", "2024-01-15") + require.NoError(t, err) + require.NotNil(t, since) + require.NotNil(t, before) + }) + + t.Run("since before before is allowed", func(t *testing.T) { + _, _, err := parseDateRange("2024-01-01", "2024-02-01") + require.NoError(t, err) + }) + + t.Run("since after before is rejected", func(t *testing.T) { + _, _, err := parseDateRange("2024-02-01", "2024-01-01") + require.Error(t, err) + assert.Contains(t, err.Error(), "--since") + assert.Contains(t, err.Error(), "--before") + }) + + t.Run("invalid since surfaces error", func(t *testing.T) { + _, _, err := parseDateRange("bogus", "2024-01-01") + require.Error(t, err) + assert.Contains(t, err.Error(), "expected YYYY-MM-DD") + }) + + t.Run("invalid before surfaces error", func(t *testing.T) { + _, _, err := parseDateRange("2024-01-01", "bogus") + require.Error(t, err) + assert.Contains(t, err.Error(), "expected YYYY-MM-DD") + }) +} diff --git a/internal/storage/database.go b/internal/storage/database.go index 3806033..9b3ab99 100644 --- a/internal/storage/database.go +++ b/internal/storage/database.go @@ -20,7 +20,18 @@ import ( "github.com/JulienTant/blogwatcher-cli/internal/storage/migrations" ) -const sqliteTimeLayout = time.RFC3339Nano +const ( + // sqliteTimeLayout is the layout used when parsing timestamps read from + // the database. RFC3339Nano accepts both fractional and non-fractional + // forms so legacy rows continue to round-trip. + sqliteTimeLayout = time.RFC3339Nano + + // sqliteWriteLayout is the canonical layout used when persisting + // timestamps. Fixed-width, UTC, no fractional seconds — required so the + // --since/--before lexicographic comparison in ListArticles is stable + // regardless of the source precision. + sqliteWriteLayout = "2006-01-02T15:04:05Z" +) func DefaultDBPath() (string, error) { home, err := os.UserHomeDir() @@ -187,7 +198,7 @@ func (db *Database) UpdateBlog(ctx context.Context, blog model.Blog) error { func (db *Database) UpdateBlogLastScanned(ctx context.Context, id int64, lastScanned time.Time) error { _, err := sq.Update("blogs"). - Set("last_scanned", lastScanned.Format(sqliteTimeLayout)). + Set("last_scanned", lastScanned.UTC().Format(sqliteWriteLayout)). Where(sq.Eq{"id": id}). RunWith(db.conn). ExecContext(ctx) @@ -377,10 +388,10 @@ func (db *Database) ListArticles(ctx context.Context, unreadOnly bool, blogID *i query = query.Where("EXISTS (SELECT 1 FROM json_each(categories) WHERE LOWER(json_each.value) = LOWER(?))", *category) } if since != nil { - query = query.Where(sq.GtOrEq{"published_date": since.Format(sqliteTimeLayout)}) + query = query.Where(sq.GtOrEq{"published_date": since.UTC().Format(sqliteWriteLayout)}) } if before != nil { - query = query.Where(sq.Lt{"published_date": before.Format(sqliteTimeLayout)}) + query = query.Where(sq.Lt{"published_date": before.UTC().Format(sqliteWriteLayout)}) } rows, err := query.RunWith(db.conn).QueryContext(ctx) @@ -522,7 +533,7 @@ func formatTimePtr(value *time.Time) *string { if value == nil { return nil } - formatted := value.Format(sqliteTimeLayout) + formatted := value.UTC().Format(sqliteWriteLayout) return &formatted } diff --git a/internal/storage/database_test.go b/internal/storage/database_test.go index 66faed5..d9877ad 100644 --- a/internal/storage/database_test.go +++ b/internal/storage/database_test.go @@ -7,8 +7,11 @@ import ( "testing" "time" - "github.com/JulienTant/blogwatcher-cli/internal/model" + sq "github.com/Masterminds/squirrel" "github.com/stretchr/testify/require" + + "github.com/JulienTant/blogwatcher-cli/internal/model" + "github.com/JulienTant/blogwatcher-cli/internal/storage/migrations" ) func TestDatabaseCreatesFileAndCRUD(t *testing.T) { @@ -121,7 +124,10 @@ func TestBlogTimeRoundTrip(t *testing.T) { require.NoError(t, err, "open database") defer func() { require.NoError(t, db.Close()) }() - now := time.Date(2025, 1, 2, 3, 4, 5, 6, time.UTC) + // Sub-second precision is dropped on write so that the lexicographic + // comparison in ListArticles is reliable. Use a second-precision instant + // for the round-trip assertion. + now := time.Date(2025, 1, 2, 3, 4, 5, 0, time.UTC) blog, err := db.AddBlog(ctx, model.Blog{ Name: "Test", URL: "https://example.com", @@ -147,7 +153,8 @@ func TestArticleTimeRoundTripAndNilDiscoveredDate(t *testing.T) { blog, err := db.AddBlog(ctx, model.Blog{Name: "Test", URL: "https://example.com"}) require.NoError(t, err, "add blog") - published := time.Date(2024, 12, 31, 23, 59, 59, 123, time.UTC) + // Stored at second precision (see TestBlogTimeRoundTrip for the rationale). + published := time.Date(2024, 12, 31, 23, 59, 59, 0, time.UTC) article, err := db.AddArticle(ctx, model.Article{ BlogID: blog.ID, Title: "Title", @@ -501,6 +508,168 @@ func TestListArticlesFilterByDate(t *testing.T) { }) } +func TestAddArticleStoresPublishedDateInUTC(t *testing.T) { + ctx := context.Background() + tmp := t.TempDir() + path := filepath.Join(tmp, "blogwatcher-cli.db") + db, err := OpenDatabase(ctx, path) + require.NoError(t, err, "open database") + defer func() { require.NoError(t, db.Close()) }() + + blog, err := db.AddBlog(ctx, model.Blog{Name: "TZ", URL: "https://example.com"}) + require.NoError(t, err, "add blog") + + jst := time.FixedZone("JST", 9*3600) + published := time.Date(2024, 1, 15, 10, 0, 0, 0, jst) // 2024-01-15 01:00 UTC + + _, err = db.AddArticle(ctx, model.Article{ + BlogID: blog.ID, + Title: "TZ article", + URL: "https://example.com/tz", + PublishedDate: &published, + }) + require.NoError(t, err, "add article") + + var stored string + err = sq.Select("published_date").From("articles").Where(sq.Eq{"url": "https://example.com/tz"}). + RunWith(db.conn).QueryRowContext(ctx).Scan(&stored) + require.NoError(t, err, "read stored published_date") + require.Contains(t, stored, "Z", "expected UTC marker in stored value, got %q", stored) + require.NotContains(t, stored, "+09:00", "expected offset to be stripped, got %q", stored) +} + +func TestUpdateBlogLastScannedStoresInUTC(t *testing.T) { + ctx := context.Background() + tmp := t.TempDir() + path := filepath.Join(tmp, "blogwatcher-cli.db") + db, err := OpenDatabase(ctx, path) + require.NoError(t, err, "open database") + defer func() { require.NoError(t, db.Close()) }() + + blog, err := db.AddBlog(ctx, model.Blog{Name: "TZ", URL: "https://example.com"}) + require.NoError(t, err, "add blog") + + jst := time.FixedZone("JST", 9*3600) + scanned := time.Date(2024, 5, 1, 12, 0, 0, 0, jst) // 2024-05-01 03:00 UTC + require.NoError(t, db.UpdateBlogLastScanned(ctx, blog.ID, scanned)) + + var stored string + err = sq.Select("last_scanned").From("blogs").Where(sq.Eq{"id": blog.ID}). + RunWith(db.conn).QueryRowContext(ctx).Scan(&stored) + require.NoError(t, err, "read stored last_scanned") + require.Contains(t, stored, "Z", "expected UTC marker, got %q", stored) + require.NotContains(t, stored, "+09:00", "expected offset to be stripped, got %q", stored) +} + +func TestDateFilterRespectsTimezoneEquivalence(t *testing.T) { + // An article published 2024-01-15 02:00 in JST is 2024-01-14 17:00 UTC. + // With --since 2024-01-15 (UTC), it should be excluded. + ctx := context.Background() + tmp := t.TempDir() + path := filepath.Join(tmp, "blogwatcher-cli.db") + db, err := OpenDatabase(ctx, path) + require.NoError(t, err, "open database") + defer func() { require.NoError(t, db.Close()) }() + + blog, err := db.AddBlog(ctx, model.Blog{Name: "TZ", URL: "https://example.com"}) + require.NoError(t, err, "add blog") + + jst := time.FixedZone("JST", 9*3600) + jan15JST := time.Date(2024, 1, 15, 2, 0, 0, 0, jst) // = 2024-01-14T17:00:00Z + _, err = db.AddArticle(ctx, model.Article{BlogID: blog.ID, Title: "JST", URL: "https://example.com/jst", PublishedDate: &jan15JST}) + require.NoError(t, err, "add article") + + since := time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC) + articles, err := db.ListArticles(ctx, false, nil, nil, &since, nil) + require.NoError(t, err, "list articles") + require.Empty(t, articles, "JST article published before UTC midnight Jan 15 should be excluded") +} + +func TestMigrationNormalizesLegacyTimestampsToUTC(t *testing.T) { + ctx := context.Background() + tmp := t.TempDir() + path := filepath.Join(tmp, "blogwatcher-cli.db") + db, err := OpenDatabase(ctx, path) + require.NoError(t, err, "open database") + defer func() { require.NoError(t, db.Close()) }() + + blog, err := db.AddBlog(ctx, model.Blog{Name: "Legacy", URL: "https://example.com"}) + require.NoError(t, err, "add blog") + + _, err = sq.Insert("articles"). + Columns("blog_id", "title", "url", "published_date", "discovered_date"). + Values(blog.ID, "Legacy", "https://example.com/legacy", + "2024-01-15T10:00:00+09:00", + "2024-01-15 09:00:00"). + RunWith(db.conn).ExecContext(ctx) + require.NoError(t, err, "insert legacy row") + + _, err = sq.Update("blogs"). + Set("last_scanned", "2024-01-15T10:00:00+09:00"). + Where(sq.Eq{"id": blog.ID}). + RunWith(db.conn).ExecContext(ctx) + require.NoError(t, err, "set legacy last_scanned") + + // Replay the actual migration SQL against the rows we just injected. + sqlBytes, err := migrations.FS.ReadFile("000003_normalize_dates_utc.up.sql") + require.NoError(t, err, "read migration file") + _, err = db.conn.ExecContext(ctx, string(sqlBytes)) + require.NoError(t, err, "replay migration") + + var published, discovered, lastScanned string + err = sq.Select("published_date", "discovered_date").From("articles"). + Where(sq.Eq{"url": "https://example.com/legacy"}). + RunWith(db.conn).QueryRowContext(ctx).Scan(&published, &discovered) + require.NoError(t, err) + err = sq.Select("last_scanned").From("blogs").Where(sq.Eq{"id": blog.ID}). + RunWith(db.conn).QueryRowContext(ctx).Scan(&lastScanned) + require.NoError(t, err) + + require.Equal(t, "2024-01-15T01:00:00Z", published, "JST 10:00 should normalize to UTC 01:00") + require.Equal(t, "2024-01-15T09:00:00Z", discovered, "space-separated form already UTC stays at same wall time") + require.Equal(t, "2024-01-15T01:00:00Z", lastScanned) + + // Article should be readable back through the normal API. + article, err := db.GetArticleByURL(ctx, "https://example.com/legacy") + require.NoError(t, err) + require.NotNil(t, article) + require.NotNil(t, article.PublishedDate) + require.Equal(t, time.Date(2024, 1, 15, 1, 0, 0, 0, time.UTC), article.PublishedDate.UTC()) +} + +func TestMigrationPreservesUnparseableTimestamps(t *testing.T) { + // strftime() returns NULL on inputs it can't parse. COALESCE keeps the + // original value so we surface bad data instead of silently destroying it. + ctx := context.Background() + tmp := t.TempDir() + path := filepath.Join(tmp, "blogwatcher-cli.db") + db, err := OpenDatabase(ctx, path) + require.NoError(t, err, "open database") + defer func() { require.NoError(t, db.Close()) }() + + blog, err := db.AddBlog(ctx, model.Blog{Name: "Bad", URL: "https://example.com"}) + require.NoError(t, err, "add blog") + + const garbage = "not-a-timestamp" + _, err = sq.Insert("articles"). + Columns("blog_id", "title", "url", "published_date"). + Values(blog.ID, "Bad", "https://example.com/bad", garbage). + RunWith(db.conn).ExecContext(ctx) + require.NoError(t, err, "insert bad row") + + sqlBytes, err := migrations.FS.ReadFile("000003_normalize_dates_utc.up.sql") + require.NoError(t, err, "read migration file") + _, err = db.conn.ExecContext(ctx, string(sqlBytes)) + require.NoError(t, err, "replay migration") + + var stored string + err = sq.Select("published_date").From("articles"). + Where(sq.Eq{"url": "https://example.com/bad"}). + RunWith(db.conn).QueryRowContext(ctx).Scan(&stored) + require.NoError(t, err) + require.Equal(t, garbage, stored, "unparseable timestamp should be preserved verbatim") +} + func openTestDB(t *testing.T) *Database { t.Helper() ctx := context.Background() diff --git a/internal/storage/migrations/000003_normalize_dates_utc.down.sql b/internal/storage/migrations/000003_normalize_dates_utc.down.sql new file mode 100644 index 0000000..f823660 --- /dev/null +++ b/internal/storage/migrations/000003_normalize_dates_utc.down.sql @@ -0,0 +1,2 @@ +-- No-op: original timezone offsets cannot be restored. +SELECT 1; diff --git a/internal/storage/migrations/000003_normalize_dates_utc.up.sql b/internal/storage/migrations/000003_normalize_dates_utc.up.sql new file mode 100644 index 0000000..51c4336 --- /dev/null +++ b/internal/storage/migrations/000003_normalize_dates_utc.up.sql @@ -0,0 +1,22 @@ +-- Normalize existing timestamps to UTC, fixed-width RFC3339 (no fractional). +-- +-- Pre-existing rows may carry timezone offsets (e.g. "+09:00") because feeds +-- supply timestamps in their local zone. The date filter compares strings +-- lexicographically, which only behaves correctly across a uniform zone and a +-- uniform width, so we rewrite all stored timestamps to UTC second-precision. +-- +-- COALESCE preserves the original value if strftime() returns NULL (which it +-- does for unparseable inputs) so we surface bad data instead of silently +-- destroying it. + +UPDATE articles +SET published_date = COALESCE(strftime('%Y-%m-%dT%H:%M:%SZ', published_date), published_date) +WHERE published_date IS NOT NULL; + +UPDATE articles +SET discovered_date = COALESCE(strftime('%Y-%m-%dT%H:%M:%SZ', discovered_date), discovered_date) +WHERE discovered_date IS NOT NULL; + +UPDATE blogs +SET last_scanned = COALESCE(strftime('%Y-%m-%dT%H:%M:%SZ', last_scanned), last_scanned) +WHERE last_scanned IS NOT NULL;