@@ -2,6 +2,7 @@ package backfill
22
33import (
44 "context"
5+ "encoding/json"
56 "fmt"
67 "log/slog"
78 "os"
@@ -111,10 +112,11 @@ func (s *Stats) Duration() time.Duration {
111112
112113// Backfiller coordinates historical data backfill.
113114type Backfiller struct {
114- config Config
115- client * Client
116- recordsRepo * repositories.RecordsRepository
117- actorsRepo * repositories.ActorsRepository
115+ config Config
116+ client * Client
117+ recordsRepo * repositories.RecordsRepository
118+ actorsRepo * repositories.ActorsRepository
119+ activityRepo * repositories.JetstreamActivityRepository
118120
119121 // httpSem is a global semaphore limiting concurrent HTTP requests.
120122 // This prevents overwhelming the network and running out of file descriptors.
@@ -132,6 +134,7 @@ func NewBackfiller(
132134 config Config ,
133135 recordsRepo * repositories.RecordsRepository ,
134136 actorsRepo * repositories.ActorsRepository ,
137+ activityRepo * repositories.JetstreamActivityRepository ,
135138) * Backfiller {
136139 // Create DID resolver with custom PLC URL
137140 didResolver := oauth .NewDIDResolver (
@@ -152,6 +155,7 @@ func NewBackfiller(
152155 client : NewClient (config .RelayURL , config .PLCURL , config .MaxHTTPConcurrent ),
153156 recordsRepo : recordsRepo ,
154157 actorsRepo : actorsRepo ,
158+ activityRepo : activityRepo ,
155159 httpSem : make (chan struct {}, config .MaxHTTPConcurrent ),
156160 didCache : didCache ,
157161 stopCacheCleanup : stopCleanup ,
@@ -530,6 +534,17 @@ func (b *Backfiller) processRepo(ctx context.Context, pdsURL string, data *Atpro
530534 } else {
531535 insertedCount = len (filteredRecords )
532536 atomic .AddInt64 (& b .stats .RecordsInserted , int64 (insertedCount ))
537+
538+ // Log activity for each inserted record (with 'success' status since already inserted)
539+ if b .activityRepo != nil {
540+ for _ , rec := range filteredRecords {
541+ timestamp := extractCreatedAt (rec .JSON )
542+ _ , err := b .activityRepo .LogActivityWithStatus (ctx , timestamp , "create" , rec .Collection , rec .DID , rec .JSON , "success" )
543+ if err != nil {
544+ slog .Debug ("[backfill] Failed to log activity" , "uri" , rec .URI , "error" , err )
545+ }
546+ }
547+ }
533548 }
534549 }
535550 insertMs := time .Since (insertStart ).Milliseconds ()
@@ -650,6 +665,14 @@ func (b *Backfiller) processRepoLegacy(ctx context.Context, pdsURL string, data
650665 if result == repositories .Inserted {
651666 totalInserted ++
652667 atomic .AddInt64 (& b .stats .RecordsInserted , 1 )
668+ // Log activity for the inserted record
669+ if b .activityRepo != nil {
670+ timestamp := extractCreatedAt (string (rec .Value ))
671+ _ , err := b .activityRepo .LogActivityWithStatus (ctx , timestamp , "create" , collection , data .DID , string (rec .Value ), "success" )
672+ if err != nil {
673+ slog .Debug ("[backfill] Failed to log activity" , "uri" , rec .URI , "error" , err )
674+ }
675+ }
653676 }
654677 }
655678 }
@@ -723,6 +746,17 @@ func (b *Backfiller) BackfillActor(ctx context.Context, did string) (int, error)
723746 if err := b .recordsRepo .BatchInsert (ctx , filteredRecords ); err != nil {
724747 return 0 , fmt .Errorf ("batch insert failed: %w" , err )
725748 }
749+
750+ // Log activity for each inserted record
751+ if b .activityRepo != nil {
752+ for _ , rec := range filteredRecords {
753+ timestamp := extractCreatedAt (rec .JSON )
754+ _ , err := b .activityRepo .LogActivityWithStatus (ctx , timestamp , "create" , rec .Collection , rec .DID , rec .JSON , "success" )
755+ if err != nil {
756+ slog .Debug ("[backfill] Failed to log activity" , "uri" , rec .URI , "error" , err )
757+ }
758+ }
759+ }
726760 }
727761
728762 slog .Info ("[backfill] Actor backfill complete (CAR)" ,
@@ -758,6 +792,14 @@ func (b *Backfiller) backfillActorLegacy(ctx context.Context, data *AtprotoData)
758792
759793 if result == repositories .Inserted {
760794 totalRecords ++
795+ // Log activity for the inserted record
796+ if b .activityRepo != nil {
797+ timestamp := extractCreatedAt (string (rec .Value ))
798+ _ , err := b .activityRepo .LogActivityWithStatus (ctx , timestamp , "create" , collection , data .DID , string (rec .Value ), "success" )
799+ if err != nil {
800+ slog .Debug ("[backfill] Failed to log activity" , "uri" , rec .URI , "error" , err )
801+ }
802+ }
761803 }
762804 }
763805 }
@@ -786,3 +828,28 @@ func ParseCollections(s string) []string {
786828 }
787829 return result
788830}
831+
832+ // extractCreatedAt extracts the createdAt timestamp from a record's JSON.
833+ // Returns the parsed time or the current time if not found/parseable.
834+ func extractCreatedAt (recordJSON string ) time.Time {
835+ var data map [string ]interface {}
836+ if err := json .Unmarshal ([]byte (recordJSON ), & data ); err != nil {
837+ return time .Now ()
838+ }
839+
840+ createdAt , ok := data ["createdAt" ].(string )
841+ if ! ok {
842+ return time .Now ()
843+ }
844+
845+ // Try parsing as RFC3339
846+ t , err := time .Parse (time .RFC3339 , createdAt )
847+ if err != nil {
848+ // Try without timezone
849+ t , err = time .Parse ("2006-01-02T15:04:05" , createdAt )
850+ if err != nil {
851+ return time .Now ()
852+ }
853+ }
854+ return t
855+ }
0 commit comments