Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 81 additions & 2 deletions backend/internal/api/handlers/processing.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/nesposito/frfr/internal/services/extraction"
"github.com/nesposito/frfr/internal/services/pdf"
"github.com/nesposito/frfr/internal/services/session"
slackext "github.com/nesposito/frfr/internal/services/slack"
)

// ProcessingHandler handles processing-related API requests
Expand Down Expand Up @@ -249,9 +250,9 @@ func (h *ProcessingHandler) processDocuments(sessionID string, documents []strin
Progress: float64(i) / float64(totalDocs),
})

// Step 1: Extract text from document
// Step 1: Extract text based on document source
textFile := filepath.Join(sessionDir, "text", docName+".txt")
textContent, err := h.extractFileText(ctx, sessionID, docName, docInfo, textFile)
textContent, err := h.extractText(ctx, sessionID, docName, docInfo, textFile)
if err != nil {
continue
}
Expand Down Expand Up @@ -375,6 +376,84 @@ func (h *ProcessingHandler) processDocuments(sessionID string, documents []strin
})
}

// extractText extracts text content from a document, dispatching to the appropriate
// source-specific method. On failure it updates the document status and broadcasts
// an error event, so callers can simply `continue` on error.
func (h *ProcessingHandler) extractText(ctx context.Context, sessionID, docName string, docInfo models.DocumentInfo, textFile string) (string, error) {
if docInfo.Source == models.DocumentSourceSlack {
return h.extractSlackText(ctx, sessionID, docName, docInfo, textFile)
}
return h.extractFileText(ctx, sessionID, docName, docInfo, textFile)
}

// extractSlackText fetches messages from a Slack channel and returns the text content.
func (h *ProcessingHandler) extractSlackText(ctx context.Context, sessionID, docName string, docInfo models.DocumentInfo, textFile string) (string, error) {
if docInfo.SlackMeta == nil {
h.store.UpdateDocumentStatus(sessionID, docName, models.DocumentStatusFailed, "missing slack metadata")
h.broadcast(sessionID, models.ProcessingEvent{
Type: models.EventTypeError,
Timestamp: time.Now(),
Document: docName,
Message: "Slack document missing metadata",
})
return "", fmt.Errorf("missing slack metadata")
}

h.broadcast(sessionID, models.ProcessingEvent{
Type: "slack_extraction_start",
Timestamp: time.Now(),
Document: docName,
Message: fmt.Sprintf("Fetching messages from Slack channel #%s...", docInfo.SlackMeta.ChannelName),
})

slackExtractor := slackext.NewExtractor(h.config.SlackBotToken, h.config.SlackMaxMessages, h.config.SlackLookbackDays)

opts := slackext.ExtractOptions{IncludeThreads: true}
if docInfo.SlackMeta.Since != "" {
if t, err := time.Parse("2006-01-02", docInfo.SlackMeta.Since); err == nil {
opts.Since = t
}
}
if docInfo.SlackMeta.Until != "" {
if t, err := time.Parse("2006-01-02", docInfo.SlackMeta.Until); err == nil {
opts.Until = t
}
}

result, err := slackExtractor.Extract(ctx, docInfo.SlackMeta.ChannelID, textFile, opts)
if err != nil {
h.store.UpdateDocumentStatus(sessionID, docName, models.DocumentStatusFailed, err.Error())
h.broadcast(sessionID, models.ProcessingEvent{
Type: models.EventTypeError,
Timestamp: time.Now(),
Document: docName,
Message: fmt.Sprintf("Slack extraction failed: %v", err),
})
return "", err
}

h.broadcast(sessionID, models.ProcessingEvent{
Type: "slack_extraction_complete",
Timestamp: time.Now(),
Document: docName,
Message: fmt.Sprintf("Extracted %d messages (%d threads), %d characters from #%s",
result.MessageCount, result.ThreadCount, result.TotalChars, result.ChannelName),
})

data, err := os.ReadFile(textFile)
if err != nil {
h.store.UpdateDocumentStatus(sessionID, docName, models.DocumentStatusFailed, err.Error())
h.broadcast(sessionID, models.ProcessingEvent{
Type: models.EventTypeError,
Timestamp: time.Now(),
Document: docName,
Message: fmt.Sprintf("Failed to read extracted text: %v", err),
})
return "", err
}
return string(data), nil
}

// extractFileText extracts text from a PDF or reads a plain text/markdown file.
func (h *ProcessingHandler) extractFileText(ctx context.Context, sessionID, docName string, docInfo models.DocumentInfo, textFile string) (string, error) {
pdfPath := expandTilde(docInfo.OriginalPDFPath)
Expand Down
98 changes: 98 additions & 0 deletions backend/internal/api/handlers/slack.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package handlers

import (
"encoding/json"
"net/http"
"strings"
"time"

"github.com/nesposito/frfr/internal/config"
"github.com/nesposito/frfr/internal/domain/models"
"github.com/nesposito/frfr/internal/services/session"
)

// SlackHandler handles Slack-related API requests
type SlackHandler struct {
store *session.Store
config *config.Config
}

// NewSlackHandler creates a new Slack handler
func NewSlackHandler(store *session.Store, cfg *config.Config) *SlackHandler {
return &SlackHandler{store: store, config: cfg}
}

// AddSlackChannelRequest is the request body for importing a Slack channel
type AddSlackChannelRequest struct {
ChannelID string `json:"channel_id"`
Token string `json:"token,omitempty"` // Optional; falls back to SLACK_BOT_TOKEN env
Since string `json:"since,omitempty"` // Date string: "2025-01-01"
Until string `json:"until,omitempty"` // Date string: "2025-03-01"
}

// Add imports a Slack channel as a document source in a session
func (h *SlackHandler) Add(w http.ResponseWriter, r *http.Request) {
sessionID := r.PathValue("id")
if sessionID == "" {
writeError(w, http.StatusBadRequest, "Session ID is required")
return
}

var req AddSlackChannelRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "Invalid request body: "+err.Error())
return
}

if req.ChannelID == "" {
writeError(w, http.StatusBadRequest, "channel_id is required")
return
}

// Check that we have a token somewhere
token := req.Token
if token == "" {
token = h.config.SlackBotToken
}
if token == "" {
writeError(w, http.StatusBadRequest, "No Slack token provided. Set SLACK_BOT_TOKEN env or pass 'token' in request.")
return
}

// Get session
sess, err := h.store.Get(sessionID)
if err != nil {
if strings.Contains(err.Error(), "not found") {
writeError(w, http.StatusNotFound, err.Error())
} else {
writeError(w, http.StatusInternalServerError, "Failed to get session: "+err.Error())
}
return
}

// Create document name from channel ID
docName := "slack-" + req.ChannelID

// Register as a document in the session
if sess.DocumentRegistry == nil {
sess.DocumentRegistry = make(map[string]models.DocumentInfo)
}

sess.DocumentRegistry[docName] = models.DocumentInfo{
Status: models.DocumentStatusPending,
AddedAt: models.FlexibleTime{Time: time.Now()},
Source: models.DocumentSourceSlack,
SlackMeta: &models.SlackDocumentMeta{
ChannelID: req.ChannelID,
Since: req.Since,
Until: req.Until,
},
}

if err := h.store.Update(sess); err != nil {
writeError(w, http.StatusInternalServerError, "Failed to update session: "+err.Error())
return
}

writeJSON(w, http.StatusCreated, sess.DocumentRegistry[docName])
}
4 changes: 4 additions & 0 deletions backend/internal/api/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ func (s *Server) registerRoutes() {
factsHandler := handlers.NewFactsHandler(s.sessionStore)
processingHandler := handlers.NewProcessingHandler(s.sessionStore, s.config)
queryHandler := handlers.NewQueryHandler(s.sessionStore, s.config)
slackHandler := handlers.NewSlackHandler(s.sessionStore, s.config)
filePickerHandler := handlers.NewFilePickerHandler()
claudeHandler := handlers.NewClaudeHandler(s.config)

Expand All @@ -60,6 +61,9 @@ func (s *Server) registerRoutes() {
s.mux.HandleFunc("POST /api/sessions/{id}/query/stream", queryHandler.SubmitStream)
s.mux.HandleFunc("GET /api/sessions/{id}/query/history", queryHandler.History)

// Slack
s.mux.HandleFunc("POST /api/sessions/{id}/slack", slackHandler.Add)

// Processing
s.mux.HandleFunc("POST /api/sessions/{id}/process", processingHandler.Start)
s.mux.HandleFunc("GET /api/sessions/{id}/process/events", processingHandler.Events)
Expand Down
10 changes: 10 additions & 0 deletions backend/internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ type Config struct {
AnthropicAPIKey string
MaxWorkers int
MaxRetries int

// Slack settings
SlackBotToken string
SlackMaxMessages int
SlackLookbackDays int
}

// DefaultConfig returns the default configuration
Expand Down Expand Up @@ -59,6 +64,11 @@ func DefaultConfig() *Config {
AnthropicAPIKey: getAnthropicAPIKey(),
MaxWorkers: getEnvInt("FRFR_MAX_WORKERS", 20),
MaxRetries: getEnvInt("FRFR_MAX_RETRIES", 3),

// Slack
SlackBotToken: os.Getenv("SLACK_BOT_TOKEN"),
SlackMaxMessages: getEnvInt("FRFR_SLACK_MAX_MESSAGES", 1000),
SlackLookbackDays: getEnvInt("FRFR_SLACK_LOOKBACK_DAYS", 90),
}
}

Expand Down
18 changes: 18 additions & 0 deletions backend/internal/domain/models/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@ const (
DocumentStatusFailed DocumentStatus = "failed"
)

// DocumentSource indicates where a document came from
type DocumentSource string

const (
DocumentSourceFile DocumentSource = "" // Default: local file (PDF/Markdown)
DocumentSourceSlack DocumentSource = "slack"
)

// SlackDocumentMeta contains Slack-specific metadata for a document
type SlackDocumentMeta struct {
ChannelID string `json:"channel_id"`
ChannelName string `json:"channel_name,omitempty"`
Since string `json:"since,omitempty"`
Until string `json:"until,omitempty"`
}

// DocumentInfo contains metadata about a document in a session
type DocumentInfo struct {
OriginalPDFPath string `json:"original_pdf_path"`
Expand All @@ -20,6 +36,8 @@ type DocumentInfo struct {
AddedAt FlexibleTime `json:"added_at"`
CompletedAt *FlexibleTime `json:"completed_at,omitempty"`
ErrorMessage string `json:"error_message,omitempty"`
Source DocumentSource `json:"source,omitempty"`
SlackMeta *SlackDocumentMeta `json:"slack_meta,omitempty"`
}

// DocumentSummary contains the LLM-generated summary of a document
Expand Down
Loading