Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ async fn generate_candidates(
api_key: Option<String>,
provider: Option<String>,
model_name: Option<String>,
target_seconds: Option<f64>,
_allow_demo: bool,
) -> Result<Vec<Candidate>, String> {
let db = state.db.clone();
Expand All @@ -442,6 +443,11 @@ async fn generate_candidates(
let normalized: NormalizedTranscript =
serde_json::from_str(&transcript.raw_json).map_err(to_command_error)?;

// Desired clip length from the UI, with a ±25% tolerance band for the model.
let target = target_seconds.unwrap_or(60.0).clamp(10.0, 600.0);
let min_seconds = (target * 0.75).max(5.0);
let max_seconds = target * 1.25;

let active_provider = provider
.or_else(|| std::env::var("LLM_PROVIDER").ok())
.unwrap_or_else(|| "deepseek".to_string())
Expand All @@ -452,23 +458,23 @@ async fn generate_candidates(
let key = api_key
.or_else(|| std::env::var("ANTHROPIC_API_KEY").ok())
.ok_or_else(|| "Set ANTHROPIC_API_KEY or supply Claude API Key to generate candidates.".to_string())?;
llm::detect_candidates_with_claude(&normalized, &key)
llm::detect_candidates_with_claude(&normalized, &key, min_seconds, max_seconds)
.await
.map_err(to_command_error)?
}
"local" | "ollama" => {
let model = model_name
.or_else(|| std::env::var("OLLAMA_MODEL").ok())
.unwrap_or_else(|| "llama3.2".to_string());
llm::detect_candidates_with_local_llm(&normalized, &model)
llm::detect_candidates_with_local_llm(&normalized, &model, min_seconds, max_seconds)
.await
.map_err(to_command_error)?
}
_ => {
let key = api_key
.or_else(|| std::env::var("DEEPSEEK_API_KEY").ok())
.ok_or_else(|| "Set DEEPSEEK_API_KEY or supply DeepSeek API Key to generate candidates.".to_string())?;
llm::detect_candidates_with_deepseek(&normalized, &key)
llm::detect_candidates_with_deepseek(&normalized, &key, min_seconds, max_seconds)
.await
.map_err(to_command_error)?
}
Expand Down Expand Up @@ -874,7 +880,10 @@ fn build_drawtext_filters(
let mut font_option = String::new();
for path in &font_paths {
if std::path::Path::new(path).exists() {
font_option = format!("fontfile='{}':", path);
// Escape the colon in Windows drive-letter paths (e.g. C:/...).
// Single quotes protect filtergraph-level separators (commas) but NOT
// drawtext's own option separator (':'), so the colon must be backslash-escaped.
font_option = format!("fontfile='{}':", path.replace(':', "\\:"));
break;
}
}
Expand Down
53 changes: 30 additions & 23 deletions src-tauri/src/llm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,15 @@ struct DeepseekResponse {
pub async fn detect_candidates_with_deepseek(
transcript: &NormalizedTranscript,
api_key: &str,
min_seconds: f64,
max_seconds: f64,
) -> Result<Vec<CandidateDraft>> {
let segments = compact_segments(&transcript.segments);
let dur = format!("{:.0}-{:.0}", min_seconds, max_seconds);
let prompt = format!(
"You are identifying the most viral moments and strongest short-form clip candidates from a long-form transcript. \
For each candidate, the clip must be self-contained, starting with an extremely engaging hook within the first 3 seconds (to capture immediate attention on social feeds), \
30-90 seconds long, and cut at clean sentence/thought boundaries. Favor highly shareable content: concrete stories, \
{dur} seconds long, and cut at clean sentence/thought boundaries. Favor highly shareable content: concrete stories, \
strong opinions, emotional turns, surprising or counter-intuitive claims, clear payoffs, and high-energy/dramatic peaks. \
Avoid rambling setup, context-dependent references, and pure filler. Return up to 10 candidates as JSON matching this schema: \
{{\"candidates\":[{{\"start\":0.0,\"end\":0.0,\"score\":0.0,\"hook\":\"...\",\"rationale\":\"...\"}}]}}\n\nTranscript:\n{segments}"
Expand Down Expand Up @@ -78,11 +81,7 @@ Avoid rambling setup, context-dependent references, and pure filler. Return up t
.map(|c| c.message.content.clone())
.ok_or_else(|| anyhow!("DeepSeek response did not include choices content"))?;

let min_duration = if transcript.duration < 60.0 {
(transcript.duration * 0.5).max(5.0)
} else {
30.0
};
let min_duration = min_clip_duration(transcript.duration, min_seconds);
parse_candidate_json(&text, min_duration)
}

Expand All @@ -95,12 +94,15 @@ struct ClaudeMessage<'a> {
pub async fn detect_candidates_with_claude(
transcript: &NormalizedTranscript,
api_key: &str,
min_seconds: f64,
max_seconds: f64,
) -> Result<Vec<CandidateDraft>> {
let segments = compact_segments(&transcript.segments);
let dur = format!("{:.0}-{:.0}", min_seconds, max_seconds);
let prompt = format!(
"You are identifying the most viral moments and strongest short-form clip candidates from a long-form transcript. \
For each candidate, the clip must be self-contained, starting with an extremely engaging hook within the first 3 seconds (to capture immediate attention on social feeds), \
30-90 seconds long, and cut at clean sentence/thought boundaries. Favor highly shareable content: concrete stories, \
{dur} seconds long, and cut at clean sentence/thought boundaries. Favor highly shareable content: concrete stories, \
strong opinions, emotional turns, surprising or counter-intuitive claims, clear payoffs, and high-energy/dramatic peaks. \
Avoid rambling setup, context-dependent references, and pure filler. Return up to 10 candidates as JSON only: \
{{\"candidates\":[{{\"start\":0,\"end\":0,\"score\":0.0,\"hook\":\"...\",\"rationale\":\"...\"}}]}}\n\nTranscript:\n{segments}"
Expand Down Expand Up @@ -141,11 +143,7 @@ Avoid rambling setup, context-dependent references, and pure filler. Return up t
.find_map(|content| content.text)
.ok_or_else(|| anyhow!("Claude response did not include text content"))?;

let min_duration = if transcript.duration < 60.0 {
(transcript.duration * 0.5).max(5.0)
} else {
30.0
};
let min_duration = min_clip_duration(transcript.duration, min_seconds);
parse_candidate_json(&text, min_duration)
}

Expand All @@ -162,18 +160,20 @@ struct OllamaResponse {
pub async fn detect_candidates_with_local_llm(
transcript: &NormalizedTranscript,
model_name: &str,
min_seconds: f64,
max_seconds: f64,
) -> Result<Vec<CandidateDraft>> {
let segments = compact_segments(&transcript.segments);
let system_instructions = "You are identifying the most viral moments and strongest short-form clip candidates from a long-form transcript. \

let system_instructions = format!("You are identifying the most viral moments and strongest short-form clip candidates from a long-form transcript. \
For each candidate, the clip must be self-contained, starting with an extremely engaging hook within the first 3 seconds (to capture immediate attention on social feeds), \
30-90 seconds long, and cut at clean sentence/thought boundaries. \
CRITICAL: Each clip candidate MUST have a duration between 30 and 90 seconds (i.e. 'end' minus 'start' must be between 30.0 and 90.0). \
Do NOT return short clips of less than 30 seconds. Combine multiple adjacent sentences to build a meaningful segment of 30-90 seconds. \
{min_seconds:.0}-{max_seconds:.0} seconds long, and cut at clean sentence/thought boundaries. \
CRITICAL: Each clip candidate MUST have a duration between {min_seconds:.0} and {max_seconds:.0} seconds (i.e. 'end' minus 'start' must be between {min_seconds:.1} and {max_seconds:.1}). \
Do NOT return short clips of less than {min_seconds:.0} seconds. Combine multiple adjacent sentences to build a meaningful segment of {min_seconds:.0}-{max_seconds:.0} seconds. \
Comment on lines +170 to +172
Favor highly shareable content: concrete stories, strong opinions, emotional turns, surprising or counter-intuitive claims, clear payoffs, and high-energy/dramatic peaks. \
Avoid rambling setup, context-dependent references, and pure filler. \
You MUST identify and return at least 3-5 candidates (up to 10 candidates). Do not return an empty candidates list. \
Ensure the 'start' and 'end' values correspond to actual timestamps in the transcript. Do not output 0.0 for start and end times.";
Ensure the 'start' and 'end' values correspond to actual timestamps in the transcript. Do not output 0.0 for start and end times.");

let user_content = format!("Transcript:\n{}", segments);

Expand Down Expand Up @@ -227,14 +227,21 @@ Ensure the 'start' and 'end' values correspond to actual timestamps in the trans
}

let res_body: OllamaResponse = response.json().await.context("parsing local Ollama response")?;
let min_duration = if transcript.duration < 60.0 {
(transcript.duration * 0.5).max(5.0)
} else {
30.0
};
let min_duration = min_clip_duration(transcript.duration, min_seconds);
parse_candidate_json(&res_body.message.content, min_duration)
}

/// Minimum acceptable clip length, relaxed for transcripts shorter than the
/// requested band so very short sources still yield candidates.
// ponytail: simple heuristic; revisit if users want a hard floor regardless of source length
fn min_clip_duration(transcript_duration: f64, min_seconds: f64) -> f64 {
if transcript_duration < min_seconds * 2.0 {
(transcript_duration * 0.5).max(5.0)
} else {
min_seconds
Comment on lines +237 to +241
}
}

pub fn demo_candidates(transcript: &NormalizedTranscript) -> Vec<CandidateDraft> {
transcript
.segments
Expand Down
20 changes: 20 additions & 0 deletions src/main.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ function App() {
const [localLlmModel, setLocalLlmModel] = useState(() => {
return localStorage.getItem("autoshorts_local_llm_model") || "llama3.2";
});
const [clipLength, setClipLength] = useState(() => {
return Number(localStorage.getItem("autoshorts_clip_length")) || 60;
});
const [deepgramKey, setDeepgramKey] = useState(() => {
return localStorage.getItem("autoshorts_deepgram_key") || "";
});
Expand Down Expand Up @@ -201,6 +204,10 @@ function App() {
localStorage.setItem("autoshorts_local_llm_model", localLlmModel);
}, [localLlmModel]);

useEffect(() => {
localStorage.setItem("autoshorts_clip_length", String(clipLength));
}, [clipLength]);

useEffect(() => {
localStorage.setItem("autoshorts_deepgram_key", deepgramKey);
}, [deepgramKey]);
Expand Down Expand Up @@ -365,6 +372,7 @@ function App() {
apiKey: activeKey || null,
provider: llmEngine,
modelName: llmEngine === "local" ? localLlmModel.trim() : null,
targetSeconds: clipLength,
allowDemo: false,
});
await refresh(projectId);
Expand Down Expand Up @@ -447,6 +455,7 @@ function App() {
apiKey: activeKey || null,
provider: llmEngine,
modelName: llmEngine === "local" ? localLlmModel.trim() : null,
targetSeconds: clipLength,
allowDemo,
});
await refresh(detail.project.id);
Expand Down Expand Up @@ -634,6 +643,17 @@ function App() {
<option value="deepseek">DeepSeek (Cloud)</option>
</select>
</label>
<label>
<span>Clip Length (seconds)</span>
<input
value={clipLength}
onChange={(event) => setClipLength(Number(event.target.value) || 0)}
onBlur={() => setClipLength((v) => Math.min(600, Math.max(10, v || 60)))}
type="number"
Comment on lines +649 to +652
min={10}
max={600}
/>
</label>
{transcriptionEngine === "deepgram" && (
<label>
<span>Deepgram API Key</span>
Expand Down