Skip to content

Commit bca3f4b

Browse files
authored
Merge pull request #5 from follen99/LaTeX
Add LaTeX (.tex) support to config, parsers and TOON
2 parents 1e2f9e6 + 15489d0 commit bca3f4b

4 files changed

Lines changed: 128 additions & 102 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77
[project]
88
name = "deepbase"
99
# Increment the version to reflect changes
10-
version = "1.4.0"
10+
version = "1.5.0"
1111
authors = [
1212
{ name="Your Name", email="your@email.com" },
1313
]

src/deepbase/main.py

Lines changed: 59 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,19 @@
22

33
import os
44
import typer
5-
import fnmatch # Necessario per il pattern matching
5+
import fnmatch
66
from rich.console import Console
77
from rich.progress import Progress
88
import tomli
99
import chardet
10+
# --- NUOVI IMPORT PER LA VERSIONE ---
11+
from importlib.metadata import version as get_package_version, PackageNotFoundError
1012
from typing import List, Dict, Any, Set, Optional
1113

1214
from deepbase.toon import generate_toon_representation
1315
from deepbase.parsers import get_document_structure
1416

15-
# ... (LE CONFIGURAZIONI DEFAULT_CONFIG e HELPER RIMANGONO INVARIATE) ...
16-
# Assicurati di copiare le funzioni: load_config, is_significant_file,
17-
# generate_directory_tree, get_all_significant_files, read_file_content
18-
# dalla versione precedente.
19-
17+
# --- CONFIGURAZIONI (Invariate) ---
2018
DEFAULT_CONFIG = {
2119
"ignore_dirs": {
2220
"__pycache__", ".git", ".idea", ".vscode", "venv", ".venv", "env",
@@ -25,10 +23,11 @@
2523
"site", "*.egg-info", "coverage"
2624
},
2725
"significant_extensions": {
28-
".py", ".java", ".js", ".jsx", ".ts", ".tsx", ".html", ".css", ".scss", ".sql", # Aggiunto jsx/tsx
26+
".py", ".java", ".js", ".jsx", ".ts", ".tsx", ".html", ".css", ".scss", ".sql",
2927
".md", ".json", ".xml", ".yml", ".yaml", ".sh", ".bat", "Dockerfile",
3028
".dockerignore", ".gitignore", "requirements.txt", "pom.xml", "gradlew",
31-
"pyproject.toml", "setup.py", "package.json", "tsconfig.json" # Aggiunto package/ts config
29+
"pyproject.toml", "setup.py", "package.json", "tsconfig.json",
30+
".tex", ".bib", ".sty", ".cls"
3231
}
3332
}
3433

@@ -39,11 +38,8 @@
3938
)
4039
console = Console()
4140

42-
# ... [INSERISCI QUI LE FUNZIONI HELPER: load_config, generate_directory_tree, etc.] ...
43-
# Per brevità non le ripeto se non sono cambiate, ma devono esserci nel file finale.
44-
41+
# --- HELPER FUNCTIONS (Invariate) ---
4542
def load_config(root_dir: str) -> Dict[str, Any]:
46-
"""Loads configuration from .deepbase.toml if available."""
4743
config_path = os.path.join(root_dir, ".deepbase.toml")
4844
config = DEFAULT_CONFIG.copy()
4945
if os.path.exists(config_path):
@@ -59,7 +55,7 @@ def load_config(root_dir: str) -> Dict[str, Any]:
5955
def is_significant_file(file_path: str, significant_extensions: Set[str]) -> bool:
6056
file_name = os.path.basename(file_path)
6157
if file_name in significant_extensions: return True
62-
_, ext = os.path.splitext(file_name)
58+
_, ext = os.path.splitext(file_path) # Corretto os.path.splitext(file_name) -> file_path per sicurezza
6359
return ext in significant_extensions
6460

6561
def generate_directory_tree(root_dir: str, config: Dict[str, Any]) -> str:
@@ -99,93 +95,99 @@ def read_file_content(file_path: str) -> str:
9995
except Exception as e:
10096
return f"!!! Error reading file: {e} !!!"
10197

102-
103-
# --- NEW HELPER FOR FOCUS ---
10498
def matches_focus(file_path: str, root_dir: str, focus_patterns: List[str]) -> bool:
105-
"""Check if the file path matches any of the focus patterns."""
10699
if not focus_patterns:
107100
return False
108-
109-
# Rendi il path relativo per il matching (es. src/main.py)
110101
rel_path = os.path.relpath(file_path, root_dir)
111-
# Supporta anche slash normali su Windows
112102
rel_path_fwd = rel_path.replace(os.sep, '/')
113-
114103
for pattern in focus_patterns:
115-
# Se il pattern finisce con /, matchiamo una directory e tutto il contenuto
116104
clean_pattern = pattern.replace(os.sep, '/')
117-
118-
# Match esatto file, match wildcard o match startswith directory
119105
if fnmatch.fnmatch(rel_path_fwd, clean_pattern):
120106
return True
121-
if clean_pattern in rel_path_fwd: # Match parziale semplice (contiene stringa)
107+
if clean_pattern in rel_path_fwd:
122108
return True
123-
124109
return False
125110

126-
# --- Legge il file di focus ---
127111
def load_focus_patterns_from_file(file_path: str) -> List[str]:
128-
"""Legge pattern da un file di testo (uno per riga), ignorando # commenti."""
129112
patterns = []
130113
if os.path.exists(file_path):
131114
try:
132115
with open(file_path, 'r', encoding='utf-8') as f:
133116
lines = f.readlines()
134117
for line in lines:
135118
line = line.strip()
136-
# Ignora righe vuote o che iniziano con #
137119
if line and not line.startswith("#"):
138120
patterns.append(line)
139121
except Exception as e:
140122
console.print(f"[bold yellow]Warning:[/bold yellow] Could not read focus file '{file_path}': {e}")
141123
else:
142124
console.print(f"[bold yellow]Warning:[/bold yellow] Focus file '{file_path}' not found.")
143125
return patterns
126+
127+
128+
# --- FUNZIONE CALLBACK PER VERSIONE ---
129+
def version_callback(value: bool):
130+
if value:
131+
try:
132+
v = get_package_version("deepbase")
133+
console.print(f"DeepBase version: [bold cyan]{v}[/bold cyan]")
134+
except PackageNotFoundError:
135+
console.print("DeepBase version: [yellow]unknown (editable/dev mode)[/yellow]")
136+
raise typer.Exit()
137+
144138
# --- MAIN COMMAND ---
145139

146140
@app.command()
147141
def create(
148-
target: str = typer.Argument(..., help="The file or directory to scan."),
149-
output: str = typer.Option("llm_context.md", "--output", "-o", help="The output file."),
150-
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output."),
151-
include_all: bool = typer.Option(False, "--all", "-a", help="Include full content of ALL files."),
152-
toon_mode: bool = typer.Option(False, "--toon", "-t", help="Use 'Skeleton' mode for non-focused files."),
142+
# Nota: Ho reso 'target' facoltativo (Optional) nel type hint solo per evitare errori
143+
# statici se non viene passato quando si usa --version,
144+
# ma Typer lo gestirà comunque come richiesto se non eseguiamo la callback.
145+
target: str = typer.Argument(
146+
None, # Default a None per permettere a --version di funzionare senza target
147+
help="The file or directory to scan."
148+
),
153149

154-
# 1. Focus Flag (Manual)
155-
focus: Optional[List[str]] = typer.Option(
156-
None, "--focus", "-f",
157-
help="Pattern to focus on. Can be used multiple times."
150+
# --- FLAG VERSIONE ---
151+
version: Optional[bool] = typer.Option(
152+
None, "--version", "-v",
153+
callback=version_callback,
154+
is_eager=True, # IMPORTANTE: Processa questo flag prima di controllare gli argomenti required
155+
help="Show the application version and exit."
158156
),
157+
158+
output: str = typer.Option("llm_context.md", "--output", "-o", help="The output file."),
159159

160-
# 2. Focus File (File based)
161-
focus_file: Optional[str] = typer.Option(
162-
None, "--focus-file", "-ff",
163-
help="Path to a text file containing a list of focus patterns (one per line)."
164-
)
160+
# NOTA: Ho cambiato lo short flag di verbose da -v a -V per lasciare -v alla version
161+
verbose: bool = typer.Option(False, "--verbose", "-V", help="Show detailed output."),
162+
163+
include_all: bool = typer.Option(False, "--all", "-a", help="Include full content of ALL files."),
164+
toon_mode: bool = typer.Option(False, "--toon", "-t", help="Use 'Skeleton' mode for non-focused files."),
165+
focus: Optional[List[str]] = typer.Option(None, "--focus", "-f", help="Pattern to focus on."),
166+
focus_file: Optional[str] = typer.Option(None, "--focus-file", "-ff", help="Path to focus patterns file.")
165167
):
166168
"""
167169
Analyzes a directory OR a single file.
168170
Hybrid workflow with Context Skeleton + Focused Content.
169171
"""
172+
173+
# Se target è None (succede solo se uno lancia deepbase senza argomenti e senza --version)
174+
if target is None:
175+
# Mostra help ed esci
176+
ctx = typer.get_current_context()
177+
console.print("[red]Error: Missing argument 'TARGET'.[/red]")
178+
console.print(ctx.get_help())
179+
raise typer.Exit(code=1)
180+
170181
if not os.path.exists(target):
171182
console.print(f"[bold red]Error:[/bold red] Target not found: '{target}'")
172183
raise typer.Exit(code=1)
173184

174-
# --- LOGICA DI MERGE DEI FOCUS PATTERNS ---
185+
# --- LOGICA FOCUS MERGE ---
175186
active_focus_patterns = []
176-
177-
# Aggiungi quelli da CLI
178-
if focus:
179-
active_focus_patterns.extend(focus)
180-
181-
# Aggiungi quelli da FILE
187+
if focus: active_focus_patterns.extend(focus)
182188
if focus_file:
183189
file_patterns = load_focus_patterns_from_file(focus_file)
184-
if file_patterns:
185-
active_focus_patterns.extend(file_patterns)
186-
console.print(f"[green]Loaded {len(file_patterns)} patterns from '{focus_file}'[/green]")
187-
188-
# Pulizia duplicati (opzionale ma utile)
190+
if file_patterns: active_focus_patterns.extend(file_patterns)
189191
active_focus_patterns = list(set(active_focus_patterns))
190192

191193
console.print(f"[bold green]Analyzing '{target}'...[/bold green]")
@@ -212,7 +214,7 @@ def fmt_separator(): return "-" * 40 + "\n\n"
212214
try:
213215
with open(output, "w", encoding="utf-8") as outfile:
214216

215-
# CASE 1: SINGLE FILE (Minimally affected)
217+
# CASE 1: SINGLE FILE
216218
if os.path.isfile(target):
217219
filename = os.path.basename(target)
218220
outfile.write(f"# File Structure Analysis: {filename}\n\n")
@@ -241,24 +243,17 @@ def fmt_separator(): return "-" * 40 + "\n\n"
241243
outfile.write("\n\n")
242244

243245
# 2. Content Generation
244-
# Check based on MERGED active_focus_patterns
245246
if include_all or toon_mode or active_focus_patterns:
246-
247247
section_title = "FILE CONTENTS (HYBRID)" if (toon_mode and active_focus_patterns) else \
248248
("SEMANTIC SKELETONS (TOON)" if toon_mode else "FILE CONTENTS")
249-
250249
outfile.write(fmt_header(section_title))
251-
252250
files = get_all_significant_files(target, config)
253251

254252
with Progress(console=console) as progress:
255253
task = progress.add_task("[cyan]Processing...", total=len(files))
256254
for fpath in files:
257255
rel_path = os.path.relpath(fpath, target).replace('\\', '/')
258-
259-
# DECISION MATRIX based on active_focus_patterns
260256
is_in_focus = active_focus_patterns and matches_focus(fpath, target, active_focus_patterns)
261-
262257
should_write_full = include_all or is_in_focus
263258
should_write_toon = toon_mode and not should_write_full
264259

@@ -267,18 +262,13 @@ def fmt_separator(): return "-" * 40 + "\n\n"
267262
continue
268263

269264
progress.update(task, advance=1, description=f"[cyan]{rel_path}[/cyan]")
270-
271265
marker = ""
272266
if is_in_focus and toon_mode: marker = " [FOCUSED - FULL CONTENT]"
273267

274268
outfile.write(fmt_file_start(rel_path + marker))
275269
content = read_file_content(fpath)
276-
277-
if should_write_full:
278-
outfile.write(content)
279-
elif should_write_toon:
280-
outfile.write(generate_toon_representation(fpath, content))
281-
270+
if should_write_full: outfile.write(content)
271+
elif should_write_toon: outfile.write(generate_toon_representation(fpath, content))
282272
outfile.write(fmt_file_end(rel_path))
283273
outfile.write(fmt_separator())
284274
else:
@@ -290,6 +280,5 @@ def fmt_separator(): return "-" * 40 + "\n\n"
290280
console.print(f"\n[bold red]Error:[/bold red] {e}")
291281
raise typer.Exit(code=1)
292282

293-
294283
if __name__ == "__main__":
295284
app()

src/deepbase/parsers.py

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5,48 +5,47 @@
55
from typing import Optional
66

77
def extract_markdown_structure(content: str) -> str:
8-
"""
9-
Estrae solo le intestazioni (headers) da un contenuto Markdown,
10-
preservando la gerarchia (#, ##, ###).
11-
"""
12-
structure_lines = []
13-
lines = content.splitlines()
14-
15-
# Regex per catturare le righe che iniziano con uno o più '#' seguiti da spazio
8+
"""Estrae solo le intestazioni (headers) da un contenuto Markdown."""
9+
lines = []
10+
# Regex per catturare le righe che iniziano con #
1611
header_pattern = re.compile(r'^\s*(#{1,6})\s+(.*)')
1712

18-
for line in lines:
19-
match = header_pattern.match(line)
20-
if match:
21-
# Opzione 1: Manteniamo il formato Raw Markdown (# Titolo)
22-
# Questo è ideale per gli LLM perché capiscono nativamente il livello di importanza.
23-
structure_lines.append(line.strip())
13+
for line in content.splitlines():
14+
if header_pattern.match(line):
15+
lines.append(line.strip())
2416

25-
# Opzione 2 (Alternativa): Convertire in lista indentata
26-
# level = len(match.group(1))
27-
# indent = " " * (level - 1)
28-
# structure_lines.append(f"{indent}- {match.group(2)}")
17+
if not lines:
18+
return "(Nessuna struttura Markdown rilevata)"
19+
return "\n".join(lines)
20+
21+
def extract_latex_structure(content: str) -> str:
22+
"""Estrae comandi strutturali LaTeX (part, chapter, section, etc)."""
23+
lines = []
24+
# Regex per catturare comandi strutturali standard di LaTeX
25+
# Supporta \section{Title} e \section*{Title}
26+
tex_pattern = re.compile(r'^\s*\\(part|chapter|section|subsection|subsubsection|paragraph|subparagraph)\*?\{(.+?)\}')
27+
28+
# Catturiamo anche documentclass e begin/end document per contesto
29+
context_pattern = re.compile(r'^\s*\\(documentclass|begin|end)\{.+?\}')
30+
31+
for line in content.splitlines():
32+
if tex_pattern.match(line) or context_pattern.match(line):
33+
lines.append(line.strip())
2934

30-
if not structure_lines:
31-
return "(Nessuna struttura rilevata o file privo di intestazioni)"
32-
33-
return "\n".join(structure_lines)
35+
if not lines:
36+
return "(Nessuna struttura LaTeX rilevata)"
37+
return "\n".join(lines)
3438

3539
def get_document_structure(file_path: str, content: str) -> Optional[str]:
36-
"""
37-
Funzione dispatcher che decide quale parser usare in base all'estensione.
38-
Restituisce una stringa formattata con la struttura del documento.
39-
"""
40+
"""Funzione dispatcher che decide quale parser usare."""
4041
_, ext = os.path.splitext(file_path)
4142
ext = ext.lower()
4243

4344
if ext in ['.md', '.markdown', '.mdown', '.mkd']:
4445
return extract_markdown_structure(content)
4546

46-
# --- FUTURE IMPLEMENTAZIONI ---
47-
# elif ext == '.docx':
48-
# return extract_docx_structure(file_path) # Richiederà python-docx
49-
# elif ext == '.tex':
50-
# return extract_latex_structure(content)
47+
# --- LATEX HANDLER ---
48+
elif ext in ['.tex']:
49+
return extract_latex_structure(content)
5150

5251
return None

0 commit comments

Comments
 (0)