@@ -24,7 +24,7 @@ import * as fs from 'node:fs/promises'
2424import * as path from 'node:path'
2525import { JSDOM } from 'jsdom'
2626import { log } from '../config/logger'
27-
27+ import type { TracingContext } from '@mastra/core/ai-tracing' ;
2828// Enhanced HTML processing with JSDOM
2929const DANGEROUS_TAGS = new Set ( [
3030 'script' ,
@@ -408,7 +408,7 @@ export const webScraperTool = createTool({
408408 'Extracts structured data from web pages using JSDOM and Cheerio with enhanced security and error handling.' ,
409409 inputSchema : webScraperInputSchema ,
410410 outputSchema : webScraperOutputSchema ,
411- execute : async ( { context, writer, tracingContext } ) => {
411+ execute : async ( { context, writer, tracingContext } : { context : { url : string ; selector ?: string ; extractAttributes ?: string [ ] ; saveMarkdown ?: boolean ; markdownFileName ?: string } , writer ?: any , tracingContext ?: TracingContext } ) => {
412412 await writer ?. write ( { type : 'progress' , data : { message : `🌐 Starting web scrape for ${ context . url } ` } } ) ;
413413 toolCallCounters . set ( 'web-scraper' , ( toolCallCounters . get ( 'web-scraper' ) ?? 0 ) + 1 )
414414 const scrapeSpan = tracingContext ?. currentSpan ?. createChildSpan ( {
@@ -741,7 +741,7 @@ export const batchWebScraperTool = createTool({
741741 'Scrape multiple web pages concurrently with enhanced JSDOM processing and rate limiting.' ,
742742 inputSchema : batchWebScraperInputSchema ,
743743 outputSchema : batchWebScraperOutputSchema ,
744- execute : async ( { context, writer, tracingContext } ) => {
744+ execute : async ( { context, writer, tracingContext } : { context : { urls : string [ ] ; selector ?: string ; maxConcurrent ?: number ; saveResults ?: boolean ; baseFileName ?: string } , writer ?: any , tracingContext ?: TracingContext } ) => {
745745 await writer ?. write ( { type : 'progress' , data : { message : `🌐 Batch scraping ${ context . urls . length } URLs` } } ) ;
746746 toolCallCounters . set ( 'batch-web-scraper' , ( toolCallCounters . get ( 'batch-web-scraper' ) ?? 0 ) + 1 )
747747 const batchSpan = tracingContext ?. currentSpan ?. createChildSpan ( {
@@ -1000,7 +1000,7 @@ export const siteMapExtractorTool = createTool({
10001000 'Extract a comprehensive site map by crawling internal links with enhanced JSDOM processing and rate limiting.' ,
10011001 inputSchema : siteMapExtractorInputSchema ,
10021002 outputSchema : siteMapExtractorOutputSchema ,
1003- execute : async ( { context, writer, tracingContext } ) => {
1003+ execute : async ( { context, writer, tracingContext } : { context : { url : string ; maxDepth ?: number ; maxPages ?: number ; includeExternal ?: boolean ; saveMap ?: boolean } , writer ?: any , tracingContext ?: TracingContext } ) => {
10041004 await writer ?. write ( { type : 'progress' , data : { message : `🗺️ Starting site map extraction for ${ context . url } ` } } ) ;
10051005 toolCallCounters . set ( 'site-map-extractor' , ( toolCallCounters . get ( 'site-map-extractor' ) ?? 0 ) + 1 )
10061006 const mapSpan = tracingContext ?. currentSpan ?. createChildSpan ( {
@@ -1259,7 +1259,7 @@ export const linkExtractorTool = createTool({
12591259 'Extract and analyze all links from a web page with enhanced JSDOM processing and filtering.' ,
12601260 inputSchema : linkExtractorInputSchema ,
12611261 outputSchema : linkExtractorOutputSchema ,
1262- execute : async ( { context, writer, tracingContext } ) => {
1262+ execute : async ( { context, writer, tracingContext } : { context : { url : string ; linkTypes ?: ( 'internal' | 'external' | 'all' ) [ ] ; includeAnchors ?: boolean ; filterPatterns ?: string [ ] } , writer ?: any , tracingContext ?: TracingContext } ) => {
12631263 await writer ?. write ( { type : 'progress' , data : { message : `🔗 Extracting links from ${ context . url } ` } } ) ;
12641264 toolCallCounters . set ( 'link-extractor' , ( toolCallCounters . get ( 'link-extractor' ) ?? 0 ) + 1 )
12651265 const linkSpan = tracingContext ?. currentSpan ?. createChildSpan ( {
@@ -1489,7 +1489,7 @@ export const htmlToMarkdownTool = createTool({
14891489 'Convert HTML content to well-formatted markdown with enhanced JSDOM parsing and security.' ,
14901490 inputSchema : htmlToMarkdownInputSchema ,
14911491 outputSchema : htmlToMarkdownOutputSchema ,
1492- execute : async ( { context, writer, tracingContext } ) => {
1492+ execute : async ( { context, writer, tracingContext } : { context : { html : string ; saveToFile ?: boolean ; fileName ?: string } , writer ?: any , tracingContext ?: TracingContext } ) => {
14931493 await writer ?. write ( { type : 'progress' , data : { message : '🔄 Converting HTML to markdown...' } } ) ;
14941494 toolCallCounters . set ( 'html-to-markdown' , ( toolCallCounters . get ( 'html-to-markdown' ) ?? 0 ) + 1 )
14951495 const convertSpan = tracingContext ?. currentSpan ?. createChildSpan ( {
@@ -1621,7 +1621,7 @@ export const listScrapedContentTool = createTool({
16211621 'List all scraped content files stored in the data directory with enhanced security.' ,
16221622 inputSchema : listScrapedContentInputSchema ,
16231623 outputSchema : listScrapedContentOutputSchema ,
1624- execute : async ( { context, writer, tracingContext } ) => {
1624+ execute : async ( { context, writer, tracingContext } : { context : { pattern ?: string ; includeMetadata ?: boolean } , writer ?: any , tracingContext ?: TracingContext } ) => {
16251625 await writer ?. write ( { type : 'progress' , data : { message : '📂 Listing scraped content files...' } } ) ;
16261626 toolCallCounters . set ( 'list-scraped-content' , ( toolCallCounters . get ( 'list-scraped-content' ) ?? 0 ) + 1 )
16271627 const listSpan = tracingContext ?. currentSpan ?. createChildSpan ( {
@@ -1812,7 +1812,7 @@ export const contentCleanerTool = createTool({
18121812 'Clean HTML content by removing unwanted elements with enhanced JSDOM processing and security.' ,
18131813 inputSchema : contentCleanerInputSchema ,
18141814 outputSchema : contentCleanerOutputSchema ,
1815- execute : async ( { context, writer, tracingContext } ) => {
1815+ execute : async ( { context, writer, tracingContext } : { context : { html : string ; removeScripts ?: boolean ; removeStyles ?: boolean ; removeComments ?: boolean ; preserveStructure ?: boolean } , writer ?: any , tracingContext ?: TracingContext } ) => {
18161816 await writer ?. write ( { type : 'progress' , data : { message : '🧹 Starting content cleaning...' } } ) ;
18171817 toolCallCounters . set ( 'content-cleaner' , ( toolCallCounters . get ( 'content-cleaner' ) ?? 0 ) + 1 )
18181818 const cleanSpan = tracingContext ?. currentSpan ?. createChildSpan ( {
0 commit comments