@@ -8,29 +8,30 @@ import (
88 "net/http"
99 "net/url"
1010 "reflect"
11+ "strings"
1112 "time"
1213
14+ htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2"
15+ "github.com/k3a/html2text"
16+
1317 "github.com/docker/cagent/pkg/tools"
1418)
1519
1620type FetchTool struct {
17- timeout time.Duration
18- client * http.Client
21+ handler * fetchHandler
1922}
2023
2124var _ tools.ToolSet = (* FetchTool )(nil )
2225
2326type fetchHandler struct {
24- tool * FetchTool
27+ timeout time. Duration
2528}
2629
2730func (h * fetchHandler ) CallTool (ctx context.Context , toolCall tools.ToolCall ) (* tools.ToolCallResult , error ) {
2831 var params struct {
29- URLs []string `json:"urls"`
30- Headers map [string ]string `json:"headers,omitempty"`
31- Method string `json:"method,omitempty"`
32- Timeout int `json:"timeout,omitempty"`
33- UserAgent string `json:"userAgent,omitempty"`
32+ URLs []string `json:"urls"`
33+ Timeout int `json:"timeout,omitempty"`
34+ Format string `json:"format,omitempty"`
3435 }
3536
3637 if err := json .Unmarshal ([]byte (toolCall .Function .Arguments ), & params ); err != nil {
@@ -41,24 +42,18 @@ func (h *fetchHandler) CallTool(ctx context.Context, toolCall tools.ToolCall) (*
4142 return nil , fmt .Errorf ("at least one URL is required" )
4243 }
4344
44- // Set defaults
45- if params .Method == "" {
46- params .Method = "GET"
47- }
48- if params .UserAgent == "" {
49- params .UserAgent = "cagent-fetch/1.0"
50- }
51-
5245 // Set timeout if specified
53- client := h .tool .client
46+ client := & http.Client {
47+ Timeout : h .timeout ,
48+ }
5449 if params .Timeout > 0 {
5550 timeout := time .Duration (params .Timeout ) * time .Second
5651 client = & http.Client {Timeout : timeout }
5752 }
5853
5954 var results []FetchResult
6055 for _ , urlStr := range params .URLs {
61- result := h .fetchURL (ctx , client , urlStr , params .Method , params . Headers , params . UserAgent )
56+ result := h .fetchURL (ctx , client , urlStr , params .Format )
6257 results = append (results , result )
6358 }
6459
@@ -93,7 +88,7 @@ type FetchResult struct {
9388 Error string `json:"error,omitempty"`
9489}
9590
96- func (h * fetchHandler ) fetchURL (ctx context.Context , client * http.Client , urlStr , method string , headers map [ string ] string , userAgent string ) FetchResult {
91+ func (h * fetchHandler ) fetchURL (ctx context.Context , client * http.Client , urlStr , format string ) FetchResult {
9792 result := FetchResult {URL : urlStr }
9893
9994 // Validate URL
@@ -116,18 +111,24 @@ func (h *fetchHandler) fetchURL(ctx context.Context, client *http.Client, urlStr
116111 }
117112
118113 // Create request
119- req , err := http .NewRequestWithContext (ctx , method , urlStr , http .NoBody )
114+ req , err := http .NewRequestWithContext (ctx , http . MethodGet , urlStr , http .NoBody )
120115 if err != nil {
121116 result .Error = fmt .Sprintf ("failed to create request: %v" , err )
122117 return result
123118 }
124119
125120 // Set User-Agent
126- req .Header .Set ("User-Agent" , userAgent )
127-
128- // Set custom headers
129- for key , value := range headers {
130- req .Header .Set (key , value )
121+ req .Header .Set ("User-Agent" , "cagent/1.0" )
122+
123+ switch format {
124+ case "markdown" :
125+ req .Header .Set ("Accept" , "text/markdown;q=1.0, text/plain;q=0.9, text/html;q=0.7, */*;q=0.1" )
126+ case "html" :
127+ req .Header .Set ("Accept" , "text/html;q=1.0, text/plain;q=0.8, */*;q=0.1" )
128+ case "text" :
129+ req .Header .Set ("Accept" , "text/plain;q=1.0, text/markdown;q=0.9, text/html;q=0.8, */*;q=0.1" )
130+ default :
131+ req .Header .Set ("Accept" , "text/plain;q=1.0, */*;q=0.1" )
131132 }
132133
133134 // Execute request
@@ -143,66 +144,87 @@ func (h *fetchHandler) fetchURL(ctx context.Context, client *http.Client, urlStr
143144 result .ContentType = resp .Header .Get ("Content-Type" )
144145
145146 // Read response body
146- body , err := io .ReadAll (resp .Body )
147+ maxSize := int64 (1 << 20 ) // 1MB
148+ body , err := io .ReadAll (io .LimitReader (resp .Body , maxSize ))
147149 if err != nil {
148150 result .Error = fmt .Sprintf ("failed to read response body: %v" , err )
149151 return result
150152 }
151153
152- result .ContentLength = len (body )
153- result .Body = string (body )
154+ contentType := resp .Header .Get ("Content-Type" )
155+
156+ switch format {
157+ case "markdown" :
158+ if strings .Contains (contentType , "text/html" ) {
159+ result .Body = htmlToMarkdown (string (body ))
160+ } else {
161+ result .Body = string (body )
162+ }
163+ case "html" :
164+ result .Body = string (body )
165+ case "text" :
166+ if strings .Contains (contentType , "text/html" ) {
167+ result .Body = htmlToText (string (body ))
168+ } else {
169+ result .Body = string (body )
170+ }
171+ default :
172+ result .Body = string (body )
173+ }
174+
175+ result .ContentLength = len (result .Body )
154176
155177 return result
156178}
157179
180+ func htmlToMarkdown (html string ) string {
181+ markdown , err := htmltomarkdown .ConvertString (html )
182+ if err != nil {
183+ return html
184+ }
185+ return markdown
186+ }
187+
188+ func htmlToText (html string ) string {
189+ return html2text .HTML2Text (html )
190+ }
191+
158192func NewFetchTool (options ... FetchToolOption ) * FetchTool {
159193 tool := & FetchTool {
160- timeout : 30 * time .Second ,
194+ handler : & fetchHandler {
195+ timeout : 30 * time .Second ,
196+ },
161197 }
162198
163- // Apply options
164199 for _ , opt := range options {
165200 opt (tool )
166201 }
167202
168- // Create HTTP client with timeout
169- tool .client = & http.Client {
170- Timeout : tool .timeout ,
171- }
172-
173203 return tool
174204}
175205
176206type FetchToolOption func (* FetchTool )
177207
178208func WithTimeout (timeout time.Duration ) FetchToolOption {
179209 return func (t * FetchTool ) {
180- t .timeout = timeout
210+ t .handler . timeout = timeout
181211 }
182212}
183213
184214func (t * FetchTool ) Instructions () string {
185- return `## Fetch Tool Instructions
215+ return `## "fetch" tool instructions
186216
187217This tool allows you to fetch content from HTTP and HTTPS URLs.
188218
189- ### Features
190- - Support for multiple URLs in a single call
191- - Customizable HTTP headers
192- - Configurable request method (GET, POST, etc.)
193- - Timeout control
194- - User-Agent customization
219+ FEATURES
195220
196- ### Security
197- - Only HTTP and HTTPS protocols are supported
198- - No local file access or other protocols
199- - Request timeouts prevent hanging requests
221+ - Support for multiple URLs in a single call
222+ - Returns response body and metadata (status code, content type, length)
223+ - Specify the output format (text, markdown, html)
200224
201- ### Usage Tips
225+ USAGE TIPS
202226- Use single URLs for simple content fetching
203- - Use multiple URLs for batch operations
204- - Set appropriate headers for APIs that require authentication
205- - Consider timeout values for slow or large responses`
227+ - Use multiple URLs for batch operations`
206228}
207229
208230func (t * FetchTool ) Tools (context.Context ) ([]tools.Tool , error ) {
@@ -226,35 +248,23 @@ func (t *FetchTool) Tools(context.Context) ([]tools.Tool, error) {
226248 "description" : "Array of URLs to fetch" ,
227249 "minItems" : 1 ,
228250 },
229- "method " : map [string ]any {
251+ "format " : map [string ]any {
230252 "type" : "string" ,
231- "description" : "HTTP method to use (default: GET)" ,
232- "default" : "GET" ,
233- "enum" : []string {"GET" , "POST" , "PUT" , "DELETE" , "HEAD" , "OPTIONS" , "PATCH" },
234- },
235- "headers" : map [string ]any {
236- "type" : "object" ,
237- "additionalProperties" : map [string ]any {
238- "type" : "string" ,
239- },
240- "description" : "Optional HTTP headers to send with the request" ,
253+ "description" : "The format to return the content in (text, markdown, or html)" ,
254+ "enum" : []string {"text" , "markdown" , "html" },
241255 },
242256 "timeout" : map [string ]any {
243257 "type" : "integer" ,
244258 "description" : "Request timeout in seconds (default: 30)" ,
245259 "minimum" : 1 ,
246260 "maximum" : 300 ,
247261 },
248- "userAgent" : map [string ]any {
249- "type" : "string" ,
250- "description" : "Custom User-Agent header (default: cagent-fetch/1.0)" ,
251- },
252262 },
253- Required : []string {"urls" },
263+ Required : []string {"urls" , "format" },
254264 },
255265 OutputSchema : tools .ToOutputSchemaSchema (reflect .TypeFor [string ]()),
256266 },
257- Handler : ( & fetchHandler { tool : t }) .CallTool ,
267+ Handler : t . handler .CallTool ,
258268 },
259269 }, nil
260270}
0 commit comments