Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions crates/fetchkit/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub enum FetchError {
MissingUrl,

/// URL has invalid scheme
#[error("Invalid URL: must start with http:// or https://")]
#[error("Invalid URL: must be http://, https://, or a bare domain URL")]
InvalidUrlScheme,

/// Invalid HTTP method
Expand Down Expand Up @@ -110,7 +110,7 @@ mod tests {
);
assert_eq!(
FetchError::InvalidUrlScheme.to_string(),
"Invalid URL: must start with http:// or https://"
"Invalid URL: must be http://, https://, or a bare domain URL"
);
assert_eq!(
FetchError::InvalidMethod.to_string(),
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/arxiv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ impl Fetcher for ArXivFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let paper_id = Self::parse_url(&url)
Expand Down
6 changes: 4 additions & 2 deletions crates/fetchkit/src/fetchers/default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ impl Fetcher for DefaultFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
if request.url.is_empty() {
return Err(FetchError::MissingUrl);
}
Expand All @@ -244,7 +245,7 @@ impl Fetcher for DefaultFetcher {
"*/*"
};

let headers = build_headers(options, accept, request);
let headers = build_headers(options, accept, &request);
let parsed_url = url::Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let reqwest_method = match method {
Expand Down Expand Up @@ -416,6 +417,7 @@ impl Fetcher for DefaultFetcher {
Some(path) => path.clone(),
None => return self.fetch(request, options).await,
};
let request = request.normalized_for_fetch()?;

if request.url.is_empty() {
return Err(FetchError::MissingUrl);
Expand All @@ -424,7 +426,7 @@ impl Fetcher for DefaultFetcher {
let method = request.effective_method();
let max_body_size = options.max_body_size.unwrap_or(DEFAULT_MAX_BODY_SIZE);

let headers = build_headers(options, "*/*", request);
let headers = build_headers(options, "*/*", &request);
let parsed_url = url::Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let reqwest_method = match method {
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/docs_site.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ impl Fetcher for DocsSiteFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;
let user_agent = options.user_agent.as_deref().unwrap_or(DEFAULT_USER_AGENT);
let ua_header = HeaderValue::from_str(user_agent)
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/github_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ impl Fetcher for GitHubCodeFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let parsed = Self::parse_url(&url)
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/github_issue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ impl Fetcher for GitHubIssueFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let (owner, repo, number) = Self::parse_url(&url).ok_or_else(|| {
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/github_repo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ impl Fetcher for GitHubRepoFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let (owner, repo) = Self::parse_github_url(&url).ok_or_else(|| {
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/hackernews.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ impl Fetcher for HackerNewsFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let item_id = Self::parse_url(&url)
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/package_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ impl Fetcher for PackageRegistryFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let registry = Self::parse_url(&url).ok_or_else(|| {
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/rss_feed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ impl Fetcher for RSSFeedFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let user_agent = options.user_agent.as_deref().unwrap_or(DEFAULT_USER_AGENT);
let mut headers = HeaderMap::new();
let ua_header = HeaderValue::from_str(user_agent)
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/stackoverflow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ impl Fetcher for StackOverflowFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let (site, question_id) = Self::parse_url(&url).ok_or_else(|| {
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/twitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,7 @@ impl Fetcher for TwitterFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let (_username, tweet_id) = Self::parse_tweet_url(&url).ok_or_else(|| {
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/wikipedia.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ impl Fetcher for WikipediaFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let (lang, title) = Self::parse_url(&url)
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/src/fetchers/youtube.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ impl Fetcher for YouTubeFetcher {
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let request = request.normalized_for_fetch()?;
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;

let video_id = Self::parse_video_id(&url)
Expand Down
14 changes: 7 additions & 7 deletions crates/fetchkit/src/tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1020,25 +1020,25 @@ fn build_help(tool: &Tool) -> String {
let errors = if is_ukrainian(tool.locale()) {
if tool.enable_save_to_file {
"- `MissingUrl` — параметр `url` обов’язковий\n\
- `InvalidUrlScheme` — схема URL має бути `http` або `https`\n\
- `InvalidUrlScheme` — URL має бути `http://`, `https://` або доменним URL\n\
- `BlockedUrl` — URL заблокований політикою SSRF або allow/block правилами\n\
- `FirstByteTimeout` — сервер не відповів протягом 1 секунди\n\
- `SaverNotAvailable` — `save_to_file` потребує адаптер `FileSaver`\n"
} else {
"- `MissingUrl` — параметр `url` обов’язковий\n\
- `InvalidUrlScheme` — схема URL має бути `http` або `https`\n\
- `InvalidUrlScheme` — URL має бути `http://`, `https://` або доменним URL\n\
- `BlockedUrl` — URL заблокований політикою SSRF або allow/block правилами\n\
- `FirstByteTimeout` — сервер не відповів протягом 1 секунди\n"
}
} else if tool.enable_save_to_file {
"- `MissingUrl` — `url` is required\n\
- `InvalidUrlScheme` — URL scheme must be `http` or `https`\n\
- `InvalidUrlScheme` — URL must be `http://`, `https://`, or a bare domain URL\n\
- `BlockedUrl` — URL blocked by SSRF policy or allow/block rules\n\
- `FirstByteTimeout` — server did not respond within 1 second\n\
- `SaverNotAvailable` — `save_to_file` requires a `FileSaver` adapter\n"
} else {
"- `MissingUrl` — `url` is required\n\
- `InvalidUrlScheme` — URL scheme must be `http` or `https`\n\
- `InvalidUrlScheme` — URL must be `http://`, `https://`, or a bare domain URL\n\
- `BlockedUrl` — URL blocked by SSRF policy or allow/block rules\n\
- `FirstByteTimeout` — server did not respond within 1 second\n"
};
Expand Down Expand Up @@ -1139,7 +1139,7 @@ fn unknown_parameter_error(locale: &str, key: &str) -> ToolError {
fn user_text(locale: &str, key: &str) -> &'static str {
match (is_ukrainian(locale), key) {
(true, "missing_url") => "Параметр url обов’язковий.",
(true, "invalid_scheme") => "Схема URL має бути http або https.",
(true, "invalid_scheme") => "URL має бути http://, https:// або доменним URL.",
(true, "invalid_method") => "Метод має бути GET або HEAD.",
(true, "blocked_url") => "URL заблокований політикою безпеки.",
(true, "timeout") => {
Expand All @@ -1153,7 +1153,7 @@ fn user_text(locale: &str, key: &str) -> &'static str {
(true, "save_error") => "Не вдалося зберегти файл. Перевірте шлях призначення.",
(true, "saver_missing") => "save_to_file потребує адаптер FileSaver.",
(false, "missing_url") => "url is required.",
(false, "invalid_scheme") => "URL scheme must be http or https.",
(false, "invalid_scheme") => "URL must be http://, https://, or a bare domain URL.",
(false, "invalid_method") => "Method must be GET or HEAD.",
(false, "blocked_url") => "URL is blocked by security policy.",
(false, "timeout") => {
Expand Down Expand Up @@ -1357,7 +1357,7 @@ mod tests {
assert!(err
.unwrap_err()
.to_string()
.contains("URL scheme must be http or https"));
.contains("URL must be http://, https://, or a bare domain URL"));
}

#[test]
Expand Down
7 changes: 7 additions & 0 deletions crates/fetchkit/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ impl FetchRequest {
Ok(())
}

/// Return a normalized copy for APIs that accept `&FetchRequest`.
pub(crate) fn normalized_for_fetch(&self) -> Result<Self, FetchError> {
let mut request = self.clone();
request.normalize_url_for_fetch()?;
Ok(request)
}

/// Set the HTTP method
pub fn method(mut self, method: HttpMethod) -> Self {
self.method = Some(method);
Expand Down
2 changes: 1 addition & 1 deletion crates/fetchkit/tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ async fn test_invalid_url_scheme() {
assert!(result
.unwrap_err()
.to_string()
.contains("http:// or https://"));
.contains("http://, https://, or a bare domain URL"));
}

#[tokio::test]
Expand Down
87 changes: 85 additions & 2 deletions crates/fetchkit/tests/transport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
use async_trait::async_trait;
use bytes::Bytes;
use fetchkit::{
DnsPolicy, FetchError, FetchOptions, FetchRequest, Fetcher, HttpMethod, HttpTransport,
TransportError, TransportMethod, TransportRequest, TransportResponse,
ArXivFetcher, DefaultFetcher, DnsPolicy, DocsSiteFetcher, FetchError, FetchOptions,
FetchRequest, Fetcher, GitHubCodeFetcher, GitHubIssueFetcher, GitHubRepoFetcher,
HackerNewsFetcher, HttpMethod, HttpTransport, PackageRegistryFetcher, RSSFeedFetcher,
StackOverflowFetcher, TransportError, TransportMethod, TransportRequest, TransportResponse,
TwitterFetcher, WikipediaFetcher, YouTubeFetcher,
};
use serde_json::json;
use std::sync::{Arc, Mutex};
Expand Down Expand Up @@ -122,6 +125,86 @@ async fn default_fetcher_get_uses_injected_transport() {
assert!(calls[0].0.contains("example.test/page"));
}

#[tokio::test]
async fn direct_default_fetcher_defaults_bare_host_urls_to_https() {
let mock = Arc::new(MockTransport::new().route(
"example.test/direct",
200,
"text/plain",
b"direct default fetcher",
));
let options = options_with(mock.clone());

let fetcher = DefaultFetcher::new();
let request = FetchRequest::new("example.test/direct");
let response = fetcher.fetch(&request, &options).await.unwrap();

assert_eq!(response.status_code, 200);
assert_eq!(response.url, "https://example.test/direct");

let calls = mock.calls();
assert_eq!(calls.len(), 1);
assert_eq!(calls[0].0, "https://example.test/direct");
}

#[tokio::test]
async fn direct_specialized_fetchers_normalize_bare_host_urls_before_parsing() {
let cases: Vec<(Box<dyn Fetcher>, &str)> = vec![
(
Box::new(GitHubCodeFetcher::new()),
"github.com/owner/repo/blob/main/README.md",
),
(
Box::new(GitHubIssueFetcher::new()),
"github.com/owner/repo/issues/42",
),
(Box::new(GitHubRepoFetcher::new()), "github.com/owner/repo"),
(
Box::new(TwitterFetcher::new()),
"x.com/user/status/123456789",
),
(
Box::new(StackOverflowFetcher::new()),
"stackoverflow.com/questions/12345/title",
),
(
Box::new(PackageRegistryFetcher::new()),
"pypi.org/project/requests",
),
(
Box::new(WikipediaFetcher::new()),
"en.wikipedia.org/wiki/Rust",
),
(
Box::new(YouTubeFetcher::new()),
"youtube.com/watch?v=abc123",
),
(Box::new(ArXivFetcher::new()), "arxiv.org/abs/2301.07041"),
(
Box::new(HackerNewsFetcher::new()),
"news.ycombinator.com/item?id=123",
),
(Box::new(RSSFeedFetcher::new()), "example.com/feed"),
(
Box::new(DocsSiteFetcher::new()),
"docs.rs/tokio/latest/tokio",
),
];

for (fetcher, bare_url) in cases {
let mock = Arc::new(MockTransport::new());
let options = options_with(mock);
let request = FetchRequest::new(bare_url);
let result = fetcher.fetch(&request, &options).await;

assert!(
!matches!(result, Err(FetchError::InvalidUrlScheme)),
"{} rejected bare URL at scheme parsing",
fetcher.name()
);
}
}

#[tokio::test]
async fn default_fetcher_head_uses_injected_transport() {
let mock = Arc::new(MockTransport::new().route("example.test/head", 200, "text/html", b""));
Expand Down
5 changes: 5 additions & 0 deletions specs/fetchers.md
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,11 @@ pub async fn fetch_with_options(req: FetchRequest, options: FetchOptions)
-> Result<FetchResponse, FetchError>;
```

Built-in fetchers normalize `FetchRequest::url` before parsing, so direct calls to
`Fetcher::fetch` accept the same URL forms as the registry and tool surfaces:
explicit `http://`, explicit `https://`, or bare domain URLs normalized to
`https://`.

## Testing

### Unit Tests
Expand Down
7 changes: 4 additions & 3 deletions specs/initial.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,9 @@ Provide a builder to configure tool options, including:
### Request Validation

- `url` is required.
- Only `http://` and `https://` URLs allowed.
- Invalid URL: `Invalid URL: must start with http:// or https://`.
- Explicit `http://` and `https://` URLs are allowed. Bare domain URLs are
accepted and normalized to `https://` before validation and fetch.
- Invalid URL: `Invalid URL: must be http://, https://, or a bare domain URL`.
- Invalid method: `Invalid method: must be GET or HEAD`.
- Allow/block list prefixes (if configured) are applied before fetch.
- If allow list is non-empty, URL must match at least one allow prefix.
Expand Down Expand Up @@ -321,7 +322,7 @@ Content is HTML if:
### Error Handling

- Missing url -> tool error string "Missing required parameter: url".
- Invalid URL -> tool error string "Invalid URL: must start with http:// or https://".
- Invalid URL -> tool error string "Invalid URL: must be http://, https://, or a bare domain URL".
- Invalid method -> tool error string "Invalid method: must be GET or HEAD".
- Blocked URL (prefix or DNS policy) -> tool error string "Blocked URL: not allowed by policy".
- First-byte timeout -> "Request timed out: server did not respond within 1 second".
Expand Down
Loading