Skip to content

Commit d5d22bc

Browse files
authored
Support multiple config file formats and refactor config module (#2104)
Resolves #1930 by adding support for loading configuration from pyproject.toml and Cargo.toml. Support for package.json has also been added, as it is a common way to configure tools in the JavaScript ecosystem. During implementation, the config module was split into logical submodules to keep the core high-level structs clean. To handle the different config formats, a ConfigLoader trait was introduced. After a few iterations, a simple imperative approach -- where the entire file is strictly deserialized and an option returned -- was ruled out: because the config uses strict validation (denying unknown fields), a single typo would cause parsing to fail silently rather than surfacing the error to the user. To fix this, the trait was split into is_match and load. The is_match method does a lightweight check to see if the lychee section exists in the file. If it does, load enforces strict schema deserialization and properly bubbles up validation errors. Heavyweight dependencies like the cargo_toml crate (which builds massive ASTs for entire manifests) were intentionally avoided. Instead, minimal custom envelope structs with serde extract only the lychee blocks, keeping binary size small and compilation fast. Cargo package metadata takes precedence over workspace metadata if both are present. Tests cover each loader to verify that valid blocks are mapped correctly, missing keys are ignored, precedence works as intended, and malformed schemas produce the expected errors.
1 parent 1a903e4 commit d5d22bc

19 files changed

Lines changed: 1107 additions & 305 deletions

File tree

lychee-bin/src/client.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::options::{Config, HeaderMapExt};
1+
use crate::config::{Config, HeaderMapExt};
22
use crate::parse::parse_remaps;
33
use anyhow::{Context, Result};
44
use http::{HeaderMap, StatusCode};

lychee-bin/src/commands/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::io::{self, Write};
1212
use std::path::PathBuf;
1313

1414
use crate::cache::Cache;
15-
use crate::options::Config;
15+
use crate::config::Config;
1616
use lychee_lib::RequestError;
1717
use lychee_lib::{Client, Request};
1818

lychee-bin/src/config/header.rs

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
//! Parse and handle custom HTTP headers.
2+
//!
3+
//! Provides utilities for taking user-provided HTTP header strings
4+
//! (e.g. from the CLI or config files) and converting them into strongly
5+
//! typed `reqwest` headers.
6+
7+
use anyhow::{Error, Result, anyhow};
8+
use clap::builder::TypedValueParser;
9+
use http::{
10+
HeaderMap,
11+
header::{HeaderName, HeaderValue},
12+
};
13+
use std::{collections::HashMap, str::FromStr};
14+
15+
/// Parse a single header into a [`HeaderName`] and [`HeaderValue`]
16+
///
17+
/// Headers are expected to be in format `Header-Name: Header-Value`.
18+
/// The header name and value are trimmed of whitespace.
19+
///
20+
/// If the header contains multiple colons, the part after the first colon is
21+
/// considered the value.
22+
///
23+
/// # Errors
24+
///
25+
/// This fails if the header does not contain exactly one `:` character or
26+
/// if the header name contains non-ASCII characters.
27+
pub(crate) fn parse_single_header(header: &str) -> Result<(HeaderName, HeaderValue)> {
28+
let parts: Vec<&str> = header.splitn(2, ':').collect();
29+
match parts.as_slice() {
30+
[name, value] => {
31+
let name = name.trim();
32+
let name = HeaderName::from_str(name)
33+
.map_err(|e| anyhow!("Unable to convert header name '{name}': {e}"))?;
34+
let value = HeaderValue::from_str(value.trim())
35+
.map_err(|e| anyhow!("Unable to read value of header with name '{name}': {e}"))?;
36+
Ok((name, value))
37+
}
38+
_ => Err(anyhow!(
39+
"Invalid header format. Expected colon-separated string in the format 'HeaderName: HeaderValue'"
40+
)),
41+
}
42+
}
43+
44+
/// Parses a single HTTP header into a tuple of (String, String)
45+
///
46+
/// This does NOT merge multiple headers into one.
47+
#[derive(Clone, Debug)]
48+
pub(crate) struct HeaderParser;
49+
50+
impl TypedValueParser for HeaderParser {
51+
type Value = (String, String);
52+
53+
fn parse_ref(
54+
&self,
55+
_cmd: &clap::Command,
56+
_arg: Option<&clap::Arg>,
57+
value: &std::ffi::OsStr,
58+
) -> Result<Self::Value, clap::Error> {
59+
let header_str = value.to_str().ok_or_else(|| {
60+
clap::Error::raw(
61+
clap::error::ErrorKind::InvalidValue,
62+
"Header value contains invalid UTF-8",
63+
)
64+
})?;
65+
66+
match parse_single_header(header_str) {
67+
Ok((name, value)) => {
68+
let Ok(value) = value.to_str() else {
69+
return Err(clap::Error::raw(
70+
clap::error::ErrorKind::InvalidValue,
71+
"Header value contains invalid UTF-8",
72+
));
73+
};
74+
75+
Ok((name.to_string(), value.to_string()))
76+
}
77+
Err(e) => Err(clap::Error::raw(
78+
clap::error::ErrorKind::InvalidValue,
79+
e.to_string(),
80+
)),
81+
}
82+
}
83+
}
84+
85+
impl clap::builder::ValueParserFactory for HeaderParser {
86+
type Parser = HeaderParser;
87+
fn value_parser() -> Self::Parser {
88+
HeaderParser
89+
}
90+
}
91+
92+
/// Extension trait for converting a map of header pairs to a `HeaderMap`
93+
pub(crate) trait HeaderMapExt {
94+
/// Convert a collection of header key-value pairs to a `HeaderMap`
95+
///
96+
/// # Errors
97+
///
98+
/// This fails if any header name or value cannot be parsed into a valid
99+
/// `HeaderName` or `HeaderValue` respectively.
100+
fn from_header_pairs(headers: &HashMap<String, String>) -> Result<HeaderMap, Error>;
101+
}
102+
103+
impl HeaderMapExt for HeaderMap {
104+
fn from_header_pairs(headers: &HashMap<String, String>) -> Result<HeaderMap, Error> {
105+
let mut header_map = HeaderMap::new();
106+
for (name, value) in headers {
107+
let header_name = HeaderName::from_bytes(name.as_bytes())
108+
.map_err(|e| anyhow!("Invalid header name '{name}': {e}"))?;
109+
let header_value = HeaderValue::from_str(value)
110+
.map_err(|e| anyhow!("Invalid header value '{value}': {e}"))?;
111+
header_map.insert(header_name, header_value);
112+
}
113+
Ok(header_map)
114+
}
115+
}
116+
117+
#[cfg(test)]
118+
mod tests {
119+
use super::*;
120+
121+
#[test]
122+
fn test_parse_custom_headers() {
123+
assert_eq!(
124+
parse_single_header("accept:text/html").unwrap(),
125+
(
126+
HeaderName::from_static("accept"),
127+
HeaderValue::from_static("text/html")
128+
)
129+
);
130+
}
131+
132+
#[test]
133+
fn test_parse_custom_header_multiple_colons() {
134+
assert_eq!(
135+
parse_single_header("key:x-test:check=this").unwrap(),
136+
(
137+
HeaderName::from_static("key"),
138+
HeaderValue::from_static("x-test:check=this")
139+
)
140+
);
141+
}
142+
143+
#[test]
144+
fn test_parse_custom_headers_with_equals() {
145+
assert_eq!(
146+
parse_single_header("key:x-test=check=this").unwrap(),
147+
(
148+
HeaderName::from_static("key"),
149+
HeaderValue::from_static("x-test=check=this")
150+
)
151+
);
152+
}
153+
154+
#[test]
155+
/// We should not reveal potentially sensitive data contained in the headers.
156+
/// See: [#1297](https://github.com/lycheeverse/lychee/issues/1297)
157+
fn test_does_not_echo_sensitive_data() {
158+
let error = parse_single_header("My-Header💣: secret")
159+
.expect_err("Should not allow unicode as key");
160+
assert!(!error.to_string().contains("secret"));
161+
162+
let error = parse_single_header("secret").expect_err("Should fail when no `:` given");
163+
assert!(!error.to_string().contains("secret"));
164+
}
165+
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
//! `Cargo.toml` configuration loader.
2+
//!
3+
//! This module allows configuring lychee via a `[package.metadata.lychee]` or
4+
//! `[workspace.metadata.lychee]` section in a `Cargo.toml` file.
5+
//!
6+
//! # Tradeoffs
7+
//!
8+
//! While there are crates like `cargo_toml` available for parsing `Cargo.toml`
9+
//! files, we deliberately avoid using them. Such crates bring in heavy dependencies
10+
//! and extensive struct hierarchies to represent the entire Cargo schema (dependencies,
11+
//! targets, profiles, etc.).
12+
//!
13+
//! Instead, we define a lightweight, custom set of structs to extract exactly
14+
//! what we need using `serde` and `toml`.
15+
//!
16+
//! # Example
17+
//!
18+
//! ```toml
19+
//! [package.metadata.lychee]
20+
//! exclude = ["foo", "bar"]
21+
//! max_redirects = 5
22+
//! ```
23+
24+
use super::{ConfigLoader, ConfigMatch};
25+
use crate::config::Config;
26+
use anyhow::{Context, Result};
27+
use serde::Deserialize;
28+
29+
pub(crate) const CARGO_CONFIG_FILE: &str = "Cargo.toml";
30+
31+
/// The lychee config can be defined in either
32+
/// `[package.metadata.lychee]` or `[workspace.metadata.lychee]`.
33+
#[derive(Deserialize)]
34+
struct CargoToml {
35+
package: Option<CargoSection>,
36+
workspace: Option<CargoSection>,
37+
}
38+
39+
#[derive(Deserialize)]
40+
struct CargoSection {
41+
metadata: Option<Metadata>,
42+
}
43+
44+
#[derive(Deserialize)]
45+
struct Metadata {
46+
lychee: Option<Config>,
47+
}
48+
49+
pub(crate) struct CargoTomlLoader;
50+
51+
impl ConfigLoader for CargoTomlLoader {
52+
fn filename(&self) -> &str {
53+
CARGO_CONFIG_FILE
54+
}
55+
56+
fn load(&self, contents: &str) -> Result<ConfigMatch> {
57+
let cargo = toml::from_str::<CargoToml>(contents)
58+
.with_context(|| "Failed to parse lychee config from Cargo.toml")?;
59+
60+
// Package metadata fully replaces workspace metadata (instead of merging).
61+
// That's useful, because it allows users to define a workspace-wide
62+
// default config and then override it in specific packages.
63+
let config = [cargo.package, cargo.workspace]
64+
.into_iter()
65+
.flatten()
66+
.find_map(|s| s.metadata.and_then(|m| m.lychee));
67+
68+
match config {
69+
Some(config) => Ok(ConfigMatch::Found(Box::new(config))),
70+
None => Ok(ConfigMatch::NotFound),
71+
}
72+
}
73+
}
74+
75+
#[cfg(test)]
76+
mod tests {
77+
use super::*;
78+
79+
#[test]
80+
fn test_load_package_config() {
81+
let toml = r#"
82+
[package.metadata.lychee]
83+
exclude = ["foo"]
84+
"#;
85+
let result = CargoTomlLoader.load(toml).unwrap();
86+
match result {
87+
ConfigMatch::Found(config) => assert_eq!(config.exclude, vec!["foo".to_string()]),
88+
ConfigMatch::NotFound => panic!("Expected config to be found"),
89+
}
90+
}
91+
92+
#[test]
93+
fn test_load_workspace_config() {
94+
let toml = r#"
95+
[workspace.metadata.lychee]
96+
exclude = ["bar"]
97+
"#;
98+
let result = CargoTomlLoader.load(toml).unwrap();
99+
match result {
100+
ConfigMatch::Found(config) => assert_eq!(config.exclude, vec!["bar".to_string()]),
101+
ConfigMatch::NotFound => panic!("Expected config to be found"),
102+
}
103+
}
104+
105+
#[test]
106+
fn test_load_package_takes_precedence() {
107+
let toml = r#"
108+
[workspace.metadata.lychee]
109+
exclude = ["bar"]
110+
111+
[package.metadata.lychee]
112+
exclude = ["foo"]
113+
"#;
114+
let result = CargoTomlLoader.load(toml).unwrap();
115+
match result {
116+
ConfigMatch::Found(config) => assert_eq!(config.exclude, vec!["foo".to_string()]),
117+
ConfigMatch::NotFound => panic!("Expected config to be found"),
118+
}
119+
}
120+
121+
#[test]
122+
fn test_load_no_lychee_config() {
123+
let toml = r#"
124+
[package]
125+
name = "lychee"
126+
version = "1.0.0"
127+
128+
[workspace]
129+
members = ["lychee-bin"]
130+
"#;
131+
let result = CargoTomlLoader.load(toml).unwrap();
132+
match result {
133+
ConfigMatch::NotFound => (),
134+
ConfigMatch::Found(_) => panic!("Expected no config to be found"),
135+
}
136+
}
137+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//! `lychee.toml` configuration loader.
2+
//!
3+
//! This module allows configuring lychee via a standard `lychee.toml` file.
4+
//! This is the default configuration file format for lychee.
5+
//!
6+
//! Unlike `Cargo.toml` and `pyproject.toml` which require the configuration
7+
//! to be scoped under a specific section (like `[package.metadata.lychee]`),
8+
//! `lychee.toml` defines the configuration at the root level of the document.
9+
//!
10+
//! # Example
11+
//!
12+
//! ```toml
13+
//! exclude = ["foo", "bar"]
14+
//! timeout = 10
15+
//! ```
16+
17+
use super::{ConfigLoader, ConfigMatch};
18+
use anyhow::{Context, Result};
19+
20+
pub(crate) const LYCHEE_CONFIG_FILE: &str = "lychee.toml";
21+
22+
pub(crate) struct LycheeTomlLoader;
23+
24+
impl ConfigLoader for LycheeTomlLoader {
25+
fn filename(&self) -> &str {
26+
LYCHEE_CONFIG_FILE
27+
}
28+
29+
/// We strictly deserialize the entire file directly into our `Config` struct.
30+
/// Any failure here is a genuine configuration error that we want to bubble up.
31+
/// A dedicated `lychee.toml` file is assumed to always contain lychee configuration.
32+
fn load(&self, contents: &str) -> Result<ConfigMatch> {
33+
let config =
34+
toml::from_str(contents).with_context(|| "Failed to parse configuration file")?;
35+
Ok(ConfigMatch::Found(Box::new(config)))
36+
}
37+
}
38+
39+
#[cfg(test)]
40+
mod tests {
41+
use super::*;
42+
43+
#[test]
44+
fn test_load_config() {
45+
let toml = r#"
46+
exclude = ["foo"]
47+
"#;
48+
let result = LycheeTomlLoader.load(toml).unwrap();
49+
match result {
50+
ConfigMatch::Found(config) => assert_eq!(config.exclude, vec!["foo".to_string()]),
51+
ConfigMatch::NotFound => panic!("Expected config to be found"),
52+
}
53+
}
54+
55+
#[test]
56+
fn test_load_invalid_config() {
57+
let toml = r#"
58+
exclude = "foo" # should be an array
59+
"#;
60+
assert!(LycheeTomlLoader.load(toml).is_err());
61+
}
62+
}

0 commit comments

Comments
 (0)