From e6a6bd69b26e5e670da373210410b97542ccf878 Mon Sep 17 00:00:00 2001 From: klacointe Date: Wed, 17 Jun 2026 19:36:20 +0200 Subject: [PATCH] feat: introduce pluggable HTTP client behaviour - Add Waffle.HTTPClient behaviour with a single get/3 callback - Add Waffle.HTTPClient.Hackney as the default implementation - Refactor Waffle.File to dispatch through the configured :http_client instead of calling hackney directly; retry logic stays client-agnostic - Document how to implement a custom HTTP client adapter --- CHANGELOG.md | 8 ++ README.md | 10 ++ lib/waffle/behaviors/http_client_behaviour.ex | 55 +++++++++++ lib/waffle/file.ex | 82 ++++------------ lib/waffle/http_client/hackney.ex | 87 +++++++++++++++++ test/actions/store_test.exs | 18 +++- test/file_test.exs | 77 ++++++++++++++- test/http_client/hackney_test.exs | 96 +++++++++++++++++++ 8 files changed, 361 insertions(+), 72 deletions(-) create mode 100644 lib/waffle/behaviors/http_client_behaviour.ex create mode 100644 lib/waffle/http_client/hackney.ex create mode 100644 test/http_client/hackney_test.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index 2672bca..bcb7da7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## Unreleased + +- Add pluggable HTTP client behaviour (`Waffle.HTTPClient`) with `Waffle.HTTPClient.Hackney` + as the default implementation (#150) + - `:timeout` and `:recv_timeout` error atoms are **unchanged** from previous behaviour + - `{:error, :service_unavailable}` replaces `{:error, {:waffle_hackney_error, {:ok, 503, ...}}}` on 503 out of retries + - Non-2xx errors now return `{:error, {:http_error, status_code}}` (e.g. `{:error, {:http_error, 404}}`), replacing `{:error, {:waffle_hackney_error, {:ok, status, headers, ref}}}` + ## v1.1.10 (2025-12-28) - Handle http errors from fetching remote files (#134) diff --git a/README.md b/README.md index cabafbc..5663642 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,16 @@ config :ex_aws, # any configurations provided by https://github.com/ex-aws/ex_aws ``` +### Setup an HTTP client + +Waffle uses `:hackney` by default to download remote files. You can configure it explicitly: + +```elixir +config :waffle, :http_client, Waffle.HTTPClient.Hackney +``` + +You can also implement your own client by adopting the `Waffle.HTTPClient` behaviour. + ### Define a definition module Waffle requires a **definition module** which contains the relevant diff --git a/lib/waffle/behaviors/http_client_behaviour.ex b/lib/waffle/behaviors/http_client_behaviour.ex new file mode 100644 index 0000000..4b9ae07 --- /dev/null +++ b/lib/waffle/behaviors/http_client_behaviour.ex @@ -0,0 +1,55 @@ +defmodule Waffle.HTTPClient do + @moduledoc """ + Behaviour for pluggable HTTP clients used when downloading remote files. + + ## Built-in implementations + + - `Waffle.HTTPClient.Hackney` — default, uses `:hackney`. Add `{:hackney, "~> 1.9"}` to + your deps. + + ## Configuration + + config :waffle, :http_client, Waffle.HTTPClient.Hackney + + ## Writing a custom client + + Implement the `c:get/3` callback and configure Waffle to use your module: + + config :waffle, :http_client, MyApp.HTTPClient + + ### Options + + Waffle passes the following options (all values come from application config): + + | Option | Type | Default | Description | + |--------------------|-------------------------|------------|------------------------------------------| + | `:recv_timeout` | `non_neg_integer()` | `5_000` | Timeout for receiving a response (ms) | + | `:connect_timeout` | `non_neg_integer()` | `10_000` | Timeout for establishing a connection (ms) | + | `:max_body_length` | `non_neg_integer() \| :infinity` | `:infinity` | Maximum allowed response body size (bytes) | + | `:follow_redirect` | `boolean()` | `true` | Whether to follow HTTP redirects | + + ### Return values + + | Pattern | Meaning | + |----------------------------------------|--------------------------------------------------| + | `{:ok, body}` | Successful response, no filename in headers | + | `{:ok, body, filename}` | Successful response with `content-disposition` filename | + | `{:error, :timeout}` | Connect timed out — Waffle will retry | + | `{:error, :recv_timeout}` | Receive timed out — Waffle will retry | + | `{:error, :service_unavailable}` | Server returned 503 — Waffle will retry | + | `{:error, {:http_error, reason}}` | Non-retryable error; `reason` is the HTTP status integer for unexpected status codes, or an error term for connection/protocol errors | + """ + + @type body :: binary() + @type filename :: String.t() + @type option :: + {:recv_timeout, non_neg_integer()} + | {:connect_timeout, non_neg_integer()} + | {:max_body_length, non_neg_integer() | :infinity} + | {:follow_redirect, boolean()} + + @callback get(url :: String.t(), headers :: list(), options :: [option()]) :: + {:ok, body()} + | {:ok, body(), filename()} + | {:error, :timeout | :recv_timeout | :service_unavailable | {:http_error, any()}} +end diff --git a/lib/waffle/file.ex b/lib/waffle/file.ex index eccb0e9..8ea77d9 100644 --- a/lib/waffle/file.ex +++ b/lib/waffle/file.ex @@ -185,11 +185,6 @@ defmodule Waffle.File do end end - # hackney :connect_timeout - timeout used when establishing a connection, in milliseconds - # hackney :recv_timeout - timeout used when receiving from a connection, in milliseconds - # hackney :max_body_length - maximum size of the file to download, in bytes. Defaults to :infinity - # :backoff_max - maximum backoff time, in milliseconds - # :backoff_factor - a backoff factor to apply between attempts, in milliseconds defp get_remote_path(remote_path, definition) do headers = definition.remote_file_headers(remote_path) @@ -197,6 +192,7 @@ defmodule Waffle.File do follow_redirect: true, recv_timeout: Application.get_env(:waffle, :recv_timeout, 5_000), connect_timeout: Application.get_env(:waffle, :connect_timeout, 10_000), + max_body_length: Application.get_env(:waffle, :max_body_length, :infinity), max_retries: Application.get_env(:waffle, :max_retries, 3), backoff_factor: Application.get_env(:waffle, :backoff_factor, 1000), backoff_max: Application.get_env(:waffle, :backoff_max, 30_000) @@ -206,80 +202,34 @@ defmodule Waffle.File do end defp request(remote_path, headers, options, tries \\ 0) do - with {:ok, 200, response_headers, client_ref} <- - :hackney.get(URI.to_string(remote_path), headers, "", options), - res when elem(res, 0) == :ok <- body(client_ref, response_headers) do - res - else - {:error, %{reason: :timeout}} -> - case retry(tries, options) do - {:ok, :retry} -> request(remote_path, headers, options, tries + 1) - {:error, :out_of_tries} -> {:error, :timeout} - end - - {:error, :timeout} -> - case retry(tries, options) do - {:ok, :retry} -> request(remote_path, headers, options, tries + 1) - {:error, :out_of_tries} -> {:error, :recv_timeout} - end - - {:ok, 503, _headers, client_ref} = response -> - case retry(tries, options) do - {:ok, :retry} -> - request(remote_path, headers, options, tries + 1) - - {:error, :out_of_tries} -> - :hackney.close(client_ref) - {:error, {:waffle_hackney_error, response}} - end - - {:ok, _, _, client_ref} = response -> - :hackney.close(client_ref) - {:error, {:waffle_hackney_error, response}} - - _err -> - {:error, :waffle_hackney_error} - end - end - - defp body(client_ref, response_headers) do - max_body_length = Application.get_env(:waffle, :max_body_length, :infinity) + http_client = Application.get_env(:waffle, :http_client, Waffle.HTTPClient.Hackney) + url = URI.to_string(remote_path) - case :hackney.body(client_ref, max_body_length) do + case http_client.get(url, headers, options) do {:ok, body} -> - response_headers = :hackney_headers.new(response_headers) - filename = content_disposition(response_headers) + {:ok, body} - if is_nil(filename) do - {:ok, body} - else - {:ok, body, filename} - end + {:ok, body, filename} -> + {:ok, body, filename} - err -> + {:error, reason} when reason in [:timeout, :recv_timeout, :service_unavailable] -> + retry_or_error(remote_path, headers, options, tries, reason) + + {:error, _} = err -> err end end - defp content_disposition(headers) do - case :hackney_headers.get_value("content-disposition", headers) do - :undefined -> - nil - - value -> - case :hackney_headers.content_disposition(value) do - {_, [{"filename", filename} | _]} -> - filename - - _ -> - nil - end + defp retry_or_error(remote_path, headers, options, tries, reason) do + case retry(tries, options) do + {:ok, :retry} -> request(remote_path, headers, options, tries + 1) + {:error, :out_of_tries} -> {:error, reason} end end defp retry(tries, options) do if tries < options[:max_retries] do - backoff = round(options[:backoff_factor] * :math.pow(2, tries - 1)) + backoff = round(options[:backoff_factor] * :math.pow(2, tries)) backoff = :erlang.min(backoff, options[:backoff_max]) :timer.sleep(backoff) {:ok, :retry} diff --git a/lib/waffle/http_client/hackney.ex b/lib/waffle/http_client/hackney.ex new file mode 100644 index 0000000..adb649d --- /dev/null +++ b/lib/waffle/http_client/hackney.ex @@ -0,0 +1,87 @@ +defmodule Waffle.HTTPClient.Hackney do + @moduledoc """ + Default HTTP client implementation using `:hackney`. + + Add `:hackney` to your dependencies: + + {:hackney, "~> 1.9"} + + ## Configuration + + config :waffle, :http_client, Waffle.HTTPClient.Hackney + + ## Options + + | Option | Default | Description | + |--------------------|--------------|--------------------------------------------------------| + | `:recv_timeout` | `5_000` | Timeout for receiving a response, in milliseconds | + | `:connect_timeout` | `10_000` | Timeout for establishing a connection, in milliseconds | + | `:max_body_length` | `:infinity` | Maximum response body size, in bytes | + | `:follow_redirect` | `true` | Whether to follow HTTP redirects automatically | + """ + + @behaviour Waffle.HTTPClient + + @impl Waffle.HTTPClient + def get(url, headers, options) do + hackney_options = [ + follow_redirect: Keyword.get(options, :follow_redirect, true), + recv_timeout: Keyword.get(options, :recv_timeout, 5_000), + connect_timeout: Keyword.get(options, :connect_timeout, 10_000) + ] + + max_body_length = Keyword.get(options, :max_body_length, :infinity) + + case :hackney.get(url, headers, "", hackney_options) do + {:ok, 200, response_headers, client_ref} -> + read_body(client_ref, response_headers, max_body_length) + + {:ok, 503, _headers, client_ref} -> + :hackney.close(client_ref) + {:error, :service_unavailable} + + {:ok, status, _headers, client_ref} -> + :hackney.close(client_ref) + {:error, {:http_error, status}} + + # connect timeout: hackney returns %{reason: :timeout}, not a bare atom + {:error, %{reason: :timeout}} -> + {:error, :timeout} + + # recv timeout: hackney returns a bare :timeout atom + {:error, :timeout} -> + {:error, :recv_timeout} + + {:error, reason} -> + {:error, {:http_error, reason}} + end + end + + defp read_body(client_ref, response_headers, max_body_length) do + case :hackney.body(client_ref, max_body_length) do + {:ok, body} -> + filename = + :hackney_headers.new(response_headers) + |> get_content_disposition_filename() + + if filename, do: {:ok, body, filename}, else: {:ok, body} + + {:error, reason} -> + :hackney.close(client_ref) + {:error, {:http_error, reason}} + end + end + + defp get_content_disposition_filename(headers) do + case :hackney_headers.get_value("content-disposition", headers) do + :undefined -> + nil + + value -> + case :hackney_headers.content_disposition(value) do + {_, [{"filename", filename} | _]} -> filename + _ -> nil + end + end + end +end diff --git a/test/actions/store_test.exs b/test/actions/store_test.exs index 416276d..dd8133f 100644 --- a/test/actions/store_test.exs +++ b/test/actions/store_test.exs @@ -140,8 +140,15 @@ defmodule WaffleTest.Actions.Store do end test "recv_timeout" do + original = Application.get_env(:waffle, :recv_timeout) Application.put_env(:waffle, :recv_timeout, 1) + on_exit(fn -> + if original, + do: Application.put_env(:waffle, :recv_timeout, original), + else: Application.delete_env(:waffle, :recv_timeout) + end) + with_mock Waffle.Storage.S3, put: fn DummyDefinition, _, {%{file_name: "favicon.ico", path: _}, nil} -> {:ok, "favicon.ico"} @@ -149,13 +156,18 @@ defmodule WaffleTest.Actions.Store do assert DummyDefinition.store("https://www.google.com/favicon.ico") == {:error, :recv_timeout} end - - Application.put_env(:waffle, :recv_timeout, 5_000) end test "recv_timeout with a filename" do + original = Application.get_env(:waffle, :recv_timeout) Application.put_env(:waffle, :recv_timeout, 1) + on_exit(fn -> + if original, + do: Application.put_env(:waffle, :recv_timeout, original), + else: Application.delete_env(:waffle, :recv_timeout) + end) + with_mock Waffle.Storage.S3, put: fn DummyDefinition, _, {%{file_name: "newfavicon.ico", path: _}, nil} -> {:ok, "newfavicon.ico"} @@ -166,8 +178,6 @@ defmodule WaffleTest.Actions.Store do }) == {:error, :recv_timeout} end - - Application.put_env(:waffle, :recv_timeout, 5_000) end test "accepts remote files" do diff --git a/test/file_test.exs b/test/file_test.exs index b0194a2..12707f5 100644 --- a/test/file_test.exs +++ b/test/file_test.exs @@ -1,22 +1,95 @@ defmodule WaffleTest.File do use ExUnit.Case, async: false + import Mock + alias Waffle.HTTPClient.Hackney @custom_tmp_dir System.tmp_dir() <> "/waffle_test_custom" + defmodule DummyDefinition do + use Waffle.Definition.Storage + def transform(_, _), do: :noaction + def __versions, do: [:original] + end + describe "generate_temporary_path/1" do test "uses configured tmp_dir" do File.mkdir_p!(@custom_tmp_dir) Application.put_env(:waffle, :tmp_dir, @custom_tmp_dir) assert Waffle.File.generate_temporary_path() |> String.starts_with?(@custom_tmp_dir) - on_exit fn -> + + on_exit(fn -> Application.delete_env(:waffle, :tmp_dir) File.rm_rf!(@custom_tmp_dir) - end + end) end test "uses system tmp_dir" do assert Waffle.File.generate_temporary_path() |> String.starts_with?(System.tmp_dir()) end end + + describe "new/2 with remote URL — retry behavior" do + setup do + Application.put_env(:waffle, :http_client, Hackney) + Application.put_env(:waffle, :max_retries, 2) + # Zero-out backoff so tests don't sleep + Application.put_env(:waffle, :backoff_factor, 0) + Application.put_env(:waffle, :backoff_max, 0) + + on_exit(fn -> + Application.delete_env(:waffle, :http_client) + Application.delete_env(:waffle, :max_retries) + Application.delete_env(:waffle, :backoff_factor) + Application.delete_env(:waffle, :backoff_max) + end) + end + + test "retries on timeout and returns {:error, :timeout} after exhausting retries" do + with_mock Hackney, + get: fn _url, _headers, _opts -> + {:error, :timeout} + end do + result = Waffle.File.new("http://example.com/image.jpg", DummyDefinition) + assert result == {:error, :timeout} + # initial attempt + 2 retries + assert called(Hackney.get(:_, :_, :_)) + end + end + + test "retries on service_unavailable and returns {:error, :service_unavailable} after exhausting retries" do + with_mock Hackney, + get: fn _url, _headers, _opts -> + {:error, :service_unavailable} + end do + result = Waffle.File.new("http://example.com/image.jpg", DummyDefinition) + assert result == {:error, :service_unavailable} + assert called(Hackney.get(:_, :_, :_)) + end + end + + test "does not retry on non-retryable errors" do + with_mock Hackney, + get: fn _url, _headers, _opts -> + {:error, {:http_error, :unexpected_status}} + end do + result = Waffle.File.new("http://example.com/image.jpg", DummyDefinition) + assert result == {:error, {:http_error, :unexpected_status}} + # exactly 1 call — no retry + assert :meck.num_calls(Hackney, :get, :_) == 1 + end + end + + test "uses the module configured as :http_client" do + Application.put_env(:waffle, :http_client, Hackney) + + with_mock Hackney, + get: fn _url, _headers, _opts -> + {:ok, "image data"} + end do + Waffle.File.new("http://example.com/image.jpg", DummyDefinition) + assert called(Hackney.get(:_, :_, :_)) + end + end + end end diff --git a/test/http_client/hackney_test.exs b/test/http_client/hackney_test.exs new file mode 100644 index 0000000..1560708 --- /dev/null +++ b/test/http_client/hackney_test.exs @@ -0,0 +1,96 @@ +defmodule WaffleTest.HTTPClient.Hackney do + use ExUnit.Case, async: false + import Mock + + alias Waffle.HTTPClient.Hackney + + describe "get/3" do + test "returns {:ok, body} on 200 with no content-disposition header" do + with_mock :hackney, + get: fn _url, _headers, "", _opts -> {:ok, 200, [], :client_ref} end, + body: fn :client_ref, :infinity -> {:ok, "file content"} end do + result = Hackney.get("http://example.com/file.jpg", [], []) + assert result == {:ok, "file content"} + end + end + + test "returns {:ok, body, filename} when content-disposition header is present" do + response_headers = [{"content-disposition", ~s(attachment; filename="photo.jpg")}] + + with_mock :hackney, + get: fn _url, _headers, "", _opts -> {:ok, 200, response_headers, :client_ref} end, + body: fn :client_ref, :infinity -> {:ok, "file content"} end do + result = Hackney.get("http://example.com/file", [], []) + assert result == {:ok, "file content", "photo.jpg"} + end + end + + test "returns {:error, :service_unavailable} on 503" do + with_mock :hackney, + get: fn _url, _headers, "", _opts -> {:ok, 503, [], :client_ref} end, + close: fn :client_ref -> :ok end do + result = Hackney.get("http://example.com/file.jpg", [], []) + assert result == {:error, :service_unavailable} + end + end + + test "returns {:error, {:http_error, status}} with the actual HTTP status on non-200/503" do + with_mock :hackney, + get: fn _url, _headers, "", _opts -> {:ok, 404, [], :client_ref} end, + close: fn :client_ref -> :ok end do + result = Hackney.get("http://example.com/file.jpg", [], []) + assert result == {:error, {:http_error, 404}} + end + end + + test "returns {:error, :timeout} on hackney timeout map" do + with_mock :hackney, + get: fn _url, _headers, "", _opts -> {:error, %{reason: :timeout}} end do + result = Hackney.get("http://example.com/file.jpg", [], []) + assert result == {:error, :timeout} + end + end + + test "returns {:error, :recv_timeout} on :timeout atom" do + with_mock :hackney, + get: fn _url, _headers, "", _opts -> {:error, :timeout} end do + result = Hackney.get("http://example.com/file.jpg", [], []) + assert result == {:error, :recv_timeout} + end + end + + test "returns {:error, {:http_error, reason}} on other errors" do + with_mock :hackney, + get: fn _url, _headers, "", _opts -> {:error, :econnrefused} end do + result = Hackney.get("http://example.com/file.jpg", [], []) + assert result == {:error, {:http_error, :econnrefused}} + end + end + + test "passes recv_timeout and connect_timeout to hackney" do + with_mock :hackney, + get: fn _url, _headers, "", opts -> + assert Keyword.get(opts, :recv_timeout) == 3_000 + assert Keyword.get(opts, :connect_timeout) == 8_000 + {:ok, 200, [], :client_ref} + end, + body: fn :client_ref, :infinity -> {:ok, "body"} end do + Hackney.get("http://example.com/file.jpg", [], + recv_timeout: 3_000, + connect_timeout: 8_000 + ) + end + end + + test "passes max_body_length to hackney body read" do + with_mock :hackney, + get: fn _url, _headers, "", _opts -> {:ok, 200, [], :client_ref} end, + body: fn :client_ref, max -> + assert max == 1024 + {:ok, "body"} + end do + Hackney.get("http://example.com/file.jpg", [], max_body_length: 1024) + end + end + end +end