diff --git a/_config.yml b/_config.yml
index a151fe72..d654f273 100644
--- a/_config.yml
+++ b/_config.yml
@@ -73,7 +73,7 @@ organization-logo-nav: https://www.lib.uidaho.edu/collectionbuilder/cdil-white.p
# provide a relative path in this repository or full url
lib-assets: /assets/lib
# ignore stuff
-exclude: [docs/, Rakefile, rakelib/, README.md, LICENSE, CITATION.cff, CODE_OF_CONDUCT.md, CONTRIBUTING.md, SECURITY.md]
+exclude: [docs/, Rakefile, rakelib/, README.md, LICENSE, CITATION.cff, CODE_OF_CONDUCT.md, CONTRIBUTING.md, SECURITY.md, offline_site/]
# compress CSS output
sass:
style: compressed
diff --git a/_includes/head/head.html b/_includes/head/head.html
index 5fd397ac..e8cbfd58 100644
--- a/_includes/head/head.html
+++ b/_includes/head/head.html
@@ -29,8 +29,10 @@
{% comment %}
Meta tags and analytics are added during production build ONLY
{%- endcomment -%}
-{% if jekyll.environment == "production" %}
+{% if jekyll.environment == "production" or jekyll.environment == "offline" %}
{% if layout.item-meta %}{% include head/item-meta.html %}{% else %}{% include head/page-meta.html %}{% endif %}
+{% endif %}
+{% if jekyll.environment == "production" %}
{% include head/analytics.html %}
{%- endif -%}
{% if site.noindex == true or page.noindex == true or layout.noindex == true %}{% endif %}
diff --git a/_includes/js/table-js.html b/_includes/js/table-js.html
index 1ed1ecc4..f5574203 100644
--- a/_includes/js/table-js.html
+++ b/_includes/js/table-js.html
@@ -1,6 +1,15 @@
+{% comment %}
+
+ Javascript for DataTables interactive table feature, https://datatables.net/
+ Used with the "data.html" layout.
+ By default it loads table data from "assets/js/metadata.min.json" for better performance with large collections.
+ For offline build, it provides an alternative basic table version.
+
+{%- endcomment -%}
{% assign fcount = site.data.config-table | size %}
-
+
+{% unless jekyll.environment == "offline" or page.simple-table == true %}
+{% else %}
+
+{% endunless %}
diff --git a/_layouts/data.html b/_layouts/data.html
index 2d833c7d..47f6a904 100644
--- a/_layouts/data.html
+++ b/_layouts/data.html
@@ -15,9 +15,22 @@
{% for f in fields %}
{{ f }} |
{% endfor %}
- Link |
+ {% unless jekyll.environment == "offline" %}Link | {% endunless %}
+ {% if jekyll.environment == "offline" or page.simple-table == true %}
+
+ {% if site.data.theme.data-child-objects == true %}
+ {%- assign items = site.data[site.metadata] | where_exp: 'item','item.objectid' -%}
+ {% else %}
+ {%- assign items = site.data[site.metadata] | where_exp: 'item','item.objectid and item.parentid == nil' -%}
+ {% endif %}
+ {%- assign fields = site.data.config-table | map: 'field' -%}
+ {%- for item in items -%}
+ {% for f in fields %}| {% if forloop.first %}{{ item[f] }}{% else %}{{ item[f] }}{% endif %} | {% endfor %}
+ {%- endfor -%}
+
+ {% endif %}
\ No newline at end of file
diff --git a/docs/rake_tasks/build_offline.md b/docs/rake_tasks/build_offline.md
new file mode 100644
index 00000000..f84e38c9
--- /dev/null
+++ b/docs/rake_tasks/build_offline.md
@@ -0,0 +1,35 @@
+# build_offline
+
+`rake build_offline` generates your CB site, downloads all external media, and then rewrites all internal links to create a copy of your project that can be used fully offline in the local filesystem.
+
+The fully static offline file version is intended to serve as an artifact for digital preservation that maintains the functionality of a project in a minimal environment.
+It can also be useful for sharing the site (via a thumb drive) in a location with no internet, or for content that needs to remain offline for security or privacy reasons.
+
+The task will:
+
+1. Complete a fresh build of the site (using the "offline" JEKYLL_ENV rather than "production", which allows us to swap out some parts of the site specific to building offline)
+2. Copy the build to the output directory
+3. Download external media (images, PDFs, audio) references in your metadata CSV to the "objects" folder.
+4. Rewrite all links in the files to relatives file paths so that pages can load and link correctly from the local filesystem.
+
+When the task completes, you can browse the offline version by clicking "offline_site/index.html" to open it in your web browser.
+
+| option | description | default value |
+| --- | --- | --- |
+| download_external | attempt to download all external media linked in the project including items, true/false | true |
+| output_dir | directory name for output offline version | "offline_site" |
+| skip_rewrite | local path of directory to skip rewriting, useful for external libraries that should not be modified. | "assets/lib" |
+
+Pass options as rake arguments:
+
+`rake build_offline[false,"my_offline_copy","assets/lib"]`
+
+Note: if you change the default output_dir, remember to add it to `exclude` option in "_config.yml" to avoid including in your future site builds!
+
+## Limitations
+
+- Streaming video (YouTube, Vimeo, etc.) is not downloaded and will not play offline. Item pages for video objects will display without the video.
+- Map tiles (Leaflet/OpenStreetMap) require internet access. The map page will show markers but no background tiles when offline.
+- External images not in metadata (e.g., organization logos in the banner) remain as external links and require internet to display. Please manually adjust those images in your project.
+- The rewrite script is somewhat idiosyncratic to CB projects, so may not work correctly for other websites or highly customized projects.
+
diff --git a/rakelib/build_offline.rake b/rakelib/build_offline.rake
new file mode 100644
index 00000000..bb70d63a
--- /dev/null
+++ b/rakelib/build_offline.rake
@@ -0,0 +1,264 @@
+###############################################################################
+# TASK: build_offline
+#
+# build a CollectionBuilder site and replace links for offline use
+#
+# generates the Jekyll site, downloads external media (images, pdfs),
+# and rewrites all internal links so the site works from the local filesystem without a server.
+#
+# options (passed as rake arguments, e.g. rake build_offline[true,offline_site,assets/lib]):
+# download_external - download external media linked in metadata, true/false (default: true)
+# output_dir - directory name for the offline output (default: "offline_site")
+# skip_rewrite - local path of directory to skip rewriting, useful for external libraries that should not be modified (default: "assets/lib")
+#
+# see docs/rake_tasks/build_offline.md for full documentation
+###############################################################################
+
+require 'net/http'
+require 'open-uri'
+require 'pathname'
+require 'uri'
+require 'yaml'
+
+# file types to download for offline use (images and audio; streaming video is skipped)
+OFFLINE_MEDIA_EXTENSIONS = %w[.jpg .jpeg .png .gif .tif .tiff .pdf .mp3 .wav .ogg .m4a].freeze
+
+# streaming/video platforms to skip when downloading external media
+OFFLINE_SKIP_DOMAINS = %w[youtube.com youtu.be vimeo.com soundcloud.com].freeze
+
+# check if a URL is from a platform that should be skipped for downloading
+def offline_skip_url?(url)
+ OFFLINE_SKIP_DOMAINS.any? { |domain| url.include?(domain) }
+end
+
+# check whether the URL points to a file type eligible for offline download
+def offline_downloadable?(url)
+ ext = File.extname(URI.parse(url).path).downcase
+ OFFLINE_MEDIA_EXTENSIONS.include?(ext)
+rescue URI::InvalidURIError
+ false
+end
+
+# download a file from url and save to dest_path; returns true on success
+def offline_download(url, dest_path)
+ puts "Downloading: #{url}"
+ URI.open(url, 'rb', open_timeout: 30, read_timeout: 60) do |remote|
+ IO.copy_stream(remote, dest_path)
+ end
+ puts " -> #{dest_path}"
+ true
+rescue OpenURI::HTTPError, SocketError, Errno::ECONNREFUSED, Errno::ETIMEDOUT,
+ Net::OpenTimeout, Net::ReadTimeout, RuntimeError => e
+ puts " -> download failed: #{e.message}"
+ FileUtils.rm_f(dest_path)
+ false
+end
+
+# rewrite all internal links in a file's content for local filesystem use.
+# depth - number of directory levels below the offline root (0 = root-level files)
+# site_url - absolute URL prefix from Jekyll config (url + baseurl), used in data files
+# url_map - hash of { external_url => root_relative_local_path } for downloaded media
+def offline_rewrite_links(content, depth, site_url, url_map)
+ prefix = '../' * depth
+
+ # 1. replace downloaded external media URLs with relative local paths
+ url_map.each do |external_url, local_path|
+ content = content.gsub(external_url, "#{prefix}#{local_path.delete_prefix('/')}")
+ end
+
+ # 2. replace absolute site URLs (Jekyll url + baseurl, or localhost:4000 when url is unset)
+ # these appear in generated data files and occasionally in HTML meta tags
+ unless site_url.empty?
+ escaped = Regexp.escape(site_url)
+ content = content.gsub(%r{#{escaped}(/[^\s"'<>()\[\]]+)}) do
+ "#{prefix}#{$1.delete_prefix('/')}"
+ end
+ # bare site root URL with no following path
+ content = content.gsub(%r{#{escaped}/?(?=[\s"'<>()\[\]])}) do
+ "#{prefix}index.html"
+ end
+ end
+
+ # 3. rewrite root-relative paths in HTML attribute values
+ # covers href, src, action, content (meta), xlink:href (SVG), data-src (lazy-load)
+ # negative lookahead (?!\/) prevents rewriting protocol-relative URLs (//)
+ content = content.gsub(/((?:href|src|action|content|xlink:href|data-src)=["'])(\/(?!\/)[^"']*)/) do
+ local = $2.delete_prefix('/')
+ local = 'index.html' if local.empty?
+ "#{$1}#{prefix}#{local}"
+ end
+
+ # 4. rewrite root-relative paths in CSS url() references (inline styles and