diff --git a/_config.yml b/_config.yml index a151fe72..d654f273 100644 --- a/_config.yml +++ b/_config.yml @@ -73,7 +73,7 @@ organization-logo-nav: https://www.lib.uidaho.edu/collectionbuilder/cdil-white.p # provide a relative path in this repository or full url lib-assets: /assets/lib # ignore stuff -exclude: [docs/, Rakefile, rakelib/, README.md, LICENSE, CITATION.cff, CODE_OF_CONDUCT.md, CONTRIBUTING.md, SECURITY.md] +exclude: [docs/, Rakefile, rakelib/, README.md, LICENSE, CITATION.cff, CODE_OF_CONDUCT.md, CONTRIBUTING.md, SECURITY.md, offline_site/] # compress CSS output sass: style: compressed diff --git a/_includes/head/head.html b/_includes/head/head.html index 5fd397ac..e8cbfd58 100644 --- a/_includes/head/head.html +++ b/_includes/head/head.html @@ -29,8 +29,10 @@ {% comment %} Meta tags and analytics are added during production build ONLY {%- endcomment -%} -{% if jekyll.environment == "production" %} +{% if jekyll.environment == "production" or jekyll.environment == "offline" %} {% if layout.item-meta %}{% include head/item-meta.html %}{% else %}{% include head/page-meta.html %}{% endif %} +{% endif %} +{% if jekyll.environment == "production" %} {% include head/analytics.html %} {%- endif -%} {% if site.noindex == true or page.noindex == true or layout.noindex == true %}{% endif %} diff --git a/_includes/js/table-js.html b/_includes/js/table-js.html index 1ed1ecc4..f5574203 100644 --- a/_includes/js/table-js.html +++ b/_includes/js/table-js.html @@ -1,6 +1,15 @@ +{% comment %} + + Javascript for DataTables interactive table feature, https://datatables.net/ + Used with the "data.html" layout. + By default it loads table data from "assets/js/metadata.min.json" for better performance with large collections. + For offline build, it provides an alternative basic table version. + +{%- endcomment -%} {% assign fcount = site.data.config-table | size %} - + +{% unless jekyll.environment == "offline" or page.simple-table == true %} +{% else %} + +{% endunless %} diff --git a/_layouts/data.html b/_layouts/data.html index 2d833c7d..47f6a904 100644 --- a/_layouts/data.html +++ b/_layouts/data.html @@ -15,9 +15,22 @@ {% for f in fields %} {{ f }} {% endfor %} - Link + {% unless jekyll.environment == "offline" %}Link{% endunless %} + {% if jekyll.environment == "offline" or page.simple-table == true %} + + {% if site.data.theme.data-child-objects == true %} + {%- assign items = site.data[site.metadata] | where_exp: 'item','item.objectid' -%} + {% else %} + {%- assign items = site.data[site.metadata] | where_exp: 'item','item.objectid and item.parentid == nil' -%} + {% endif %} + {%- assign fields = site.data.config-table | map: 'field' -%} + {%- for item in items -%} + {% for f in fields %}{% if forloop.first %}{{ item[f] }}{% else %}{{ item[f] }}{% endif %}{% endfor %} + {%- endfor -%} + + {% endif %} \ No newline at end of file diff --git a/docs/rake_tasks/build_offline.md b/docs/rake_tasks/build_offline.md new file mode 100644 index 00000000..f84e38c9 --- /dev/null +++ b/docs/rake_tasks/build_offline.md @@ -0,0 +1,35 @@ +# build_offline + +`rake build_offline` generates your CB site, downloads all external media, and then rewrites all internal links to create a copy of your project that can be used fully offline in the local filesystem. + +The fully static offline file version is intended to serve as an artifact for digital preservation that maintains the functionality of a project in a minimal environment. +It can also be useful for sharing the site (via a thumb drive) in a location with no internet, or for content that needs to remain offline for security or privacy reasons. + +The task will: + +1. Complete a fresh build of the site (using the "offline" JEKYLL_ENV rather than "production", which allows us to swap out some parts of the site specific to building offline) +2. Copy the build to the output directory +3. Download external media (images, PDFs, audio) references in your metadata CSV to the "objects" folder. +4. Rewrite all links in the files to relatives file paths so that pages can load and link correctly from the local filesystem. + +When the task completes, you can browse the offline version by clicking "offline_site/index.html" to open it in your web browser. + +| option | description | default value | +| --- | --- | --- | +| download_external | attempt to download all external media linked in the project including items, true/false | true | +| output_dir | directory name for output offline version | "offline_site" | +| skip_rewrite | local path of directory to skip rewriting, useful for external libraries that should not be modified. | "assets/lib" | + +Pass options as rake arguments: + +`rake build_offline[false,"my_offline_copy","assets/lib"]` + +Note: if you change the default output_dir, remember to add it to `exclude` option in "_config.yml" to avoid including in your future site builds! + +## Limitations + +- Streaming video (YouTube, Vimeo, etc.) is not downloaded and will not play offline. Item pages for video objects will display without the video. +- Map tiles (Leaflet/OpenStreetMap) require internet access. The map page will show markers but no background tiles when offline. +- External images not in metadata (e.g., organization logos in the banner) remain as external links and require internet to display. Please manually adjust those images in your project. +- The rewrite script is somewhat idiosyncratic to CB projects, so may not work correctly for other websites or highly customized projects. + diff --git a/rakelib/build_offline.rake b/rakelib/build_offline.rake new file mode 100644 index 00000000..bb70d63a --- /dev/null +++ b/rakelib/build_offline.rake @@ -0,0 +1,264 @@ +############################################################################### +# TASK: build_offline +# +# build a CollectionBuilder site and replace links for offline use +# +# generates the Jekyll site, downloads external media (images, pdfs), +# and rewrites all internal links so the site works from the local filesystem without a server. +# +# options (passed as rake arguments, e.g. rake build_offline[true,offline_site,assets/lib]): +# download_external - download external media linked in metadata, true/false (default: true) +# output_dir - directory name for the offline output (default: "offline_site") +# skip_rewrite - local path of directory to skip rewriting, useful for external libraries that should not be modified (default: "assets/lib") +# +# see docs/rake_tasks/build_offline.md for full documentation +############################################################################### + +require 'net/http' +require 'open-uri' +require 'pathname' +require 'uri' +require 'yaml' + +# file types to download for offline use (images and audio; streaming video is skipped) +OFFLINE_MEDIA_EXTENSIONS = %w[.jpg .jpeg .png .gif .tif .tiff .pdf .mp3 .wav .ogg .m4a].freeze + +# streaming/video platforms to skip when downloading external media +OFFLINE_SKIP_DOMAINS = %w[youtube.com youtu.be vimeo.com soundcloud.com].freeze + +# check if a URL is from a platform that should be skipped for downloading +def offline_skip_url?(url) + OFFLINE_SKIP_DOMAINS.any? { |domain| url.include?(domain) } +end + +# check whether the URL points to a file type eligible for offline download +def offline_downloadable?(url) + ext = File.extname(URI.parse(url).path).downcase + OFFLINE_MEDIA_EXTENSIONS.include?(ext) +rescue URI::InvalidURIError + false +end + +# download a file from url and save to dest_path; returns true on success +def offline_download(url, dest_path) + puts "Downloading: #{url}" + URI.open(url, 'rb', open_timeout: 30, read_timeout: 60) do |remote| + IO.copy_stream(remote, dest_path) + end + puts " -> #{dest_path}" + true +rescue OpenURI::HTTPError, SocketError, Errno::ECONNREFUSED, Errno::ETIMEDOUT, + Net::OpenTimeout, Net::ReadTimeout, RuntimeError => e + puts " -> download failed: #{e.message}" + FileUtils.rm_f(dest_path) + false +end + +# rewrite all internal links in a file's content for local filesystem use. +# depth - number of directory levels below the offline root (0 = root-level files) +# site_url - absolute URL prefix from Jekyll config (url + baseurl), used in data files +# url_map - hash of { external_url => root_relative_local_path } for downloaded media +def offline_rewrite_links(content, depth, site_url, url_map) + prefix = '../' * depth + + # 1. replace downloaded external media URLs with relative local paths + url_map.each do |external_url, local_path| + content = content.gsub(external_url, "#{prefix}#{local_path.delete_prefix('/')}") + end + + # 2. replace absolute site URLs (Jekyll url + baseurl, or localhost:4000 when url is unset) + # these appear in generated data files and occasionally in HTML meta tags + unless site_url.empty? + escaped = Regexp.escape(site_url) + content = content.gsub(%r{#{escaped}(/[^\s"'<>()\[\]]+)}) do + "#{prefix}#{$1.delete_prefix('/')}" + end + # bare site root URL with no following path + content = content.gsub(%r{#{escaped}/?(?=[\s"'<>()\[\]])}) do + "#{prefix}index.html" + end + end + + # 3. rewrite root-relative paths in HTML attribute values + # covers href, src, action, content (meta), xlink:href (SVG), data-src (lazy-load) + # negative lookahead (?!\/) prevents rewriting protocol-relative URLs (//) + content = content.gsub(/((?:href|src|action|content|xlink:href|data-src)=["'])(\/(?!\/)[^"']*)/) do + local = $2.delete_prefix('/') + local = 'index.html' if local.empty? + "#{$1}#{prefix}#{local}" + end + + # 4. rewrite root-relative paths in CSS url() references (inline styles and