From 74d7d2e85a8b3b2de3111ff7d832b96511269d4a Mon Sep 17 00:00:00 2001 From: Max Voit Date: Sat, 8 Jan 2022 10:24:52 +0100 Subject: [PATCH] new feature: preserve document time as file mtime --- docdl/cli.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/docdl/cli.py b/docdl/cli.py index fceac33..54faaa5 100644 --- a/docdl/cli.py +++ b/docdl/cli.py @@ -1,5 +1,6 @@ """download documents from web portals""" +import os import pkg_resources import click import click_plugins @@ -85,6 +86,16 @@ help="webdriver to use for selenium based plugins", show_default=True ) +@click.option( + "-T", + "--time-preserve", + type=bool, + default=True, + envvar="DOCDL_TIME_PRESERVE", + show_envvar=True, + help="preserve the document creation time for the downloaded file", + show_default=True +) @click.option( "-t", "--timeout", @@ -131,7 +142,7 @@ # pylint: disable=W0613,C0103,R0913 def documentdl( ctx, username, password, string_matches, regex_matches, jq_matches, - headless, browser, timeout, image_loading, action, output_format + headless, browser, time_preserve, timeout, image_loading, action, output_format ): """download documents from web portals""" # set browser that SeleniumWebPortal plugins should use @@ -179,7 +190,10 @@ def run(ctx, plugin_class): continue # download ? if root_params['action'] == "download": - portal.download(document) + filename = portal.download(document) + if root_params['time_preserve']: + timest = document.attributes['date'].timestamp() + os.utime(filename, (timest, timest)) # line buffered dict output? if root_params['output_format'] == "dicts": # always output as json dict