diff --git a/.github/workflows/almalinux-8-build.yml b/.github/workflows/almalinux-8-build.yml index 48de328a3..87b34f974 100644 --- a/.github/workflows/almalinux-8-build.yml +++ b/.github/workflows/almalinux-8-build.yml @@ -48,7 +48,8 @@ jobs: attr libattr-devel acl libacl-devel \ zstd libzstd-devel \ lz4 lz4-devel \ - xxhash xxhash-devel + xxhash xxhash-devel \ + libidn2 libidn2-devel alternatives --set python3 /usr/bin/python3.9 pip3 install commonmark - name: configure diff --git a/.github/workflows/android-static-build.yml b/.github/workflows/android-static-build.yml index e91542550..237ee33e6 100644 --- a/.github/workflows/android-static-build.yml +++ b/.github/workflows/android-static-build.yml @@ -76,7 +76,7 @@ jobs: # checksums and its bundled zlib. ./configure --host=${{ matrix.triple }} --build=x86_64-pc-linux-gnu \ --enable-ipv6 \ - --disable-zstd --disable-lz4 --disable-xxhash --disable-openssl \ + --disable-zstd --disable-lz4 --disable-xxhash --disable-openssl --disable-idn \ --disable-iconv --disable-iconv-open \ --disable-acl-support --disable-xattr-support \ --disable-md2man --disable-roll-simd \ diff --git a/.github/workflows/asan-build.yml b/.github/workflows/asan-build.yml index 9acc8d8f6..584f03553 100644 --- a/.github/workflows/asan-build.yml +++ b/.github/workflows/asan-build.yml @@ -41,7 +41,7 @@ jobs: - name: prep run: | sudo apt-get update - sudo apt-get install -y clang acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev openssl + sudo apt-get install -y clang acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev libidn2-dev openssl echo "/usr/local/bin" >>"$GITHUB_PATH" - name: configure # -DNDEBUG builds as a shipped release does (assert() compiled out), so diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 078fda4ff..77e648fbb 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -26,7 +26,7 @@ jobs: - name: prep run: | sudo apt-get update - sudo apt-get install -y acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl gcovr + sudo apt-get install -y acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev libidn2-dev python3-cmarkgfm openssl gcovr echo "/usr/local/bin" >>"$GITHUB_PATH" - name: configure run: ./configure --enable-coverage --with-rrsync diff --git a/.github/workflows/cygwin-build.yml b/.github/workflows/cygwin-build.yml index f8feebcd6..4a187ff5e 100644 --- a/.github/workflows/cygwin-build.yml +++ b/.github/workflows/cygwin-build.yml @@ -26,7 +26,7 @@ jobs: run: choco install -y --no-progress cygwin cyg-get - name: prep run: | - cyg-get make autoconf automake gcc-core attr libattr-devel python39 python39-pip libzstd-devel liblz4-devel libssl-devel libxxhash0 libxxhash-devel + cyg-get make autoconf automake gcc-core attr libattr-devel python39 python39-pip libzstd-devel liblz4-devel libssl-devel libxxhash0 libxxhash-devel libidn2-devel echo "C:/tools/cygwin/bin" >>$Env:GITHUB_PATH - name: commonmark run: bash -c 'python3 -mpip install --user commonmark' diff --git a/.github/workflows/fleettest.yml b/.github/workflows/fleettest.yml index c3f66c771..3a8986158 100644 --- a/.github/workflows/fleettest.yml +++ b/.github/workflows/fleettest.yml @@ -33,7 +33,7 @@ jobs: run: | sudo apt-get update sudo apt-get install -y gcc g++ gawk autoconf automake \ - acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev \ + acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev libidn2-dev \ python3-cmarkgfm openssl rsync openssh-server - name: set up ssh to localhost run: | diff --git a/.github/workflows/freebsd-build.yml b/.github/workflows/freebsd-build.yml index 4012366d7..d52cf07a8 100644 --- a/.github/workflows/freebsd-build.yml +++ b/.github/workflows/freebsd-build.yml @@ -31,7 +31,7 @@ jobs: pkg install -y bash autotools m4 devel/xxhash zstd liblz4 python3 archivers/liblz4 git run: | freebsd-version - ./configure --with-rrsync -disable-zstd --disable-md2man --disable-xxhash --disable-lz4 + ./configure --with-rrsync -disable-zstd --disable-md2man --disable-xxhash --disable-lz4 --disable-idn make ./rsync --version make check diff --git a/.github/workflows/macos-build.yml b/.github/workflows/macos-build.yml index 697cc86af..954d8463e 100644 --- a/.github/workflows/macos-build.yml +++ b/.github/workflows/macos-build.yml @@ -24,7 +24,7 @@ jobs: fetch-depth: 0 - name: prep run: | - brew install automake openssl xxhash zstd lz4 + brew install automake openssl xxhash zstd lz4 libidn2 pip3 install --user --break-system-packages commonmark echo "$(brew --prefix)/bin" >>"$GITHUB_PATH" - name: configure diff --git a/.github/workflows/netbsd-build.yml b/.github/workflows/netbsd-build.yml index a80fb11b4..00da9440e 100644 --- a/.github/workflows/netbsd-build.yml +++ b/.github/workflows/netbsd-build.yml @@ -32,7 +32,7 @@ jobs: ln -sf /usr/pkg/bin/python3.12 /usr/pkg/bin/python3 run: | uname -a - ./configure --with-rrsync --disable-zstd --disable-md2man --disable-xxhash --disable-lz4 + ./configure --with-rrsync --disable-zstd --disable-md2man --disable-xxhash --disable-lz4 --disable-idn make ./rsync --version make check diff --git a/.github/workflows/openbsd-build.yml b/.github/workflows/openbsd-build.yml index ec38eac5e..21cd2885e 100644 --- a/.github/workflows/openbsd-build.yml +++ b/.github/workflows/openbsd-build.yml @@ -33,7 +33,7 @@ jobs: uname -a export AUTOCONF_VERSION=2.71 export AUTOMAKE_VERSION=1.16 - ./configure --with-rrsync --disable-zstd --disable-md2man --disable-xxhash --disable-lz4 + ./configure --with-rrsync --disable-zstd --disable-md2man --disable-xxhash --disable-lz4 --disable-idn make ./rsync --version make check diff --git a/.github/workflows/scan-build.yml b/.github/workflows/scan-build.yml index 23c1b73c8..d3517d6ba 100644 --- a/.github/workflows/scan-build.yml +++ b/.github/workflows/scan-build.yml @@ -24,7 +24,7 @@ jobs: - name: prep run: | sudo apt-get update - sudo apt-get install -y clang clang-tools acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev openssl + sudo apt-get install -y clang clang-tools acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev libidn2-dev openssl - name: configure (under scan-build) # Run configure under scan-build so its analyzer compiler-wrapper is baked # into the Makefile's $(CC); --disable-md2man avoids the doc toolchain. diff --git a/.github/workflows/solaris-build.yml b/.github/workflows/solaris-build.yml index 82aa12692..0ccaeac7c 100644 --- a/.github/workflows/solaris-build.yml +++ b/.github/workflows/solaris-build.yml @@ -31,7 +31,7 @@ jobs: pkg install bash automake gnu-m4 pkg://solaris/runtime/python-35 autoconf gcc git run: | uname -a - ./configure --with-rrsync -disable-zstd --disable-md2man --disable-xxhash --disable-lz4 + ./configure --with-rrsync -disable-zstd --disable-md2man --disable-xxhash --disable-lz4 --disable-idn make ./rsync --version make check diff --git a/.github/workflows/ubuntu-22.04-build.yml b/.github/workflows/ubuntu-22.04-build.yml index 5546ff496..3324a94f3 100644 --- a/.github/workflows/ubuntu-22.04-build.yml +++ b/.github/workflows/ubuntu-22.04-build.yml @@ -28,7 +28,7 @@ jobs: fetch-depth: 0 - name: prep run: | - sudo apt-get install acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl + sudo apt-get install acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev libidn2-dev python3-cmarkgfm openssl echo "/usr/local/bin" >>"$GITHUB_PATH" - name: configure run: ./configure --with-rrsync diff --git a/.github/workflows/ubuntu-build.yml b/.github/workflows/ubuntu-build.yml index 1cec98942..4fefcd71d 100644 --- a/.github/workflows/ubuntu-build.yml +++ b/.github/workflows/ubuntu-build.yml @@ -24,7 +24,7 @@ jobs: fetch-depth: 0 - name: prep run: | - sudo apt-get install acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl + sudo apt-get install acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev libidn2-dev python3-cmarkgfm openssl echo "/usr/local/bin" >>"$GITHUB_PATH" - name: configure run: ./configure --with-rrsync diff --git a/.github/workflows/ubuntu-version-mix.yml b/.github/workflows/ubuntu-version-mix.yml index 16fd32884..a080a92f1 100644 --- a/.github/workflows/ubuntu-version-mix.yml +++ b/.github/workflows/ubuntu-version-mix.yml @@ -45,7 +45,7 @@ jobs: fetch-depth: 0 - name: prep run: | - sudo apt-get install acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl + sudo apt-get install acl libacl1-dev attr libattr1-dev liblz4-dev libzstd-dev libxxhash-dev libidn2-dev python3-cmarkgfm openssl echo "/usr/local/bin" >>"$GITHUB_PATH" - name: configure run: ./configure --with-rrsync diff --git a/access.c b/access.c index b924e0a34..48a06c16a 100644 --- a/access.c +++ b/access.c @@ -23,6 +23,9 @@ #ifdef HAVE_NETGROUP_H #include #endif +#ifdef SUPPORT_IDN +#include +#endif static int allow_forward_dns; @@ -33,6 +36,9 @@ static int match_hostname(const char **host_ptr, const char *addr, const char *t struct hostent *hp; unsigned int i; const char *host = *host_ptr; +#ifdef SUPPORT_IDN + char idn_tok[1024], *idn; +#endif if (!host || !*host) return 0; @@ -42,6 +48,14 @@ static int match_hostname(const char **host_ptr, const char *addr, const char *t return innetgr(tok + 1, host, NULL, NULL); #endif +#ifdef SUPPORT_IDN + if (idn2_to_ascii_8z(tok, &idn, IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL) == IDN2_OK) { + strlcpy(idn_tok, idn, sizeof idn_tok); + idn2_free(idn); + tok = idn_tok; + } +#endif + /* First check if the reverse-DNS-determined hostname matches. */ if (iwildmatch(tok, host)) return 1; diff --git a/configure.ac b/configure.ac index cda60405b..276954fad 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ AC_CHECK_HEADERS(sys/fcntl.h sys/select.h fcntl.h sys/time.h sys/unistd.h \ sys/acl.h acl/libacl.h attr/xattr.h sys/xattr.h sys/extattr.h dl.h \ popt.h popt/popt.h linux/falloc.h netinet/in_systm.h netgroup.h \ zlib.h xxhash.h openssl/md4.h openssl/md5.h zstd.h lz4.h sys/file.h \ - bsd/string.h) + bsd/string.h idn2.h) AC_CHECK_HEADERS([netinet/ip.h], [], [], [[#include ]]) AC_HEADER_MAJOR_FIXED @@ -626,6 +626,27 @@ else AC_MSG_RESULT(no) fi +AC_MSG_CHECKING([whether to enable IDN support]) +AC_ARG_ENABLE([idn], + AS_HELP_STRING([--disable-idn], [disable to omit IDN (Internationalized Domain Name) support])) +AH_TEMPLATE([SUPPORT_IDN], +[Undefine if you do not want IDN support. By default this is defined.]) +if test x"$enable_idn" != x"no"; then + if test x"$ac_cv_header_idn2_h" = x"yes"; then + AC_MSG_RESULT(yes) + AC_SEARCH_LIBS(idn2_lookup_ul, idn2, + [AC_DEFINE(SUPPORT_IDN)], + [err_msg="$err_msg$nl- Failed to find idn2_lookup_ul function in idn2 lib."; + no_lib="$no_lib idn"]) + else + AC_MSG_RESULT(no) + err_msg="$err_msg$nl- Failed to find idn2.h for IDN support." + no_lib="$no_lib idn" + fi +else + AC_MSG_RESULT(no) +fi + if test x"$no_lib" != x; then echo "" echo "Configure found the following issues:" diff --git a/main.c b/main.c index 9b52bbe6a..31c283abf 100644 --- a/main.c +++ b/main.c @@ -31,6 +31,9 @@ #ifdef __TANDEM #include #endif +#ifdef SUPPORT_IDN +#include +#endif extern int dry_run; extern int list_only; @@ -517,6 +520,18 @@ static pid_t do_cmd(char *cmd, char *machine, char *user, char **remote_argv, in char *args[MAX_ARGS], *need_to_free = NULL; pid_t pid; int dash_l_set = 0; +#ifdef SUPPORT_IDN + char idn_machine[1024]; + + if (machine && daemon_connection > 0) { + char *idn; + if (idn2_lookup_ul(machine, &idn, IDN2_NONTRANSITIONAL) == IDN2_OK) { + strlcpy(idn_machine, idn, sizeof idn_machine); + idn2_free(idn); + machine = idn_machine; + } + } +#endif if (!read_batch && !local_server) { char *t, *f, in_quote = '\0'; diff --git a/socket.c b/socket.c index d5aa0cb71..5b6475be3 100644 --- a/socket.c +++ b/socket.c @@ -34,6 +34,9 @@ #include #endif #include +#ifdef SUPPORT_IDN +#include +#endif extern char *bind_address; extern char *sockopts; @@ -196,6 +199,15 @@ int open_socket_out(char *host, int port, const char *bind_addr, int af_hint) int proxied = 0; char buffer[1024]; char *proxy_user = NULL, *proxy_pass = NULL; +#ifdef SUPPORT_IDN + char *idn, idn_host[1024]; + + if (idn2_lookup_ul(host, &idn, IDN2_NONTRANSITIONAL) == IDN2_OK) { + strlcpy(idn_host, idn, sizeof idn_host); + idn2_free(idn); + host = idn_host; + } +#endif /* if we have a RSYNC_PROXY env variable then redirect our * connection via a web proxy at the given address. */ diff --git a/testsuite/idn_test.py b/testsuite/idn_test.py new file mode 100644 index 000000000..2a9246073 --- /dev/null +++ b/testsuite/idn_test.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +# Verify that rsync converts an IDN (internationalized domain name) host to +# its IDNA A-label (Punycode) form. +# +# Two daemon connection methods carry the host name out of rsync, so both are +# checked: +# * daemon over a remote shell (what rsync-ssl does): the host is handed to +# the --rsh helper. +# * direct daemon socket: observed through a dummy HTTP proxy (RSYNC_PROXY) on +# loopback, so this part only runs under --use-tcp. +# A plain remote-shell transfer (host:path) is intentionally left alone, since +# that name belongs to the user's ssh. + +import os +import shlex +import socket +import subprocess +import sys +import threading + +from rsyncfns import ( + RSYNC, SCRATCHDIR, USE_TCP, claim_ports, run_rsync, + test_fail, test_skipped, +) + + +if '"IDN": true' not in run_rsync('-VV', check=True, capture_output=True).stdout: + test_skipped("rsync built without IDN support") + + +def find_utf8_locale(): + try: + out = subprocess.check_output(['locale', '-a'], text=True, + stderr=subprocess.DEVNULL) + except (OSError, subprocess.CalledProcessError): + return None + avail = out.split() + for want in ('C.UTF-8', 'C.utf8', 'en_US.UTF-8', 'en_US.utf8'): + if want in avail: + return want + for loc in avail: + if loc.lower().replace('-', '').endswith('utf8'): + return loc + return None + + +utf8_locale = find_utf8_locale() +if not utf8_locale: + test_skipped("no UTF-8 locale available to encode the IDN host") + +idn_host = "\u010ci\u010dku.example" +ascii_host = "xn--iku-eqab.example" + +env = os.environ.copy() +env['LC_ALL'] = utf8_locale +out_dir = (str(SCRATCHDIR / 'out') + '/').encode() + + +def run_idn(url, *extra, extra_env=None): + # A bytes argv keeps the UTF-8 host intact regardless of Python's + # filesystem encoding. + e = dict(env) + if extra_env: + e.update(extra_env) + argv = [a.encode() for a in shlex.split(RSYNC)] + argv += [a.encode() for a in extra] + argv += [url.encode('utf-8'), out_dir] + return subprocess.run(argv, capture_output=True, env=e, timeout=30) + + +# --- daemon over a remote shell (the rsync-ssl mechanism) ------------------ +helper = SCRATCHDIR / 'idn-rsh.sh' +helper.write_text('#!/bin/sh\nprintf %s "$1" > "$IDN_RSH_OUT"\nexit 1\n') +helper.chmod(0o755) + +hostfile = SCRATCHDIR / 'idn-rsh-host' +if hostfile.exists(): + hostfile.unlink() + +run_idn(f"rsync://{idn_host}/module/", f"--rsh={helper}", + extra_env={'IDN_RSH_OUT': str(hostfile)}) + +got = hostfile.read_text() if hostfile.exists() else '' +if got != ascii_host: + test_fail(f"daemon-over-rsh sent host {got!r}, expected A-label {ascii_host!r}") +print(f"OK: daemon-over-rsh (rsync-ssl style) host sent as {ascii_host}") + + +# --- direct daemon socket, observed via a dummy proxy ----------------------- +if not USE_TCP: + print("direct-socket proxy check needs --use-tcp; skipping that part") + sys.exit(0) + +PROXY_PORT = 13335 +claim_ports(PROXY_PORT) + +listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +listener.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +listener.bind(('127.0.0.1', PROXY_PORT)) +listener.listen(1) + +captured = {} + + +def serve_one(): + conn, _ = listener.accept() + conn.settimeout(5) + data = b"" + try: + while b"\r\n\r\n" not in data and len(data) < 65536: + chunk = conn.recv(8192) + if not chunk: + break + data += chunk + except socket.timeout: + pass + captured['request'] = data + try: + conn.sendall(b"HTTP/1.0 403 Forbidden\r\n\r\n") + conn.shutdown(socket.SHUT_RDWR) + except OSError: + pass + conn.close() + + +t = threading.Thread(target=serve_one) +t.daemon = True +t.start() + +proc = run_idn(f"rsync://{idn_host}:873/whatever/", + extra_env={'RSYNC_PROXY': f'127.0.0.1:{PROXY_PORT}'}) + +t.join(timeout=15) +listener.close() + +if proc.returncode >= 128: + sys.stderr.write(proc.stderr.decode('latin1')) + test_fail(f"rsync killed by signal (status={proc.returncode})") + +request = captured.get('request', b'') +if not request: + test_fail("dummy proxy received no CONNECT request from rsync") + +if ascii_host.encode() not in request: + sys.stderr.write("proxy received: %r\n" % request.split(b"\r\n", 1)[0]) + test_fail(f"expected A-label {ascii_host} in the proxy CONNECT request") + +print(f"OK: direct-socket CONNECT host sent as {ascii_host}") diff --git a/usage.c b/usage.c index f346385f4..0b3e7fcf5 100644 --- a/usage.c +++ b/usage.c @@ -138,6 +138,11 @@ static void print_info_flags(enum logcode f) #endif "crtimes", +#ifndef SUPPORT_IDN + "no " +#endif + "IDN", + "*Optimizations", #ifndef USE_ROLL_SIMD