From 5804900de225321f42aae2e9b8f7a66e0452e59b Mon Sep 17 00:00:00 2001 From: Kirk Wolak Date: Tue, 16 Jun 2026 01:07:00 -0400 Subject: [PATCH 1/2] Add --create-empty-files-for-excluded-data for directory-format pg_dump. When used with --exclude-table-data in -Fd output, still emit TABLE DATA TOC entries and placeholder .dat files containing a COPY end marker for excluded tables. Restrict the option to directory format and COPY output; document it and add a TAP test covering validation and dump contents. Co-authored-by: Cursor --- doc/src/sgml/ref/pg_dump.sgml | 34 +++++ src/bin/pg_dump/meson.build | 1 + src/bin/pg_dump/pg_backup.h | 1 + src/bin/pg_dump/pg_dump.c | 64 ++++++++- src/bin/pg_dump/pg_dump.h | 1 + .../t/012_pg_dump_empty_excluded_data.pl | 121 ++++++++++++++++++ 6 files changed, 218 insertions(+), 4 deletions(-) create mode 100644 src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index ae1bc14d2f26f..6ee7cb131b6bf 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -720,6 +720,40 @@ PostgreSQL documentation + + + + + When used together with or + in directory output + format ( or ), + still create a TABLE DATA archive entry (including + the usual COPY statement) for each excluded table, + but do not dump the table's rows. A data file named after the table's + dump ID (for example 3541.dat) is created + containing only a \. COPY end marker as a + placeholder. + + + This option is intended for workflows where excluded table data is + loaded separately after the dump is taken, for example by replacing + the placeholder data file with externally produced data before + restore. + + + cannot be used + without or + . It is only + supported when directory output format is selected + ( or ) and + data is being dumped as COPY (the default). + It cannot be used with , + , or + . + + + + diff --git a/src/bin/pg_dump/meson.build b/src/bin/pg_dump/meson.build index 7c9a475963b5c..9d76332264082 100644 --- a/src/bin/pg_dump/meson.build +++ b/src/bin/pg_dump/meson.build @@ -105,6 +105,7 @@ tests += { 't/006_pg_dump_compress.pl', 't/007_pg_dumpall.pl', 't/010_dump_connstr.pl', + 't/012_pg_dump_empty_excluded_data.pl', ], }, } diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 28e7ff6fa1636..15c9b987fcf46 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -198,6 +198,7 @@ typedef struct _dumpOptions int use_setsessauth; int enable_row_security; int load_via_partition_root; + bool create_empty_files_for_excluded_data; /* default, if no "inclusion" switches appear, is to dump everything */ bool include_everything; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index a0f7f8e216803..187090d0920b8 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -68,6 +68,7 @@ #include "getopt_long.h" #include "libpq/libpq-fs.h" #include "parallel.h" +#include "pg_backup_archiver.h" #include "pg_backup_db.h" #include "pg_backup_utils.h" #include "pg_dump.h" @@ -493,6 +494,7 @@ main(int argc, char **argv) {"attribute-inserts", no_argument, &dopt.column_inserts, 1}, {"binary-upgrade", no_argument, &dopt.binary_upgrade, 1}, {"column-inserts", no_argument, &dopt.column_inserts, 1}, + {"create-empty-files-for-excluded-data", no_argument, NULL, 26}, {"disable-dollar-quoting", no_argument, &dopt.disable_dollar_quoting, 1}, {"disable-triggers", no_argument, &dopt.disable_triggers, 1}, {"enable-row-security", no_argument, &dopt.enable_row_security, 1}, @@ -799,6 +801,10 @@ main(int argc, char **argv) dopt.restrict_key = pg_strdup(optarg); break; + case 26: + dopt.create_empty_files_for_excluded_data = true; + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -886,9 +892,27 @@ main(int argc, char **argv) "--on-conflict-do-nothing", "--inserts", "--rows-per-insert", "--column-inserts"); + if (dopt.create_empty_files_for_excluded_data && + tabledata_exclude_patterns.head == NULL && + tabledata_exclude_patterns_and_children.head == NULL) + pg_fatal("option %s requires option %s or %s", + "--create-empty-files-for-excluded-data", + "--exclude-table-data", "--exclude-table-data-and-children"); + + if (dopt.create_empty_files_for_excluded_data && + dopt.dump_inserts != 0) + pg_fatal("option %s cannot be used with %s, %s, or %s", + "--create-empty-files-for-excluded-data", + "--inserts", "--column-inserts", "--rows-per-insert"); + /* Identify archive format to emit */ archiveFormat = parseArchiveFormat(format, &archiveMode); + if (dopt.create_empty_files_for_excluded_data && + archiveFormat != archDirectory) + pg_fatal("option %s is only supported by the directory format", + "--create-empty-files-for-excluded-data"); + /* archiveFormat specific setup */ if (archiveFormat == archNull) { @@ -1329,6 +1353,10 @@ help(const char *progname) printf(_(" -x, --no-privileges do not dump privileges (grant/revoke)\n")); printf(_(" --binary-upgrade for use by upgrade utilities only\n")); printf(_(" --column-inserts dump data as INSERT commands with column names\n")); + printf(_(" --create-empty-files-for-excluded-data\n" + " create empty data files for tables excluded\n" + " with --exclude-table-data (directory\n" + " format and COPY data only)\n")); printf(_(" --disable-dollar-quoting disable dollar quoting, use SQL standard quoting\n")); printf(_(" --disable-triggers disable triggers during data-only restore\n")); printf(_(" --enable-row-security enable row security (dump only content user has\n" @@ -2355,6 +2383,29 @@ selectDumpableObject(DumpableObject *dobj, Archive *fout) DUMP_COMPONENT_ALL : DUMP_COMPONENT_NONE; } +/* + * Dump an empty data file for a table whose data was excluded with + * --exclude-table-data but --create-empty-files-for-excluded-data was set. + */ +static int +dumpTableData_empty(Archive *fout, const void *dcontext) +{ + const TableDataInfo *tdinfo = dcontext; + const TableInfo *tbinfo = tdinfo->tdtable; + + pg_log_info("creating empty data file for excluded table \"%s.%s\"", + tbinfo->dobj.namespace->dobj.name, tbinfo->dobj.name); + + /* + * Emit the COPY end marker, as dumpTableData_copy() does for an empty + * table. Archive formats store raw COPY data in separate blobs/files. + */ + if (fout->dopt->dump_inserts == 0) + archprintf(fout, "\\.\n\n\n"); + + return 1; +} + /* * Dump a table's contents for loading using the COPY command * - this routine is called by the Archiver when it wants the table @@ -2895,7 +2946,8 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo) if (dopt->dump_inserts == 0) { /* Dump/restore using COPY */ - dumpFn = dumpTableData_copy; + dumpFn = tdinfo->emptyExcludedData ? + dumpTableData_empty : dumpTableData_copy; /* must use 2 steps here 'cause fmtId is nonreentrant */ printfPQExpBuffer(copyBuf, "COPY %s ", copyFrom); @@ -2906,7 +2958,8 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo) else { /* Restore using INSERT */ - dumpFn = dumpTableData_insert; + dumpFn = tdinfo->emptyExcludedData ? + dumpTableData_empty : dumpTableData_insert; copyStmt = NULL; } @@ -3026,6 +3079,7 @@ static void makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo) { TableDataInfo *tdinfo; + bool data_excluded; /* * Nothing to do if we already decided to dump the table. This will @@ -3056,8 +3110,9 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo) return; /* Check that the data is not explicitly excluded */ - if (simple_oid_list_member(&tabledata_exclude_oids, - tbinfo->dobj.catId.oid)) + data_excluded = simple_oid_list_member(&tabledata_exclude_oids, + tbinfo->dobj.catId.oid); + if (data_excluded && !dopt->create_empty_files_for_excluded_data) return; /* OK, let's dump it */ @@ -3081,6 +3136,7 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo) tdinfo->dobj.namespace = tbinfo->dobj.namespace; tdinfo->tdtable = tbinfo; tdinfo->filtercond = NULL; /* might get set later */ + tdinfo->emptyExcludedData = data_excluded; addObjectDependency(&tdinfo->dobj, tbinfo->dobj.dumpId); /* A TableDataInfo contains data, of course */ diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 5a6726d8b12e2..d563c3ed22489 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -413,6 +413,7 @@ typedef struct _tableDataInfo DumpableObject dobj; TableInfo *tdtable; /* link to table to dump */ char *filtercond; /* WHERE condition to limit rows dumped */ + bool emptyExcludedData; /* excluded by --exclude-table-data */ } TableDataInfo; typedef struct _indxInfo diff --git a/src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl b/src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl new file mode 100644 index 0000000000000..530e08a4ee872 --- /dev/null +++ b/src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl @@ -0,0 +1,121 @@ + +# Copyright (c) 2026, PostgreSQL Global Development Group + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $tempdir = PostgreSQL::Test::Utils::tempdir; + +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init; +$node->start; + +my $src_db = 'empty_excl_src'; +my $dumpdir = "$tempdir/empty_excl_dump"; + +$node->safe_psql( + 'postgres', + qq{CREATE DATABASE $src_db; + \\c $src_db + CREATE TABLE keep_data(id int); + CREATE TABLE skip_data(id int); + INSERT INTO keep_data VALUES (1), (2); + INSERT INTO skip_data VALUES (10), (20), (30);}); + +# Flag without --exclude-table-data must fail. +$node->command_fails( + [ + 'pg_dump', + '--no-sync', + '--format' => 'directory', + '--file' => "$tempdir/bad_dump", + '--create-empty-files-for-excluded-data', + $node->connstr($src_db), + ], + 'create-empty-files-for-excluded-data requires exclude-table-data'); + +# Flag requires directory output format. +$node->command_fails_like( + [ + 'pg_dump', + '--no-sync', + '--format' => 'custom', + '--file' => "$tempdir/bad_custom.dump", + '--exclude-table-data' => 'skip_data', + '--create-empty-files-for-excluded-data', + $node->connstr($src_db), + ], + qr/create-empty-files-for-excluded-data.*only supported by the directory format/, + 'create-empty-files-for-excluded-data requires directory format'); + +# Flag requires COPY-format data, not INSERT output. +my @incompatible_opts = ( + { label => 'inserts', extra => [ '--inserts' ] }, + { label => 'column-inserts', extra => [ '--column-inserts' ] }, + { label => 'rows-per-insert', extra => [ '--rows-per-insert' => 10 ] }, +); +for my $case (@incompatible_opts) +{ + $node->command_fails_like( + [ + 'pg_dump', + '--no-sync', + '--format' => 'directory', + '--file' => "$tempdir/bad_$case->{label}", + '--exclude-table-data' => 'skip_data', + '--create-empty-files-for-excluded-data', + @{ $case->{extra} }, + $node->connstr($src_db), + ], + qr/create-empty-files-for-excluded-data.*cannot be used with/, + "create-empty-files-for-excluded-data rejects $case->{label}"); +} + +$node->command_ok( + [ + 'pg_dump', + '--no-sync', + '--format' => 'directory', + '--compress' => 'none', + '--file' => $dumpdir, + '--exclude-table-data' => 'skip_data', + '--create-empty-files-for-excluded-data', + $node->connstr($src_db), + ], + 'directory dump with empty excluded table data files'); + +$node->command_like( + [ 'pg_restore', '--list', $dumpdir ], + qr/TABLE DATA public skip_data/, + 'TOC lists TABLE DATA for excluded table'); + +my ($stdout, $stderr) = run_command([ 'pg_restore', '--list', $dumpdir ]); +my $skip_dumpid; +foreach my $line (split /\n/, $stdout) +{ + if ($line =~ /TABLE DATA public skip_data/ && $line =~ /^(\d+);/) + { + $skip_dumpid = $1; + last; + } +} +ok(defined $skip_dumpid, 'found dump ID for excluded table'); +like( + slurp_file("$dumpdir/${skip_dumpid}.dat"), + qr/^\\\.\n/, + 'excluded table data file contains COPY end marker only') + if defined $skip_dumpid; + +my @datfiles = grep { $_ !~ /\/toc\.dat$/ } glob("$dumpdir/*.dat"); +cmp_ok(scalar(@datfiles), '==', 2, 'two table data files in dump'); + +my ($keep_dat) = grep { $_ ne "$dumpdir/${skip_dumpid}.dat" } @datfiles; +ok(defined $keep_dat && -s $keep_dat > 0, + 'included table has a non-empty data file') + if defined $skip_dumpid; + +done_testing(); From dceb101583cd6748a0627af1e714f9dc70087e61 Mon Sep 17 00:00:00 2001 From: Kirk Wolak Date: Tue, 16 Jun 2026 09:54:19 -0400 Subject: [PATCH 2/2] Restore pg_restore coverage in pg_dump-only TAP test. Stock pg_restore handles placeholder COPY end-marker files correctly; the earlier restore failure was from mixed build artifacts after branch switching without make clean. Co-authored-by: Cursor --- .../t/012_pg_dump_empty_excluded_data.pl | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl b/src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl index 530e08a4ee872..845900a191844 100644 --- a/src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl +++ b/src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl @@ -15,6 +15,7 @@ $node->start; my $src_db = 'empty_excl_src'; +my $dst_db = 'empty_excl_dst'; my $dumpdir = "$tempdir/empty_excl_dump"; $node->safe_psql( @@ -118,4 +119,23 @@ 'included table has a non-empty data file') if defined $skip_dumpid; +$node->safe_psql('postgres', "CREATE DATABASE $dst_db"); + +$node->command_ok( + [ + 'pg_restore', + '--dbname' => $node->connstr($dst_db), + $dumpdir, + ], + 'restore dump with empty excluded data file'); + +is( + $node->safe_psql($dst_db, 'SELECT count(*) FROM keep_data'), + '2', + 'included table data restored'); +is( + $node->safe_psql($dst_db, 'SELECT count(*) FROM skip_data'), + '0', + 'excluded table restored with no rows'); + done_testing();