Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 84adc8e

Browse filesBrowse files
committed
pg_dump: Add support for zstd compression
Allow pg_dump to use the zstd compression, in addition to gzip/lz4. Bulk of the new compression method is implemented in compress_zstd.{c,h}, covering the pg_dump compression APIs. The rest of the patch adds test and makes various places aware of the new compression method. The zstd library (which this patch relies on) supports multithreaded compression since version 1.5. We however disallow that feature for now, as it might interfere with parallel backups on platforms that rely on threads (e.g. Windows). This can be improved / relaxed in the future. This also fixes a minor issue in InitDiscoverCompressFileHandle(), which was not updated to check if the file already has the .lz4 extension. Adding zstd compression was originally proposed in 2020 (see the second thread), but then was reworked to use the new compression API introduced in e996073. I've considered both threads when compiling the list of reviewers. Author: Justin Pryzby Reviewed-by: Tomas Vondra, Jacob Champion, Andreas Karlsson Discussion: https://postgr.es/m/20230224191840.GD1653@telsasoft.com Discussion: https://postgr.es/m/20201221194924.GI30237@telsasoft.com
1 parent 794f259 commit 84adc8e
Copy full SHA for 84adc8e

File tree

12 files changed

+713
-54
lines changed
Filter options

12 files changed

+713
-54
lines changed

‎doc/src/sgml/ref/pg_dump.sgml

Copy file name to clipboardExpand all lines: doc/src/sgml/ref/pg_dump.sgml
+8-5
Original file line numberDiff line numberDiff line change
@@ -330,8 +330,9 @@ PostgreSQL documentation
330330
machine-readable format that <application>pg_restore</application>
331331
can read. A directory format archive can be manipulated with
332332
standard Unix tools; for example, files in an uncompressed archive
333-
can be compressed with the <application>gzip</application> or
334-
<application>lz4</application> tools.
333+
can be compressed with the <application>gzip</application>,
334+
<application>lz4</application>, or
335+
<application>zstd</application> tools.
335336
This format is compressed by default using <literal>gzip</literal>
336337
and also supports parallel dumps.
337338
</para>
@@ -655,7 +656,8 @@ PostgreSQL documentation
655656
<para>
656657
Specify the compression method and/or the compression level to use.
657658
The compression method can be set to <literal>gzip</literal>,
658-
<literal>lz4</literal>, or <literal>none</literal> for no compression.
659+
<literal>lz4</literal>, <literal>zstd</literal>,
660+
or <literal>none</literal> for no compression.
659661
A compression detail string can optionally be specified. If the
660662
detail string is an integer, it specifies the compression level.
661663
Otherwise, it should be a comma-separated list of items, each of the
@@ -676,8 +678,9 @@ PostgreSQL documentation
676678
individual table-data segments, and the default is to compress using
677679
<literal>gzip</literal> at a moderate level. For plain text output,
678680
setting a nonzero compression level causes the entire output file to be compressed,
679-
as though it had been fed through <application>gzip</application> or
680-
<application>lz4</application>; but the default is not to compress.
681+
as though it had been fed through <application>gzip</application>,
682+
<application>lz4</application>, or <application>zstd</application>;
683+
but the default is not to compress.
681684
</para>
682685
<para>
683686
The tar archive format currently does not support compression at all.

‎src/bin/pg_dump/Makefile

Copy file name to clipboardExpand all lines: src/bin/pg_dump/Makefile
+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ include $(top_builddir)/src/Makefile.global
1818

1919
export GZIP_PROGRAM=$(GZIP)
2020
export LZ4
21+
export ZSTD
2122
export with_icu
2223

2324
override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
@@ -29,6 +30,7 @@ OBJS = \
2930
compress_io.o \
3031
compress_lz4.o \
3132
compress_none.o \
33+
compress_zstd.o \
3234
dumputils.o \
3335
parallel.o \
3436
pg_backup_archiver.o \

‎src/bin/pg_dump/compress_io.c

Copy file name to clipboardExpand all lines: src/bin/pg_dump/compress_io.c
+40-32
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@
5252
*
5353
* InitDiscoverCompressFileHandle tries to infer the compression by the
5454
* filename suffix. If the suffix is not yet known then it tries to simply
55-
* open the file and if it fails, it tries to open the same file with the .gz
56-
* suffix, and then again with the .lz4 suffix.
55+
* open the file and if it fails, it tries to open the same file with
56+
* compressed suffixes (.gz, .lz4 and .zst, in this order).
5757
*
5858
* IDENTIFICATION
5959
* src/bin/pg_dump/compress_io.c
@@ -69,6 +69,7 @@
6969
#include "compress_io.h"
7070
#include "compress_lz4.h"
7171
#include "compress_none.h"
72+
#include "compress_zstd.h"
7273
#include "pg_backup_utils.h"
7374

7475
/*----------------------
@@ -77,7 +78,8 @@
7778
*/
7879

7980
/*
80-
* Checks whether a compression algorithm is supported.
81+
* Checks whether support for a compression algorithm is implemented in
82+
* pg_dump/restore.
8183
*
8284
* On success returns NULL, otherwise returns a malloc'ed string which can be
8385
* used by the caller in an error message.
@@ -98,6 +100,10 @@ supports_compression(const pg_compress_specification compression_spec)
98100
if (algorithm == PG_COMPRESSION_LZ4)
99101
supported = true;
100102
#endif
103+
#ifdef USE_ZSTD
104+
if (algorithm == PG_COMPRESSION_ZSTD)
105+
supported = true;
106+
#endif
101107

102108
if (!supported)
103109
return psprintf("this build does not support compression with %s",
@@ -130,6 +136,8 @@ AllocateCompressor(const pg_compress_specification compression_spec,
130136
InitCompressorGzip(cs, compression_spec);
131137
else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
132138
InitCompressorLZ4(cs, compression_spec);
139+
else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
140+
InitCompressorZstd(cs, compression_spec);
133141

134142
return cs;
135143
}
@@ -196,20 +204,36 @@ InitCompressFileHandle(const pg_compress_specification compression_spec)
196204
InitCompressFileHandleGzip(CFH, compression_spec);
197205
else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
198206
InitCompressFileHandleLZ4(CFH, compression_spec);
207+
else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
208+
InitCompressFileHandleZstd(CFH, compression_spec);
199209

200210
return CFH;
201211
}
202212

213+
/*
214+
* Checks if a compressed file (with the specified extension) exists.
215+
*
216+
* The filename of the tested file is stored to fname buffer (the existing
217+
* buffer is freed, new buffer is allocated and returned through the pointer).
218+
*/
219+
static bool
220+
check_compressed_file(const char *path, char **fname, char *ext)
221+
{
222+
free_keep_errno(*fname);
223+
*fname = psprintf("%s.%s", path, ext);
224+
return (access(*fname, F_OK) == 0);
225+
}
226+
203227
/*
204228
* Open a file for reading. 'path' is the file to open, and 'mode' should
205229
* be either "r" or "rb".
206230
*
207231
* If the file at 'path' contains the suffix of a supported compression method,
208-
* currently this includes ".gz" and ".lz4", then this compression will be used
232+
* currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
209233
* throughout. Otherwise the compression will be inferred by iteratively trying
210234
* to open the file at 'path', first as is, then by appending known compression
211235
* suffixes. So if you pass "foo" as 'path', this will open either "foo" or
212-
* "foo.gz" or "foo.lz4", trying in that order.
236+
* "foo.{gz,lz4,zst}", trying in that order.
213237
*
214238
* On failure, return NULL with an error code in errno.
215239
*/
@@ -229,36 +253,20 @@ InitDiscoverCompressFileHandle(const char *path, const char *mode)
229253

230254
if (hasSuffix(fname, ".gz"))
231255
compression_spec.algorithm = PG_COMPRESSION_GZIP;
256+
else if (hasSuffix(fname, ".lz4"))
257+
compression_spec.algorithm = PG_COMPRESSION_LZ4;
258+
else if (hasSuffix(fname, ".zst"))
259+
compression_spec.algorithm = PG_COMPRESSION_ZSTD;
232260
else
233261
{
234-
bool exists;
235-
236-
exists = (stat(path, &st) == 0);
237-
/* avoid unused warning if it is not built with compression */
238-
if (exists)
262+
if (stat(path, &st) == 0)
239263
compression_spec.algorithm = PG_COMPRESSION_NONE;
240-
#ifdef HAVE_LIBZ
241-
if (!exists)
242-
{
243-
free_keep_errno(fname);
244-
fname = psprintf("%s.gz", path);
245-
exists = (stat(fname, &st) == 0);
246-
247-
if (exists)
248-
compression_spec.algorithm = PG_COMPRESSION_GZIP;
249-
}
250-
#endif
251-
#ifdef USE_LZ4
252-
if (!exists)
253-
{
254-
free_keep_errno(fname);
255-
fname = psprintf("%s.lz4", path);
256-
exists = (stat(fname, &st) == 0);
257-
258-
if (exists)
259-
compression_spec.algorithm = PG_COMPRESSION_LZ4;
260-
}
261-
#endif
264+
else if (check_compressed_file(path, &fname, "gz"))
265+
compression_spec.algorithm = PG_COMPRESSION_GZIP;
266+
else if (check_compressed_file(path, &fname, "lz4"))
267+
compression_spec.algorithm = PG_COMPRESSION_LZ4;
268+
else if (check_compressed_file(path, &fname, "zst"))
269+
compression_spec.algorithm = PG_COMPRESSION_ZSTD;
262270
}
263271

264272
CFH = InitCompressFileHandle(compression_spec);

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.