Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 52e47fe

Browse filesBrowse files
author
Oleg Gurev
committed
[PBCKP-218] Incremental restore and missing pg_control (issue #304)
- pg_control file backup after all other files in backup - pg_control file restore last in full restore - rename pg_control to pg_control.pbk.bak at start of non-full restore - remove pg_control.pbk.bak in the end of successfull non-full restore - use pg_control.pbk.bak after failed non-full restore - added tests for full and incremental restore Tags: backup, catchup, restore
1 parent d26df12 commit 52e47fe
Copy full SHA for 52e47fe

File tree

9 files changed

+354
-83
lines changed
Filter options

9 files changed

+354
-83
lines changed

‎src/backup.c

Copy file name to clipboardExpand all lines: src/backup.c
+48-10Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
122122
char pretty_time[20];
123123
char pretty_bytes[20];
124124

125+
pgFile *src_pg_control_file = NULL;
126+
125127
elog(INFO, "Database backup start");
126128
if(current.external_dir_str)
127129
{
@@ -424,6 +426,24 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
424426

425427
}
426428

429+
/*
430+
* find pg_control file
431+
* We'll copy it last
432+
*/
433+
{
434+
int control_file_elem_index;
435+
pgFile search_key;
436+
MemSet(&search_key, 0, sizeof(pgFile));
437+
/* pgFileCompareRelPathWithExternal uses only .rel_path and .external_dir_num for comparision */
438+
search_key.rel_path = XLOG_CONTROL_FILE;
439+
search_key.external_dir_num = 0;
440+
control_file_elem_index = parray_bsearch_index(backup_files_list, &search_key, pgFileCompareRelPathWithExternal);
441+
442+
if (control_file_elem_index < 0)
443+
elog(ERROR, "File \"%s\" not found in PGDATA %s", XLOG_CONTROL_FILE, current.database_dir);
444+
src_pg_control_file = (pgFile *)parray_get(backup_files_list, control_file_elem_index);
445+
}
446+
427447
/* setup thread locks */
428448
pfilearray_clear_locks(backup_files_list);
429449

@@ -483,6 +503,26 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
483503
backup_isok = false;
484504
}
485505

506+
/* copy pg_control at very end */
507+
if (backup_isok)
508+
{
509+
510+
elog(progress ? INFO : LOG, "Progress: Backup file \"%s\"",
511+
src_pg_control_file->rel_path);
512+
513+
char from_fullpath[MAXPGPATH];
514+
char to_fullpath[MAXPGPATH];
515+
join_path_components(from_fullpath, instance_config.pgdata, src_pg_control_file->rel_path);
516+
join_path_components(to_fullpath, current.database_dir, src_pg_control_file->rel_path);
517+
518+
backup_non_data_file(src_pg_control_file, NULL,
519+
from_fullpath, to_fullpath,
520+
current.backup_mode, current.parent_backup,
521+
true);
522+
}
523+
524+
525+
486526
time(&end_time);
487527
pretty_time_interval(difftime(end_time, start_time),
488528
pretty_time, lengthof(pretty_time));
@@ -510,17 +550,8 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
510550
{
511551
pgFile *pg_control = NULL;
512552

513-
for (i = 0; i < parray_num(backup_files_list); i++)
514-
{
515-
pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i);
553+
pg_control = src_pg_control_file;
516554

517-
if (tmp_file->external_dir_num == 0 &&
518-
(strcmp(tmp_file->rel_path, XLOG_CONTROL_FILE) == 0))
519-
{
520-
pg_control = tmp_file;
521-
break;
522-
}
523-
}
524555

525556
if (!pg_control)
526557
elog(ERROR, "Failed to find file \"%s\" in backup filelist.",
@@ -2076,6 +2107,13 @@ backup_files(void *arg)
20762107
/* We have already copied all directories */
20772108
if (S_ISDIR(file->mode))
20782109
continue;
2110+
/*
2111+
* Don't copy the pg_control file now, we'll copy it last
2112+
*/
2113+
if(file->external_dir_num == 0 && pg_strcasecmp(file->rel_path, XLOG_CONTROL_FILE) == 0)
2114+
{
2115+
continue;
2116+
}
20792117

20802118
if (arguments->thread_num == 1)
20812119
{

‎src/catchup.c

Copy file name to clipboardExpand all lines: src/catchup.c
+43-3Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,13 @@ catchup_preflight_checks(PGNodeInfo *source_node_info, PGconn *source_conn,
171171

172172
if (current.backup_mode != BACKUP_MODE_FULL)
173173
{
174-
dest_id = get_system_identifier(dest_pgdata, FIO_LOCAL_HOST, false);
174+
ControlFileData dst_control;
175+
get_control_file_or_back_file(dest_pgdata, FIO_LOCAL_HOST, &dst_control);
176+
dest_id = dst_control.system_identifier;
177+
175178
if (source_conn_id != dest_id)
176-
elog(ERROR, "Database identifiers mismatch: we connected to DB id %lu, but in \"%s\" we found id %lu",
177-
source_conn_id, dest_pgdata, dest_id);
179+
elog(ERROR, "Database identifiers mismatch: we connected to DB id %llu, but in \"%s\" we found id %llu",
180+
(long long)source_conn_id, dest_pgdata, (long long)dest_id);
178181
}
179182
}
180183

@@ -640,6 +643,9 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
640643
ssize_t transfered_walfiles_bytes = 0;
641644
char pretty_source_bytes[20];
642645

646+
char dest_pg_control_fullpath[MAXPGPATH];
647+
char dest_pg_control_bak_fullpath[MAXPGPATH];
648+
643649
source_conn = catchup_init_state(&source_node_info, source_pgdata, dest_pgdata);
644650
catchup_preflight_checks(&source_node_info, source_conn, source_pgdata, dest_pgdata);
645651

@@ -935,6 +941,9 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
935941
Assert(file->external_dir_num == 0);
936942
if (pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0)
937943
redundant = true;
944+
/* global/pg_control.pbk.bak is always keeped, because it's needed for restart failed incremental restore */
945+
if (pg_strcasecmp(file->rel_path, XLOG_CONTROL_BAK_FILE) == 0)
946+
redundant = false;
938947

939948
/* if file does not exists in destination list, then we can safely unlink it */
940949
if (redundant)
@@ -966,6 +975,28 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
966975
if (dest_filelist)
967976
parray_qsort(dest_filelist, pgFileCompareRelPathWithExternal);
968977

978+
join_path_components(dest_pg_control_fullpath, dest_pgdata, XLOG_CONTROL_FILE);
979+
join_path_components(dest_pg_control_bak_fullpath, dest_pgdata, XLOG_CONTROL_BAK_FILE);
980+
/*
981+
* rename (if it exist) dest control file before restoring
982+
* if it doesn't exist, that mean, that we already restoring in a previously failed
983+
* pgdata, where XLOG_CONTROL_BAK_FILE exist
984+
*/
985+
if (current.backup_mode != BACKUP_MODE_FULL && !dry_run)
986+
{
987+
if (!fio_access(dest_pg_control_fullpath, F_OK, FIO_LOCAL_HOST))
988+
{
989+
pgFile *dst_control;
990+
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
991+
true,0, FIO_BACKUP_HOST);
992+
993+
if(!fio_access(dest_pg_control_bak_fullpath, F_OK, FIO_LOCAL_HOST))
994+
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
995+
fio_rename(dest_pg_control_fullpath, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
996+
pgFileFree(dst_control);
997+
}
998+
}
999+
9691000
/* run copy threads */
9701001
elog(INFO, "Start transferring data files");
9711002
time(&start_time);
@@ -985,6 +1016,15 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
9851016
copy_pgcontrol_file(from_fullpath, FIO_DB_HOST,
9861017
to_fullpath, FIO_LOCAL_HOST, source_pg_control_file);
9871018
transfered_datafiles_bytes += source_pg_control_file->size;
1019+
1020+
/* Now backup control file can be deled */
1021+
if (current.backup_mode != BACKUP_MODE_FULL && !fio_access(dest_pg_control_bak_fullpath, F_OK, FIO_LOCAL_HOST)){
1022+
pgFile *dst_control;
1023+
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
1024+
true,0, FIO_BACKUP_HOST);
1025+
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
1026+
pgFileFree(dst_control);
1027+
}
9881028
}
9891029

9901030
if (!catchup_isok && !dry_run)

‎src/dir.c

Copy file name to clipboardExpand all lines: src/dir.c
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1867,4 +1867,4 @@ set_forkname(pgFile *file)
18671867
file->segno = segno;
18681868
file->is_datafile = file->forkName == none;
18691869
return true;
1870-
}
1870+
}

‎src/pg_probackup.h

Copy file name to clipboardExpand all lines: src/pg_probackup.h
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ extern const char *PROGRAM_EMAIL;
9191
#define DATABASE_MAP "database_map"
9292
#define HEADER_MAP "page_header_map"
9393
#define HEADER_MAP_TMP "page_header_map_tmp"
94+
#define XLOG_CONTROL_BAK_FILE XLOG_CONTROL_FILE".pbk.bak"
9495

9596
/* default replication slot names */
9697
#define DEFAULT_TEMP_SLOT_NAME "pg_probackup_slot";
@@ -1209,6 +1210,8 @@ extern uint32 get_xlog_seg_size(const char *pgdata_path);
12091210
extern void get_redo(const char *pgdata_path, fio_location pgdata_location, RedoParams *redo);
12101211
extern void set_min_recovery_point(pgFile *file, const char *backup_path,
12111212
XLogRecPtr stop_backup_lsn);
1213+
extern void get_control_file_or_back_file(const char *pgdata_path, fio_location location,
1214+
ControlFileData *control);
12121215
extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_location,
12131216
const char *to_fullpath, fio_location to_location, pgFile *file);
12141217

‎src/restore.c

Copy file name to clipboardExpand all lines: src/restore.c
+82-1Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ typedef struct
3939
int ret;
4040
} restore_files_arg;
4141

42+
static bool control_downloaded = false;
43+
static ControlFileData instance_control;
4244

4345
static void
4446
print_recovery_settings(InstanceState *instanceState, FILE *fp, pgBackup *backup,
@@ -501,6 +503,9 @@ do_restore_or_validate(InstanceState *instanceState, time_t target_backup_id, pg
501503
if (redo.checksum_version == 0)
502504
elog(ERROR, "Incremental restore in 'lsn' mode require "
503505
"data_checksums to be enabled in destination data directory");
506+
if (!control_downloaded)
507+
get_control_file_or_back_file(instance_config.pgdata, FIO_DB_HOST,
508+
&instance_control);
504509

505510
timelines = read_timeline_history(instanceState->instance_wal_subdir_path,
506511
redo.tli, false);
@@ -719,6 +724,10 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
719724
parray *pgdata_files = NULL;
720725
parray *dest_files = NULL;
721726
parray *external_dirs = NULL;
727+
pgFile *dest_pg_control_file = NULL;
728+
char dest_pg_control_fullpath[MAXPGPATH];
729+
char dest_pg_control_bak_fullpath[MAXPGPATH];
730+
722731
/* arrays with meta info for multi threaded backup */
723732
pthread_t *threads;
724733
restore_files_arg *threads_args;
@@ -922,6 +931,11 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
922931
pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0)
923932
redundant = true;
924933

934+
/* global/pg_control.pbk.bak are always keeped, because it's needed for restart failed incremental restore */
935+
if (file->external_dir_num == 0 &&
936+
pg_strcasecmp(file->rel_path, XLOG_CONTROL_BAK_FILE) == 0)
937+
redundant = false;
938+
925939
/* do not delete the useful internal directories */
926940
if (S_ISDIR(file->mode) && !redundant)
927941
continue;
@@ -974,6 +988,42 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
974988
dest_bytes = dest_backup->pgdata_bytes;
975989

976990
pretty_size(dest_bytes, pretty_dest_bytes, lengthof(pretty_dest_bytes));
991+
/*
992+
* [Issue #313]
993+
* find pg_control file (in already sorted earlier dest_files, see parray_qsort(backup->files...))
994+
* and exclude it from list for future special processing
995+
*/
996+
{
997+
int control_file_elem_index;
998+
pgFile search_key;
999+
MemSet(&search_key, 0, sizeof(pgFile));
1000+
/* pgFileCompareRelPathWithExternal uses only .rel_path and .external_dir_num for comparision */
1001+
search_key.rel_path = XLOG_CONTROL_FILE;
1002+
search_key.external_dir_num = 0;
1003+
control_file_elem_index = parray_bsearch_index(dest_files, &search_key, pgFileCompareRelPathWithExternal);
1004+
1005+
if (control_file_elem_index < 0)
1006+
elog(ERROR, "File \"%s\" not found in backup %s", XLOG_CONTROL_FILE, base36enc(dest_backup->start_time));
1007+
dest_pg_control_file = (pgFile *) parray_get(dest_files, control_file_elem_index);
1008+
parray_remove(dest_files, control_file_elem_index);
1009+
1010+
join_path_components(dest_pg_control_fullpath, pgdata_path, XLOG_CONTROL_FILE);
1011+
join_path_components(dest_pg_control_bak_fullpath, pgdata_path, XLOG_CONTROL_BAK_FILE);
1012+
/*
1013+
* rename (if it exist) dest control file before restoring
1014+
* if it doesn't exist, that mean, that we already restoring in a previously failed
1015+
* pgdata, where XLOG_CONTROL_BAK_FILE exist
1016+
*/
1017+
if (params->incremental_mode != INCR_NONE)
1018+
{
1019+
if (fio_access(dest_pg_control_fullpath,F_OK,FIO_DB_HOST) == 0){
1020+
if (fio_rename(dest_pg_control_fullpath, dest_pg_control_bak_fullpath, FIO_DB_HOST) < 0)
1021+
elog(WARNING, "Cannot rename file \"%s\" to \"%s\": %s",
1022+
dest_pg_control_fullpath, dest_pg_control_bak_fullpath, strerror(errno));
1023+
}
1024+
}
1025+
}
1026+
9771027
elog(INFO, "Start restoring backup files. PGDATA size: %s", pretty_dest_bytes);
9781028
time(&start_time);
9791029
thread_interrupted = false;
@@ -1014,6 +1064,32 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
10141064
total_bytes += threads_args[i].restored_bytes;
10151065
}
10161066

1067+
/* [Issue #313] copy pg_control at very end */
1068+
if (restore_isok)
1069+
{
1070+
FILE *out = NULL;
1071+
elog(progress ? INFO : LOG, "Progress: Restore file \"%s\"",
1072+
dest_pg_control_file->rel_path);
1073+
1074+
out = fio_fopen(dest_pg_control_fullpath, PG_BINARY_R "+", FIO_DB_HOST);
1075+
1076+
total_bytes += restore_non_data_file(parent_chain,
1077+
dest_backup,
1078+
dest_pg_control_file,
1079+
out,
1080+
dest_pg_control_fullpath, false);
1081+
fio_fclose(out);
1082+
/* Now backup control file can be deleted */
1083+
if (params->incremental_mode != INCR_NONE)
1084+
{
1085+
pgFile *dst_control;
1086+
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
1087+
true,0, FIO_BACKUP_HOST);
1088+
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
1089+
pgFileFree(dst_control);
1090+
}
1091+
}
1092+
10171093
time(&end_time);
10181094
pretty_time_interval(difftime(end_time, start_time),
10191095
pretty_time, lengthof(pretty_time));
@@ -1098,6 +1174,8 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
10981174
parray_free(pgdata_files);
10991175
}
11001176

1177+
if(dest_pg_control_file) pgFileFree(dest_pg_control_file);
1178+
11011179
for (i = parray_num(parent_chain) - 1; i >= 0; i--)
11021180
{
11031181
pgBackup *backup = (pgBackup *) parray_get(parent_chain, i);
@@ -2230,7 +2308,10 @@ check_incremental_compatibility(const char *pgdata, uint64 system_identifier,
22302308
*/
22312309
elog(LOG, "Trying to read pg_control file in destination directory");
22322310

2233-
system_id_pgdata = get_system_identifier(pgdata, FIO_DB_HOST, false);
2311+
get_control_file_or_back_file(pgdata, FIO_DB_HOST, &instance_control);
2312+
control_downloaded = true;
2313+
2314+
system_id_pgdata = instance_control.system_identifier;
22342315

22352316
if (system_id_pgdata == instance_config.system_identifier)
22362317
system_id_match = true;

‎src/util.c

Copy file name to clipboardExpand all lines: src/util.c
+20Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,26 @@ get_current_timeline_from_control(const char *pgdata_path, fio_location location
190190
return ControlFile.checkPointCopy.ThisTimeLineID;
191191
}
192192

193+
void
194+
get_control_file_or_back_file(const char *pgdata_path, fio_location location, ControlFileData *control)
195+
{
196+
char *buffer;
197+
size_t size;
198+
199+
/* First fetch file... */
200+
buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, true, location);
201+
202+
if (!buffer || size == 0){
203+
/* Error read XLOG_CONTROL_FILE or file is truncated, trying read backup */
204+
buffer = slurpFile(pgdata_path, XLOG_CONTROL_BAK_FILE, &size, true, location);
205+
if (!buffer)
206+
elog(ERROR, "Could not read %s and %s files\n", XLOG_CONTROL_FILE, XLOG_CONTROL_BAK_FILE); /* Maybe it should be PANIC? */
207+
}
208+
digestControlFile(control, buffer, size);
209+
pg_free(buffer);
210+
}
211+
212+
193213
/*
194214
* Get last check point record ptr from pg_tonrol.
195215
*/

‎tests/helpers/ptrack_helpers.py

Copy file name to clipboardExpand all lines: tests/helpers/ptrack_helpers.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1783,7 +1783,7 @@ def pgdata_content(self, pgdata, ignore_ptrack=True, exclude_dirs=None):
17831783
'ptrack_control', 'ptrack_init', 'pg_control',
17841784
'probackup_recovery.conf', 'recovery.signal',
17851785
'standby.signal', 'ptrack.map', 'ptrack.map.mmap',
1786-
'ptrack.map.tmp'
1786+
'ptrack.map.tmp', 'recovery.done','backup_label.old'
17871787
]
17881788

17891789
if exclude_dirs:

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.