postgres-ai
diff --git a/‎configs/config.example.physical_pgbackrest.yml
Copy file name to clipboard
+375Lines changed: 375 additions & 0 deletions b/‎configs/config.example.physical_pgbackrest.yml
Copy file name to clipboard
+375Lines changed: 375 additions & 0 deletions
diff --git a/‎engine/internal/retrieval/engine/postgres/physical/physical.go
Copy file name to clipboardExpand all lines: engine/internal/retrieval/engine/postgres/physical/physical.go
+4Lines changed: 4 additions & 0 deletions b/‎engine/internal/retrieval/engine/postgres/physical/physical.go
Copy file name to clipboardExpand all lines: engine/internal/retrieval/engine/postgres/physical/physical.go
+4Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,375 @@
+# Copy the following to: ~/.dblab/engine/configs/server.yml
+
+# Database Lab API server. This API is used to work with clones
+# (list them, create, delete, see how to connect to a clone).
+# Normally, it is supposed to listen 127.0.0.1:2345 (default),
+# and to be running inside a Docker container,
+# with port mapping, to allow users to connect from outside
+# to 2345 port using private or public IP address of the machine
+# where the container is running. See https://postgres.ai/docs/database-lab/how-to-manage-database-lab
+server:
+  # The main token that is used to work with Database Lab API.
+  # Note, that only one token is supported.
+  # However, if the integration with Postgres.ai Platform is configured
+  # (see below, "platform: ..." configuration), then users may use
+  # their personal tokens generated on the Platform. In this case,
+  # it is recommended to keep "verificationToken" secret, known
+  # only to the administrator of the Database Lab instance.
+  #
+  # Database Lab Engine can be running with an empty verification token, which is not recommended.
+  # In this case, the DLE API and the UI application will not require any credentials.
+  verificationToken: "secret_token"
+
+  # HTTP server port. Default: 2345.
+  port: 2345
+
+# Embedded UI. Controls the application to provide a user interface to DLE API.
+embeddedUI:
+  enabled: true
+
+  # Docker image of the UI application.
+  dockerImage: "postgresai/ce-ui:latest"
+
+  # Host or IP address, from which the embedded UI container accepts HTTP connections.
+  # By default, use a loop-back to accept only local connections.
+  # The empty string means "all available addresses".
+  host: "127.0.0.1"
+
+  # HTTP port of the UI application. Default: 2346.
+  port: 2346
+
+global:
+  # Database engine. Currently, the only supported option: "postgres".
+  engine: postgres
+
+  # Debugging, when enabled, allows seeing more in the Database Lab logs
+  # (not PostgreSQL logs). Enable in the case of troubleshooting.
+  debug: false
+
+  # Contains default configuration options of the restored database.
+  database:
+    # Default database username that will be used for Postgres management connections.
+    # This user must exist.
+    username: postgres
+
+    # Default database name.
+    dbname: postgres
+
+  # Telemetry: anonymous statistics sent to Postgres.ai.
+  # Used to analyze DLE usage, it helps the DLE maintainers make decisions on product development.
+  # Please leave it enabled if possible – this will contribute to DLE development.
+  # The full list of data points being collected: https://postgres.ai/docs/database-lab/telemetry
+  telemetry:
+    enabled: true
+    # Telemetry API URL. To send anonymous telemetry data, keep it default ("https://postgres.ai/api/general").
+    url: "https://postgres.ai/api/general"
+
+# Manages filesystem pools (in the case of ZFS) or volume groups.
+poolManager:
+  # The full path which contains the pool mount directories. mountDir can contain multiple pool directories.
+  mountDir: /var/lib/dblab
+
+  # Subdir where PGDATA located relative to the pool mount directory.
+  # This directory must already exist before launching Database Lab instance. It may be empty if
+  # data initialization is configured (see below).
+  # Note, it is a relative path. Default: "data".
+  # For example, for the PostgreSQL data directory "/var/lib/dblab/dblab_pool/data" (`dblab_pool` is a pool mount directory) set:
+  #      mountDir:  /var/lib/dblab
+  #      dataSubDir:  data
+  # In this case, we assume that the mount point is: /var/lib/dblab/dblab_pool
+  dataSubDir: data
+
+  # Directory that will be used to mount clones. Subdirectories in this directory
+  # will be used as mount points for clones. Subdirectory names will
+  # correspond to ports. E.g., subdirectory "dblab_clone_6000" for the clone running on port 6000.
+  clonesMountSubDir: clones
+
+  # Unix domain socket directory used to establish local connections to cloned databases.
+  socketSubDir: sockets
+
+  # Directory that will be used to store observability artifacts. The directory will be created inside PGDATA.
+  observerSubDir: observer
+
+  # Snapshots with this suffix are considered preliminary. They are not supposed to be accessible to end-users.
+  preSnapshotSuffix: "_pre"
+
+  # Force selection of a working pool inside the `mountDir`.
+  # It is an empty string by default which means that the standard selection and rotation mechanism will be applied.
+  selectedPool: ""
+
+# Configure PostgreSQL containers
+databaseContainer: &db_container
+  # Database Lab provisions thin clones using Docker containers and uses auxiliary containers.
+  # We need to specify which Postgres Docker image is to be used for that.
+  # The default is the extended Postgres image built on top of the official Postgres image
+  # (See https://postgres.ai/docs/database-lab/supported_databases).
+  # Any custom or official Docker image that runs Postgres. Our Dockerfile
+  # (See https://gitlab.com/postgres-ai/custom-images/-/tree/master/extended)
+  # is recommended in case if customization is needed.
+  dockerImage: "postgresai/extended-postgres:14"
+
+  # Custom parameters for containers with PostgreSQL, see
+  # https://docs.docker.com/engine/reference/run/#runtime-constraints-on-resources
+  containerConfig:
+    "shm-size": 1gb
+
+# Adjust PostgreSQL configuration
+databaseConfigs: &db_configs
+  configs:
+    # In order to match production plans with Database Lab plans set parameters related to Query Planning as on production.
+    shared_buffers: 1GB
+    # shared_preload_libraries – copy the value from the source
+    # Adding shared preload libraries, make sure that there are "pg_stat_statements, auto_explain, logerrors" in the list.
+    # It is necessary to perform query and db migration analysis.
+    # Note, if you are using PostgreSQL 9.6 and older, remove the logerrors extension from the list since it is not supported.
+    shared_preload_libraries: "pg_stat_statements, auto_explain, logerrors"
+    # work_mem and all the Query Planning parameters – copy the values from the source.
+    # To do it, use this query:
+    #     select format($$%s = '%s'$$, name, setting)
+    #     from pg_settings
+    #     where
+    #       name ~ '(work_mem$|^enable_|_cost$|scan_size$|effective_cache_size|^jit)'
+    #       or name ~ '(^geqo|default_statistics_target|constraint_exclusion|cursor_tuple_fraction)'
+    #       or name ~ '(collapse_limit$|parallel|plan_cache_mode)';
+    work_mem: "100MB"
+    # ... put Query Planning parameters here
+
+# Details of provisioning – where data is located,
+# thin cloning method, etc.
+provision:
+  <<: *db_container
+  # Pool of ports for Postgres clones. Ports will be allocated sequentially,
+  # starting from the lowest value. The "from" value must be less than "to".
+  portPool:
+    from: 6000
+    to: 6100
+
+  # Use sudo for ZFS/LVM and Docker commands if Database Lab server running
+  # outside a container. Keep it "false" (default) when running in a container.
+  useSudo: false
+
+  # Avoid default password resetting in clones and have the ability for
+  # existing users to log in with old passwords.
+  keepUserPasswords: false
+
+# Data retrieval flow. This section defines both initial retrieval, and rules
+# to keep the data directory in a synchronized state with the source. Both are optional:
+# you may already have the data directory, so neither initial retrieval nor
+# synchronization are needed.
+# 
+# Data retrieval can be also considered as "thick" cloning. Once it's done, users
+# can use "thin" cloning to get independent full-size clones of the database in
+# seconds, for testing and development. Normally, retrieval (thick cloning) is
+# a slow operation (1 TiB/h is a good speed). Optionally, the process of keeping
+# the Database Lab data directory in sync with the source (being continuously
+# updated) can be configured.
+#
+# There are two basic ways to organize data retrieval:
+#  - "logical":  use dump/restore processes, obtaining a logical copy of the initial
+#                database (a sequence  of SQL commands), and then loading it to
+#                the target Database Lab data directory. This is the only option
+#                for managed cloud PostgreSQL services such as Amazon RDS. Physically,
+#                the copy of the database created using this method differs from
+#                the original one (data blocks are stored differently). However,
+#                row counts are the same, as well as internal database statistics,
+#                allowing to do various kinds of development and testing, including
+#                running EXPLAIN command to optimize SQL queries.
+#  - "physical": physically copy the data directory from the source (or from the
+#                archive if a physical backup tool such as WAL-G, pgBackRest, or Barman
+#                is used). This approach allows to have a copy of the original database
+#                which is physically identical, including the existing bloat, data
+#                blocks location. Not supported for managed cloud Postgres services
+#                such as Amazon RDS.
+retrieval:
+  # The jobs section must not contain physical and logical restore jobs simultaneously.
+  jobs:
+    - physicalRestore
+    - physicalSnapshot
+
+  spec:
+    # Restores database data from a physical backup.
+    physicalRestore:
+      options:
+        <<: *db_container
+        # Defines the tool to restore data.
+        tool: pgbackrest
+
+        # Sync instance options.
+        sync:
+          # Enable running of a sync instance.
+          enabled: true
+
+          # Custom health check options for a sync instance container.
+          healthCheck:
+            # Health check interval for a sync instance container (in seconds).
+            interval: 5
+
+            # Maximum number of health check retries.
+            maxRetries: 200
+
+          # Add PostgreSQL configuration parameters to the sync container.
+          configs:
+            shared_buffers: 2GB
+
+          # Add PostgreSQL recovery configuration parameters to the sync container.
+          recovery:
+          # Uncomment this only if you are on Postgres version 11 or older.
+          # standby_mode: on
+
+        # Passes custom environment variables to the Docker container with the restoring tool.
+        envs:
+          PGBACKREST_LOG_LEVEL_CONSOLE: detail
+          PGBACKREST_PROCESS_MAX: 2
+          PGBACKREST_REPO: 1
+          # SSH example
+          PGBACKREST_REPO1_TYPE: posix
+          PGBACKREST_REPO1_HOST: repo.hostname
+          PGBACKREST_REPO1_HOST_USER: postgres
+          # S3 example
+          #PGBACKREST_REPO1_TYPE: s3
+          #PGBACKREST_REPO1_PATH: "/pgbackrest"
+          #PGBACKREST_REPO1_S3_BUCKET: my_bucket
+          #PGBACKREST_REPO1_S3_ENDPOINT: s3.amazonaws.com
+          #PGBACKREST_REPO1_S3_KEY: "XXXXXXXXXXXXXXXXXX"
+          #PGBACKREST_REPO1_S3_KEY_SECRET: "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+          #PGBACKREST_REPO1_S3_REGION: us_east_1
+
+        # Defines pgBackRest configuration options.
+        pgbackrest:
+          stanza: stanzaName
+          forceInit: false
+
+    physicalSnapshot:
+      options:
+        # Skip taking a snapshot while the retrieval starts.
+        skipStartSnapshot: false
+
+        # Adjust PostgreSQL configuration of the snapshot.
+        <<: *db_configs
+
+        # Promote PGDATA after data fetching.
+        promotion:
+          <<: *db_container
+          # Enable PGDATA promotion.
+          enabled: true
+
+          # Custom health check options for a data promotion container.
+          healthCheck:
+            # Health check interval for a data promotion container (in seconds).
+            interval: 5
+
+            # Maximum number of health check retries.
+            maxRetries: 200
+
+          # It is possible to define pre-processing SQL queries. For example, "/tmp/scripts/sql".
+          # Default: empty string (no pre-processing defined).
+          queryPreprocessing:
+            # Path to SQL pre-processing queries.
+            queryPath: ""
+
+            # Worker limit for parallel queries.
+            maxParallelWorkers: 2
+
+          # Add PostgreSQL configuration parameters to the promotion container.
+          configs:
+            shared_buffers: 2GB
+
+          # Add PostgreSQL recovery configuration parameters to the promotion container.
+          recovery:
+          # Uncomment this only if you are on Postgres version 11 or older.
+          # standby_mode: on
+
+        # It is possible to define a pre-processing script. For example, "/tmp/scripts/custom.sh".
+        # Default: empty string (no pre-processing defined).
+        # This can be used for scrubbing eliminating PII data, to define data masking, etc.
+        preprocessingScript: ""
+
+        # Scheduler contains tasks that run on a schedule.
+        scheduler:
+          # Snapshot scheduler creates a new snapshot on a schedule.
+          snapshot:
+            # Timetable defines in crontab format: https://en.wikipedia.org/wiki/Cron#Overview
+            timetable: "0 */6 * * *"
+          # Retention scheduler cleans up old snapshots on a schedule.
+          retention:
+            # Timetable defines in crontab format: https://en.wikipedia.org/wiki/Cron#Overview
+            timetable: "0 * * * *"
+            # Limit defines how many snapshots should be hold.
+            limit: 4
+
+        # Passes custom environment variables to the promotion Docker container.
+        envs:
+          PGBACKREST_LOG_LEVEL_CONSOLE: detail
+          PGBACKREST_PROCESS_MAX: 2
+          PGBACKREST_REPO: 1
+          # SSH example
+          PGBACKREST_REPO1_TYPE: posix
+          PGBACKREST_REPO1_HOST: repo.hostname
+          PGBACKREST_REPO1_HOST_USER: postgres
+          # S3 example
+          #PGBACKREST_REPO1_TYPE: s3
+          #PGBACKREST_REPO1_PATH: "/pgbackrest"
+          #PGBACKREST_REPO1_S3_BUCKET: my_bucket
+          #PGBACKREST_REPO1_S3_ENDPOINT: s3.amazonaws.com
+          #PGBACKREST_REPO1_S3_KEY: "XXXXXXXXXXXXXXXXXX"
+          #PGBACKREST_REPO1_S3_KEY_SECRET: "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+          #PGBACKREST_REPO1_S3_REGION: us_east_1
+
+cloning:
+  # Host that will be specified in database connection info for all clones
+  # Use public IP address if database connections are allowed from outside
+  # This value is only used to inform users about how to connect to database clones
+  accessHost: "localhost"
+
+  # Automatically delete clones after the specified minutes of inactivity.
+  # 0 - disable automatic deletion.
+  # Inactivity means:
+  #   - no active sessions (queries being processed right now)
+  #   - no recently logged queries in the query log
+  maxIdleMinutes: 120
+
+
+# ### INTEGRATION ###
+
+# Postgres.ai Platform integration (provides GUI) – extends the open source offering.
+# Uncomment the following lines if you need GUI, personal tokens, audit logs, more.
+#
+#platform:
+#  # Platform API URL. To work with Postgres.ai SaaS, keep it default
+#  # ("https://postgres.ai/api/general").
+#  url: "https://postgres.ai/api/general"
+#
+#  # Token for authorization in Platform API. This token can be obtained on
+#  # the Postgres.ai Console: https://postgres.ai/console/YOUR_ORG_NAME/tokens
+#  # This token needs to be kept in secret, known only to the administrator.
+#  accessToken: "platform_access_token"
+#
+#  # Enable authorization with personal tokens of the organization's members.
+#  # If false: all users must use "accessToken" value for any API request
+#  # If true: "accessToken" is known only to admin, users use their own tokens,
+#  #          and any token can be revoked not affecting others
+#  enablePersonalTokens: true
+#
+# CI Observer configuration.
+#observer:
+#  # Set up regexp rules for Postgres logs.
+#  # These rules are applied before sending the logs to the Platform, to ensure that personal data is masked properly.
+#  # Check the syntax of regular expressions: https://github.com/google/re2/wiki/Syntax
+#  replacementRules:
+#    "regexp": "replace"
+#    "select \\d+": "***"
+#    "[a-z0-9._%+\\-]+(@[a-z0-9.\\-]+\\.[a-z]{2,4})": "***$1"
+#
+# Tool to calculate timing difference between Database Lab and production environments.
+#estimator:
+#  # The ratio evaluating the timing difference for operations involving IO Read between Database Lab and production environments.
+#  readRatio: 1
+#
+#  # The ratio evaluating the timing difference for operations involving IO Write between Database Lab and production environments.
+#  writeRatio: 1
+#
+#  # Time interval of samples taken by the profiler.
+#  profilingInterval: 10ms
+#
+#  # The minimum number of samples sufficient to display the estimation results.
+#  sampleThreshold: 20
@@ -72,6 +72,7 @@ type CopyOptions struct {
 	ContainerConfig map[string]interface{} `yaml:"containerConfig"`
 	Envs            map[string]string      `yaml:"envs"`
 	WALG            walgOptions            `yaml:"walg"`
+	PgBackRest      pgbackrestOptions      `yaml:"pgbackrest"`
 	CustomTool      customOptions          `yaml:"customTool"`
 	Sync            Sync                   `yaml:"sync"`
 }
@@ -130,6 +131,9 @@ func (r *RestoreJob) getRestorer(tool string) (restorer, error) {
 	case walgTool:
 		return newWALG(r.fsPool.DataDir(), r.WALG), nil
 
+	case pgbackrestTool:
+		return newPgBackRest(r.fsPool.DataDir(), r.PgBackRest), nil
+
 	case customTool:
 		return newCustomTool(r.CustomTool), nil
 	}