diff --git a/addons/elasticsearch/dataprotection/es-dump-backup.sh b/addons/elasticsearch/dataprotection/es-dump-backup.sh new file mode 100644 index 000000000..0edf5afcd --- /dev/null +++ b/addons/elasticsearch/dataprotection/es-dump-backup.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash + +# This script performs a full logical backup of Elasticsearch using elasticsearch-dump (multielasticdump). +# It dumps all indices' data, mappings, analyzers, aliases, settings, and templates, +# packages them into a tar archive, and pushes to backup storage via datasafed. + +set -e +set -o errexit +set -x + +export PATH="$PATH:$DP_DATASAFED_BIN_PATH" +export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" + +ES_ENDPOINT="http://${DP_DB_HOST}.${KB_NAMESPACE}.svc.cluster.local:9200" + +# Exit handler: write backup info on success, or touch exit file on failure +handle_exit() { + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "failed with exit code $exit_code" + touch "${DP_BACKUP_INFO_FILE}.exit" + exit 1 + else + echo "{}" >"${DP_BACKUP_INFO_FILE}" + exit 0 + fi +} +trap handle_exit EXIT + +# Build authenticated endpoint URL for elasticdump +if [ -n "${ELASTIC_USER_PASSWORD}" ]; then + ES_AUTH_ENDPOINT="http://elastic:${ELASTIC_USER_PASSWORD}@${DP_DB_HOST}.${KB_NAMESPACE}.svc.cluster.local:9200" +else + ES_AUTH_ENDPOINT="${ES_ENDPOINT}" +fi + +# Create temporary backup directory +BACKUP_DIR=/tmp/es-dump-backup +rm -rf ${BACKUP_DIR} +mkdir -p ${BACKUP_DIR} + +echo "INFO: Starting elasticsearch-dump full backup" +echo "INFO: Elasticsearch endpoint: ${ES_ENDPOINT}" + +# Default match pattern: only backup user indices (exclude system indices starting with ".") +# System indices (.kibana, .kibana_task_manager, .security, .tasks, .apm, etc.) +# are managed internally by Elasticsearch and Kibana. Restoring them from a backup +# will overwrite their internal migration/state tracking and cause errors (e.g. Kibana +# migration lock). Override with the MATCH env variable if needed. +MATCH_PATTERN="${MATCH:-^[^\.]}" +echo "INFO: Index match pattern: ${MATCH_PATTERN}" + +# Set elasticdump options +DUMP_OPTS="" +if [ -n "${SCROLL_TIME}" ]; then + DUMP_OPTS="${DUMP_OPTS} --scrollTime=${SCROLL_TIME}" +fi +if [ -n "${LIMIT}" ]; then + DUMP_OPTS="${DUMP_OPTS} --limit=${LIMIT}" +fi + +# Use multielasticdump to dump all matched indices +# Types: data (documents), mapping (index mappings), analyzer (custom analyzers), +# alias (index aliases), settings (index settings), template (index templates) +multielasticdump \ + --direction=dump \ + --input="${ES_AUTH_ENDPOINT}" \ + --output="${BACKUP_DIR}" \ + --match="${MATCH_PATTERN}" \ + --includeType=data,mapping,analyzer,alias,settings,template \ + "${DUMP_OPTS}" + +echo "INFO: elasticsearch-dump completed, packaging backup data" + +# Tar and push to backup storage via datasafed +cd ${BACKUP_DIR} +tar -cf - . | datasafed push -z zstd-fastest - "/${DP_BACKUP_NAME}.tar.zst" + +echo "INFO: Backup data pushed to storage successfully" + +# Cleanup +rm -rf ${BACKUP_DIR} diff --git a/addons/elasticsearch/dataprotection/es-dump-restore.sh b/addons/elasticsearch/dataprotection/es-dump-restore.sh new file mode 100644 index 000000000..38ff4d82e --- /dev/null +++ b/addons/elasticsearch/dataprotection/es-dump-restore.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash + +# This script performs a full logical restore of Elasticsearch using elasticsearch-dump (multielasticdump). +# It pulls the backup archive from storage, extracts it, and restores all indices' +# data, mappings, analyzers, aliases, settings, and templates. + +set -e +set -o errexit +set -x + +export PATH="$PATH:$DP_DATASAFED_BIN_PATH" +export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" + +ES_ENDPOINT="http://${DP_DB_HOST}.${KB_NAMESPACE}.svc.cluster.local:9200" + +# Exit handler +handle_exit() { + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "failed with exit code $exit_code" + touch "${DP_BACKUP_INFO_FILE}.exit" + exit 1 + fi +} +trap handle_exit EXIT + +# Build authenticated endpoint URL for elasticdump +if [ -n "${ELASTIC_USER_PASSWORD}" ]; then + ES_AUTH_ENDPOINT="http://elastic:${ELASTIC_USER_PASSWORD}@${DP_DB_HOST}.${KB_NAMESPACE}.svc.cluster.local:9200" +else + ES_AUTH_ENDPOINT="${ES_ENDPOINT}" +fi + +# Create temporary restore directory +RESTORE_DIR=/tmp/es-dump-restore +rm -rf ${RESTORE_DIR} +mkdir -p ${RESTORE_DIR} + +echo "INFO: Pulling backup data from storage" + +# Pull and extract backup data +datasafed pull -d zstd-fastest "${DP_BACKUP_NAME}.tar.zst" - | tar -xf - -C ${RESTORE_DIR} + +echo "INFO: Backup data extracted to ${RESTORE_DIR}" +ls -la ${RESTORE_DIR} + +# Safety measure: remove any system index dump files (starting with ".") from restore directory. +# System indices (.kibana, .kibana_task_manager, .security, .tasks, etc.) are managed internally +# by Elasticsearch and Kibana. Restoring them overwrites their migration/state tracking and causes +# errors such as Kibana migration lock loops. +echo "INFO: Removing system index dump files (starting with '.') from restore directory" +for f in "${RESTORE_DIR}"/.*; do + case "$(basename "$f")" in + .|..) continue ;; + *) + echo "INFO: Removing system index dump file: $f" + rm -f "$f" + ;; + esac +done + +echo "INFO: Starting elasticsearch-dump restore" + +# Set elasticdump options +DUMP_OPTS="" +if [ -n "${SCROLL_TIME}" ]; then + DUMP_OPTS="${DUMP_OPTS} --scrollTime=${SCROLL_TIME}" +fi +if [ -n "${LIMIT}" ]; then + DUMP_OPTS="${DUMP_OPTS} --limit=${LIMIT}" +fi + +# Use multielasticdump to restore all indices +multielasticdump \ + --direction=load \ + --input="${RESTORE_DIR}" \ + --output="${ES_AUTH_ENDPOINT}" \ + "${DUMP_OPTS}" + +echo "INFO: elasticsearch-dump restore completed" + +# Cleanup +rm -rf ${RESTORE_DIR} + +echo "INFO: Elasticsearch restore finished successfully" diff --git a/addons/elasticsearch/templates/actionset-es-dump.yaml b/addons/elasticsearch/templates/actionset-es-dump.yaml new file mode 100644 index 000000000..75bc45bec --- /dev/null +++ b/addons/elasticsearch/templates/actionset-es-dump.yaml @@ -0,0 +1,35 @@ +apiVersion: dataprotection.kubeblocks.io/v1alpha1 +kind: ActionSet +metadata: + name: elasticsearch-es-dump + labels: + {{- include "elasticsearch.labels" . | nindent 4 }} +spec: + backupType: Full + env: + - name: ES_DUMP_IMAGE_TAG + value: {{ .Values.image.esDump.tag | default "latest" }} + backup: + preBackup: [] + postBackup: [] + backupData: + image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.esDump.repository }}:{{ .Values.image.esDump.tag }} + runOnTargetPodNode: false + command: + - sh + - -c + - | + {{- .Files.Get "dataprotection/es-dump-backup.sh" | nindent 8 }} + syncProgress: + enabled: true + intervalSeconds: 5 + restore: + postReady: + - job: + image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.esDump.repository }}:{{ .Values.image.esDump.tag }} + runOnTargetPodNode: false + command: + - sh + - -c + - | + {{- .Files.Get "dataprotection/es-dump-restore.sh" | nindent 10 }} diff --git a/addons/elasticsearch/templates/backuppolicytemplate.yaml b/addons/elasticsearch/templates/backuppolicytemplate.yaml index 969854f65..07542f42d 100644 --- a/addons/elasticsearch/templates/backuppolicytemplate.yaml +++ b/addons/elasticsearch/templates/backuppolicytemplate.yaml @@ -16,8 +16,18 @@ spec: strategy: Any snapshotVolumes: false actionSetName: elasticsearch-physical-br + - name: es-dump + target: + role: "" + strategy: Any + snapshotVolumes: false + actionSetName: elasticsearch-es-dump schedules: - backupMethod: full-backup enabled: false cronExpression: "0 18 * * *" retentionPeriod: 7d + - backupMethod: es-dump + enabled: false + cronExpression: "0 18 * * *" + retentionPeriod: 7d diff --git a/addons/elasticsearch/values.yaml b/addons/elasticsearch/values.yaml index 10f815e51..38ad94ff4 100644 --- a/addons/elasticsearch/values.yaml +++ b/addons/elasticsearch/values.yaml @@ -34,6 +34,10 @@ image: agent: repository: apecloud/elasticsearch-agent tag: "0.1.0" + # elasticsearch-dump: https://github.com/elasticsearch-dump/elasticsearch-dump + esDump: + repository: elasticdump/elasticsearch-dump + tag: "v6.124.2" ## @param supported elasticsearch versions with detailed information esVersions: