@@ -43,10 +43,9 @@ function keeper_run() {
4343 --query " $query "
4444 )
4545 if [[ " ${TLS_ENABLED:- false} " == " true" ]]; then
46- keeper_args+=(--secure -- tls-ca-file " $CLICKHOUSE_TLS_CA " --tls-cert-file " $CLICKHOUSE_TLS_CERT " --tls-key-file " $CLICKHOUSE_TLS_KEY " )
46+ keeper_args+=(--tls-ca-file " $CLICKHOUSE_TLS_CA " --tls-cert-file " $CLICKHOUSE_TLS_CERT " --tls-key-file " $CLICKHOUSE_TLS_KEY " )
4747 fi
4848 if output=$( clickhouse-keeper-client " ${keeper_args[@]} " 2>&1 ) ; then
49-
5049 if [[ " $output " != * " Coordination error" * ]] &&
5150 [[ " $output " != * " Connection refused" * ]] &&
5251 [[ " $output " != * " Timeout" * ]]; then
@@ -130,15 +129,29 @@ function get_mode_by_keeper() {
130129 echo " $mode " | awk ' {print $2}'
131130}
132131
133- # Find leader node from member addresses
132+ # Get mode with retry to tolerate some network failures
133+ function get_mode_with_retry() {
134+ local host=" $1 "
135+ for _ in {1..5}; do
136+ local mode
137+ if mode=$( get_mode " $host " ) && [[ -n " $mode " ]]; then
138+ echo " $mode "
139+ return 0
140+ fi
141+ sleep 6
142+ done
143+ return 1
144+ }
145+
146+ # Find leader node from member addresses with retry mechanism
134147function find_leader() {
135148 local member_addresses=" $1 "
136149 [[ -z " $member_addresses " ]] && return 1
137150
138151 while IFS=' ,' read -ra members; do
139152 for member_addr in " ${members[@]} " ; do
140153 local member_fqdn=" ${member_addr%:* } "
141- mode=$( get_mode " $member_fqdn " )
154+ local mode=$( get_mode_with_retry " $member_fqdn " )
142155 if [[ " $mode " == " leader" || " $mode " == " standalone" ]]; then
143156 echo " $member_fqdn "
144157 return 0
0 commit comments