Skip to content

Commit 411292a

Browse files
committed
feat(dns): add dual-stack DNS resolution with dnsdualstack+ scheme
Add dnsdualstack+ DNS scheme that resolves both A and AAAA records, enabling IPv4/IPv6 failover for store connections. Failover is handled automatically by gRPC's built-in health checking and round_robin balancer. Usage: --store=dnsdualstack+store.example.com:10901 Signed-off-by: Kevin Glasson <kglasson@cloudflare.com> Signed-off-by: Kevin Glasson <kevinglasson@gmail.com>
1 parent f452443 commit 411292a

File tree

7 files changed

+357
-3
lines changed

7 files changed

+357
-3
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
2020

2121
### Added
2222

23+
- [#XXXX](https://github.com/thanos-io/thanos/pull/XXXX) Query/Ruler: Add dual-stack DNS resolution with `dnsdualstack+` scheme for resolving both IPv4 and IPv6 addresses with automatic failover via gRPC health checking.
2324
- [#](https://github.com/thanos-io/thanos/pull/8623): Query: support sending a batch of Series per SeriesResponse with `--query.series-response-batch-size` flag.
2425
- [#](https://github.com/thanos-io/thanos/pull/8582): Sidecar: support --storage.tsdb.delay-compact-file.path Prometheus flag.
2526
- [#](https://github.com/thanos-io/thanos/pull/8595): *: add --shipper.upload-compacted flag for controlling upload concurrency in components that use shippper

docs/service-discovery.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ This configuration will instruct Thanos to discover all endpoints within the `th
9696
--endpoint=dnssrvnoa+_thanosstores._tcp.mycompany.org
9797
```
9898
99+
* `dnsdualstack+` - the domain name after this prefix will be looked up as both A and AAAA queries simultaneously, returning all resolved addresses. This provides dual-stack resilience by resolving both IPv4 and IPv6 addresses, with automatic failover handled by gRPC's built-in health checking. *A port is required for this query type*. For example:
100+
101+
```
102+
--store=dnsdualstack+stores.thanos.mycompany.org:9090
103+
```
104+
99105
The default interval between DNS lookups is 30s. This interval can be changed using the `store.sd-dns-interval` flag for `StoreAPI` configuration in `Thanos Querier`, or `query.sd-dns-interval` for `QueryAPI` configuration in `Thanos Ruler`.
100106
101107
## Other

pkg/discovery/dns/godns/resolver.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package godns
55

66
import (
7+
"context"
78
"net"
89

910
"github.com/pkg/errors"
@@ -14,6 +15,44 @@ type Resolver struct {
1415
*net.Resolver
1516
}
1617

18+
func (r *Resolver) LookupIPAddrDualStack(ctx context.Context, host string) ([]net.IPAddr, error) {
19+
seen := make(map[string]struct{})
20+
var result []net.IPAddr
21+
22+
for _, network := range []string{"ip6", "ip4"} {
23+
select {
24+
case <-ctx.Done():
25+
if len(result) > 0 {
26+
return result, nil
27+
}
28+
return nil, ctx.Err()
29+
default:
30+
}
31+
32+
ips, err := r.Resolver.LookupIP(ctx, network, host)
33+
if err != nil {
34+
continue
35+
}
36+
37+
for _, ip := range ips {
38+
ipStr := ip.String()
39+
if _, ok := seen[ipStr]; !ok {
40+
seen[ipStr] = struct{}{}
41+
result = append(result, net.IPAddr{IP: ip})
42+
}
43+
}
44+
}
45+
46+
if len(result) == 0 {
47+
return nil, &net.DNSError{
48+
Err: "no such host",
49+
Name: host,
50+
IsNotFound: true,
51+
}
52+
}
53+
return result, nil
54+
}
55+
1756
// IsNotFound checkout if DNS record is not found.
1857
func (r *Resolver) IsNotFound(err error) bool {
1958
if err == nil {

pkg/discovery/dns/miekgdns/resolver.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,69 @@ func (r *Resolver) LookupIPAddr(_ context.Context, host string) ([]net.IPAddr, e
6868
return r.lookupIPAddr(host, 1, 8)
6969
}
7070

71+
func (r *Resolver) LookupIPAddrDualStack(ctx context.Context, host string) ([]net.IPAddr, error) {
72+
return r.lookupIPAddrDualStack(ctx, host, 1, 8)
73+
}
74+
75+
func (r *Resolver) lookupIPAddrDualStack(ctx context.Context, host string, currIteration, maxIterations int) ([]net.IPAddr, error) {
76+
if currIteration > maxIterations {
77+
return nil, errors.Errorf("maximum number of recursive iterations reached (%d)", maxIterations)
78+
}
79+
80+
seen := make(map[string]struct{})
81+
var result []net.IPAddr
82+
83+
for _, qtype := range []uint16{dns.TypeAAAA, dns.TypeA} {
84+
select {
85+
case <-ctx.Done():
86+
if len(result) > 0 {
87+
return result, nil
88+
}
89+
return nil, ctx.Err()
90+
default:
91+
}
92+
93+
response, err := r.lookupWithSearchPath(host, dns.Type(qtype))
94+
if err != nil {
95+
continue
96+
}
97+
98+
for _, record := range response.Answer {
99+
switch addr := record.(type) {
100+
case *dns.A:
101+
ipStr := addr.A.String()
102+
if _, ok := seen[ipStr]; !ok {
103+
seen[ipStr] = struct{}{}
104+
result = append(result, net.IPAddr{IP: addr.A})
105+
}
106+
case *dns.AAAA:
107+
ipStr := addr.AAAA.String()
108+
if _, ok := seen[ipStr]; !ok {
109+
seen[ipStr] = struct{}{}
110+
result = append(result, net.IPAddr{IP: addr.AAAA})
111+
}
112+
case *dns.CNAME:
113+
addrs, err := r.lookupIPAddrDualStack(ctx, addr.Target, currIteration+1, maxIterations)
114+
if err != nil {
115+
continue
116+
}
117+
for _, a := range addrs {
118+
ipStr := a.IP.String()
119+
if _, ok := seen[ipStr]; !ok {
120+
seen[ipStr] = struct{}{}
121+
result = append(result, a)
122+
}
123+
}
124+
}
125+
}
126+
}
127+
128+
if len(result) == 0 {
129+
return nil, ErrNoSuchHost
130+
}
131+
return result, nil
132+
}
133+
71134
func (r *Resolver) lookupIPAddr(host string, currIteration, maxIterations int) ([]net.IPAddr, error) {
72135
// We want to protect from infinite loops when resolving DNS records recursively.
73136
if currIteration > maxIterations {

pkg/discovery/dns/provider.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,19 @@ import (
2121
"github.com/thanos-io/thanos/pkg/extprom"
2222
)
2323

24+
func isIPv6Addr(addr string) bool {
25+
if strings.HasPrefix(addr, "[") {
26+
return true
27+
}
28+
// Try parsing as host:port.
29+
host, _, err := net.SplitHostPort(addr)
30+
if err != nil {
31+
host = addr
32+
}
33+
ip := net.ParseIP(host)
34+
return ip != nil && ip.To4() == nil
35+
}
36+
2437
// Provider is a stateful cache for asynchronous DNS resolutions. It provides a way to resolve addresses and obtain them.
2538
type Provider struct {
2639
sync.RWMutex
@@ -108,8 +121,14 @@ func GetQTypeName(addr string) (qtype, name string) {
108121
return qtypeAndName[0], qtypeAndName[1]
109122
}
110123

124+
// IsDualStackNode returns true if the address uses dual-stack DNS resolution (dnsdualstack+).
125+
func IsDualStackNode(addr string) bool {
126+
return strings.HasPrefix(addr, string(ADualStack)+"+")
127+
}
128+
111129
// Resolve stores a list of provided addresses or their DNS records if requested.
112130
// Addresses prefixed with `dns+` or `dnssrv+` will be resolved through respective DNS lookup (A/AAAA or SRV).
131+
// Addresses prefixed with `dnsdualstack+` will resolve both A and AAAA records.
113132
// For non-SRV records, it will return an error if a port is not supplied.
114133
func (p *Provider) Resolve(ctx context.Context, addrs []string, flushOld bool) error {
115134
resolvedAddrs := map[string][]string{}

pkg/discovery/dns/resolver.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package dns
66
import (
77
"context"
88
"net"
9+
"sort"
910
"strconv"
1011
"strings"
1112

@@ -24,6 +25,8 @@ const (
2425
SRV = QType("dnssrv")
2526
// SRVNoA qtype performs SRV lookup without any A/AAAA lookup for each SRV result.
2627
SRVNoA = QType("dnssrvnoa")
28+
// ADualStack qtype performs both A and AAAA lookup, returning all addresses.
29+
ADualStack = QType("dnsdualstack")
2730
)
2831

2932
type Resolver interface {
@@ -36,6 +39,7 @@ type Resolver interface {
3639

3740
type ipLookupResolver interface {
3841
LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error)
42+
LookupIPAddrDualStack(ctx context.Context, host string) ([]net.IPAddr, error)
3943
LookupSRV(ctx context.Context, service, proto, name string) (cname string, addrs []*net.SRV, err error)
4044
IsNotFound(err error) bool
4145
}
@@ -127,6 +131,37 @@ func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string
127131
res = append(res, appendScheme(scheme, net.JoinHostPort(resIP.String(), resPort)))
128132
}
129133
}
134+
case ADualStack:
135+
if port == "" {
136+
return nil, errors.Errorf("missing port in address given for dnsdualstack lookup: %v", name)
137+
}
138+
ips, err := s.resolver.LookupIPAddrDualStack(ctx, host)
139+
if err != nil {
140+
if !s.resolver.IsNotFound(err) {
141+
return nil, errors.Wrapf(err, "lookup IP addresses (dual-stack) %q", host)
142+
}
143+
if ips == nil {
144+
level.Error(s.logger).Log("msg", "failed to lookup IP addresses (dual-stack)", "host", host, "err", err)
145+
}
146+
}
147+
sort.Slice(ips, func(i, j int) bool {
148+
iIs6 := ips[i].IP.To4() == nil
149+
jIs6 := ips[j].IP.To4() == nil
150+
if iIs6 != jIs6 {
151+
return iIs6
152+
}
153+
return ips[i].IP.String() < ips[j].IP.String()
154+
})
155+
var ipv4Count, ipv6Count int
156+
for _, ip := range ips {
157+
if ip.IP.To4() == nil {
158+
ipv6Count++
159+
} else {
160+
ipv4Count++
161+
}
162+
res = append(res, appendScheme(scheme, net.JoinHostPort(ip.String(), port)))
163+
}
164+
level.Debug(s.logger).Log("msg", "dual-stack DNS lookup", "host", host, "ipv6_count", ipv6Count, "ipv4_count", ipv4Count)
130165
default:
131166
return nil, errors.Errorf("invalid lookup scheme %q", qtype)
132167
}

0 commit comments

Comments
 (0)