Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ It is recommend to upgrade the storage components first (Receive, Store, etc.) a

### Added

- [#8651](https://github.com/thanos-io/thanos/pull/8651) Query/Ruler: Add dual-stack DNS resolution with `dnsdualstack+` scheme for resolving both IPv4 and IPv6 addresses with automatic failover via gRPC health checking.
- [#](https://github.com/thanos-io/thanos/pull/8623): Query: Enable batching of Series per SeriesResponse.
- [#](https://github.com/thanos-io/thanos/pull/8582): Sidecar: support --storage.tsdb.delay-compact-file.path Prometheus flag.
- [#](https://github.com/thanos-io/thanos/pull/8595): *: add --shipper.upload-compacted flag for controlling upload concurrency in components that use shippper
Expand Down
6 changes: 6 additions & 0 deletions docs/service-discovery.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ This configuration will instruct Thanos to discover all endpoints within the `th
--endpoint=dnssrvnoa+_thanosstores._tcp.mycompany.org
```

* `dnsdualstack+` - the domain name after this prefix will be looked up as both A and AAAA queries, returning all resolved addresses. This provides dual-stack resilience by resolving both IPv4 and IPv6 addresses, with automatic failover handled by gRPC's built-in health checking. *A port is required for this query type*. This is most useful with `--endpoint-group`, which allows gRPC to manage all resolved addresses as a single logical group with automatic failover. For example:

```
--endpoint-group=dnsdualstack+stores.thanos.mycompany.org:9090
```

The default interval between DNS lookups is 30s. This interval can be changed using the `store.sd-dns-interval` flag for `StoreAPI` configuration in `Thanos Querier`, or `query.sd-dns-interval` for `QueryAPI` configuration in `Thanos Ruler`.

## Other
Expand Down
13 changes: 13 additions & 0 deletions pkg/discovery/dns/godns/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package godns

import (
"context"
"net"

"github.com/pkg/errors"
Expand All @@ -14,6 +15,18 @@ type Resolver struct {
*net.Resolver
}

func (r *Resolver) LookupIPAddrByNetwork(ctx context.Context, network, host string) ([]net.IPAddr, error) {
ips, err := r.LookupIP(ctx, network, host)
if err != nil {
return nil, err
}
result := make([]net.IPAddr, len(ips))
for i, ip := range ips {
result[i] = net.IPAddr{IP: ip}
}
return result, nil
}

// IsNotFound checkout if DNS record is not found.
func (r *Resolver) IsNotFound(err error) bool {
if err == nil {
Expand Down
51 changes: 51 additions & 0 deletions pkg/discovery/dns/miekgdns/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,57 @@ func (r *Resolver) LookupIPAddr(_ context.Context, host string) ([]net.IPAddr, e
return r.lookupIPAddr(host, 1, 8)
}

func (r *Resolver) LookupIPAddrByNetwork(ctx context.Context, network, host string) ([]net.IPAddr, error) {
var qtype uint16
switch network {
case "ip6":
qtype = dns.TypeAAAA
case "ip4":
qtype = dns.TypeA
default:
return nil, errors.Errorf("unsupported network %q", network)
}
return r.lookupIPAddrByNetwork(ctx, host, qtype, 1, 8)
}

func (r *Resolver) lookupIPAddrByNetwork(ctx context.Context, host string, qtype uint16, currIteration, maxIterations int) ([]net.IPAddr, error) {
if currIteration > maxIterations {
return nil, errors.Errorf("maximum number of recursive iterations reached (%d)", maxIterations)
}

select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}

response, err := r.lookupWithSearchPath(host, dns.Type(qtype))
if err != nil {
return nil, err
}

var result []net.IPAddr
for _, record := range response.Answer {
switch addr := record.(type) {
case *dns.A:
result = append(result, net.IPAddr{IP: addr.A})
case *dns.AAAA:
result = append(result, net.IPAddr{IP: addr.AAAA})
case *dns.CNAME:
addrs, err := r.lookupIPAddrByNetwork(ctx, addr.Target, qtype, currIteration+1, maxIterations)
if err != nil {
continue
}
result = append(result, addrs...)
}
}

if len(result) == 0 {
return nil, ErrNoSuchHost
}
return result, nil
}

func (r *Resolver) lookupIPAddr(host string, currIteration, maxIterations int) ([]net.IPAddr, error) {
// We want to protect from infinite loops when resolving DNS records recursively.
if currIteration > maxIterations {
Expand Down
1 change: 1 addition & 0 deletions pkg/discovery/dns/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ func GetQTypeName(addr string) (qtype, name string) {

// Resolve stores a list of provided addresses or their DNS records if requested.
// Addresses prefixed with `dns+` or `dnssrv+` will be resolved through respective DNS lookup (A/AAAA or SRV).
// Addresses prefixed with `dnsdualstack+` will resolve both A and AAAA records.
// For non-SRV records, it will return an error if a port is not supplied.
func (p *Provider) Resolve(ctx context.Context, addrs []string, flushOld bool) error {
resolvedAddrs := map[string][]string{}
Expand Down
34 changes: 33 additions & 1 deletion pkg/discovery/dns/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,22 @@ const (
SRV = QType("dnssrv")
// SRVNoA qtype performs SRV lookup without any A/AAAA lookup for each SRV result.
SRVNoA = QType("dnssrvnoa")
// ADualStack qtype performs both A and AAAA lookup, returning all addresses.
ADualStack = QType("dnsdualstack")
)

type Resolver interface {
// Resolve performs a DNS lookup and returns a list of records.
// name is the domain name to be resolved.
// qtype is the query type. Accepted values are `dns` for A/AAAA lookup and `dnssrv` for SRV lookup.
// qtype is the query type. Accepted values are `dns` for A/AAAA lookup, `dnssrv` for SRV lookup,
// `dnssrvnoa` for SRV lookup without A/AAAA, and `dnsdualstack` for combined A and AAAA lookup.
// If scheme is passed through name, it is preserved on IP results.
Resolve(ctx context.Context, name string, qtype QType) ([]string, error)
}

type ipLookupResolver interface {
LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error)
LookupIPAddrByNetwork(ctx context.Context, network, host string) ([]net.IPAddr, error)
LookupSRV(ctx context.Context, service, proto, name string) (cname string, addrs []*net.SRV, err error)
IsNotFound(err error) bool
}
Expand Down Expand Up @@ -127,6 +131,34 @@ func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string
res = append(res, appendScheme(scheme, net.JoinHostPort(resIP.String(), resPort)))
}
}
case ADualStack:
if port == "" {
return nil, errors.Errorf("missing port in address given for dnsdualstack lookup: %v", name)
}
var ips []net.IPAddr
var lastErr error

for _, network := range []string{"ip6", "ip4"} {
addrs, err := s.resolver.LookupIPAddrByNetwork(ctx, network, host)
if err != nil {
if !s.resolver.IsNotFound(err) {
lastErr = err
}
continue
}
ips = append(ips, addrs...)
}

if len(ips) == 0 && lastErr != nil {
return nil, errors.Wrapf(lastErr, "lookup IP addresses (dual-stack) %q", host)
}

for _, ip := range ips {
res = append(res, appendScheme(scheme, net.JoinHostPort(ip.String(), port)))
}
if len(ips) == 0 {
level.Error(s.logger).Log("msg", "found no IP addresses (dual-stack)", "host", host)
}
default:
return nil, errors.Errorf("invalid lookup scheme %q", qtype)
}
Expand Down
179 changes: 178 additions & 1 deletion pkg/discovery/dns/resolver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ type mockHostnameResolver struct {
resultIPs map[string][]net.IPAddr
resultSRVs map[string][]*net.SRV
err error

resultByNetwork map[string][]net.IPAddr
errByNetwork map[string]error
isNotFound func(error) bool
}

func lookupKey(network, host string) string {
return network + ":" + host
}

func (m mockHostnameResolver) LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error) {
Expand All @@ -29,14 +37,30 @@ func (m mockHostnameResolver) LookupIPAddr(ctx context.Context, host string) ([]
return m.resultIPs[host], nil
}

func (m mockHostnameResolver) LookupSRV(ctx context.Context, service, proto, name string) (cname string, addrs []*net.SRV, err error) {
func (m mockHostnameResolver) LookupIPAddrByNetwork(_ context.Context, network, host string) ([]net.IPAddr, error) {
key := lookupKey(network, host)
if m.errByNetwork != nil {
if err, ok := m.errByNetwork[key]; ok {
return nil, err
}
}
if m.resultByNetwork != nil {
return m.resultByNetwork[key], nil
}
return nil, nil
}

func (m mockHostnameResolver) LookupSRV(_ context.Context, _, _, name string) (cname string, addrs []*net.SRV, err error) {
if m.err != nil {
return "", nil, m.err
}
return "", m.resultSRVs[name], nil
}

func (m mockHostnameResolver) IsNotFound(err error) bool {
if m.isNotFound != nil {
return m.isNotFound(err)
}
return false
}

Expand Down Expand Up @@ -203,3 +227,156 @@ func testDnsSd(t *testing.T, tt DNSSDTest) {
sort.Strings(result)
testutil.Equals(t, tt.expectedResult, result)
}

func TestDnsSD_ResolveDualStack(t *testing.T) {
ipv6 := net.ParseIP("2001:db8::1")
ipv4 := net.ParseIP("192.168.1.1")
host := "test.mycompany.com"

tests := []struct {
name string
addr string
resolver *mockHostnameResolver
expectedResult []string
expectedErr error
}{
{
name: "both families resolve",
addr: host + ":8080",
resolver: &mockHostnameResolver{
resultByNetwork: map[string][]net.IPAddr{
lookupKey("ip6", host): {{IP: ipv6}},
lookupKey("ip4", host): {{IP: ipv4}},
},
},
expectedResult: []string{"[2001:db8::1]:8080", "192.168.1.1:8080"},
},
{
name: "IPv6 only",
addr: host + ":8080",
resolver: &mockHostnameResolver{
resultByNetwork: map[string][]net.IPAddr{
lookupKey("ip6", host): {{IP: ipv6}},
},
},
expectedResult: []string{"[2001:db8::1]:8080"},
},
{
name: "IPv4 only",
addr: host + ":8080",
resolver: &mockHostnameResolver{
resultByNetwork: map[string][]net.IPAddr{
lookupKey("ip4", host): {{IP: ipv4}},
},
},
expectedResult: []string{"192.168.1.1:8080"},
},
{
name: "requires port",
addr: host,
resolver: &mockHostnameResolver{},
expectedResult: nil,
expectedErr: errors.New("missing port in address given for dnsdualstack lookup: " + host),
},
{
name: "IPv6 fails, IPv4 succeeds",
addr: host + ":8080",
resolver: &mockHostnameResolver{
resultByNetwork: map[string][]net.IPAddr{
lookupKey("ip4", host): {{IP: ipv4}},
},
errByNetwork: map[string]error{
lookupKey("ip6", host): errors.New("network unreachable"),
},
},
expectedResult: []string{"192.168.1.1:8080"},
},
{
name: "IPv4 fails, IPv6 succeeds",
addr: host + ":8080",
resolver: &mockHostnameResolver{
resultByNetwork: map[string][]net.IPAddr{
lookupKey("ip6", host): {{IP: ipv6}},
},
errByNetwork: map[string]error{
lookupKey("ip4", host): errors.New("network unreachable"),
},
},
expectedResult: []string{"[2001:db8::1]:8080"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx := context.TODO()
dnsSD := dnsSD{tt.resolver, log.NewNopLogger()}

result, err := dnsSD.Resolve(ctx, tt.addr, ADualStack)
if tt.expectedErr != nil {
testutil.NotOk(t, err)
testutil.Assert(t, tt.expectedErr.Error() == err.Error(), "expected error '%v', but got '%v'", tt.expectedErr.Error(), err.Error())
} else {
testutil.Ok(t, err)
}
sort.Strings(result)
sort.Strings(tt.expectedResult)
testutil.Equals(t, tt.expectedResult, result)
})
}
}

func TestDnsSD_ResolveDualStack_Errors(t *testing.T) {
host := "test.mycompany.com"
addr := host + ":8080"

t.Run("both families fail propagates error", func(t *testing.T) {
resolver := &mockHostnameResolver{
errByNetwork: map[string]error{
lookupKey("ip6", host): errors.New("network unreachable"),
lookupKey("ip4", host): errors.New("network unreachable"),
},
}
dnsSD := dnsSD{resolver, log.NewNopLogger()}

_, err := dnsSD.Resolve(context.TODO(), addr, ADualStack)
testutil.NotOk(t, err)
})

t.Run("not-found on both families returns empty", func(t *testing.T) {
notFound := &net.DNSError{Err: "no such host", Name: host, IsNotFound: true}
resolver := &mockHostnameResolver{
errByNetwork: map[string]error{
lookupKey("ip6", host): notFound,
lookupKey("ip4", host): notFound,
},
isNotFound: func(err error) bool {
dnsErr, ok := err.(*net.DNSError)
return ok && dnsErr.IsNotFound
},
}
dnsSD := dnsSD{resolver, log.NewNopLogger()}

result, err := dnsSD.Resolve(context.TODO(), addr, ADualStack)
testutil.Ok(t, err)
testutil.Equals(t, 0, len(result))
})

t.Run("not-found on one family real error on other propagates", func(t *testing.T) {
notFound := &net.DNSError{Err: "no such host", Name: host, IsNotFound: true}
resolver := &mockHostnameResolver{
errByNetwork: map[string]error{
lookupKey("ip6", host): notFound,
lookupKey("ip4", host): errors.New("server misbehaving"),
},
isNotFound: func(err error) bool {
dnsErr, ok := err.(*net.DNSError)
return ok && dnsErr.IsNotFound
},
}
dnsSD := dnsSD{resolver, log.NewNopLogger()}

_, err := dnsSD.Resolve(context.TODO(), addr, ADualStack)
testutil.NotOk(t, err)
})

}
Loading