Skip to content

Commit a390d0f

Browse files
authored
Account for fast mono clock when polling in device flow (#111)
* fix(poller): introduce wait interval multiplier Signed-off-by: Babak K. Shandiz <babakks@github.com> * refactor: collapse repeated post args Signed-off-by: Babak K. Shandiz <babakks@github.com> * fix: tune clock drift for slow-mono case Signed-off-by: Babak K. Shandiz <babakks@github.com> * fix: bail out on the 3rd slow_down Signed-off-by: Babak K. Shandiz <babakks@github.com> * docs: fix comment Signed-off-by: Babak K. Shandiz <babakks@github.com> * fix: bail out on the 2nd slow_down Signed-off-by: Babak K. Shandiz <babakks@github.com> --------- Signed-off-by: Babak K. Shandiz <babakks@github.com>
1 parent d28c578 commit a390d0f

File tree

3 files changed

+139
-222
lines changed

3 files changed

+139
-222
lines changed

device/device_flow.go

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,44 @@ type WaitOptions struct {
143143
// GrantType overrides the default value specified by OAuth 2.0 Device Code. Optional.
144144
GrantType string
145145

146-
newPoller pollerFactory
146+
newPoller pollerFactory
147+
calculateTimeDriftRatioF func(tstart, tstop time.Time) float64
147148
}
148149

150+
const (
151+
primaryIntervalMultiplier = 1.2
152+
secondaryIntervalMultiplier = 1.4
153+
)
154+
149155
// Wait polls the server at uri until authorization completes.
150156
func Wait(ctx context.Context, c httpClient, uri string, opts WaitOptions) (*api.AccessToken, error) {
157+
// We know that in virtualised environments (e.g. WSL or VMs), the monotonic
158+
// clock, which is the source of time measurements in Go, can run faster than
159+
// real time. So, polling intervals should be adjusted to avoid falling into
160+
// an endless loop of "slow_down" errors. See the following issue in cli/cli
161+
// for more context (especially what's after this particular comment):
162+
// - https://github.com/cli/cli/issues/9370#issuecomment-3759706125
163+
//
164+
// We've observed ~10% faster ticking, thanks to community, but a chat with
165+
// AI suggests it's typically between 5-15% on WSL, and can get up to 30% in
166+
// worst cases. There are issues reported on the WSL repo, but I couldn't
167+
// find any documented/conclusive data about this.
168+
//
169+
// See more:
170+
// - https://github.com/microsoft/WSL/issues/12583
171+
//
172+
// What we're doing here is to play on the safe side by applying a default
173+
// 20% increase to the polling interval from the start. That is, instead of
174+
// 5s, we begin with 6s waits. This should resolve most cases without any
175+
// "slow_down" errors. However, upon receiving a "slow_down" from the OAuth
176+
// server, we will bump the safety margin to 40%. This will eliminate further
177+
// "slow_down"s in most cases.
178+
//
179+
// We also bail out if we receive more than two "slow_down" errors, as that's
180+
// probably an indication of severe clock drift. In such cases, we'll check
181+
// the time drift between the monotonic and the wall clocks and report it in
182+
// the error message to hint the user at the root cause.
183+
151184
baseCheckInterval := time.Duration(opts.DeviceCode.Interval) * time.Second
152185
expiresIn := time.Duration(opts.DeviceCode.ExpiresIn) * time.Second
153186
grantType := opts.GrantType
@@ -161,11 +194,23 @@ func Wait(ctx context.Context, c httpClient, uri string, opts WaitOptions) (*api
161194
}
162195
_, poll := makePoller(ctx, baseCheckInterval, expiresIn)
163196

197+
calculateTimeDriftRatioF := opts.calculateTimeDriftRatioF
198+
if calculateTimeDriftRatioF == nil {
199+
calculateTimeDriftRatioF = calculateTimeDriftRatio
200+
}
201+
202+
multiplier := primaryIntervalMultiplier
203+
204+
var slowDowns int
164205
for {
165-
if err := poll.Wait(); err != nil {
206+
tstart := time.Now()
207+
208+
if err := poll.Wait(multiplier); err != nil {
166209
return nil, err
167210
}
168211

212+
tstop := time.Now()
213+
169214
values := url.Values{
170215
"client_id": {opts.ClientID},
171216
"device_code": {opts.DeviceCode.DeviceCode},
@@ -199,6 +244,18 @@ func Wait(ctx context.Context, c httpClient, uri string, opts WaitOptions) (*api
199244
}
200245

201246
if apiError.Code == "slow_down" {
247+
slowDowns++
248+
249+
// Since we have already added the secondary safety multiplier upon
250+
// receiving the first slow_down, getting a second one is a strong
251+
// indication of a huge clock drift (+40% faster mono). More polling
252+
// is just futile unless we apply some unreasonably large multiplier.
253+
// So, we bail out and inform the user about the potential cause.
254+
if slowDowns > 1 {
255+
driftRatio := calculateTimeDriftRatioF(tstart, tstop)
256+
return nil, fmt.Errorf("too many slow_down responses; detected clock drift of roughly %.0f%% between monotonic and wall clocks; please ensure your system clock is accurate", driftRatio*100)
257+
}
258+
202259
// Based on the RFC spec, we must add 5 seconds to our current polling interval.
203260
// (See https://www.rfc-editor.org/rfc/rfc8628#section-3.5)
204261
newInterval := poll.GetInterval() + 5*time.Second
@@ -213,9 +270,17 @@ func Wait(ctx context.Context, c httpClient, uri string, opts WaitOptions) (*api
213270
}
214271

215272
poll.SetInterval(newInterval)
273+
multiplier = secondaryIntervalMultiplier
216274
continue
217275
}
218276

219277
return nil, err
220278
}
221279
}
280+
281+
func calculateTimeDriftRatio(tstart, tstop time.Time) float64 {
282+
elapsedWall := tstop.UnixNano() - tstart.UnixNano()
283+
elapsedMono := tstop.Sub(tstart).Nanoseconds()
284+
drift := elapsedMono - elapsedWall
285+
return float64(drift) / float64(elapsedWall)
286+
}

0 commit comments

Comments
 (0)