1- // The design comes from https://www.notion.so/thegraph/Indexer-Selection-Algorithm-fc95c7a0b11c436288c82094002d4c3d
2- // The design boils down to these steps:
3- // Model a curve that represent the expected time for a query
4- // Transform the "expected time" into a notion of "performance" (the inverse of expected time)
5- // Multiply this performance curve with the utility curve
6- // Integrate over a range to get the expected utility.
7- //
8- // The observation in the design below is that you get the same result
9- // by just averaging the utility of all queries in the relevant window.
1+ // Converts latency to utility.
102//
113// The simplest possible thing would be to convert all queries to utility,
124// aggregate utilities and count, then use that to calculate the average utility.
135// The problem with that approach is that it doesn't allow us to parameterize
146// the utility function.
7+ // Note: avg(utility(latencies)) != utlity(avg(latencies))
158//
169// So instead we take only a slightly more complicated approach.
17- // First, we convert all durations into a quantized performance.
18- // The quantized performance serves as a key to a bucket of queries
19- // with nearly the same performance. This limits the number of
20- // performance->utility conversions that we need to calculate the
21- // expected performance which enables making the utility function
22- // a parameter without obscene loss to performance.
23-
24- use crate :: {
25- indexer_selection:: { decay:: Decay , utility:: concave_utility} ,
26- prelude:: * ,
27- } ;
28- use ordered_float:: NotNan ;
10+ // First, we bucket queries keyed by a quantized latency. Each
11+ // bucket is gets a utility based on the consumer preferences,
12+ // and then we get the count-weighted average of utilities.
13+ // This trades off precision for query-time work.
14+
15+ use crate :: { indexer_selection:: decay:: Decay , prelude:: * } ;
2916
3017use super :: decay:: DecayUtility ;
3118
3219#[ derive( Clone , Debug , Default ) ]
3320pub struct Performance {
34- performance : Vec < f64 > ,
21+ latency : Vec < u32 > ,
3522 count : Vec < f64 > ,
3623}
3724
3825impl Decay for Performance {
3926 fn shift ( & mut self , mut next : Option < & mut Self > , fraction : f64 , keep : f64 ) {
4027 // For each quantized bucket, find the corresponding quantized bucket in
4128 // the next frame, and shift information into it.
42- for ( count, performance ) in self . count . iter_mut ( ) . zip ( self . performance . iter ( ) . cloned ( ) ) {
43- let next_performance = next. as_deref_mut ( ) . map ( |n| n. bucket_mut ( performance ) ) ;
44- count. shift ( next_performance , fraction, keep) ;
29+ for ( count, latency ) in self . count . iter_mut ( ) . zip ( self . latency . iter ( ) . copied ( ) ) {
30+ let next_latency = next. as_deref_mut ( ) . map ( |n| n. bucket_mut ( latency ) ) ;
31+ count. shift ( next_latency , fraction, keep) ;
4532 }
4633 }
4734
4835 fn clear ( & mut self ) {
49- self . performance . clear ( ) ;
36+ self . latency . clear ( ) ;
5037 self . count . clear ( ) ;
5138 }
5239}
5340
41+ // https://www.desmos.com/calculator/w6pxajuuve
42+ fn latency_to_utility ( latency : f64 , pow : f64 ) -> f64 {
43+ let sigmoid = |x : f64 | 1.0 + std:: f64:: consts:: E . powf ( ( x. powf ( pow) - 400.0 ) / 300.0 ) ;
44+ sigmoid ( 0.0 ) / sigmoid ( latency as f64 )
45+ }
46+
5447impl DecayUtility for Performance {
55- fn expected_utility ( & self , u_a : f64 ) -> f64 {
48+ fn expected_utility ( & self , pow : f64 ) -> f64 {
5649 let mut agg_count = 0.0 ;
5750 let mut agg_utility = 0.0 ;
58- for ( count, performance ) in self . iter ( ) {
51+ for ( count, latency ) in self . iter ( ) {
5952 agg_count += count;
60- agg_utility += count * concave_utility ( performance , u_a ) ;
53+ agg_utility += count * latency_to_utility ( latency as f64 , pow ) ;
6154 }
6255 if agg_count == 0.0 {
6356 return 0.0 ;
@@ -70,75 +63,34 @@ impl DecayUtility for Performance {
7063}
7164
7265impl Performance {
73- fn quantized_performance ( duration : Duration ) -> f64 {
74- // Get nearest triangle number of quantized duration, then convert to performance.
75- // The idea here is to quantize with variable precision to limit bucket count,
76- // but also have a reasonable number of buckets no matter what scale you care about.
77- //
78- // 6 buckets between 200ms - 300ms (a 100ms delta)
79- // 6 buckets between 17s - 18s (a 1000ms delta for the same number of buckets as above)
80- // 244 buckets between 0s-30s... or about the most we'll ever have to keep.
81- //
82- // There's probably a smarter way, but this seems reasonable if you squint.
83- // The tests seemed in practice to create around 100 buckets after 100,000 queries,
84- // which seems about what you would hope for to approximate a curve. There was variation
85- // because of different values for std_dev
86- let duration_ms = duration. as_secs_f64 ( ) * 1000.0 ;
87- // Subtracting a small amount of here moves the inflection of the curve so
88- // that changes after this point are more noticeable. This makes a big
89- // difference to how often a 2000ms indexer is selected compared to a 200ms
90- // indexer (for the better).
91- let duration_ms = duration_ms - 35.0 ;
92- let duration_ms = nearest_triangle_number ( duration_ms. max ( 1.0 ) ) ;
93- let performance = 1000.0 / duration_ms;
94-
95- // SAFETY: We force performance to be NotNan here. duration_ms is >=
96- // 1.0 and NotNan because of the clamp (above) and implementation of Duration.
97- // Therefore performance is <= 1000.0 and NonNan.
98- // See also 47632ed6-4dcc-4b39-b064-c0ca01560626
99- // I think I would be comfortable changing this assert
100- // to a debug_assert.
101- assert ! ( !performance. is_nan( ) ) ;
102-
103- performance
104- }
105-
106- pub fn add_query ( & mut self , mut duration : Duration , result : Result < ( ) , ( ) > ) {
66+ pub fn add_query ( & mut self , mut duration : Duration , status : Result < ( ) , ( ) > ) {
10767 // If the query is failed, the user will experience it as increased latency.
10868 // Furthermore, most errors are expected to resolve quicker than successful queries.
10969 // Penalizing failed queries in performance will make it so that we account
11070 // for the additional latency incurred by retry. Ideally we could do this by
11171 // accounting for the likelyhood that the utility curve would be moved down and how
11272 // much later when selecting, but this is a much simpler LOC.
113- if result . is_err ( ) {
73+ if status . is_err ( ) {
11474 duration *= 2 ;
11575 }
116- * self . bucket_mut ( Self :: quantized_performance ( duration) ) += 1.0 ;
76+ let quantized = nearest_triangle_number ( duration. as_secs_f64 ( ) * 1000.0 ) as u32 ;
77+ * self . bucket_mut ( quantized) += 1.0 ;
11778 }
11879
119- fn bucket_mut ( & mut self , quantized_performance : f64 ) -> & mut f64 {
120- // Safety: Performance is NotNan. See also 47632ed6-4dcc-4b39-b064-c0ca01560626
121- let index = match unsafe {
122- self . performance
123- . binary_search_by_key ( & NotNan :: new_unchecked ( quantized_performance) , |a| {
124- NotNan :: new_unchecked ( * a)
125- } )
126- } {
80+ fn bucket_mut ( & mut self , key : u32 ) -> & mut f64 {
81+ let index = match self . latency . binary_search ( & key) {
12782 Ok ( index) => index,
12883 Err ( index) => {
129- self . performance . insert ( index, quantized_performance ) ;
84+ self . latency . insert ( index, key ) ;
13085 self . count . insert ( index, 0.0 ) ;
13186 index
13287 }
13388 } ;
13489 & mut self . count [ index]
13590 }
13691
137- fn iter < ' a > ( & ' a self ) -> impl ' a + Iterator < Item = ( f64 , f64 ) > {
138- self . count
139- . iter ( )
140- . cloned ( )
141- . zip ( self . performance . iter ( ) . cloned ( ) )
92+ fn iter < ' a > ( & ' a self ) -> impl ' a + Iterator < Item = ( f64 , u32 ) > {
93+ self . count . iter ( ) . cloned ( ) . zip ( self . latency . iter ( ) . cloned ( ) )
14294 }
14395}
14496
@@ -156,33 +108,8 @@ fn nearest_triangle_number(n: f64) -> f64 {
156108#[ cfg( test) ]
157109mod tests {
158110 use super :: * ;
159- use crate :: indexer_selection :: test_utils :: * ;
111+ use ordered_float :: NotNan ;
160112 use rand:: { thread_rng, Rng as _} ;
161- use rand_distr:: Normal ;
162-
163- // "Web utility" - based on percentage of users that will leave a site after an amount of time.
164- const WEB_UTIL : f64 = 0.01 ;
165-
166- // For the use case of analytics where requests may be larger, parallel, data processing
167- // needs to occur, and we are prioritizing other features like price efficiency instead.
168- // I pretty much just made this number up though after checking a couple of values to see what fit.
169- const ANALYTICS_UTIL : f64 = 4.0 ;
170-
171- /// Add a bunch of queries with randomized duration according to a performance profile.
172- /// Check the utility, given some A.
173- /// Verify that the expected utility is within a range.
174- fn uniform_test ( mean : f64 , std_dev : f64 , a_u : f64 , expected : f64 , tolerance : f64 ) {
175- let mut tracker = Performance :: default ( ) ;
176- let dist = Normal :: new ( mean, std_dev) . unwrap ( ) ;
177- for _ in 0 ..10000 {
178- let duration = thread_rng ( ) . sample ( dist) ;
179- let duration = Duration :: from_millis ( duration. max ( 0.0 ) as u64 ) ;
180- tracker. add_query ( duration, Ok ( ( ) ) )
181- }
182-
183- let utility = tracker. expected_utility ( a_u) ;
184- assert_within ( utility, expected, tolerance) ;
185- }
186113
187114 #[ test]
188115 fn nearest_triangle_numbers ( ) {
@@ -192,62 +119,56 @@ mod tests {
192119 }
193120 }
194121
195- #[ test]
196- fn poor_performance_web ( ) {
197- uniform_test ( 10000.0 , 2000.0 , WEB_UTIL , 0.001 , 0.01 ) ;
198- }
199-
200- #[ test]
201- fn mediocre_performance_web ( ) {
202- uniform_test ( 2000.0 , 250.0 , WEB_UTIL , 0.005 , 0.01 ) ;
203- }
204-
205- #[ test]
206- fn good_performance_web ( ) {
207- uniform_test ( 200.0 , 50.0 , WEB_UTIL , 0.067 , 0.01 ) ;
208- }
209-
210- #[ test]
211- fn great_performance_web ( ) {
212- uniform_test ( 50.0 , 10.0 , WEB_UTIL , 0.57 , 0.01 ) ;
213- }
214-
215- #[ test]
216- fn bad_performance_analytics ( ) {
217- uniform_test ( 100000.0 , 10000.0 , ANALYTICS_UTIL , 0.04 , 0.01 ) ;
218- }
219-
220- #[ test]
221- fn good_performance_analytics ( ) {
222- uniform_test ( 5000.0 , 1000.0 , ANALYTICS_UTIL , 0.56 , 0.02 ) ;
223- }
224-
225- #[ test]
226- fn great_performance_analytics ( ) {
227- uniform_test ( 1000.0 , 1000.0 , ANALYTICS_UTIL , 0.94 , 0.02 ) ;
228- }
229-
230122 #[ test]
231123 fn debug_ratios ( ) {
124+ const WEB_POW : f64 = 1.1 ;
232125 fn web_utility ( ms : u64 ) -> f64 {
233126 let mut tracker = Performance :: default ( ) ;
234- tracker. add_query ( Duration :: from_millis ( ms) , Ok ( ( ) ) ) ;
235- tracker. expected_utility ( WEB_UTIL )
127+ for _ in 0 ..1000 {
128+ tracker. add_query ( Duration :: from_millis ( ms) , Ok ( ( ) ) ) ;
129+ }
130+ tracker. expected_utility ( WEB_POW )
236131 }
237132
238- // What we want to see is a high preference for values
239- // that are low, but a strong preference between low values.
240- // This doesn't really achieve that as well as I might prefer.
241- // I don't think it's going to get where I might like just by
242- // tweaking a few numbers.
133+ const UTILITY_PREFERENCE : f64 = 1.1 ;
134+
135+ let a = web_utility ( 50 ) ; // 70%
136+ let b = web_utility ( 250 ) ; // 29%
137+ let c = web_utility ( 750 ) ; // 01%
243138
244- let a = web_utility ( 50 ) ;
245- let b = web_utility ( 250 ) ;
246- let c = web_utility ( 2000 ) ;
247139 let s = a + b + c;
248140
249141 println ! ( "A: {}\n B: {}\n C: {}" , a / s, b / s, c / s) ;
250142 println ! ( "A / B: {}" , a / b) ;
251143 println ! ( "A / C: {}" , a / c) ;
144+
145+ let scores = vec ! [ a, b, c] ;
146+
147+ let max_utility = scores
148+ . iter ( )
149+ . map ( |v| NotNan :: new ( * v) . unwrap ( ) )
150+ . max ( )
151+ . unwrap ( )
152+ . into_inner ( ) ;
153+
154+ use std:: collections:: HashMap ;
155+ let mut selections = HashMap :: < usize , usize > :: new ( ) ;
156+ for _ in 0 ..100000 {
157+ let mut utility_cutoff: f64 = thread_rng ( ) . gen ( ) ;
158+ utility_cutoff = max_utility * ( 1.0 - utility_cutoff. powf ( UTILITY_PREFERENCE ) ) ;
159+ use crate :: indexer_selection:: WeightedSample ;
160+ let mut selected = WeightedSample :: new ( ) ;
161+ let scores = scores. iter ( ) . filter ( |score| * * score >= utility_cutoff) ;
162+ for ( i, _) in scores. enumerate ( ) {
163+ selected. add ( i, 1.0 ) ;
164+ }
165+ let selected = selected. take ( ) . unwrap ( ) ;
166+ * selections. entry ( selected) . or_default ( ) += 1 ;
167+ }
168+
169+ let mut selections: Vec < _ > = selections. into_iter ( ) . collect ( ) ;
170+ selections. sort ( ) ;
171+ let selections: Vec < _ > = selections. iter ( ) . map ( |s| s. 1 as f64 / 100000.0 ) . collect ( ) ;
172+ dbg ! ( selections) ;
252173 }
253174}
0 commit comments