Skip to content

Commit 3db0213

Browse files
authored
Utility: Overhaul performance utility (#151)
1 parent 88070e4 commit 3db0213

File tree

3 files changed

+78
-167
lines changed

3 files changed

+78
-167
lines changed

src/indexer_selection/mod.rs

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -647,14 +647,14 @@ const UTILITY_CONFIGS_ECONOMIC_SECURITY: (UtilityParameters, UtilityParameters)
647647
weight: 1.5,
648648
},
649649
);
650-
// https://www.desmos.com/calculator/reykkamaje
650+
// https://www.desmos.com/calculator/w6pxajuuve
651651
const UTILITY_CONFIGS_PERFORMANCE: (UtilityParameters, UtilityParameters) = (
652652
UtilityParameters {
653-
a: 0.0032,
653+
a: 1.1,
654654
weight: 1.0,
655655
},
656656
UtilityParameters {
657-
a: 0.0016,
657+
a: 1.2,
658658
weight: 1.5,
659659
},
660660
);
@@ -673,22 +673,17 @@ const UTILITY_CONFIGS_DATA_FRESHNESS: (UtilityParameters, UtilityParameters) = (
673673
// Note: This is only a weight.
674674
const UTILITY_CONFIGS_PRICE_EFFICIENCY: (f64, f64) = (0.5, 1.0);
675675

676-
// TODO: For the user experience we should turn these into 0-1 values,
677-
// and from those calculate both a utility parameter and a weight
678-
// which should be used when combining utilities.
679676
impl UtilityConfig {
680677
pub fn from_preferences(preferences: &IndexerPreferences) -> Self {
681-
fn interp(min: f64, max: f64, mut x: f64) -> f64 {
682-
debug_assert!(max > min);
683-
x = x.max(0.0).min(1.0);
684-
min + ((max - min) * x)
678+
fn interp(a: f64, b: f64, x: f64) -> f64 {
679+
a + ((b - a) * x)
685680
}
686681
fn interp_utility(
687682
bounds: (UtilityParameters, UtilityParameters),
688683
x: f64,
689684
) -> UtilityParameters {
690685
UtilityParameters {
691-
a: interp(bounds.1.a, bounds.0.a, 1.0 - x),
686+
a: interp(bounds.0.a, bounds.1.a, x),
692687
weight: interp(bounds.0.weight, bounds.1.weight, x),
693688
}
694689
}
Lines changed: 71 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,56 @@
1-
// The design comes from https://www.notion.so/thegraph/Indexer-Selection-Algorithm-fc95c7a0b11c436288c82094002d4c3d
2-
// The design boils down to these steps:
3-
// Model a curve that represent the expected time for a query
4-
// Transform the "expected time" into a notion of "performance" (the inverse of expected time)
5-
// Multiply this performance curve with the utility curve
6-
// Integrate over a range to get the expected utility.
7-
//
8-
// The observation in the design below is that you get the same result
9-
// by just averaging the utility of all queries in the relevant window.
1+
// Converts latency to utility.
102
//
113
// The simplest possible thing would be to convert all queries to utility,
124
// aggregate utilities and count, then use that to calculate the average utility.
135
// The problem with that approach is that it doesn't allow us to parameterize
146
// the utility function.
7+
// Note: avg(utility(latencies)) != utlity(avg(latencies))
158
//
169
// So instead we take only a slightly more complicated approach.
17-
// First, we convert all durations into a quantized performance.
18-
// The quantized performance serves as a key to a bucket of queries
19-
// with nearly the same performance. This limits the number of
20-
// performance->utility conversions that we need to calculate the
21-
// expected performance which enables making the utility function
22-
// a parameter without obscene loss to performance.
23-
24-
use crate::{
25-
indexer_selection::{decay::Decay, utility::concave_utility},
26-
prelude::*,
27-
};
28-
use ordered_float::NotNan;
10+
// First, we bucket queries keyed by a quantized latency. Each
11+
// bucket is gets a utility based on the consumer preferences,
12+
// and then we get the count-weighted average of utilities.
13+
// This trades off precision for query-time work.
14+
15+
use crate::{indexer_selection::decay::Decay, prelude::*};
2916

3017
use super::decay::DecayUtility;
3118

3219
#[derive(Clone, Debug, Default)]
3320
pub struct Performance {
34-
performance: Vec<f64>,
21+
latency: Vec<u32>,
3522
count: Vec<f64>,
3623
}
3724

3825
impl Decay for Performance {
3926
fn shift(&mut self, mut next: Option<&mut Self>, fraction: f64, keep: f64) {
4027
// For each quantized bucket, find the corresponding quantized bucket in
4128
// the next frame, and shift information into it.
42-
for (count, performance) in self.count.iter_mut().zip(self.performance.iter().cloned()) {
43-
let next_performance = next.as_deref_mut().map(|n| n.bucket_mut(performance));
44-
count.shift(next_performance, fraction, keep);
29+
for (count, latency) in self.count.iter_mut().zip(self.latency.iter().copied()) {
30+
let next_latency = next.as_deref_mut().map(|n| n.bucket_mut(latency));
31+
count.shift(next_latency, fraction, keep);
4532
}
4633
}
4734

4835
fn clear(&mut self) {
49-
self.performance.clear();
36+
self.latency.clear();
5037
self.count.clear();
5138
}
5239
}
5340

41+
// https://www.desmos.com/calculator/w6pxajuuve
42+
fn latency_to_utility(latency: f64, pow: f64) -> f64 {
43+
let sigmoid = |x: f64| 1.0 + std::f64::consts::E.powf((x.powf(pow) - 400.0) / 300.0);
44+
sigmoid(0.0) / sigmoid(latency as f64)
45+
}
46+
5447
impl DecayUtility for Performance {
55-
fn expected_utility(&self, u_a: f64) -> f64 {
48+
fn expected_utility(&self, pow: f64) -> f64 {
5649
let mut agg_count = 0.0;
5750
let mut agg_utility = 0.0;
58-
for (count, performance) in self.iter() {
51+
for (count, latency) in self.iter() {
5952
agg_count += count;
60-
agg_utility += count * concave_utility(performance, u_a);
53+
agg_utility += count * latency_to_utility(latency as f64, pow);
6154
}
6255
if agg_count == 0.0 {
6356
return 0.0;
@@ -70,75 +63,34 @@ impl DecayUtility for Performance {
7063
}
7164

7265
impl Performance {
73-
fn quantized_performance(duration: Duration) -> f64 {
74-
// Get nearest triangle number of quantized duration, then convert to performance.
75-
// The idea here is to quantize with variable precision to limit bucket count,
76-
// but also have a reasonable number of buckets no matter what scale you care about.
77-
//
78-
// 6 buckets between 200ms - 300ms (a 100ms delta)
79-
// 6 buckets between 17s - 18s (a 1000ms delta for the same number of buckets as above)
80-
// 244 buckets between 0s-30s... or about the most we'll ever have to keep.
81-
//
82-
// There's probably a smarter way, but this seems reasonable if you squint.
83-
// The tests seemed in practice to create around 100 buckets after 100,000 queries,
84-
// which seems about what you would hope for to approximate a curve. There was variation
85-
// because of different values for std_dev
86-
let duration_ms = duration.as_secs_f64() * 1000.0;
87-
// Subtracting a small amount of here moves the inflection of the curve so
88-
// that changes after this point are more noticeable. This makes a big
89-
// difference to how often a 2000ms indexer is selected compared to a 200ms
90-
// indexer (for the better).
91-
let duration_ms = duration_ms - 35.0;
92-
let duration_ms = nearest_triangle_number(duration_ms.max(1.0));
93-
let performance = 1000.0 / duration_ms;
94-
95-
// SAFETY: We force performance to be NotNan here. duration_ms is >=
96-
// 1.0 and NotNan because of the clamp (above) and implementation of Duration.
97-
// Therefore performance is <= 1000.0 and NonNan.
98-
// See also 47632ed6-4dcc-4b39-b064-c0ca01560626
99-
// I think I would be comfortable changing this assert
100-
// to a debug_assert.
101-
assert!(!performance.is_nan());
102-
103-
performance
104-
}
105-
106-
pub fn add_query(&mut self, mut duration: Duration, result: Result<(), ()>) {
66+
pub fn add_query(&mut self, mut duration: Duration, status: Result<(), ()>) {
10767
// If the query is failed, the user will experience it as increased latency.
10868
// Furthermore, most errors are expected to resolve quicker than successful queries.
10969
// Penalizing failed queries in performance will make it so that we account
11070
// for the additional latency incurred by retry. Ideally we could do this by
11171
// accounting for the likelyhood that the utility curve would be moved down and how
11272
// much later when selecting, but this is a much simpler LOC.
113-
if result.is_err() {
73+
if status.is_err() {
11474
duration *= 2;
11575
}
116-
*self.bucket_mut(Self::quantized_performance(duration)) += 1.0;
76+
let quantized = nearest_triangle_number(duration.as_secs_f64() * 1000.0) as u32;
77+
*self.bucket_mut(quantized) += 1.0;
11778
}
11879

119-
fn bucket_mut(&mut self, quantized_performance: f64) -> &mut f64 {
120-
// Safety: Performance is NotNan. See also 47632ed6-4dcc-4b39-b064-c0ca01560626
121-
let index = match unsafe {
122-
self.performance
123-
.binary_search_by_key(&NotNan::new_unchecked(quantized_performance), |a| {
124-
NotNan::new_unchecked(*a)
125-
})
126-
} {
80+
fn bucket_mut(&mut self, key: u32) -> &mut f64 {
81+
let index = match self.latency.binary_search(&key) {
12782
Ok(index) => index,
12883
Err(index) => {
129-
self.performance.insert(index, quantized_performance);
84+
self.latency.insert(index, key);
13085
self.count.insert(index, 0.0);
13186
index
13287
}
13388
};
13489
&mut self.count[index]
13590
}
13691

137-
fn iter<'a>(&'a self) -> impl 'a + Iterator<Item = (f64, f64)> {
138-
self.count
139-
.iter()
140-
.cloned()
141-
.zip(self.performance.iter().cloned())
92+
fn iter<'a>(&'a self) -> impl 'a + Iterator<Item = (f64, u32)> {
93+
self.count.iter().cloned().zip(self.latency.iter().cloned())
14294
}
14395
}
14496

@@ -156,33 +108,8 @@ fn nearest_triangle_number(n: f64) -> f64 {
156108
#[cfg(test)]
157109
mod tests {
158110
use super::*;
159-
use crate::indexer_selection::test_utils::*;
111+
use ordered_float::NotNan;
160112
use rand::{thread_rng, Rng as _};
161-
use rand_distr::Normal;
162-
163-
// "Web utility" - based on percentage of users that will leave a site after an amount of time.
164-
const WEB_UTIL: f64 = 0.01;
165-
166-
// For the use case of analytics where requests may be larger, parallel, data processing
167-
// needs to occur, and we are prioritizing other features like price efficiency instead.
168-
// I pretty much just made this number up though after checking a couple of values to see what fit.
169-
const ANALYTICS_UTIL: f64 = 4.0;
170-
171-
/// Add a bunch of queries with randomized duration according to a performance profile.
172-
/// Check the utility, given some A.
173-
/// Verify that the expected utility is within a range.
174-
fn uniform_test(mean: f64, std_dev: f64, a_u: f64, expected: f64, tolerance: f64) {
175-
let mut tracker = Performance::default();
176-
let dist = Normal::new(mean, std_dev).unwrap();
177-
for _ in 0..10000 {
178-
let duration = thread_rng().sample(dist);
179-
let duration = Duration::from_millis(duration.max(0.0) as u64);
180-
tracker.add_query(duration, Ok(()))
181-
}
182-
183-
let utility = tracker.expected_utility(a_u);
184-
assert_within(utility, expected, tolerance);
185-
}
186113

187114
#[test]
188115
fn nearest_triangle_numbers() {
@@ -192,62 +119,56 @@ mod tests {
192119
}
193120
}
194121

195-
#[test]
196-
fn poor_performance_web() {
197-
uniform_test(10000.0, 2000.0, WEB_UTIL, 0.001, 0.01);
198-
}
199-
200-
#[test]
201-
fn mediocre_performance_web() {
202-
uniform_test(2000.0, 250.0, WEB_UTIL, 0.005, 0.01);
203-
}
204-
205-
#[test]
206-
fn good_performance_web() {
207-
uniform_test(200.0, 50.0, WEB_UTIL, 0.067, 0.01);
208-
}
209-
210-
#[test]
211-
fn great_performance_web() {
212-
uniform_test(50.0, 10.0, WEB_UTIL, 0.57, 0.01);
213-
}
214-
215-
#[test]
216-
fn bad_performance_analytics() {
217-
uniform_test(100000.0, 10000.0, ANALYTICS_UTIL, 0.04, 0.01);
218-
}
219-
220-
#[test]
221-
fn good_performance_analytics() {
222-
uniform_test(5000.0, 1000.0, ANALYTICS_UTIL, 0.56, 0.02);
223-
}
224-
225-
#[test]
226-
fn great_performance_analytics() {
227-
uniform_test(1000.0, 1000.0, ANALYTICS_UTIL, 0.94, 0.02);
228-
}
229-
230122
#[test]
231123
fn debug_ratios() {
124+
const WEB_POW: f64 = 1.1;
232125
fn web_utility(ms: u64) -> f64 {
233126
let mut tracker = Performance::default();
234-
tracker.add_query(Duration::from_millis(ms), Ok(()));
235-
tracker.expected_utility(WEB_UTIL)
127+
for _ in 0..1000 {
128+
tracker.add_query(Duration::from_millis(ms), Ok(()));
129+
}
130+
tracker.expected_utility(WEB_POW)
236131
}
237132

238-
// What we want to see is a high preference for values
239-
// that are low, but a strong preference between low values.
240-
// This doesn't really achieve that as well as I might prefer.
241-
// I don't think it's going to get where I might like just by
242-
// tweaking a few numbers.
133+
const UTILITY_PREFERENCE: f64 = 1.1;
134+
135+
let a = web_utility(50); // 70%
136+
let b = web_utility(250); // 29%
137+
let c = web_utility(750); // 01%
243138

244-
let a = web_utility(50);
245-
let b = web_utility(250);
246-
let c = web_utility(2000);
247139
let s = a + b + c;
248140

249141
println!("A: {}\nB: {}\nC: {}", a / s, b / s, c / s);
250142
println!("A / B: {}", a / b);
251143
println!("A / C: {}", a / c);
144+
145+
let scores = vec![a, b, c];
146+
147+
let max_utility = scores
148+
.iter()
149+
.map(|v| NotNan::new(*v).unwrap())
150+
.max()
151+
.unwrap()
152+
.into_inner();
153+
154+
use std::collections::HashMap;
155+
let mut selections = HashMap::<usize, usize>::new();
156+
for _ in 0..100000 {
157+
let mut utility_cutoff: f64 = thread_rng().gen();
158+
utility_cutoff = max_utility * (1.0 - utility_cutoff.powf(UTILITY_PREFERENCE));
159+
use crate::indexer_selection::WeightedSample;
160+
let mut selected = WeightedSample::new();
161+
let scores = scores.iter().filter(|score| **score >= utility_cutoff);
162+
for (i, _) in scores.enumerate() {
163+
selected.add(i, 1.0);
164+
}
165+
let selected = selected.take().unwrap();
166+
*selections.entry(selected).or_default() += 1;
167+
}
168+
169+
let mut selections: Vec<_> = selections.into_iter().collect();
170+
selections.sort();
171+
let selections: Vec<_> = selections.iter().map(|s| s.1 as f64 / 100000.0).collect();
172+
dbg!(selections);
252173
}
253174
}

src/indexer_selection/price_efficiency.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,7 @@ mod test {
193193
eventuals::idle().await;
194194
let mut context = Context::new(BASIC_QUERY, "").unwrap();
195195
// Expected values based on https://www.desmos.com/calculator/kxd4kpjxi5
196-
let tests = [
197-
(0.01, 0.0),
198-
(0.02, 0.487960),
199-
(0.1, 0.64419),
200-
(1.0, 0.68216),
201-
];
196+
let tests = [(0.01, 0.0), (0.02, 0.27304), (0.1, 0.50615), (1.0, 0.55769)];
202197
for (budget, expected_utility) in tests {
203198
let (fee, utility) = efficiency
204199
.get_price(

0 commit comments

Comments
 (0)