To inspect the tradeoffs between recall and throughput, select a dataset and a value of \(k\). Then, using the legend below, select the algorithms you are interested in.
Hovering with the mouse on the central pane will “slice” through the plot, showing in the right pane the ranking of the algorithms at the recall highlighted by the red line.
db = DuckDBClient.of({
summary: FileAttachment("results/summary.parquet"),
algorithm_basics: {file: FileAttachment("algorithms_basics.csv"), header: true}
});
alldata = db.sql`select * from summary;`
algorithms = db.sql`
select distinct algorithm
from summary natural join algorithm_basics
where (not is_gpu or ${include_gpu})
order by all;
`
datasets = db.sql`select distinct dataset from summary where dataset not like '%-id-%';`
colors = Array.from(d3["schemeTableau10"]).toReversed();
ncolors = colors.length;
mutable_color_map = new Mutable(new Map([
["lorann", colors.pop()],
["glass", colors.pop()],
["symphonyqg", colors.pop()],
]));
color_map = mutable_color_map.generator
viewof selected_dataset = Inputs.select(
datasets.map(d => d.dataset),
{value: "landmark-nomic-768-normalized", label: "select dataset"}
);
k_values = db.sql`select distinct k from summary;`
viewof k_value = Inputs.select(k_values.map(d => d.k), {value: 10, label: "value of k"});
d3.select("#legend")
.selectAll("div")
.data(algorithms)
.join("div")
.style("border-bottom", d => `solid 6px ${(color_map.has(d.algorithm))? color_map.get(d.algorithm) : "lightgray"}`)
.style("border-top", "solid 1px lightgray")
.style("border-left", "solid 1px lightgray")
.style("border-right", "solid 1px lightgray")
.style("border-radius", "4px")
.style("padding-right", "5pt")
.style("padding-left", "5pt")
.on("click", (event, d)=> {
const key = d.algorithm;
let cm = new Map(color_map);
console.log("Algorithm " + key);
if (cm.has(key)) {
const color = cm.get(key);
console.log("Returning color to the pool " + color);
cm.delete(key);
colors.push(color);
} else {
const color = colors.pop();
console.log("Got color from the pool " + color);
if (color !== undefined) {
cm.set(key, color);
}
}
mutable_color_map.value = cm;
console.log(color_map);
console.log(colors);
console.log(event.target);
})
.text(d => d.algorithm);
pareto = db.sql`WITH
filtered_summary AS (
SELECT *
FROM summary natural left join algorithm_basics
WHERE (NOT is_gpu or ${include_gpu})
),
ranked_points AS (
SELECT
algorithm, dataset, params, qps, recall,
ROW_NUMBER() OVER (PARTITION BY algorithm, dataset ORDER BY qps) AS rank_qps,
ROW_NUMBER() OVER (PARTITION BY algorithm, dataset ORDER BY recall) AS rank_recall
FROM filtered_summary
where dataset = ${selected_dataset} and k = ${k_value}
),
non_dominated AS (
SELECT
r1.algorithm, r1.dataset, r1.params, r1.qps, r1.recall
FROM ranked_points r1
LEFT JOIN ranked_points r2
ON r1.algorithm = r2.algorithm
AND r1.dataset = r2.dataset
AND ((r1.rank_qps < r2.rank_qps AND r1.rank_recall <= r2.rank_recall) OR
(r1.rank_qps <= r2.rank_qps AND r1.rank_recall < r2.rank_recall))
WHERE r2.recall IS NULL -- no dominating point
)
SELECT * FROM non_dominated;
`
highlighted = pareto.filter(d => color_map.has(d.algorithm));
background = pareto.filter(d => !color_map.has(d.algorithm));
viewof paretoplot = Plot.plot({
style: {fontSize: "10pt"},
x: {domain: [0, 1], grid: true},
y: {type: "log", grid: true},
marks: [
Plot.ruleY([1]),
Plot.ruleX([0]),
Plot.line(background, {
x: "recall",
y: "qps",
stroke: "lightgray",
z: "algorithm",
marker: "circle-stroke",
tip: false
}),
Plot.line(highlighted, {
x: "recall",
y: "qps",
stroke: (d) => color_map.get(d.algorithm),
z: "algorithm",
marker: "circle-stroke",
tip: false
}),
Plot.ruleX(background, Plot.pointerX({x: "recall", py: "qps", stroke: "red"}))
]
})
dynamic_recall_threshold = (paretoplot)? paretoplot.recall : null;
rankdata = db.sql`
SELECT dataset, algorithm, k, max(qps) as qps
FROM summary natural left join algorithm_basics
WHERE recall > ${dynamic_recall_threshold}
AND dataset = ${selected_dataset}
AND (not is_gpu or ${include_gpu})
GROUP BY dataset, algorithm, k
`
half = d3.max(rankdata, d => d.qps) / 2
console.log(half)
Plot.plot({
style: {fontSize: "12pt"},
marginLeft: 180,
marks: [
Plot.ruleY([0]),
Plot.barX(rankdata, {
y: "algorithm",
x: "qps",
fill: d => color_map.has(d.algorithm)? color_map.get(d.algorithm) : "gray",
sort: {y: "-x"}
}),
Plot.text(rankdata.filter(d => d.qps < half), {
y: "algorithm",
x: "qps",
text: d => d3.format(".0f")(d.qps) + " qps",
dx: 10,
textAnchor: "start",
fill: "black"
}),
Plot.text(rankdata.filter(d => d.qps >= half), {
y: "algorithm",
x: "qps",
text: d => d3.format(".0f")(d.qps) + " qps",
dx: -10,
textAnchor: "end",
fill: "white"
})
]
})