This page allows to inspect all the configurations for any supported algorithm on any supported dataset.
db = DuckDBClient.of({
summary: FileAttachment("results/summary.parquet"),
querydetail: FileAttachment("results/stats.parquet"),
});
alldata = db.sql`select * from summary;`
algorithms = db.sql`select distinct algorithm from summary;`
datasets = db.sql`select distinct dataset from summary where dataset not like '%-id-%';`
colors = Array.from(d3["schemeTableau10"]).toReversed();
ncolors = colors.length;
viewof selected_dataset = Inputs.select(
datasets.map(d => d.dataset),
{value: "landmark-nomic-768-normalized", label: "select dataset"}
);
viewof selected_algorithm = Inputs.select(
algorithms.map(d => d.algorithm),
{value: "lorann", label: "select algorithm"}
);
k_values = db.sql`select distinct k from summary;`
viewof k_value = Inputs.select(k_values.map(d => d.k), {value: 10, label: "value of k"});
pareto = db.sql`WITH
ranked_points AS (
SELECT
algorithm, dataset, params, qps, recall,
ROW_NUMBER() OVER (PARTITION BY algorithm, dataset ORDER BY qps) AS rank_qps,
ROW_NUMBER() OVER (PARTITION BY algorithm, dataset ORDER BY recall) AS rank_recall
FROM summary
where dataset = ${selected_dataset} and k = ${k_value} and algorithm = ${selected_algorithm}
),
non_dominated AS (
SELECT
r1.algorithm, r1.dataset, r1.params, r1.qps, r1.recall
FROM ranked_points r1
LEFT JOIN ranked_points r2
ON r1.algorithm = r2.algorithm
AND r1.dataset = r2.dataset
AND ((r1.rank_qps < r2.rank_qps AND r1.rank_recall <= r2.rank_recall) OR
(r1.rank_qps <= r2.rank_qps AND r1.rank_recall < r2.rank_recall))
WHERE r2.recall IS NULL -- no dominating point
)
SELECT * FROM non_dominated;
`
other_configurations = db.sql`
SELECT algorithm, dataset, params, qps, recall
FROM summary
WHERE dataset = ${selected_dataset} and k = ${k_value} and algorithm = ${selected_algorithm};
`
viewof paretoplot = Plot.plot({
style: {fontSize: "10pt"},
x: {domain: [d3.min(other_configurations, d => d.recall), 1], grid: true},
y: {type: "log", grid: true},
marks: [
Plot.ruleY([1]),
Plot.ruleX([0]),
Plot.dot(other_configurations, {
x: "recall",
y: "qps",
stroke: "gray",
z: "algorithm",
marker: "circle-stroke",
tip: false
}),
Plot.line(pareto, {
x: "recall",
y: "qps",
z: "algorithm",
marker: "circle-stroke",
tip: false
}),
Plot.tip(other_configurations, Plot.pointer({
x: "recall",
y: "qps",
title: (d) => `${d.algorithm}\n${d.params}\nrecall: ${d.recall}\nqps:${d.qps}}`
})),
]
})
pareto_selected = paretoplot ?? {algorithm: '', dataset: '', params: ''};
focus_url = `${window.location.origin}${window.location.pathname.slice(0, -"algorithm_focus.html".length)}results/${selected_dataset}__detail.parquet`;
console.log(focus_url);
focusdata = db.query(`
SELECT *
FROM (SELECT * FROM '${focus_url}')
NATURAL JOIN querydetail
WHERE algorithm = ?1
AND dataset = ?2
AND params = ?3
`, [
pareto_selected.algorithm,
pareto_selected.dataset,
pareto_selected.params,
]);
console.log(window.location.origin);
Plot.plot({
//style: {fontSize: "10pt"},
x: {domain: [-0.01, 1.01], grid: false},
y: {domain: [0, 1], grid: true},
marks: [
Plot.ruleY([0]),
Plot.ruleX([d3.mean(focusdata, (d) => d.recall)], {stroke: "red"}),
Plot.dot(focusdata, Plot.groupX({y: "proportion"}, {x: "recall", fill: "black", r: 6})),
Plot.ruleX(focusdata, Plot.groupX({y: "proportion"}, {x: "recall", strokeWidth: 3})),
]
});