<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Lablog on CuriousCoding</title><link>https://curiouscoding.nl/categories/lablog/</link><description>Recent content in Lablog on CuriousCoding</description><generator>Hugo</generator><language>en</language><lastBuildDate>Mon, 02 Mar 2026 00:00:00 +0100</lastBuildDate><atom:link href="https://curiouscoding.nl/categories/lablog/index.xml" rel="self" type="application/rss+xml"/><item><title>Route Planning using Customizable Contraction Hierarchies</title><link>https://curiouscoding.nl/posts/cch/</link><pubDate>Mon, 02 Mar 2026 00:00:00 +0100</pubDate><guid>https://curiouscoding.nl/posts/cch/</guid><description>&lt;div class="ox-hugo-toc toc has-section-numbers"&gt;
&lt;div class="heading"&gt;Table of Contents&lt;/div&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;1&lt;/span&gt; &lt;a href="#problem-statement-customizable-route-planning--crp" &gt;Problem Statement: Customizable Route Planning (CRP)&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2&lt;/span&gt; &lt;a href="#contraction-hierarchies--ch" &gt;Contraction Hierarchies (CH)&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.1&lt;/span&gt; &lt;a href="#classic-contraction-hierarchies" &gt;&lt;em&gt;Classic&lt;/em&gt; Contraction Hierarchies&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.2&lt;/span&gt; &lt;a href="#customizable-contraction-hierarchies--cch" &gt;&lt;em&gt;Customizable&lt;/em&gt; Contraction Hierarchies (CCH)&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;3&lt;/span&gt; &lt;a href="#analogy-with-trees" &gt;Analogy with trees&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;4&lt;/span&gt; &lt;a href="#shortest-paths-in-chs" &gt;Shortest Paths in CHs&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;5&lt;/span&gt; &lt;a href="#parents-faster-shortest-paths-in-cchs" &gt;Parents: Faster Shortest Paths in CCHs&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;6&lt;/span&gt; &lt;a href="#input-graph" &gt;Input Graph&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;7&lt;/span&gt; &lt;a href="#initial-algorithm" &gt;Initial Algorithm&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;7.1&lt;/span&gt; &lt;a href="#permute-input" &gt;Permute input&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;7.2&lt;/span&gt; &lt;a href="#chordal-completion-and-parents" &gt;Chordal Completion and Parents&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;7.3&lt;/span&gt; &lt;a href="#customize" &gt;Customize&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;7.4&lt;/span&gt; &lt;a href="#query" &gt;Query&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8&lt;/span&gt; &lt;a href="#optimizing-things" &gt;Optimizing things&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.1&lt;/span&gt; &lt;a href="#binary-searching-in-find-edge" &gt;Binary searching in &lt;code&gt;find_edge&lt;/code&gt;&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.2&lt;/span&gt; &lt;a href="#hashmap-of-edges" &gt;&lt;code&gt;HashMap&lt;/code&gt; of edges&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.3&lt;/span&gt; &lt;a href="#ranges-of-neighbours" &gt;Ranges of neighbours&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.4&lt;/span&gt; &lt;a href="#linear-scan" &gt;Linear scan&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.5&lt;/span&gt; &lt;a href="#proper-query-algorithm" &gt;Proper query algorithm&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.6&lt;/span&gt; &lt;a href="#pruning-edges" &gt;Pruning edges&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.7&lt;/span&gt; &lt;a href="#pruning" &gt;Pruning&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.8&lt;/span&gt; &lt;a href="#unconditional-edge-relaxing" &gt;Unconditional edge relaxing&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.9&lt;/span&gt; &lt;a href="#early-edge-break" &gt;Early edge break&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.10&lt;/span&gt; &lt;a href="#dfs-ordering-the-nodes" &gt;DFS-ordering the nodes&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.11&lt;/span&gt; &lt;a href="#not-inclining-queries" &gt;Not inclining queries&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;9&lt;/span&gt; &lt;a href="#some-stats" &gt;Some stats&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;10&lt;/span&gt; &lt;a href="#serializing-the-final-structure" &gt;Serializing the final structure&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;11&lt;/span&gt; &lt;a href="#merging-adjacent-edges" &gt;Merging adjacent edges&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;11.1&lt;/span&gt; &lt;a href="#perf-stat" &gt;Perf stat&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;11.2&lt;/span&gt; &lt;a href="#all-ranges-are-multiples-of-8" &gt;All ranges are multiples of 8&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;11.3&lt;/span&gt; &lt;a href="#all-ranges-have-size-8" &gt;All ranges have size 8&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;11.4&lt;/span&gt; &lt;a href="#finding-the-bottleneck" &gt;Finding the bottleneck&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;12&lt;/span&gt; &lt;a href="#bugfixing" &gt;Bugfixing&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;13&lt;/span&gt; &lt;a href="#further-ideas" &gt;Further ideas&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;13.1&lt;/span&gt; &lt;a href="#failed-doubling-the-graph" &gt;Failed: Doubling the graph&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;13.2&lt;/span&gt; &lt;a href="#edge-pruning" &gt;Edge pruning&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;13.3&lt;/span&gt; &lt;a href="#failed-expanding-a-node-and-its-parent-in-parallel" &gt;Failed: Expanding a node and its parent in parallel&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;14&lt;/span&gt; &lt;a href="#current-best-results" &gt;Current best results&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;14.1&lt;/span&gt; &lt;a href="#bottleneck" &gt;Bottleneck&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;15&lt;/span&gt; &lt;a href="#d41d8c" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; &lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;!--endtoc--&gt;
&lt;p&gt;These are some notes on &lt;em&gt;customizable contraction hierarchies&lt;/em&gt;, based on talks
with Michael Zündorf and the survey paper by Bläsius, Buchhold, Wagner, Zeitz, and Zündorf (&lt;a href="#citeproc_bib_item_1"&gt;2025&lt;/a&gt;).&lt;/p&gt;</description></item><item><title>Trying to understand DDR memory</title><link>https://curiouscoding.nl/posts/ddr/</link><pubDate>Tue, 20 Jan 2026 00:00:00 +0100</pubDate><guid>https://curiouscoding.nl/posts/ddr/</guid><description>&lt;div class="ox-hugo-toc toc has-section-numbers"&gt;
&lt;div class="heading"&gt;Table of Contents&lt;/div&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;1&lt;/span&gt; &lt;a href="#questions" &gt;Questions&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2&lt;/span&gt; &lt;a href="#a-load-of-articles-blogs-pages-to-read" &gt;A load of articles/blogs/pages to read&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.1&lt;/span&gt; &lt;a href="#wikipedia-articles" &gt;Wikipedia articles&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.2&lt;/span&gt; &lt;a href="#more-posts" &gt;More posts&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.3&lt;/span&gt; &lt;a href="#notes" &gt;Notes&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.4&lt;/span&gt; &lt;a href="#my-own-ram" &gt;My own RAM&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.5&lt;/span&gt; &lt;a href="#continued-notes" &gt;Continued notes&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.6&lt;/span&gt; &lt;a href="#address-mapping-notation" &gt;Address mapping notation&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.7&lt;/span&gt; &lt;a href="#intel-spec" &gt;Intel spec&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.8&lt;/span&gt; &lt;a href="#rank-interleaving" &gt;Rank interleaving&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;2.9&lt;/span&gt; &lt;a href="#nontemporal-reads-writes" &gt;Nontemporal reads/writes&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;3&lt;/span&gt; &lt;a href="#remap-using-performance-counters" &gt;reMap: using Performance counters&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;4&lt;/span&gt; &lt;a href="#sudoku" &gt;Sudoku&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;4.1&lt;/span&gt; &lt;a href="#step-1-dram-addressing-functions" &gt;Step 1: DRAM addressing functions&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;4.2&lt;/span&gt; &lt;a href="#step-2-row-column-bits" &gt;Step 2: row/column bits&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;4.3&lt;/span&gt; &lt;a href="#step-3-validation" &gt;Step 3: validation&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;4.4&lt;/span&gt; &lt;a href="#step-4-which-function-is-what" &gt;Step 4: which function is what?&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;4.5&lt;/span&gt; &lt;a href="#refreshes" &gt;Refreshes&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;4.6&lt;/span&gt; &lt;a href="#consecutive-accesses" &gt;Consecutive Accesses&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;5&lt;/span&gt; &lt;a href="#sudoku-now-with-only-1-dimm" &gt;Sudoku, now with only 1 DIMM&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;5.1&lt;/span&gt; &lt;a href="#setup" &gt;setup&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;5.2&lt;/span&gt; &lt;a href="#1-dot-reverse-functions" &gt;1. reverse functions&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;5.3&lt;/span&gt; &lt;a href="#2-dot-identify-bits" &gt;2. identify bits&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;5.4&lt;/span&gt; &lt;a href="#3-dot-validate-mapping" &gt;3. validate mapping&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;5.5&lt;/span&gt; &lt;a href="#4-dot-decompose-functions" &gt;4. decompose functions&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;6&lt;/span&gt; &lt;a href="#results" &gt;Final results&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;7&lt;/span&gt; &lt;a href="#decode-dimms" &gt;&lt;code&gt;decode-dimms&lt;/code&gt;&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;7.1&lt;/span&gt; &lt;a href="#bank-groups" &gt;Bank groups&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;7.2&lt;/span&gt; &lt;a href="#refresh" &gt;Refresh&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;7.3&lt;/span&gt; &lt;a href="#random-access-throughput" &gt;Random access throughput&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8&lt;/span&gt; &lt;a href="#cpu-benchmarks" &gt;CPU benchmarks&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.1&lt;/span&gt; &lt;a href="#cpu-benchmarks" &gt;cpu-benchmarks&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.1.1&lt;/span&gt; &lt;a href="#random-access-throughput-1-dimm" &gt;random access throughput 1 DIMM&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.1.2&lt;/span&gt; &lt;a href="#random-access-throughput-2-dimm" &gt;random access throughput 2 DIMM&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.2&lt;/span&gt; &lt;a href="#memory-read-experiment" &gt;memory-read-experiment&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.2.1&lt;/span&gt; &lt;a href="#strided-reading-1-dimm" &gt;strided reading 1 DIMM&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;8.2.2&lt;/span&gt; &lt;a href="#strided-reading-2-dimm" &gt;strided reading 2 DIMM&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;9&lt;/span&gt; &lt;a href="#tinymembench" &gt;&lt;code&gt;tinymembench&lt;/code&gt;&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="section-num"&gt;10&lt;/span&gt; &lt;a href="#remaining-questions" &gt;Remaining questions&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;!--endtoc--&gt;
&lt;p&gt;These are chronological (and thus, only lightly organized) notes on my attempt to
understand how DDR4 and DDR5 RAM memory work.&lt;/p&gt;</description></item><item><title>Mod-minimizers and other minimizers</title><link>https://curiouscoding.nl/posts/mod-minimizers/</link><pubDate>Thu, 18 Jan 2024 00:00:00 +0100</pubDate><guid>https://curiouscoding.nl/posts/mod-minimizers/</guid><description>&lt;div class="ox-hugo-toc toc"&gt;
&lt;div class="heading"&gt;Table of Contents&lt;/div&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#applications" &gt;Applications&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#background" &gt;Background&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#minimizers" &gt;Minimizers&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#density-bounds" &gt;Density bounds&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#robust-minimizers" &gt;Robust minimizers&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#pasha" &gt;PASHA&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#miniception" &gt;Miniception&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#closed-syncmers" &gt;Closed syncmers&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#bd-anchors" &gt;Bd-anchors&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#new-mod-minimizers" &gt;New: Mod-minimizers&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#experiments" &gt;Experiments&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#conclusion" &gt;Conclusion&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#small-k-experiments" &gt;Small k experiments&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#search-methods" &gt;Search methods&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#directed-minimizer" &gt;Directed minimizer&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#k-1-w-2" &gt;\(k=1\), \(w=2\)&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#k-1-w-4" &gt;\(k=1\), \(w=4\)&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#k-1-w-5" &gt;\(k=1\), \(w=5\)&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#k-2-w-2" &gt;\(k=2\), \(w=2\)&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#k-2-w-4" &gt;\(k=2\), \(w=4\)&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#notes" &gt;Notes&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#reading-list" &gt;Reading list&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;!--endtoc--&gt;
&lt;p&gt;\[
\newcommand{\d}{\mathrm{d}}
\newcommand{\L}{\mathcal{L}}
\]&lt;/p&gt;
&lt;p&gt;This post introduces some background for minimizers and some
experiments for a new minimizer variant. That new variant is now called the
&lt;em&gt;mod-minimizer&lt;/em&gt; and published at WABI24 (&lt;a href="https://doi.org/10.4230/LIPIcs.WABI.2024.11" class="external-link" target="_blank" rel="noopener"&gt;&lt;strong&gt;DOI&lt;/strong&gt;&lt;/a&gt;, &lt;a href="https://curiouscoding.nl/papers/modmini.pdf" &gt;&lt;strong&gt;PDF&lt;/strong&gt;&lt;/a&gt;) (&lt;a href="#citeproc_bib_item_5"&gt;Groot Koerkamp and Pibiri 2024&lt;/a&gt;). The paper
also includes a review of existing methods, including pseudocode for
most of the methods covered below.&lt;/p&gt;</description></item><item><title>One Billion Row Challenge</title><link>https://curiouscoding.nl/posts/1brc/</link><pubDate>Wed, 03 Jan 2024 00:00:00 +0100</pubDate><guid>https://curiouscoding.nl/posts/1brc/</guid><description>&lt;div class="ox-hugo-toc toc"&gt;
&lt;div class="heading"&gt;Table of Contents&lt;/div&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#external-links" &gt;External links&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#the-problem" &gt;The problem&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#initial-solution-105s" &gt;Initial solution: 105s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#first-flamegraph" &gt;First flamegraph&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#bytes-instead-of-strings-72s" &gt;Bytes instead of strings: 72s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#manual-parsing-61s" &gt;Manual parsing: 61s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#inline-hash-keys-50s" &gt;Inline hash keys: 50s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#faster-hash-function-41s" &gt;Faster hash function: 41s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#a-new-flame-graph" &gt;A new flame graph&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#perf-it-is" &gt;Perf it is&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#something-simple-allocating-the-right-size-41s" &gt;Something simple: allocating the right size: 41s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#memchr-for-scanning-47s" &gt;&lt;code&gt;memchr&lt;/code&gt; for scanning: 47s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#memchr-crate-29s" &gt;&lt;code&gt;memchr&lt;/code&gt; crate: 29s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#get-unchecked-28s" &gt;&lt;code&gt;get_unchecked&lt;/code&gt;: 28s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#manual-simd-29s" &gt;Manual SIMD: 29s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#profiling" &gt;Profiling&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#revisiting-the-key-function-23s" &gt;Revisiting the key function: 23s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#ptrhash-perfect-hash-function-17s" &gt;PtrHash perfect hash function: 17s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#larger-masks-15s" &gt;Larger masks: 15s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#reduce-pattern-matching-14s" &gt;Reduce pattern matching: 14s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#memory-map-12s" &gt;Memory map: 12s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#parallelization-2-dot-0s" &gt;Parallelization: 2.0s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#branchless-parsing-1-dot-7s" &gt;Branchless parsing: 1.7s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#purging-all-branches-1-dot-67s" &gt;Purging all branches: 1.67s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#some-more-attempts" &gt;Some more attempts&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#faster-perfect-hashing-1-dot-55s" &gt;Faster perfect hashing: 1.55s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#bug-time-back-up-to-1-dot-71s" &gt;Bug time: Back up to 1.71s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#temperatures-less-than-100-1-dot-62s" &gt;Temperatures less than 100: 1.62s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#computing-min-as-a-max-1-dot-50" &gt;Computing &lt;code&gt;min&lt;/code&gt; as a &lt;code&gt;max&lt;/code&gt;: 1.50&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#intermezzo-hyperthreading-1-dot-34s" &gt;Intermezzo: Hyperthreading: 1.34s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#not-parsing-negative-numbers-1-dot-48s" &gt;Not parsing negative numbers: 1.48s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#more-efficient-parsing-1-dot-44s" &gt;More efficient parsing: 1.44s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#fixing-undefined-behaviour-back-to-1-dot-56s" &gt;Fixing undefined behaviour: back to 1.56s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#lazily-subtracting-b-0-1-dot-52s" &gt;Lazily subtracting &lt;code&gt;b'0'&lt;/code&gt;: 1.52s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#min-max-without-parsing-1-dot-55s" &gt;Min/max without parsing: 1.55s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#parsing-using-a-single-multiplication-doesn-t-work" &gt;Parsing using a single multiplication: doesn&amp;rsquo;t work&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#parsing-using-a-single-multiplication-does-work-after-all-1-dot-48s" &gt;Parsing using a single multiplication does work after all! 1.48s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#a-side-note-ascii" &gt;A side note: ASCII&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#skip-parsing-using-pdep-1-dot-42s" &gt;Skip parsing using &lt;code&gt;PDEP&lt;/code&gt;: 1.42s&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#improved" &gt;Improved&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#a-further-note" &gt;A further note&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#branchy-min-max-1-dot-37s" &gt;Branchy min/max: 1.37s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#no-counting-1-dot-34s" &gt;No counting: 1.34s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#arbitrary-long-city-names-1-dot-34" &gt;Arbitrary long city names: 1.34&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#4-entries-in-parallel-1-dot-23s" &gt;4 entries in parallel: 1.23s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#mmap-per-thread" &gt;Mmap per thread&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#reordering-some-operations-1-dot-19s" &gt;Reordering some operations: 1.19s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#reordering-more-1-dot-11s" &gt;Reordering more: 1.11s&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#even-more-ilp-1-dot-05" &gt;Even more ILP: 1.05&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#compliance-1-ok-i-ll-count-1-dot-06" &gt;Compliance 1, OK I&amp;rsquo;ll count: 1.06&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#d41d8c" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; &lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#postscript" &gt;Postscript&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;!--endtoc--&gt;
&lt;p&gt;A youtube video on this post is &lt;a href="https://youtu.be/e_9ziFKcEhw?si=JHy4aVliKw9gfryf&amp;amp;t=896" class="external-link" target="_blank" rel="noopener"&gt;here&lt;/a&gt;.&lt;/p&gt;</description></item><item><title>Notes on implementing Longest Common Repeat (LCR)</title><link>https://curiouscoding.nl/posts/longest-common-repeat/</link><pubDate>Wed, 06 Dec 2023 00:00:00 +0100</pubDate><guid>https://curiouscoding.nl/posts/longest-common-repeat/</guid><description>&lt;div class="ox-hugo-toc toc"&gt;
&lt;div class="heading"&gt;Table of Contents&lt;/div&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#notes" &gt;Notes&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#coloured-tree-problem" &gt;Coloured Tree Problem&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#generic-sparse-suffix-array" &gt;Generic sparse suffix array&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#sparse-suffix-array-on-minimizers" &gt;Sparse suffix array on minimizers&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#discussion-todos" &gt;Discussion / TODOs&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#evals" &gt;Evals&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;!--endtoc--&gt;
&lt;p&gt;These are my running notes on implementing an algorithm for Longest Common
Repeat using minimizers.&lt;/p&gt;
&lt;h2 id="notes"&gt;
 Notes
 &lt;a class="heading-link" href="#notes"&gt;
 &lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
 &lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
 &lt;/a&gt;
&lt;/h2&gt;
&lt;h3 id="coloured-tree-problem"&gt;
 Coloured Tree Problem
 &lt;a class="heading-link" href="#coloured-tree-problem"&gt;
 &lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
 &lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
 &lt;/a&gt;
&lt;/h3&gt;
&lt;p&gt;See Lemma 3 at &lt;a href="https://drops.dagstuhl.de/storage/00lipics/lipics-vol105-cpm2018/LIPIcs.CPM.2018.23/LIPIcs.CPM.2018.23.pdf" class="external-link" target="_blank" rel="noopener"&gt;here&lt;/a&gt;&lt;/p&gt;
&lt;h3 id="generic-sparse-suffix-array"&gt;
 Generic sparse suffix array
 &lt;a class="heading-link" href="#generic-sparse-suffix-array"&gt;
 &lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
 &lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
 &lt;/a&gt;
&lt;/h3&gt;
&lt;ul&gt;
&lt;li&gt;paper: &lt;a href="https://arxiv.org/pdf/2310.09023.pdf" class="external-link" target="_blank" rel="noopener"&gt;https://arxiv.org/pdf/2310.09023.pdf&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;code: &lt;a href="https://github.com/lorrainea/SSA/blob/main/PA/ssa.cc" class="external-link" target="_blank" rel="noopener"&gt;https://github.com/lorrainea/SSA/blob/main/PA/ssa.cc&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;For random strings and \(b \leq n / \log n\), direct radix sort on $2log n + log
log n$-bit
prefixes is sufficient for \(O(n)\) runtime. In fact, since computer word size
\(w\geq \log n\), we only need at most \(2\) rounds of radix sort! (See simple-saca.)&lt;/p&gt;</description></item><item><title>PtrHash: Notes on adapting PTHash in Rust</title><link>https://curiouscoding.nl/posts/ptrhash-log/</link><pubDate>Thu, 21 Sep 2023 00:00:00 +0200</pubDate><guid>https://curiouscoding.nl/posts/ptrhash-log/</guid><description>&lt;div class="ox-hugo-toc toc"&gt;
&lt;div class="heading"&gt;Table of Contents&lt;/div&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#questions-and-remarks-on-pthash-paper" &gt;Questions and remarks on PTHash paper&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#ideas-for-improvement" &gt;Ideas for improvement&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#parameters" &gt;Parameters&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#align-packed-vectors-to-cachelines" &gt;Align packed vectors to cachelines&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#prefetching" &gt;Prefetching&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#faster-modulo-operations" &gt;Faster modulo operations&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#store-dictionary-d-sorted-using-elias-fano-coding" &gt;Store dictionary \(D\) sorted using Elias-Fano coding&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#how-many-bits-of-n-and-hash-entropy-do-we-need" &gt;How many bits of \(n\) and hash entropy do we need?&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#ideas-for-faster-construction" &gt;Ideas for faster construction&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#implementation-log" &gt;Implementation log&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#hashing-function" &gt;Hashing function&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#bitpacking-crates" &gt;Bitpacking crates&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#construction" &gt;Construction&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#fastmod" &gt;Fastmod&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#try-out-fastdivide-and-reciprocal-crates" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Try out &lt;code&gt;fastdivide&lt;/code&gt; and &lt;code&gt;reciprocal&lt;/code&gt; crates&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#first-benchmark" &gt;First benchmark&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#faster-bucket-computation" &gt;Faster bucket computation&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#branchless-for-real-now--aka-the-trick-of-thirds" &gt;Branchless, for real now! (aka the trick-of-thirds)&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#compiling-and-benchmarking-pthash" &gt;Compiling and benchmarking PTHash&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#compact-encoding" &gt;Compact encoding&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#find-the-x-differences" &gt;Find the \(x\) differences&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#fastreduce-revisited" &gt;&lt;code&gt;FastReduce&lt;/code&gt; revisited&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#is-there-a-problem-if-gcd--m-n--is-large" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Is there a problem if \(\gcd(m, n)\) is large?&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#faster-hashing" &gt;Faster hashing&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#try-xxhash" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Try xxhash&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#an-experiment" &gt;An experiment&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#compiler-struggles" &gt;Compiler struggles&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#prefetching-at-last" &gt;Prefetching, at last&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#prefetching-with-vectorization" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Prefetching with vectorization&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#inverting-hki" &gt;Inverting \(h(k_i)\)&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#another-day-of-progress" &gt;Another day of progress&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#possible-sorting-algorithms" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Possible sorting algorithms&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#diving-into-the-inverse-hash-problem" &gt;Diving into the inverse hash problem&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#bringing-it-home" &gt;Bringing it home&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#hash-inversion-for-faster-pthash-construction" &gt;Hash-inversion for faster PTHash construction&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#fast-path-for-small-buckets" &gt;Fast path for small buckets&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#dictionary-encoding" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Dictionary encoding&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#larger-buckets" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Larger buckets&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#prefetching-free-slots" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Prefetching free slots&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#filling-the-last-few-empty-slots-needs-very-high-k-i" &gt;Filling the last few empty slots needs very high \(k_i\)!&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#perfect-matching-for-the-tail" &gt;Perfect matching for the tail&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#peeling-for-size-1-buckets" &gt;Peeling for size-1 buckets&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#greedy-peeling-1-assigning-from-hard-to-easy" &gt;Greedy peeling 1: Assigning from hard to easy&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#peeling-and-cuckoo-hashing-for-larger-buckets-dot" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Peeling and cuckoo hashing for larger buckets.&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#sunday-morning-ideas" &gt;Sunday morning ideas&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#dinic" &gt;Dinic&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#new-iterative-greedy-assignment-idea" &gt;New iterative greedy assignment idea&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#cuckoo-hashing-again" &gt;Cuckoo hashing, again&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#cuckoo-hashing-displacing-for-real-now" &gt;Cuckoo hashing / displacing, &lt;em&gt;for real now&lt;/em&gt;&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#displacing-globally" &gt;Displacing globally&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#running-it" &gt;Running it&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#limitations" &gt;Limitations&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#cleanup-and-revisiting-defaults" &gt;Cleanup and revisiting defaults&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#sum-instead-of-xor" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Sum instead of xor?&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#revisiting-alpha-1" &gt;Revisiting \(\alpha &amp;lt; 1\)&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#elias-fano-for-the-remap-dictionary" &gt;Elias-Fano for the remap-dictionary&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#global-iterative-prioritizing" &gt;Global iterative prioritizing&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#cleanup-removing-peeling-and-suboptimal-displacing-code" &gt;Cleanup: removing peeling and suboptimal displacing code&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#some-speedups-to-the-displacement-algorithm" &gt;Some speedups to the displacement algorithm&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#runtime-analysis-of-displacement-algorithm" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Runtime analysis of displacement algorithm&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#optimal-prefetching-strategy" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; Optimal prefetching strategy&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#are-we-close-to-the-memory-bandwidth" &gt;Are we close to the memory bandwidth?&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#more-sorting-algorithm-resources" &gt;More sorting algorithm resources&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#and-some-resources-on-partitioning" &gt;And some resources on partitioning&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#partitioning-to-reduce-memory-latency" &gt;Partitioning to reduce memory latency&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#back-from-a-break" &gt;Back from a break!&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#speeding-up-the-search-for-pilots" &gt;Speeding up the search for pilots&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#multiplyreduce" &gt;&lt;code&gt;MultiplyReduce&lt;/code&gt;&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#linux-hugepages" &gt;Linux hugepages?&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#dropping-the-bucket-split" &gt;Dropping the bucket split?&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#build-performance" &gt;Build performance&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#an-alternative" &gt;An alternative&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#query-performance" &gt;Query performance&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#query-memory-bandwidth" &gt;Query memory bandwidth&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#some-more-experiments" &gt;Some more experiments&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#multithreading-benchmark" &gt;Multithreading benchmark&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#multithreading-queries-satisfaction-at-last" &gt;Multithreading queries: satisfaction at last&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#packing-difference-from-expected-position" &gt;Packing difference from expected position&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#local-packing-ideas" &gt;Local packing ideas&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#query-times-for-different-remapping-structures" &gt;Query times for different remapping structures&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#sharding" &gt;Sharding&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#128bit-hashing" &gt;128bit hashing&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="#varying-the-partition-size" &gt;Varying the partition size&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="#ptrhash-part-2" &gt;PtrHash, part 2&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#phobic" &gt;Phobic&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="#for-ptrhash" &gt;&lt;span class="org-todo todo TODO"&gt;TODO&lt;/span&gt; for PtrHash&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;!--endtoc--&gt;
&lt;p&gt;\[
%\newcommand{\mm}{\,\%\,}
\newcommand{\mm}{\bmod}
\newcommand{\lxor}{\oplus}
\newcommand{\K}{\mathcal K}
\]&lt;/p&gt;</description></item></channel></rss>