diff --git a/.gitignore b/.gitignore index 05788f6..63fdf13 100644 --- a/.gitignore +++ b/.gitignore @@ -132,3 +132,4 @@ Release # ----------------------------------------------------------------------------- nb-configuration.xml *.orig +.gitnexus diff --git a/docs/superpowers/perf-results.md b/docs/superpowers/perf-results.md new file mode 100644 index 0000000..97162a4 --- /dev/null +++ b/docs/superpowers/perf-results.md @@ -0,0 +1,26 @@ +# mapcodelib performance results — feat/optimize + +Branch: `feat/optimize` | Measurement: `time ./unittest` at `-O3` (best `user` of 3 runs, multithreaded so user ≫ real) | Baseline T0 = 114.13s + +| Commit | Change | Best user time | vs. baseline | +|--------|--------|---------------|-------------| +| `f1cb736` | baseline | 114.13s | 0.0% | +| `430bbba` | A1: cache `flags` per iteration in hot loops | 112.21s | +1.7% | +| `b9b3f2c` | A2: longitude-first in `fitsInsideBoundaries` (**reverted** — regression) | 121.10s | −6.1% | +| `1b01b82` | A3: `memcpy`-based result assembly in `encoderEngine` | 113.19s | +0.8% | +| `753c337` | A4: single division per iteration in `encodeBase31` (no gain at `-O3`) | ~114s | noise | +| `554af29` | B3: companion tables defined + `initCompanionTables` called at entry points | 109.99s | +3.6% | +| `3416f87` | B4: hot loops read from companion tables | **99.40s** | **+12.9%** | +| `2cdf52b` | B5: `#ifdef DEBUG` sanity check (zero cost in release) | 99.73s | +12.6% | + +**Final cumulative speedup: ~12.6%** (14.4s saved out of 114.13s) + +**Binary size growth:** +2,240 bytes (limit was 100,000 bytes) + +## Notes + +- **A2 reverted:** `isInRange()` checks for longitude wrap-around — its overhead exceeded the saving from the early-out it enabled. +- **A4 no-op:** GCC `-O3` already performs strength reduction on `/31` + `%31`, so the explicit version gave no new information to the compiler. +- **A5 no-op:** The early-exit was already present in the codebase. +- **Primary driver:** B4 — replacing per-iteration mask/shift operations on a 4-byte `flags` field with single byte loads from precomputed byte-sized companion tables (`RECORD_KIND`, `RECORD_CODEX`, `RECORD_REC_TYPE`, `RECORD_HEADER_LETTER`). +- **Why short of 20–50% target:** The test suite includes correctness overhead (asserts, sprintf, comparisons) that dilutes the encode/decode speedup. Approach C (loop restructuring) would be the next lever. diff --git a/docs/superpowers/plans/2026-05-28-mapcodelib-optimize.md b/docs/superpowers/plans/2026-05-28-mapcodelib-optimize.md new file mode 100644 index 0000000..78c739e --- /dev/null +++ b/docs/superpowers/plans/2026-05-28-mapcodelib-optimize.md @@ -0,0 +1,1366 @@ +# mapcodelib speed optimization implementation plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Reduce `time ./unittest` wall time by 20–50% on `-O3` builds while preserving bit-exact output, strict portable C99/C11, and per-call memory footprint. + +**Architecture:** +- **Approach A** (Tasks 2–6): Local hot-path edits in `mapcoder.c` — flag caching, branch reordering, length-tracked string assembly, tighter base-31 encode, and early exits in repack/unpack. +- **Approach B** (Tasks 7–9): Precomputed static companion tables derived once from `TERRITORY_BOUNDARIES`; hot loops in `encoderEngine`/`decoderEngine` read from them instead of re-deriving from `flags` per iteration. + +**Tech Stack:** C99, GCC `-O3`, pthread (test-only), `time` (wall-clock measurement). + +**Spec:** `docs/superpowers/specs/2026-05-28-mapcodelib-optimize-design.md` + +**Branch:** `feat/optimize` (already created, currently at design-spec commit `740bfec`). + +--- + +## Conventions used across all tasks + +### Build command +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/mapcodelib && gcc -O3 -c mapcoder.c +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && gcc -O3 unittest.c -lm -lpthread -o unittest ../mapcodelib/mapcoder.o +``` + +### Test command (correctness) +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && ./unittest +``` +Expected: final line `Unit tests passed`, exit code 0. + +### Timing command (performance) +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test +( time ./unittest ) 2>&1 | tail -3 # repeat 3 times, take best user time +``` +Each commit body records: best `user` time, and the percent-delta vs. Task 1 baseline. + +### Commit footer +Every commit includes: +``` +Co-Authored-By: Claude Sonnet 4.6 +``` + +### Test integrity +Any commit that fails `./unittest` (non-zero exit, non-zero error count, or missing "Unit tests passed" line) MUST be reverted before continuing. Do not push fixes on top — revert and reattempt. + +--- + +## File structure + +| File | Status | Responsibility | +|------|--------|----------------| +| `mapcodelib/mapcoder.c` | Modify | All optimization edits land here. Hot paths (`encoderEngine`, `decoderEngine`, `fitsInsideBoundaries`, `encodeBase31`, etc.) and new static-scope companion tables + their initializer. | +| `mapcodelib/internal_data.h` | Read-only | Source of truth for `TERRITORY_BOUNDARIES`, `MAPCODE_BOUNDARY_MAX`, `DATA_START`. Do NOT modify. | +| `mapcodelib/mapcoder.h` | Read-only | Public ABI. Do NOT modify. | +| `test/unittest.c` | Read-only | Correctness + timing harness. Do NOT modify. | + +All other files (CMake, scripts, docs) are untouched by this plan. + +--- + +## Task 1: Baseline measurement + +**Files:** +- No source changes. Single commit pins the baseline timing. + +- [ ] **Step 1: Build with -O3** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/mapcodelib && gcc -O3 -c mapcoder.c +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && gcc -O3 unittest.c -lm -lpthread -o unittest ../mapcodelib/mapcoder.o +``` + +Expected: clean compile, no warnings/errors. Both `.o` and `unittest` binary produced. + +- [ ] **Step 2: Verify tests pass** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && ./unittest 2>&1 | tail -5 +``` + +Expected: last lines include `Unit tests passed`. Exit code 0. + +- [ ] **Step 3: Run three timed iterations, capture best `user` time** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test +for i in 1 2 3; do echo "=== run $i ==="; ( time ./unittest >/dev/null ) 2>&1 | grep -E '^(real|user|sys)'; done +``` + +Record the **best of three** `user` times — that is the baseline number for all subsequent comparisons. Note it down (call it `T0`). + +- [ ] **Step 4: Commit baseline marker** + +Create an empty commit so the baseline timing lives in git history: + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git commit --allow-empty -m "$(cat <<'EOF' +chore: pin perf baseline for feat/optimize + +Measured `time ./unittest` on -O3 build (best of 3 runs): + user time = T0 = s + +All subsequent perf commits on this branch quote their best user time +and the cumulative delta vs. this baseline. + +Co-Authored-By: Claude Sonnet 4.6 +EOF +)" +``` + +Replace `` with the actual best-of-three user time from Step 3. + +--- + +## Task 2: A1 — Cache `flags` per iteration in hot loops + +**Goal:** Read `TERRITORY_BOUNDARIES[i].flags` once per loop iteration into a local; rewrite macro call sites to use the local. Six call sites in three functions. + +**Files:** +- Modify: `mapcodelib/mapcoder.c` — `encoderEngine` (around line 1487), `decoderEngine` (around line 2710), `firstNamelessRecord` (line 822), `countNamelessRecords` (line 835). + +- [ ] **Step 1: Edit `encoderEngine` inner loop body to cache flags** + +Open `mapcodelib/mapcoder.c`. Locate the loop at line 1487: + +```c +for (i = from; i <= upto; i++) { + if (fitsInsideBoundaries(&enc->coord32, TERRITORY_BOUNDARY(i))) { + if (IS_NAMELESS(i)) { +``` + +Replace this loop body (lines 1487–1542 inclusive) with a version that introduces a local `flags` cache. The transformation is mechanical: every `IS_NAMELESS(i)`, `IS_RESTRICTED(i)`, `IS_SPECIAL_SHAPE(i)`, `REC_TYPE(i)`, `coDex(i)`, `HEADER_LETTER(i)` inside the loop body becomes a local-variable read. Keep behavior identical: + +```c +for (i = from; i <= upto; i++) { + const int flags = TERRITORY_BOUNDARIES[i].flags; + if (fitsInsideBoundaries(&enc->coord32, TERRITORY_BOUNDARY(i))) { + const int isNameless = (flags & 64); + const int recType = (flags >> 7) & 3; + const int isRestricted = (flags & 512); + if (isNameless) { + encodeNameless(result, enc, ccode, extraDigits, i); + } + else if (recType > 1) { + encodeAutoHeader(result, enc, i, extraDigits); + } + else if ((i == upto) && isSubdivision(ccode)) { + // *** do a recursive call for the parent *** + encoderEngine(parentTerritoryOf(ccode), enc, stop_with_one_result, extraDigits, requiredEncoder, + ccode); + return; + } + else // must be grid + { + // skip IS_RESTRICTED records unless there already is a result + if (result_counter || !isRestricted) { + const int c = flags & 31; + const int codexLocal = 10 * (c / 5) + ((c % 5) + 1); + if (codexLocal < 54) { + const char headerletter = (char)((recType == 1) ? ENCODE_CHARS[(flags >> 11) & 31] : 0); + encodeGrid(result, enc, i, extraDigits, headerletter); + } + } + } + + // =========== handle result (if any) + if (*result) { + result_counter++; + + repackIfAllDigits(result, 0); + + if ((requiredEncoder < 0) || (requiredEncoder == i)) { + const enum Territory ccodeFinal = (ccode_override != TERRITORY_NONE ? ccode_override : ccode); + if (*result && enc->mapcodes && (enc->mapcodes->count < MAX_NR_OF_MAPCODE_RESULTS)) { + char* s = enc->mapcodes->mapcode[enc->mapcodes->count++]; + if (ccodeFinal == TERRITORY_AAA) { + // AAA is never shown with territory + strcpy(s, result); + } + else { + getTerritoryIsoName(s, ccodeFinal, 0); + strcat(s, " "); + strcat(s, result); + } + } + if (requiredEncoder == i) { + return; + } + } + if (stop_with_one_result) { + return; + } + *result = 0; // clear for next iteration + } + } +} // for i +``` + +Reasoning: identical semantics, but the compiler now has a guaranteed single load for `flags` regardless of how `IS_NAMELESS` etc. are written elsewhere, and the locally extracted bit values are SSA-friendly. + +- [ ] **Step 2: Edit `decoderEngine` inner loop body** + +Locate the loop at `mapcoder.c:2710`: + +```c +for (i = from; i <= upto; i++) { + const int codexi = coDex(i); + const int r = REC_TYPE(i); + if (r == 0) { + if (IS_NAMELESS(i)) { +``` + +Replace lines 2710–2800 with a version that caches `flags` once: + +```c +for (i = from; i <= upto; i++) { + const int flags = TERRITORY_BOUNDARIES[i].flags; + const int c = flags & 31; + const int codexi = 10 * (c / 5) + ((c % 5) + 1); + const int r = (flags >> 7) & 3; + if (r == 0) { + if (flags & 64) { // IS_NAMELESS + if (((codexi == 21) && (codex == 22)) || + ((codexi == 22) && (codex == 32)) || + ((codexi == 13) && (codex == 23))) { + err = decodeNameless(dec, i); + break; + } + } + else { + if ((codexi == codex) || ((codex == 22) && (codexi == 21))) { + err = decodeGrid(dec, i, 0); + + // first of all, make sure the zone fits the country + restrictZoneTo(&dec->zone, &dec->zone, TERRITORY_BOUNDARY(upto)); + + if ((err == ERR_OK) && (flags & 512)) { // IS_RESTRICTED + int nrZoneOverlaps = 0; + int j; + + // *** make sure decode fits somewhere *** + dec->result = getMidPointFractions(&dec->zone); + dec->coord32 = convertFractionsToCoord32(&dec->result); + for (j = i - 1; j >= from; j--) { + // look in previous rects + if (!IS_RESTRICTED(j)) { + if (fitsInsideBoundaries(&dec->coord32, TERRITORY_BOUNDARY(j))) { + nrZoneOverlaps = 1; + break; + } + } + } + + if (!nrZoneOverlaps) { + MapcodeZone zfound; + TerritoryBoundary prevu; + for (j = from; j < i; j++) { + // try all smaller rectangles j + if (!IS_RESTRICTED(j)) { + MapcodeZone z; + if (restrictZoneTo(&z, &dec->zone, TERRITORY_BOUNDARY(j))) { + nrZoneOverlaps++; + if (nrZoneOverlaps == 1) { + // first fit! remember... + zoneCopyFrom(&zfound, &z); + ASSERT(j <= MAPCODE_BOUNDARY_MAX); + memcpy(&prevu, TERRITORY_BOUNDARY(j), sizeof(TerritoryBoundary)); + } + else { + // nrZoneOverlaps >= 2 + // more than one hit + break; // give up + } + } + } // IS_RESTRICTED + } // for j + + // if several sub-areas intersect, just return the whole zone + // (the center of which may NOT re-encode to the same mapcode!) + if (nrZoneOverlaps == 1) { + // found exactly ONE intersection? + zoneCopyFrom(&dec->zone, &zfound); + } + } + + if (!nrZoneOverlaps) { + err = ERR_MAPCODE_UNDECODABLE; // type 3 "NLD L222.222" + } + } // *** make sure decode fits somewhere *** + break; + } + } + } + else if (r == 1) { + if (codex == codexi + 10 && ENCODE_CHARS[(flags >> 11) & 31] == *s) { + err = decodeGrid(dec, i, 1); + break; + } + } + else { + //r>1 + if (((codex == 23) && (codexi == 22)) || + ((codex == 33) && (codexi == 23))) { + err = decodeAutoHeader(dec, i); + break; + } + } +} // for +``` + +Note: the inner `j` loops continue to use the `IS_RESTRICTED(j)` macro (not the cached `flags`) because they index a different record `j`. Leaving them as-is is correct. + +- [ ] **Step 3: Edit `firstNamelessRecord`** + +Locate `mapcoder.c:822`: + +```c +static int firstNamelessRecord(const int m, const int firstcode) { + int i = m; + const int codexm = coDex(m); + ASSERT((0 <= m) && (m <= MAPCODE_BOUNDARY_MAX)); + ASSERT((0 <= firstcode) && (firstcode <= MAPCODE_BOUNDARY_MAX)); + while (i >= firstcode && coDex(i) == codexm && IS_NAMELESS(i)) { + i--; + } + return (i + 1); +} +``` + +Replace with: + +```c +static int firstNamelessRecord(const int m, const int firstcode) { + int i = m; + const int codexm = coDex(m); + ASSERT((0 <= m) && (m <= MAPCODE_BOUNDARY_MAX)); + ASSERT((0 <= firstcode) && (firstcode <= MAPCODE_BOUNDARY_MAX)); + while (i >= firstcode) { + const int flags = TERRITORY_BOUNDARIES[i].flags; + const int c = flags & 31; + const int codexi = 10 * (c / 5) + ((c % 5) + 1); + if (codexi != codexm || !(flags & 64)) { + break; + } + i--; + } + return (i + 1); +} +``` + +- [ ] **Step 4: Edit `countNamelessRecords`** + +Locate `mapcoder.c:835`: + +```c +static int countNamelessRecords(const int m, const int firstcode) { + const int first = firstNamelessRecord(m, firstcode); + const int codexm = coDex(m); + int last = m; + ASSERT((0 <= m) && (m <= MAPCODE_BOUNDARY_MAX)); + ASSERT((0 <= firstcode) && (firstcode <= MAPCODE_BOUNDARY_MAX)); + while (coDex(last) == codexm) { + last++; + } + ... +``` + +Replace the inner `while` loop with: + +```c +static int countNamelessRecords(const int m, const int firstcode) { + const int first = firstNamelessRecord(m, firstcode); + const int codexm = coDex(m); + int last = m; + ASSERT((0 <= m) && (m <= MAPCODE_BOUNDARY_MAX)); + ASSERT((0 <= firstcode) && (firstcode <= MAPCODE_BOUNDARY_MAX)); + while (1) { + const int c = TERRITORY_BOUNDARIES[last].flags & 31; + const int codexLast = 10 * (c / 5) + ((c % 5) + 1); + if (codexLast != codexm) { + break; + } + last++; + } + ASSERT((0 <= last) && (last <= MAPCODE_BOUNDARY_MAX)); + ASSERT(last >= first); + return (last - first); +} +``` + +- [ ] **Step 5: Build and run tests** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/mapcodelib && gcc -O3 -c mapcoder.c +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && gcc -O3 unittest.c -lm -lpthread -o unittest ../mapcodelib/mapcoder.o && ./unittest 2>&1 | tail -5 +``` + +Expected: clean compile, no warnings/errors. `./unittest` exits 0 with `Unit tests passed` in the output. + +If anything fails: revert the changes in `mapcodelib/mapcoder.c` and stop. Do not proceed to Step 6. + +- [ ] **Step 6: Measure timing (3× best)** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test +for i in 1 2 3; do echo "=== run $i ==="; ( time ./unittest >/dev/null ) 2>&1 | grep -E '^(real|user|sys)'; done +``` + +Record the best `user` time (call it `T2`). Compute `delta_pct = (T0 - T2) / T0 * 100`. + +- [ ] **Step 7: Commit** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git add mapcodelib/mapcoder.c +git commit -m "$(cat <<'EOF' +perf: A1 — cache flags per loop iteration in hot paths + +In encoderEngine and decoderEngine inner loops, read +TERRITORY_BOUNDARIES[i].flags once per iteration into a const local and +extract bit fields from it, instead of using flag-extraction macros that +each re-dereference the same memory. Same change in firstNamelessRecord +and countNamelessRecords. + +Bit-exact: macros stay defined and used in cold paths; only the inner +loop body call sites were rewritten to local reads. + + time ./unittest (best of 3, user): + baseline (T0) = s + after A1 (T2) = s + delta = % + +Co-Authored-By: Claude Sonnet 4.6 +EOF +)" +``` + +Replace `` with the recorded numbers from Step 6 and Task 1. + +--- + +## Task 3: A2 — Reorder checks in `fitsInsideBoundaries` + +**Goal:** Test longitude band first to short-circuit faster on the typical case (most territory rectangles are narrow in longitude relative to the planet). + +**Files:** +- Modify: `mapcodelib/mapcoder.c:602` + +- [ ] **Step 1: Edit `fitsInsideBoundaries`** + +Locate `mapcoder.c:602`: + +```c +static int fitsInsideBoundaries(const Point32* coord32, const TerritoryBoundary* b) { + ASSERT(coord32); + ASSERT(b); + return (b->miny <= coord32->latMicroDeg && + coord32->latMicroDeg < b->maxy && + isInRange(coord32->lonMicroDeg, b->minx, b->maxx)); +} +``` + +Replace with longitude-first ordering: + +```c +static int fitsInsideBoundaries(const Point32* coord32, const TerritoryBoundary* b) { + ASSERT(coord32); + ASSERT(b); + return (isInRange(coord32->lonMicroDeg, b->minx, b->maxx) && + b->miny <= coord32->latMicroDeg && + coord32->latMicroDeg < b->maxy); +} +``` + +Reasoning: `&&` short-circuits left to right. Longitude is the more selective dimension for typical territory rectangles, so the cheap `isInRange` call rejects most non-matches first. + +- [ ] **Step 2: Build and run tests** + +Same commands as Task 2 Step 5. Expected: clean compile, `Unit tests passed`. + +- [ ] **Step 3: Measure timing (3× best)** + +Same as Task 2 Step 6. Record best `user` time (`T3`). Compute cumulative delta vs. `T0`. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git add mapcodelib/mapcoder.c +git commit -m "$(cat <<'EOF' +perf: A2 — reorder fitsInsideBoundaries to test longitude first + +Most territory rectangles are narrower in longitude than in latitude +relative to their bounding ranges, so testing longitude first short- +circuits faster on the typical reject case. + + time ./unittest (best of 3, user): + baseline = s + after A2 = s + delta = % cumulative + +Co-Authored-By: Claude Sonnet 4.6 +EOF +)" +``` + +--- + +## Task 4: A3 — Length-tracked result assembly in `encoderEngine` + +**Goal:** Replace `strcpy`+`strcat`+`strcat` pattern (which rescans the destination on each `strcat`) with length-tracked `memcpy` calls. + +**Files:** +- Modify: `mapcodelib/mapcoder.c` — block around lines 1521–1530 (post-A1 line numbers shift slightly; locate by content). + +- [ ] **Step 1: Locate the relevant block** + +Within `encoderEngine`, find the section starting with: + +```c +char* s = enc->mapcodes->mapcode[enc->mapcodes->count++]; +if (ccodeFinal == TERRITORY_AAA) { + // AAA is never shown with territory + strcpy(s, result); +} +else { + getTerritoryIsoName(s, ccodeFinal, 0); + strcat(s, " "); + strcat(s, result); +} +``` + +- [ ] **Step 2: Replace with length-tracked memcpy** + +Replace the entire `if (ccodeFinal == TERRITORY_AAA) { ... } else { ... }` block with: + +```c +char* s = enc->mapcodes->mapcode[enc->mapcodes->count++]; +if (ccodeFinal == TERRITORY_AAA) { + // AAA is never shown with territory + strcpy(s, result); +} +else { + getTerritoryIsoName(s, ccodeFinal, 0); + size_t isoLen = strlen(s); + size_t resultLen = strlen(result); + s[isoLen] = ' '; + memcpy(s + isoLen + 1, result, resultLen + 1); // +1 to include NUL +} +``` + +Reasoning: `strcat(s, " ")` does `strlen(s)` then writes 2 bytes; `strcat(s, result)` does another `strlen(s)` (now longer) then writes `strlen(result)+1` bytes. The replacement does one `strlen` per source string and a single byte write + memcpy. Output is byte-identical. + +Note: `` is already included; `size_t` is fine in C99 source. + +- [ ] **Step 3: Build and run tests** + +Same as Task 2 Step 5. + +- [ ] **Step 4: Measure timing (3× best)** + +Same as Task 2 Step 6. + +- [ ] **Step 5: Commit** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git add mapcodelib/mapcoder.c +git commit -m "$(cat <<'EOF' +perf: A3 — length-tracked result assembly in encoderEngine + +strcpy + strcat + strcat each re-scans the destination from the start +to find the null terminator. Replace with explicit strlen on each +source plus a single memcpy. + +Output bytes are unchanged. + + time ./unittest (best of 3, user): + baseline = s + after A3 = s + delta = % cumulative + +Co-Authored-By: Claude Sonnet 4.6 +EOF +)" +``` + +--- + +## Task 5: A4 — Tighten `encodeBase31` + +**Goal:** Eliminate the per-iteration `value % 31` + `value / 31` by combining via `__builtin_*`-free C: compute quotient once, derive remainder. This is one machine division per iteration instead of two (the compiler may already do this, but it's not guaranteed without `-fno-trapping-math` etc.). + +**Files:** +- Modify: `mapcodelib/mapcoder.c:1083` + +- [ ] **Step 1: Edit `encodeBase31`** + +Locate `mapcoder.c:1083`: + +```c +static void encodeBase31(char* result, int value, int nrchars) { + ASSERT(result); + ASSERT(nrchars >= 0); + result[nrchars] = 0; // zero-terminate! + while (nrchars > 0) { + nrchars--; + result[nrchars] = ENCODE_CHARS[value % 31]; + value /= 31; + } +} +``` + +Replace with: + +```c +static void encodeBase31(char* result, int value, int nrchars) { + ASSERT(result); + ASSERT(nrchars >= 0); + result[nrchars] = 0; // zero-terminate! + while (nrchars > 0) { + const int q = value / 31; + const int r = value - q * 31; + nrchars--; + result[nrchars] = ENCODE_CHARS[r]; + value = q; + } +} +``` + +Reasoning: For `int` with `value >= 0` (precondition by inspection — callers always pass non-negative computed offsets), `r = value - q*31` is equivalent to `value % 31` but uses one IDIV/MUL+SUB pair rather than potentially two division-class operations. Modern compilers often emit `imul` magic constants for division by 31 and do this optimization themselves; we make it explicit so it survives older toolchains and `-O3` -> `-O2` regressions. + +- [ ] **Step 2: Build and run tests** + +Same as Task 2 Step 5. + +- [ ] **Step 3: Measure timing (3× best)** + +Same as Task 2 Step 6. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git add mapcodelib/mapcoder.c +git commit -m "$(cat <<'EOF' +perf: A4 — single division per iteration in encodeBase31 + +Compute quotient once, derive remainder via subtraction so the loop +has one division-class op per character rather than two. + + time ./unittest (best of 3, user): + baseline = s + after A4 = s + delta = % cumulative + +Co-Authored-By: Claude Sonnet 4.6 +EOF +)" +``` + +--- + +## Task 6: A5 — Tighten `repackIfAllDigits` early-exit + +**Goal:** The current implementation walks the string twice in many cases. We add a tightened first-pass that bails sooner. + +**Files:** +- Modify: `mapcodelib/mapcoder.c:892` + +- [ ] **Step 1: Read the current implementation in full** + +The function spans `mapcoder.c:892–930` (approx). Read it carefully — it already has an `alldigits` flag flipped to 0 on first non-digit, but it still continues the scan looking for `'.'`. Let's tighten it. + +Look at the current code: + +```c +static void repackIfAllDigits(char* input, const int aonly) { + char* s = input; + int alldigits = 1; // assume all digits + char* e; + char* dotpos = NULL; + ASSERT(input); + for (e = s; *e != 0 && *e != '-'; e++) { + if (*e < '0' || *e > '9') { + ... +``` + +- [ ] **Step 2: Read the rest of the function first** + +```bash +sed -n '892,935p' /Users/rijn/source/mapcode-foundation/mapcode-cpp/mapcodelib/mapcoder.c +``` + +The implementing engineer MUST read the full function before editing it. The optimization opportunity is to short-circuit when `alldigits` becomes 0 AND `dotpos` is already set — at that point we know the full structure and don't need to scan further within this loop. + +- [ ] **Step 3: Apply minimal early-exit** + +In the loop body, after `alldigits = 0;` is set inside the `if (*e < '0' || *e > '9')` branch, the function already has a `break` for the case `*e != '.'`. If `*e == '.'`, the loop continues, but once `alldigits` is 0 there's no more work the loop accomplishes other than setting `dotpos`. + +The cleanest tightening: after the existing `if (*e == '.')` block that assigns `dotpos`, if `alldigits` is already 0 and `dotpos` is non-null, `break`. + +Locate the existing `else if (*e == '.')` clause (it should look like): + +```c +else if (*e == '.') { + dotpos = e; +} +``` + +Modify the surrounding `for` loop body so that after `dotpos = e;` we check both conditions and break when no useful work remains. Replace the entire `for` loop with: + +```c +for (e = s; *e != 0 && *e != '-'; e++) { + if (*e < '0' || *e > '9') { + if (*e == '.') { + if (dotpos) { + // Two dots: malformed for repack purposes, bail. + return; + } + dotpos = e; + if (!alldigits) { + break; // structure fully known; nothing else to learn here + } + } else { + alldigits = 0; + if (dotpos) { + break; // structure fully known + } + } + } +} +``` + +Be very careful: the original code may handle the "two dots" case differently. If the actual code differs from what is described here, ABORT this task and consult the maintainer. Do NOT change the semantics for any input. + +- [ ] **Step 4: Read the function again to confirm semantics** + +Before committing, the implementer MUST diff the file against `git HEAD` and ensure the only changes are: +1. Optional `break`s added in well-understood control-flow locations. +2. No other behavioral change. + +If the diff shows anything else changed (variable names, condition rewrites, etc.), revert. + +- [ ] **Step 5: Build and run tests** + +Same as Task 2 Step 5. Bit-exactness is essential here — if any single test fails, revert immediately. + +- [ ] **Step 6: Measure timing (3× best)** + +Same as Task 2 Step 6. + +- [ ] **Step 7: Commit (or skip if no measurable gain)** + +If the timing delta vs. previous commit is below noise threshold (< 0.5%) and the diff is non-trivial, prefer NOT to commit (skipping A5 is acceptable per the spec). Otherwise: + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git add mapcodelib/mapcoder.c +git commit -m "$(cat <<'EOF' +perf: A5 — early-exit when structure is fully known in repackIfAllDigits + +Break out of the scan loop once both the digits-only status and the +dot position are determined. Same input/output mapping; the loop just +stops earlier on common inputs. + + time ./unittest (best of 3, user): + baseline = s + after A5 = s + delta = % cumulative + +Co-Authored-By: Claude Sonnet 4.6 +EOF +)" +``` + +If skipped, document the skip in the next commit's body ("A5 skipped: gain below noise"). + +--- + +## Task 7: B1+B2+B3 — Define and initialize companion tables (not yet used) + +**Goal:** Add companion static tables, a one-time initializer, and call sites that invoke the initializer at every public entry point. The hot loops still use macros at this point — this task is verified by tests-still-pass + the initializer running without crashing. + +**Files:** +- Modify: `mapcodelib/mapcoder.c` — add static-scope tables and `initCompanionTables()` near the top of the file (after macros, before any hot function). + +- [ ] **Step 1: Add static companion tables and initializer** + +Open `mapcodelib/mapcoder.c`. Find the line right after the flag-extraction macro definitions (after `mapcoder.c:154` `#define HEADER_LETTER(m) ...`). Add the following block: + +```c +/////////////////////////////////////////////////////////////////////////////////////////////// +// +// PRECOMPUTED COMPANION TABLES (perf: feat/optimize, see docs/superpowers/specs/2026-05-28-mapcodelib-optimize-design.md) +// +// These tables are derived once from TERRITORY_BOUNDARIES.flags and queried +// in hot encode/decode loops to avoid repeated bit-masking on the same record. +// +/////////////////////////////////////////////////////////////////////////////////////////////// + +#define KIND_BIT_NAMELESS 0x01 +#define KIND_BIT_RESTRICTED 0x02 +#define KIND_BIT_SPECIAL_SHAPE 0x04 + +static unsigned char RECORD_CODEX[MAPCODE_BOUNDARY_MAX + 1]; // = coDex(m) +static unsigned char RECORD_REC_TYPE[MAPCODE_BOUNDARY_MAX + 1]; // (flags >> 7) & 3 +static unsigned char RECORD_KIND[MAPCODE_BOUNDARY_MAX + 1]; // KIND_BIT_* bitwise OR +static unsigned char RECORD_HEADER_LETTER[MAPCODE_BOUNDARY_MAX + 1]; // ENCODE_CHARS[(flags >> 11) & 31] +static unsigned short RECORD_SMART_DIV[MAPCODE_BOUNDARY_MAX + 1]; // flags >> 16 + +// Per-territory: index of the first nameless record (or -1), and how many. +// These tables are sized to the full territory enum range. +#define TERRITORY_TABLE_SIZE (_TERRITORY_MAX - _TERRITORY_MIN) +static int TERRITORY_FIRST_NAMELESS[TERRITORY_TABLE_SIZE]; +static int TERRITORY_NAMELESS_COUNT[TERRITORY_TABLE_SIZE]; + +// Init flag. Write-once after first call. Idempotent: deterministic values mean +// a race on first call only causes duplicate work, never wrong results. +// The library does not promise thread-safety on first call (see header docs). +static int companion_initialized = 0; + +static void initCompanionTables(void) { + int m; + int t; + if (companion_initialized) { + return; + } + for (m = 0; m <= MAPCODE_BOUNDARY_MAX; m++) { + const int flags = TERRITORY_BOUNDARIES[m].flags; + const int c = flags & 31; + const int codex_val = 10 * (c / 5) + ((c % 5) + 1); + const int rec_type = (flags >> 7) & 3; + unsigned char kind = 0; + if (flags & 64) kind |= KIND_BIT_NAMELESS; + if (flags & 512) kind |= KIND_BIT_RESTRICTED; + if (flags & 1024) kind |= KIND_BIT_SPECIAL_SHAPE; + RECORD_CODEX[m] = (unsigned char) codex_val; + RECORD_REC_TYPE[m] = (unsigned char) rec_type; + RECORD_KIND[m] = kind; + RECORD_HEADER_LETTER[m] = (unsigned char) ENCODE_CHARS[(flags >> 11) & 31]; + RECORD_SMART_DIV[m] = (unsigned short) ((unsigned int) flags >> 16); + } + for (t = 0; t < TERRITORY_TABLE_SIZE; t++) { + TERRITORY_FIRST_NAMELESS[t] = -1; + TERRITORY_NAMELESS_COUNT[t] = 0; + } + for (t = 0; t < TERRITORY_TABLE_SIZE - 1; t++) { + const int from = DATA_START[t]; + const int upto_excl = DATA_START[t + 1]; + int i; + int first = -1; + int count = 0; + for (i = from; i < upto_excl; i++) { + if (RECORD_KIND[i] & KIND_BIT_NAMELESS) { + if (first < 0) { + first = i; + } + count++; + } + } + TERRITORY_FIRST_NAMELESS[t] = first; + TERRITORY_NAMELESS_COUNT[t] = count; + } + companion_initialized = 1; +} +``` + +Placement: insert this block immediately after the `HEADER_LETTER` macro (line 154) and BEFORE any function definitions. It depends only on `TERRITORY_BOUNDARIES` and `ENCODE_CHARS` (defined further down) — so be careful: `ENCODE_CHARS` is defined at `mapcoder.c:332`. The initializer references it. Since `initCompanionTables` is a function (not a static initializer), it's evaluated at call time, after `ENCODE_CHARS` is fully visible in the translation unit. C99 allows a function to reference any file-scope identifier as long as the identifier is declared before the function's first call. We must therefore either: + - Insert this block AFTER `ENCODE_CHARS` (i.e. after line 332). RECOMMENDED. + - Or insert a forward declaration of `ENCODE_CHARS` earlier in the file. + +Use the RECOMMENDED option: place the entire companion-tables block immediately AFTER the `ENCODE_CHARS` definition (around line 332+, before `decodeChar` at line 352). + +- [ ] **Step 2: Call `initCompanionTables()` from public entry points** + +The public encode/decode entry points are: +- `encodeLatLonToMapcodes_internal` (`mapcoder.c:1549`) +- `decoderEngine` (`mapcoder.c:2651`) + +At the very top of `encodeLatLonToMapcodes_internal` (after the `ASSERT(mapcodes);` etc., before `convertCoordsToMicrosAndFractions`), add: + +```c +initCompanionTables(); +``` + +At the very top of `decoderEngine` (after `ASSERT(dec);`, before `parseMapcodeString`), add: + +```c +initCompanionTables(); +``` + +Both calls are guarded by the cheap `if (companion_initialized) return;` so the call cost is one branch per call after the first. + +- [ ] **Step 3: Build and run tests** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/mapcodelib && gcc -O3 -c mapcoder.c +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && gcc -O3 unittest.c -lm -lpthread -o unittest ../mapcodelib/mapcoder.o && ./unittest 2>&1 | tail -5 +``` + +Expected: clean compile (no unused-variable warnings on the new tables — they ARE used by the initializer), `Unit tests passed`. If anything fails, revert. + +- [ ] **Step 4: Measure timing (3× best)** + +Same as Task 2 Step 6. The expected delta vs. previous commit is tiny (near zero or slightly negative due to init cost on first call). That's expected; B4 is where the payoff lands. + +- [ ] **Step 5: Commit** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git add mapcodelib/mapcoder.c +git commit -m "$(cat <<'EOF' +perf: B1+B2+B3 — add precomputed companion tables and one-time init + +Static tables RECORD_CODEX / RECORD_REC_TYPE / RECORD_KIND / +RECORD_HEADER_LETTER / RECORD_SMART_DIV are precomputed once from +TERRITORY_BOUNDARIES.flags. Per-territory TERRITORY_FIRST_NAMELESS / +TERRITORY_NAMELESS_COUNT replace linear scans. + +This commit only defines the tables and calls initCompanionTables at +the top of encodeLatLonToMapcodes_internal and decoderEngine. Hot loops +still use the existing macros; the switch happens in B4. + +Memory footprint: ~74 KB of additional static data. + + time ./unittest (best of 3, user): + baseline = s + after B3 = s + delta = % cumulative + +Co-Authored-By: Claude Sonnet 4.6 +EOF +)" +``` + +--- + +## Task 8: B4 — Hot loops read from companion tables + +**Goal:** The payoff commit. Switch the inner-loop reads in `encoderEngine`, `decoderEngine`, `firstNamelessRecord`, `countNamelessRecords` from the local `flags` cache (added in A1) to the precomputed companion tables. + +**Files:** +- Modify: `mapcodelib/mapcoder.c` — same loop bodies edited in Task 2 (A1), plus the two nameless helpers. + +- [ ] **Step 1: Replace `encoderEngine` loop body to use companion tables** + +Locate the loop edited in Task 2 (the `for (i = from; i <= upto; i++)` in `encoderEngine`). Replace the local `flags`-based extractions with direct table reads: + +```c +for (i = from; i <= upto; i++) { + const unsigned char kind = RECORD_KIND[i]; + const unsigned char recTypeI = RECORD_REC_TYPE[i]; + if (fitsInsideBoundaries(&enc->coord32, TERRITORY_BOUNDARY(i))) { + if (kind & KIND_BIT_NAMELESS) { + encodeNameless(result, enc, ccode, extraDigits, i); + } + else if (recTypeI > 1) { + encodeAutoHeader(result, enc, i, extraDigits); + } + else if ((i == upto) && isSubdivision(ccode)) { + encoderEngine(parentTerritoryOf(ccode), enc, stop_with_one_result, extraDigits, requiredEncoder, + ccode); + return; + } + else // must be grid + { + if (result_counter || !(kind & KIND_BIT_RESTRICTED)) { + if (RECORD_CODEX[i] < 54) { + const char headerletter = (char)((recTypeI == 1) ? RECORD_HEADER_LETTER[i] : 0); + encodeGrid(result, enc, i, extraDigits, headerletter); + } + } + } + + // =========== handle result (if any) + if (*result) { + result_counter++; + + repackIfAllDigits(result, 0); + + if ((requiredEncoder < 0) || (requiredEncoder == i)) { + const enum Territory ccodeFinal = (ccode_override != TERRITORY_NONE ? ccode_override : ccode); + if (*result && enc->mapcodes && (enc->mapcodes->count < MAX_NR_OF_MAPCODE_RESULTS)) { + char* s = enc->mapcodes->mapcode[enc->mapcodes->count++]; + if (ccodeFinal == TERRITORY_AAA) { + strcpy(s, result); + } + else { + getTerritoryIsoName(s, ccodeFinal, 0); + size_t isoLen = strlen(s); + size_t resultLen = strlen(result); + s[isoLen] = ' '; + memcpy(s + isoLen + 1, result, resultLen + 1); + } + } + if (requiredEncoder == i) { + return; + } + } + if (stop_with_one_result) { + return; + } + *result = 0; + } + } +} // for i +``` + +The local `const int flags = ...` line introduced in A1 is removed (no longer used inside this loop). + +- [ ] **Step 2: Replace `decoderEngine` loop body to use companion tables** + +Locate the corresponding loop in `decoderEngine`. Replace with: + +```c +for (i = from; i <= upto; i++) { + const unsigned char kind = RECORD_KIND[i]; + const int codexi = RECORD_CODEX[i]; + const int r = RECORD_REC_TYPE[i]; + if (r == 0) { + if (kind & KIND_BIT_NAMELESS) { + if (((codexi == 21) && (codex == 22)) || + ((codexi == 22) && (codex == 32)) || + ((codexi == 13) && (codex == 23))) { + err = decodeNameless(dec, i); + break; + } + } + else { + if ((codexi == codex) || ((codex == 22) && (codexi == 21))) { + err = decodeGrid(dec, i, 0); + + // first of all, make sure the zone fits the country + restrictZoneTo(&dec->zone, &dec->zone, TERRITORY_BOUNDARY(upto)); + + if ((err == ERR_OK) && (kind & KIND_BIT_RESTRICTED)) { + int nrZoneOverlaps = 0; + int j; + + dec->result = getMidPointFractions(&dec->zone); + dec->coord32 = convertFractionsToCoord32(&dec->result); + for (j = i - 1; j >= from; j--) { + if (!(RECORD_KIND[j] & KIND_BIT_RESTRICTED)) { + if (fitsInsideBoundaries(&dec->coord32, TERRITORY_BOUNDARY(j))) { + nrZoneOverlaps = 1; + break; + } + } + } + + if (!nrZoneOverlaps) { + MapcodeZone zfound; + TerritoryBoundary prevu; + for (j = from; j < i; j++) { + if (!(RECORD_KIND[j] & KIND_BIT_RESTRICTED)) { + MapcodeZone z; + if (restrictZoneTo(&z, &dec->zone, TERRITORY_BOUNDARY(j))) { + nrZoneOverlaps++; + if (nrZoneOverlaps == 1) { + zoneCopyFrom(&zfound, &z); + ASSERT(j <= MAPCODE_BOUNDARY_MAX); + memcpy(&prevu, TERRITORY_BOUNDARY(j), sizeof(TerritoryBoundary)); + } + else { + break; + } + } + } + } + + if (nrZoneOverlaps == 1) { + zoneCopyFrom(&dec->zone, &zfound); + } + } + + if (!nrZoneOverlaps) { + err = ERR_MAPCODE_UNDECODABLE; + } + } + break; + } + } + } + else if (r == 1) { + if (codex == codexi + 10 && RECORD_HEADER_LETTER[i] == (unsigned char) *s) { + err = decodeGrid(dec, i, 1); + break; + } + } + else { + if (((codex == 23) && (codexi == 22)) || + ((codex == 33) && (codexi == 23))) { + err = decodeAutoHeader(dec, i); + break; + } + } +} // for +``` + +The inner `j` loops now also use `RECORD_KIND[j] & KIND_BIT_RESTRICTED` (this is a small extra win since the same loop ran the `IS_RESTRICTED(j)` macro repeatedly). + +- [ ] **Step 3: Replace `firstNamelessRecord` and `countNamelessRecords` to use tables** + +For `firstNamelessRecord`: + +```c +static int firstNamelessRecord(const int m, const int firstcode) { + int i = m; + const int codexm = RECORD_CODEX[m]; + ASSERT((0 <= m) && (m <= MAPCODE_BOUNDARY_MAX)); + ASSERT((0 <= firstcode) && (firstcode <= MAPCODE_BOUNDARY_MAX)); + while (i >= firstcode && RECORD_CODEX[i] == codexm && (RECORD_KIND[i] & KIND_BIT_NAMELESS)) { + i--; + } + return (i + 1); +} +``` + +For `countNamelessRecords`: + +```c +static int countNamelessRecords(const int m, const int firstcode) { + const int first = firstNamelessRecord(m, firstcode); + const int codexm = RECORD_CODEX[m]; + int last = m; + ASSERT((0 <= m) && (m <= MAPCODE_BOUNDARY_MAX)); + ASSERT((0 <= firstcode) && (firstcode <= MAPCODE_BOUNDARY_MAX)); + while (RECORD_CODEX[last] == codexm) { + last++; + } + ASSERT((0 <= last) && (last <= MAPCODE_BOUNDARY_MAX)); + ASSERT(last >= first); + return (last - first); +} +``` + +- [ ] **Step 4: Build and run tests** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/mapcodelib && gcc -O3 -c mapcoder.c +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && gcc -O3 unittest.c -lm -lpthread -o unittest ../mapcodelib/mapcoder.o && ./unittest 2>&1 | tail -5 +``` + +Expected: `Unit tests passed`. Any failure → immediate revert. + +- [ ] **Step 5: Measure timing (3× best)** + +Same as Task 2 Step 6. This should show the largest single-commit speedup. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git add mapcodelib/mapcoder.c +git commit -m "$(cat <<'EOF' +perf: B4 — hot loops read from precomputed companion tables + +Replace per-iteration mask/shift extractions in encoderEngine, +decoderEngine, firstNamelessRecord, countNamelessRecords with direct +reads from RECORD_KIND / RECORD_CODEX / RECORD_REC_TYPE / +RECORD_HEADER_LETTER. The companion tables are byte-sized so each +field is a single byte load with friendlier cache behavior than the +4-byte flags field. + +Values are derived from the same macros they replace (see B3 init); +output is bit-exact. + + time ./unittest (best of 3, user): + baseline = s + after B4 = s + delta = % cumulative + +Co-Authored-By: Claude Sonnet 4.6 +EOF +)" +``` + +--- + +## Task 9: B5 — Debug-build sanity check + +**Goal:** In `#ifdef DEBUG` builds only, have `initCompanionTables` assert that every precomputed value matches the macro-derived value. Free correctness guarantee during development. + +**Files:** +- Modify: `mapcodelib/mapcoder.c` — the `initCompanionTables` function. + +- [ ] **Step 1: Add the debug check at the end of `initCompanionTables`** + +Just before `companion_initialized = 1;` (the last line of `initCompanionTables`), add: + +```c +#ifdef DEBUG + for (m = 0; m <= MAPCODE_BOUNDARY_MAX; m++) { + const int flags = TERRITORY_BOUNDARIES[m].flags; + const int c = flags & 31; + const int codex_val = 10 * (c / 5) + ((c % 5) + 1); + ASSERT(RECORD_CODEX[m] == (unsigned char) codex_val); + ASSERT(RECORD_REC_TYPE[m] == (unsigned char) ((flags >> 7) & 3)); + ASSERT(((RECORD_KIND[m] & KIND_BIT_NAMELESS) != 0) == ((flags & 64) != 0)); + ASSERT(((RECORD_KIND[m] & KIND_BIT_RESTRICTED) != 0) == ((flags & 512) != 0)); + ASSERT(((RECORD_KIND[m] & KIND_BIT_SPECIAL_SHAPE) != 0) == ((flags & 1024) != 0)); + ASSERT(RECORD_HEADER_LETTER[m] == (unsigned char) ENCODE_CHARS[(flags >> 11) & 31]); + ASSERT(RECORD_SMART_DIV[m] == (unsigned short) ((unsigned int) flags >> 16)); + } +#endif +``` + +(`m` is already declared at the top of `initCompanionTables`, so no new declarations needed.) + +- [ ] **Step 2: Build and run tests in BOTH O3 and DEBUG modes** + +```bash +# -O3 release (no debug assertions) +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/mapcodelib && gcc -O3 -c mapcoder.c +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && gcc -O3 unittest.c -lm -lpthread -o unittest ../mapcodelib/mapcoder.o && ./unittest 2>&1 | tail -5 + +# DEBUG build (with assertions) +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/mapcodelib && gcc -O0 -DDEBUG -c mapcoder.c +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && gcc -O0 -DDEBUG unittest.c -lm -lpthread -o unittest ../mapcodelib/mapcoder.o && ./unittest 2>&1 | tail -5 +``` + +Both must report `Unit tests passed`. The DEBUG run additionally exercises the new `ASSERT` block in `initCompanionTables` — if any precomputed value diverges from the macro-derived one, the DEBUG build aborts (which is exactly what we want). + +- [ ] **Step 3: Re-build -O3 (so the binary in test/ is the release one for timing)** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/mapcodelib && gcc -O3 -c mapcoder.c +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && gcc -O3 unittest.c -lm -lpthread -o unittest ../mapcodelib/mapcoder.o +``` + +- [ ] **Step 4: Measure timing (3× best)** + +Same as Task 2 Step 6. Expected: delta from previous commit is near zero (the new code is `#ifdef DEBUG` gated and absent in -O3). + +- [ ] **Step 5: Commit** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git add mapcodelib/mapcoder.c +git commit -m "$(cat <<'EOF' +perf: B5 — DEBUG-build sanity check for companion tables + +Under -DDEBUG, initCompanionTables asserts that every precomputed value +matches the macro-derived one. Free correctness guard during development; +zero cost in release. + +Verified: both -O3 and -O0 -DDEBUG builds pass the unit suite. + + time ./unittest -O3 (best of 3, user): + baseline = s + after B5 = s + delta = % cumulative + +Co-Authored-By: Claude Sonnet 4.6 +EOF +)" +``` + +--- + +## Task 10: Final summary commit (mandatory) + +**Goal:** Single commit summarizing the full optimization arc: baseline, per-step deltas, and the cumulative win. Useful for review and for future archaeology. + +**Files:** +- Modify: `docs/superpowers/specs/2026-05-28-mapcodelib-optimize-design.md` — append a results section. + +- [ ] **Step 1: Append a results section to the spec** + +Open `docs/superpowers/specs/2026-05-28-mapcodelib-optimize-design.md` and append at the bottom: + +```markdown +## 8. Results + +Measurements taken on branch `feat/optimize`, `-O3` build, `time ./unittest` +(best `user` of 3 back-to-back runs). + +| Commit | Description | Best user time | Delta vs. baseline | +|--------|-------------|---------------|--------------------| +| Task 1 | baseline | s | 0.0% | +| Task 2 | A1 — cache flags per iteration | s | % | +| Task 3 | A2 — reorder fitsInsideBoundaries | s | % | +| Task 4 | A3 — length-tracked result assembly | s | % | +| Task 5 | A4 — tighten encodeBase31 | s | % | +| Task 6 | A5 — repack early-exit | s | % | +| Task 7 | B3 — companion tables init (no hot-path use yet) | s | % | +| Task 8 | B4 — hot loops use companion tables | s | % | +| Task 9 | B5 — DEBUG sanity check | s | % | + +**Final cumulative speedup:** % + +**Success criteria check:** +- [ ] All unit tests passing after every commit (verified) +- [ ] Binary size growth ≤ 100 KB (verified) +- [ ] ≥20% wall-time reduction (target — record whether achieved) +- [ ] ≥30% wall-time reduction (stretch — record whether achieved) +``` + +Fill in all `` values from the prior commit messages. + +- [ ] **Step 2: Verify binary size delta** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git stash || true +git checkout 740bfec -- mapcodelib/mapcoder.c +cd mapcodelib && gcc -O3 -c mapcoder.c && BASELINE_SIZE=$(stat -f%z mapcoder.o) +cd .. +git checkout HEAD -- mapcodelib/mapcoder.c +git stash pop 2>/dev/null || true +cd mapcodelib && gcc -O3 -c mapcoder.c && OPTIMIZED_SIZE=$(stat -f%z mapcoder.o) +echo "Baseline .o: $BASELINE_SIZE bytes" +echo "Optimized .o: $OPTIMIZED_SIZE bytes" +echo "Delta: $((OPTIMIZED_SIZE - BASELINE_SIZE)) bytes" +``` + +If delta > 100,000 bytes (100 KB) the optimization violated a hard constraint — investigate before committing. + +- [ ] **Step 3: Final test pass** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp/test && ./unittest 2>&1 | tail -5 +``` + +Expected: `Unit tests passed`. + +- [ ] **Step 4: Commit summary** + +```bash +cd /Users/rijn/source/mapcode-foundation/mapcode-cpp +git add docs/superpowers/specs/2026-05-28-mapcodelib-optimize-design.md +git commit -m "$(cat <<'EOF' +docs: record final perf results for feat/optimize branch + +Appends a results table to the design spec capturing per-commit timing +deltas and the cumulative speedup vs. the Task 1 baseline. + +Final speedup: % wall-time on `time ./unittest` at -O3. + +Co-Authored-By: Claude Sonnet 4.6 +EOF +)" +``` + +--- + +## Self-review + +Spec coverage check: +- §1 scope/goals → covered by all tasks; bit-exact guardrail enforced by Step "run tests" in every task. +- §2 measurement methodology → Task 1 establishes baseline; every subsequent task records timing. +- §3 Approach A (A1–A5) → Tasks 2–6. +- §4 Approach B (B1–B5) → Tasks 7–9. +- §4 "Interaction with A1" → Task 8 Step 1 explicitly removes the A1 local `flags` cache when switching to companion tables. +- §5 workflow → matches Tasks 1–9. +- §6 risks → mitigated by "test after every step + revert on failure" in each task; debug-build assertion in Task 9. +- §7 success criteria → Task 10 verifies binary size and produces the final results table. + +Placeholder scan: `` markers are intentional — they're per-run measurements the implementer fills in at commit time. No `TBD`/`TODO`/"implement later" anywhere. + +Type consistency: `unsigned char` used uniformly for KIND/CODEX/REC_TYPE/HEADER_LETTER; `unsigned short` for SMART_DIV; `int` for TERRITORY_FIRST_NAMELESS/COUNT (matches existing code conventions for record indices). Names `KIND_BIT_NAMELESS` / `KIND_BIT_RESTRICTED` / `KIND_BIT_SPECIAL_SHAPE` used consistently across Tasks 7 and 8. diff --git a/docs/superpowers/specs/2026-05-28-mapcodelib-optimize-design.md b/docs/superpowers/specs/2026-05-28-mapcodelib-optimize-design.md new file mode 100644 index 0000000..66c35d6 --- /dev/null +++ b/docs/superpowers/specs/2026-05-28-mapcodelib-optimize-design.md @@ -0,0 +1,235 @@ +# mapcodelib speed optimization — design spec + +- **Date:** 2026-05-28 +- **Branch:** `feat/optimize` (off `fix/bugfix`) +- **Author:** Rijn Buve (with Claude) + +## 1. Scope & goals + +Achieve a substantial speedup on the `mapcoder.c` encode and decode hot paths, measured by `time ./unittest` in `test/` (real wall-clock against the existing unit test suite, which already loops over millions of points). + +**Target:** 20–50% wall-time reduction at `-O3`. + +### Hard guardrails + +- **Bit-exact output.** All existing unit tests must pass unchanged. No test files modified. +- **Strict portable C99/C11.** No `__builtin_*`, no SIMD intrinsics, no compiler-specific attributes. +- **No runtime / heap growth.** Per-call stack and heap usage must not increase. Public ABI (struct sizes in `mapcoder.h`) unchanged. +- **Small static-table growth OK.** Precomputed companion tables in the order of tens of KB are acceptable (negligible against existing data tables in `internal_data.h`). + +### Out of scope + +- Public API changes. +- Refactoring unrelated to performance. +- New features. +- Build-system changes (CMake, scripts) beyond what's strictly needed to compile. +- Approach C (loop restructuring, DFA decode parser) — deferred to a future branch if A+B underdeliver. + +## 2. Measurement methodology + +1. Build with `-O3` exactly as `test/run_normal.sh` does: + ``` + cd mapcodelib && gcc -O3 -c mapcoder.c + cd ../test && gcc -O3 unittest.c -lm -lpthread -o unittest ../mapcodelib/mapcoder.o + ``` +2. Run `time ./unittest` three times back-to-back; record the best `user` time. +3. Repeat after each commit that touches `mapcoder.c`. + +### Baseline + +Capture the baseline on the **first commit** of `feat/optimize` (before any code change) so we have a clean reference number. + +### Per-commit reporting + +Each optimization commit's message includes: + +- `time` output before vs. after (best of 3). +- Cumulative speedup vs. the baseline. +- One-line description of the change. + +### Test integrity + +Every commit must end with `unittest` printing `Unit tests passed` and zero errors. Any commit that regresses tests is rolled back before continuing. + +### Noise caveat + +The unit test does correctness work (asserts, sprintf, comparisons) in addition to encode/decode, so a 30% speedup of encode/decode core will appear as a smaller wall-time delta (perhaps 10–20%). That is expected; "significant" is judged on the total wall-clock number since that is the measurement method chosen. + +## 3. Approach A — Local hot-path cleanups + +Discrete, low-risk edits in `mapcoder.c`. Each is independently revertable and individually testable. + +### A1. Cache `flags` per iteration in encode/decode loops + +In `encoderEngine` (`mapcoder.c:1460`) and `decoderEngine` (`mapcoder.c:2651`), the loop body invokes `IS_NAMELESS(i)`, `IS_RESTRICTED(i)`, `IS_SPECIAL_SHAPE(i)`, `REC_TYPE(i)`, `coDex(i)`, `SMART_DIV(i)` on the same `i`. Each macro re-reads `TERRITORY_BOUNDARIES[i].flags`. + +Replace with a single `const int flags = TERRITORY_BOUNDARIES[i].flags;` at the top of the loop body, plus local extracted variables where used more than once. + +Same pattern in `firstNamelessRecord` (`mapcoder.c:822`) and `countNamelessRecords` (`mapcoder.c:835`). + +### A2. Reorder checks in `fitsInsideBoundaries` + +`mapcoder.c:602` checks lat-min, lat-max, lon-range. For mapcode territory boundaries (most are narrow longitudinally relative to the full earth lon range), test longitude band first to maximise early-out. Same logic, same correctness, fewer comparisons on average. + +### A3. Length-tracked result assembly in `encoderEngine` + +Around `mapcoder.c:1520` the per-result code uses `strcpy` + `strcat` patterns. `strcat` re-scans the destination from the start each time. Replace with `memcpy` using known lengths. + +### A4. Speed up `encodeBase31` + +`mapcoder.c:1083` emits a base-31 representation by repeated `%31` / `/31`. The compiler turns `/31` into a magic-multiply, but the loop is small. A tight unroll (or const lookup for the common small `nrchars` cases) can help. + +### A5. Early-exit in `repackIfAllDigits` / `unpackIfAllDigits` + +`mapcoder.c:892`, `:933`. If the input clearly fails the "all digits" precondition (e.g. first non-digit position), bail before any string mutation. The existing code already does some of this; tighten the check. + +### Risk per change + +Very low; each one is mechanically equivalent. Run full unit tests after each commit. + +### Expected gain from A + +~10–20% wall-time. + +## 4. Approach B — Precomputed companion tables + +Core insight: `encoderEngine`/`decoderEngine` loop over a range of records and re-derive metadata (`codex`, `recType`, `isNameless`, `isSpecialShape`, `smartDiv`) from `flags` on every iteration. Approach A caches `flags` per-iteration, but across many iterations we still recompute the same masks/shifts on the same record data. Precompute that metadata into a compact companion table once at library init, and have the hot loops read directly from it. + +### B1. Companion arrays sized to `TERRITORY_BOUNDARIES` + +Add to `mapcoder.c` at file scope (not exported): + +```c +// One byte per record. Initialized once via initCompanionTables(). +static unsigned char RECORD_CODEX[MAPCODE_BOUNDARY_MAX + 1]; // = coDex(m): 10*(c/5)+(c%5+1), c = flags & 31 +static unsigned char RECORD_REC_TYPE[MAPCODE_BOUNDARY_MAX + 1]; // (flags >> 7) & 3 +static unsigned char RECORD_KIND[MAPCODE_BOUNDARY_MAX + 1]; // bit0=nameless, bit1=restricted, + // bit2=specialShape, bit3=hasHeaderLetter +static unsigned char RECORD_HEADER_LETTER[MAPCODE_BOUNDARY_MAX + 1]; // ENCODE_CHARS[(flags >> 11) & 31] +static unsigned short RECORD_SMART_DIV[MAPCODE_BOUNDARY_MAX + 1]; // flags >> 16 +``` + +Per-record overhead: 6 bytes. With ~12,000 records, total ≈ 72 KB of static `.bss`/`.data`. Well within "small additions OK". + +### B2. Per-territory derived metadata + +```c +static int TERRITORY_FIRST_NAMELESS[_TERRITORY_MAX + 1]; // -1 if none +static int TERRITORY_NAMELESS_COUNT[_TERRITORY_MAX + 1]; // 0 if none +``` + +Eliminates linear scans in `firstNamelessRecord` (`mapcoder.c:822`) and `countNamelessRecords` (`mapcoder.c:835`), turning O(records-per-territory) into O(1). With ~241 territories, ~2 KB extra. + +### B3. One-time initialization + +A `static int companion_initialized = 0;` flag and an `initCompanionTables()` function called at the top of every public entry point that uses the data (`encodeLatLonToMapcodes_internal`, `decoderEngine`, etc.). The init computes everything from `TERRITORY_BOUNDARIES` and the flag-extraction macros — guaranteeing the precomputed values are *definitionally* the same as the macros. + +**Thread safety.** The public API is not specified as thread-safe for first call; the existing code uses pthread only in tests. Init is idempotent (writes deterministic values), so even under a first-call race the worst case is duplicate work — never wrong values. This assumption is documented inline. + +### B4. Replace macro call sites in hot loops only + +`IS_NAMELESS(m)` and the other macros stay defined (used in cold paths, debug asserts, and the init function itself). In `encoderEngine`/`decoderEngine` inner loops, switch to direct table reads: + +```c +const unsigned char kind = RECORD_KIND[i]; +const int codex = RECORD_CODEX[i]; +// ... +``` + +This decouples hot-loop performance from the bit-pack layout of `flags`. + +### B5. Debug-build sanity check + +In `#ifdef DEBUG` builds, `initCompanionTables` also asserts that the precomputed values match the macro-derived values for every record. Free correctness guarantee during development; zero cost in `-O3` release. + +### Risk + +Medium-low. Companion tables are derived directly from the same macros they replace, so by construction they hold identical values. The debug assertions catch any drift if someone later modifies the flag layout. + +### Interaction with A1 + +A1 caches `flags` locally in the very same hot loops that B4 later switches to direct companion-table reads. After B4 lands, the local `flags` cache in those specific loop bodies becomes unused and must be removed as part of that commit. A1 remains useful in any cold-path call site that does not switch to companion tables. + +### Expected combined gain from A+B + +~20–35% wall-time. + +## 5. Workflow + +One commit per checkpoint, linear progression: + +| # | Commit | Purpose | +|---|--------|---------| +| 1 | `chore: branch baseline` | Pin the baseline. Run `time ./unittest` 3× and record best `user` time in commit message. | +| 2 | `perf: A1 — cache flags per loop iteration` | Smallest, safest first. Re-run timing. | +| 3 | `perf: A2 — reorder fitsInsideBoundaries checks` | | +| 4 | `perf: A3 — length-tracked result assembly` | | +| 5 | `perf: A4 — tighten encodeBase31` | | +| 6 | `perf: A5 — early-exit in repack/unpack` | | +| 7 | `perf: B1+B2+B3 — companion tables init` | Tables defined and populated, but not yet read in hot path. Verifies init correctness in isolation. | +| 8 | `perf: B4 — hot loops read from companion tables` | The main payoff commit. | +| 9 | `perf: B5 — debug-build sanity check` | Optional polish. | + +Each commit ends with: + +- Full unit tests green (`Unit tests passed`, 0 errors). +- `time ./unittest` (best of 3) noted in the commit body, plus cumulative speedup vs. commit #1. + +## 6. Risks & mitigations + +| Risk | Mitigation | +|------|------------| +| A change breaks bit-exact behaviour | Run full test suite after every commit; revert if red. | +| Companion table values drift from `flags` semantics | B5 debug-build assertion verifies equivalence at init. | +| Thread-safety regression on first call | `initCompanionTables` is idempotent (deterministic writes). Documented. Safe under existing usage patterns. | +| Gains too small to justify | Stop after A and reassess; A alone should be ≥10% and is the lowest-risk subset. | +| Test wall-time too noisy to read signal | Use best-of-3 `user` time (not `real`), build with `-O3`, run on a quiet machine. | + +## 7. Success criteria + +- **Mandatory:** all unit tests pass after every commit. +- **Mandatory:** binary size growth ≤ 100 KB (companion tables are ~74 KB; allow margin). +- **Target:** ≥20% wall-time reduction on `time ./unittest` at `-O3` after commit #8. +- **Stretch:** ≥30% wall-time reduction. +- **Stop condition:** if Approach A alone delivers ≥30%, B is optional. If A+B underdeliver vs. 20%, re-evaluate before any further restructuring. + +## 8. Results + +Measurements taken on branch `feat/optimize`, `-O3` build, `time ./unittest` +(best `user` of 3 back-to-back runs, multithreaded so user ≫ real). + +| Commit | Description | Best user time | Delta vs. baseline | +|--------|-------------|---------------|--------------------| +| f1cb736 | baseline | 114.13s | 0.0% | +| 430bbba | A1 — cache flags per iteration | 112.21s | +1.7% | +| b9b3f2c | A2 — reorder fitsInsideBoundaries (REVERTED) | 121.10s | −6.1% | +| 5246c79 | revert A2 | ~112s | recovered | +| 1b01b82 | A3 — length-tracked result assembly | 113.19s | +0.8% | +| 753c337 | A4 — encodeBase31 single division (no gain) | 114.35s | noise | +| — | A5 — repackIfAllDigits (already optimized, skipped) | — | — | +| 554af29 | B3 — companion tables init (hot loops unchanged) | 109.99s | +3.6% | +| 3416f87 | B4 — hot loops read from companion tables | 99.40s | +12.9% | +| 2cdf52b | B5 — DEBUG sanity check | 99.73s | +12.6% | + +**Final cumulative speedup: ~12.6%** (best: 12.9% at B4 commit) + +**Binary size delta:** +2240 bytes (constraint: ≤ 100,000 bytes) + +**Success criteria check:** +- ✅ All unit tests passing after every commit +- ✅ Binary size growth ≤ 100 KB +- ⚠️ ≥20% wall-time reduction — NOT achieved (12.6% achieved) +- ❌ ≥30% stretch target — not achieved + +**Notes:** +- A2 (longitude-first boundary check) caused a regression because `isInRange()` has + non-trivial overhead (handles longitude wrap-around) and displaced a cheaper + lat-min comparison to second position. +- A4 (encodeBase31) showed no gain at -O3 because the compiler already performs + strength reduction on division by 31. +- A5 was already implemented in the codebase (unconditional break in else branch). +- The 12.6% improvement comes primarily from B4: replacing per-iteration mask/shift + operations on a 4-byte flags field with single byte loads from the precomputed + companion tables. +- Further gains would require Approach C (loop restructuring) or SIMD, which were + out of scope for this branch. diff --git a/mapcodelib/mapcode_legacy.c b/mapcodelib/mapcode_legacy.c index b897e8a..6587ece 100644 --- a/mapcodelib/mapcode_legacy.c +++ b/mapcodelib/mapcode_legacy.c @@ -23,7 +23,7 @@ */ static Mapcodes GLOBAL_RESULT; static char GLOBAL_MAKEISO_BUFFER[2 * (MAX_ISOCODE_LEN + 1)]; -static char* GLOBAL_MAKEISO_PTR; +static char* GLOBAL_MAKEISO_PTR = GLOBAL_MAKEISO_BUFFER + (MAX_ISOCODE_LEN + 1); int encodeLatLonToMapcodes_Deprecated( @@ -97,12 +97,12 @@ char* convertToRoman(char* asciiBuffer, int maxLength, const UWORD* unicodeBuffe } if (!err) { char romanized[MAX_MAPCODE_RESULT_LEN]; - sprintf(romanized, "%s%s%s%s%s", - mapcodeElements.territoryISO, - *mapcodeElements.territoryISO ? " " : "", - mapcodeElements.properMapcode, - *mapcodeElements.precisionExtension ? "-" : "", - mapcodeElements.precisionExtension); + snprintf(romanized, sizeof(romanized), "%s%s%s%s%s", + mapcodeElements.territoryISO, + *mapcodeElements.territoryISO ? " " : "", + mapcodeElements.properMapcode, + *mapcodeElements.precisionExtension ? "-" : "", + mapcodeElements.precisionExtension); if ((int)strlen(romanized) < maxLength) { strcpy(asciiBuffer, romanized); } diff --git a/mapcodelib/mapcoder.c b/mapcodelib/mapcoder.c index a495957..9c6ebf5 100644 --- a/mapcodelib/mapcoder.c +++ b/mapcodelib/mapcoder.c @@ -150,7 +150,7 @@ void _TestAssert(int iCondition, const char* cstrFile, int iLine) { #define IS_RESTRICTED(m) (TERRITORY_BOUNDARIES[m].flags & 512) // Territory has access restrictions (bit 9) #define IS_SPECIAL_SHAPE(m) (TERRITORY_BOUNDARIES[m].flags & 1024) // Territory has non-standard shape (bit 10) #define REC_TYPE(m) ((TERRITORY_BOUNDARIES[m].flags >> 7) & 3) // Record type (bits 7-8): grid encoding method -#define SMART_DIV(m) (TERRITORY_BOUNDARIES[m].flags >> 16) // Smart divider value (bits 16+): grid subdivision +#define SMART_DIV(m) ((int)((unsigned int)(TERRITORY_BOUNDARIES[m].flags) >> 16)) // Smart divider value (bits 16+): grid subdivision #define HEADER_LETTER(m) (ENCODE_CHARS[(TERRITORY_BOUNDARIES[m].flags >> 11) & 31]) // Header letter for encoding (bits 11-15) /** @@ -337,6 +337,95 @@ static const char ENCODE_CHARS[34] = { }; +/////////////////////////////////////////////////////////////////////////////////////////////// +// +// PRECOMPUTED COMPANION TABLES +// +// Derived once from TERRITORY_BOUNDARIES.flags; queried in hot encode/decode loops +// to avoid repeated bit-masking on the same record per iteration. +// +/////////////////////////////////////////////////////////////////////////////////////////////// + +#define KIND_BIT_NAMELESS 0x01u +#define KIND_BIT_RESTRICTED 0x02u +#define KIND_BIT_SPECIAL_SHAPE 0x04u + +static unsigned char RECORD_CODEX[MAPCODE_BOUNDARY_MAX + 1]; +static unsigned char RECORD_REC_TYPE[MAPCODE_BOUNDARY_MAX + 1]; +static unsigned char RECORD_KIND[MAPCODE_BOUNDARY_MAX + 1]; +static unsigned char RECORD_HEADER_LETTER[MAPCODE_BOUNDARY_MAX + 1]; +static unsigned short RECORD_SMART_DIV[MAPCODE_BOUNDARY_MAX + 1]; + +#define TERRITORY_TABLE_SIZE (_TERRITORY_MAX - _TERRITORY_MIN) +static int TERRITORY_FIRST_NAMELESS[TERRITORY_TABLE_SIZE]; +static int TERRITORY_NAMELESS_COUNT[TERRITORY_TABLE_SIZE]; + +/* Not thread-safe for concurrent first call. Duplicate concurrent init writes + identical values so results are logically correct, but callers must ensure + the library is initialized before spawning threads, or initialize once + explicitly on the main thread. */ +static int companion_initialized = 0; + +static void initCompanionTables(void) { + int m; + int t; + if (companion_initialized) { + return; + } + for (m = 0; m <= MAPCODE_BOUNDARY_MAX; m++) { + const int flags = TERRITORY_BOUNDARIES[m].flags; + const int c = flags & 31; + const int codex_val = 10 * (c / 5) + ((c % 5) + 1); + const int rec_type = (flags >> 7) & 3; + unsigned char kind = 0; + if (flags & 64) { kind |= KIND_BIT_NAMELESS; } + if (flags & 512) { kind |= KIND_BIT_RESTRICTED; } + if (flags & 1024) { kind |= KIND_BIT_SPECIAL_SHAPE; } + RECORD_CODEX[m] = (unsigned char) codex_val; + RECORD_REC_TYPE[m] = (unsigned char) rec_type; + RECORD_KIND[m] = kind; + RECORD_HEADER_LETTER[m] = (unsigned char) ENCODE_CHARS[(flags >> 11) & 31]; + RECORD_SMART_DIV[m] = (unsigned short) ((unsigned int) flags >> 16); + } + for (t = 0; t < TERRITORY_TABLE_SIZE; t++) { + TERRITORY_FIRST_NAMELESS[t] = -1; + TERRITORY_NAMELESS_COUNT[t] = 0; + } + for (t = 0; t < TERRITORY_TABLE_SIZE - 1; t++) { + const int from = DATA_START[t]; + const int upto_excl = DATA_START[t + 1]; + int i; + int first = -1; + int count = 0; + for (i = from; i < upto_excl; i++) { + if (RECORD_KIND[i] & KIND_BIT_NAMELESS) { + if (first < 0) { + first = i; + } + count++; + } + } + TERRITORY_FIRST_NAMELESS[t] = first; + TERRITORY_NAMELESS_COUNT[t] = count; + } +#ifdef DEBUG + for (m = 0; m <= MAPCODE_BOUNDARY_MAX; m++) { + const int flagsDbg = TERRITORY_BOUNDARIES[m].flags; + const int cDbg = flagsDbg & 31; + const int codex_valDbg = 10 * (cDbg / 5) + ((cDbg % 5) + 1); + ASSERT(RECORD_CODEX[m] == (unsigned char) codex_valDbg); + ASSERT(RECORD_REC_TYPE[m] == (unsigned char) ((flagsDbg >> 7) & 3)); + ASSERT(((RECORD_KIND[m] & KIND_BIT_NAMELESS) != 0) == ((flagsDbg & 64) != 0)); + ASSERT(((RECORD_KIND[m] & KIND_BIT_RESTRICTED) != 0) == ((flagsDbg & 512) != 0)); + ASSERT(((RECORD_KIND[m] & KIND_BIT_SPECIAL_SHAPE) != 0) == ((flagsDbg & 1024) != 0)); + ASSERT(RECORD_HEADER_LETTER[m] == (unsigned char) ENCODE_CHARS[(flagsDbg >> 11) & 31]); + ASSERT(RECORD_SMART_DIV[m] == (unsigned short) ((unsigned int) flagsDbg >> 16)); + } +#endif + companion_initialized = 1; +} + + /** * @brief Convert ASCII character to base-31 value for mapcode decoding * @param ch ASCII character to decode @@ -534,17 +623,11 @@ static Point convertFractionsToDegrees(const Point* p) { } -static const unsigned char DOUBLE_NAN[8] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F}; // NAN (Not a Number) -static const unsigned char DOUBLE_INF[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x7F}; // +Infinity -static const unsigned char DOUBLE_MIN_INF[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0xFF}; // -Infinity - static enum MapcodeError convertCoordsToMicrosAndFractions(Point32* coord32, int* fracLat, int* fracLon, double latDeg, double lonDeg) { double frac; ASSERT(coord32); - if (memcmp(&lonDeg, DOUBLE_NAN, 8) == 0 || memcmp(&lonDeg, DOUBLE_INF, 8) == 0 || - memcmp(&lonDeg, DOUBLE_MIN_INF, 8) == 0 || - memcmp(&latDeg, DOUBLE_NAN, 8) == 0) { + if (isnan(lonDeg) || isinf(lonDeg) || isnan(latDeg) || isinf(latDeg)) { return ERR_BAD_COORDINATE; } if (latDeg < -90) { @@ -827,10 +910,10 @@ static int isSubdivision(const enum Territory ccode) { // find first territory rectangle of the same type as m static int firstNamelessRecord(const int m, const int firstcode) { int i = m; - const int codexm = coDex(m); + const int codexm = (int) RECORD_CODEX[m]; ASSERT((0 <= m) && (m <= MAPCODE_BOUNDARY_MAX)); ASSERT((0 <= firstcode) && (firstcode <= MAPCODE_BOUNDARY_MAX)); - while (i >= firstcode && coDex(i) == codexm && IS_NAMELESS(i)) { + while (i >= firstcode && (int) RECORD_CODEX[i] == codexm && (RECORD_KIND[i] & KIND_BIT_NAMELESS)) { i--; } return (i + 1); @@ -840,11 +923,11 @@ static int firstNamelessRecord(const int m, const int firstcode) { // count all territory rectangles of the same type as m static int countNamelessRecords(const int m, const int firstcode) { const int first = firstNamelessRecord(m, firstcode); - const int codexm = coDex(m); + const int codexm = (int) RECORD_CODEX[m]; int last = m; ASSERT((0 <= m) && (m <= MAPCODE_BOUNDARY_MAX)); ASSERT((0 <= firstcode) && (firstcode <= MAPCODE_BOUNDARY_MAX)); - while (coDex(last) == codexm) { + while ((int) RECORD_CODEX[last] == codexm) { last++; } ASSERT((0 <= last) && (last <= MAPCODE_BOUNDARY_MAX)); @@ -1081,7 +1164,6 @@ static void encodeExtension(char* result, const int extrax4, const int extray, c valy -= factory * gy; // for next iteration } *s = 0; // terminate the result - ASSERT((int) strlen(s) == extraDigits); } } @@ -1092,9 +1174,11 @@ static void encodeBase31(char* result, int value, int nrchars) { ASSERT(nrchars >= 0); result[nrchars] = 0; // zero-terminate! while (nrchars > 0) { + const int q = value / 31; + const int r = value - q * 31; nrchars--; - result[nrchars] = ENCODE_CHARS[value % 31]; - value /= 31; + result[nrchars] = ENCODE_CHARS[r]; + value = q; } } @@ -1487,16 +1571,18 @@ static void encoderEngine(const enum Territory ccode, const EncodeRec* enc, cons /////////////////////////////////////////////////////////// { int i; - char result[128]; + char result[MAX_MAPCODE_RESULT_ASCII_LEN]; int result_counter = 0; *result = 0; for (i = from; i <= upto; i++) { if (fitsInsideBoundaries(&enc->coord32, TERRITORY_BOUNDARY(i))) { - if (IS_NAMELESS(i)) { + const unsigned char kind = RECORD_KIND[i]; + const unsigned char recTypeI = RECORD_REC_TYPE[i]; + if (kind & KIND_BIT_NAMELESS) { encodeNameless(result, enc, ccode, extraDigits, i); } - else if (REC_TYPE(i) > 1) { + else if (recTypeI > 1) { encodeAutoHeader(result, enc, i, extraDigits); } else if ((i == upto) && isSubdivision(ccode)) { @@ -1508,9 +1594,9 @@ static void encoderEngine(const enum Territory ccode, const EncodeRec* enc, cons else // must be grid { // skip IS_RESTRICTED records unless there already is a result - if (result_counter || !IS_RESTRICTED(i)) { - if (coDex(i) < 54) { - char headerletter = (char)((REC_TYPE(i) == 1) ? HEADER_LETTER(i) : 0); + if (result_counter || !(kind & KIND_BIT_RESTRICTED)) { + if (RECORD_CODEX[i] < 54) { + const char headerletter = (char)((recTypeI == 1) ? RECORD_HEADER_LETTER[i] : 0); encodeGrid(result, enc, i, extraDigits, headerletter); } } @@ -1532,8 +1618,12 @@ static void encoderEngine(const enum Territory ccode, const EncodeRec* enc, cons } else { getTerritoryIsoName(s, ccodeFinal, 0); - strcat(s, " "); - strcat(s, result); + { + size_t isoLen = strlen(s); + size_t resultLen = strlen(result); + s[isoLen] = ' '; + memcpy(s + isoLen + 1, result, resultLen + 1); /* +1 includes NUL */ + } } } if (requiredEncoder == i) { @@ -1562,6 +1652,7 @@ static int encodeLatLonToMapcodes_internal(Mapcodes* mapcodes, enc.mapcodes->count = 0; ASSERT(mapcodes); ASSERT((0 <= extraDigits) && (extraDigits <= MAX_PRECISION_DIGITS)); + initCompanionTables(); if (convertCoordsToMicrosAndFractions(&enc.coord32, &enc.fraclat, &enc.fraclon, lat, lon) < 0) { return 0; @@ -1736,7 +1827,7 @@ static int decodeBase31(const char* code) { static void decodeTriple(const char* result, int* difx, int* dify) { // decode the first character const int c1 = decodeChar(*result++); - ASSERT(result); + ASSERT(result - 1); ASSERT(difx); ASSERT(dify); if (c1 < 24) { @@ -2125,7 +2216,10 @@ static unsigned char getRomanVersionOf(UWORD w) { static void convertFromAbjad(char* s) { int len, dot, form, c; char* postfix = strchr(s, '-'); - dot = (int)(strchr(s, '.') - s); + { + const char* dotptr = strchr(s, '.'); + dot = dotptr ? (int)(dotptr - s) : -1; + } if (dot < 2 || dot > 5) { return; } @@ -2662,6 +2756,7 @@ static enum MapcodeError decoderEngine(DecodeRec* dec, int parseFlags) { char* s; int wasAllDigits = 0; ASSERT(dec); + initCompanionTables(); // Parse the mapcode string into its components (territory, proper mapcode, extension) err = parseMapcodeString(&dec->mapcodeElements, dec->orginput, parseFlags, dec->context); @@ -2712,10 +2807,11 @@ static enum MapcodeError decoderEngine(DecodeRec* dec, int parseFlags) { // try all ccode rectangles to decode s (pointing to first character of proper mapcode), assume not decodable err = ERR_MAPCODE_UNDECODABLE; for (i = from; i <= upto; i++) { - const int codexi = coDex(i); - const int r = REC_TYPE(i); + const unsigned char kind = RECORD_KIND[i]; + const int codexi = (int) RECORD_CODEX[i]; + const int r = (int) RECORD_REC_TYPE[i]; if (r == 0) { - if (IS_NAMELESS(i)) { + if (kind & KIND_BIT_NAMELESS) { if (((codexi == 21) && (codex == 22)) || ((codexi == 22) && (codex == 32)) || ((codexi == 13) && (codex == 23))) { @@ -2730,7 +2826,7 @@ static enum MapcodeError decoderEngine(DecodeRec* dec, int parseFlags) { // first of all, make sure the zone fits the country restrictZoneTo(&dec->zone, &dec->zone, TERRITORY_BOUNDARY(upto)); - if ((err == ERR_OK) && IS_RESTRICTED(i)) { + if ((err == ERR_OK) && (kind & KIND_BIT_RESTRICTED)) { int nrZoneOverlaps = 0; int j; @@ -2739,7 +2835,7 @@ static enum MapcodeError decoderEngine(DecodeRec* dec, int parseFlags) { dec->coord32 = convertFractionsToCoord32(&dec->result); for (j = i - 1; j >= from; j--) { // look in previous rects - if (!IS_RESTRICTED(j)) { + if (!(RECORD_KIND[j] & KIND_BIT_RESTRICTED)) { if (fitsInsideBoundaries(&dec->coord32, TERRITORY_BOUNDARY(j))) { nrZoneOverlaps = 1; break; @@ -2752,7 +2848,7 @@ static enum MapcodeError decoderEngine(DecodeRec* dec, int parseFlags) { TerritoryBoundary prevu; for (j = from; j < i; j++) { // try all smaller rectangles j - if (!IS_RESTRICTED(j)) { + if (!(RECORD_KIND[j] & KIND_BIT_RESTRICTED)) { MapcodeZone z; if (restrictZoneTo(&z, &dec->zone, TERRITORY_BOUNDARY(j))) { nrZoneOverlaps++; @@ -2788,7 +2884,7 @@ static enum MapcodeError decoderEngine(DecodeRec* dec, int parseFlags) { } } else if (r == 1) { - if (codex == codexi + 10 && HEADER_LETTER(i) == *s) { + if (codex == codexi + 10 && (unsigned char) *s == RECORD_HEADER_LETTER[i]) { err = decodeGrid(dec, i, 1); break; } @@ -3020,7 +3116,10 @@ static char* convertToAbjad(char* targetAsciiString, const char* sourceAsciiStri unpackIfAllDigits(targetAsciiString); len = (int)strlen(targetAsciiString); - dot = (int)(strchr(targetAsciiString, '.') - targetAsciiString); + { + const char* dotptr = strchr(targetAsciiString, '.'); + dot = dotptr ? (int)(dotptr - targetAsciiString) : -1; + } form = dot * 10 + (len - dot - 1); @@ -3297,6 +3396,7 @@ UWORD* convertToAlphabet(UWORD* utf16String, int maxLength, const char* asciiStr * Convert a zero-terminated UTF16 to a UTF8 string */ char* convertUtf16ToUtf8(char* utf8, const UWORD* utf16) { + char* start = utf8; ASSERT(utf16); ASSERT(utf8); while (*utf16) { @@ -3315,7 +3415,7 @@ char* convertUtf16ToUtf8(char* utf8, const UWORD* utf16) { } } *utf8 = 0; - return utf8; + return start; } // Caller must make sure utf8String can hold at least MAX_MAPCODE_RESULT_LEN characters (including 0-terminator). @@ -3585,9 +3685,6 @@ encodeLatLonToSingleMapcode(char* mapcode, double latDeg, double lonDeg, enum Te if (extraDigits > MAX_PRECISION_DIGITS) { extraDigits = MAX_PRECISION_DIGITS; } - if (territory <= TERRITORY_UNKNOWN) { - return 0; - } ret = encodeLatLonToMapcodes_internal(&rlocal, latDeg, lonDeg, territory, 1, DEBUG_STOP_AT, extraDigits); *mapcode = 0; if (ret <= 0) { @@ -3608,7 +3705,7 @@ encodeLatLonToSelectedMapcode(char* mapcode, double latDeg, double lonDeg, enum int nrOfResults = 0; nrOfResults = encodeLatLonToMapcodes(&mapcodes, latDeg, lonDeg, territory, extraDigits); ASSERT(nrOfResults == mapcodes.count); - if ((nrOfResults <= 0) || (indexOfSelected < 0) || (indexOfSelected > nrOfResults)) { + if ((nrOfResults <= 0) || (indexOfSelected < 0) || (indexOfSelected >= nrOfResults)) { return 0; } strcpy(mapcode, mapcodes.mapcode[indexOfSelected]); diff --git a/mapcodelib/mapcoder.h b/mapcodelib/mapcoder.h index 63e68b1..918360d 100644 --- a/mapcodelib/mapcoder.h +++ b/mapcodelib/mapcoder.h @@ -20,6 +20,7 @@ extern "C" { #endif +#include #include "mapcode_territories.h" #include "mapcode_alphabets.h" @@ -57,8 +58,8 @@ extern "C" { #define MAPCODE_SUPPORT_LANGUAGE_UK #endif -#define MAPCODE_C_VERSION "2.5.6" -#define UWORD unsigned short int // 2-byte unsigned integer. +#define MAPCODE_C_VERSION "2.5.7" +#define UWORD uint16_t // 2-byte unsigned integer. #define MAX_NR_OF_MAPCODE_RESULTS 22 // Max. number of results ever returned by encoder (e.g. for 26.904899, 95.138515). diff --git a/test/unittest.c b/test/unittest.c index 7d88cbf..b5b9f4f 100644 --- a/test/unittest.c +++ b/test/unittest.c @@ -2222,6 +2222,78 @@ static int testAlphabetPerTerritory(void) { } +static int testBugFixes(void) { + int nrTests = 0; + + // Issue 1: lat=+Inf or lat=-Inf must return ERR_BAD_COORDINATE (0 results), same as lon=Inf + { + Mapcodes mapcodes; + double pos_inf = 1.0 / 0.0; + double neg_inf = -1.0 / 0.0; + int n; + + n = encodeLatLonToMapcodes(&mapcodes, pos_inf, 0.0, TERRITORY_NONE, 0); + ++nrTests; + if (n != 0) { + foundError(); + printf("*** ERROR *** encodeLatLonToMapcodes(lat=+Inf) should return 0, got %d\n", n); + } + + n = encodeLatLonToMapcodes(&mapcodes, neg_inf, 0.0, TERRITORY_NONE, 0); + ++nrTests; + if (n != 0) { + foundError(); + printf("*** ERROR *** encodeLatLonToMapcodes(lat=-Inf) should return 0, got %d\n", n); + } + } + + // Issue 2: indexOfSelected == nrOfResults is out-of-bounds and must return 0 + { + char mapcode[MAX_MAPCODE_RESULT_ASCII_LEN]; + // lat=52.158993, lon=4.492346 yields 4 results (verified in testSelectedEncodes) + int n = encodeLatLonToSelectedMapcode(mapcode, 52.158993, 4.492346, TERRITORY_NONE, 0, 4); + ++nrTests; + if (n != 0) { + foundError(); + printf("*** ERROR *** encodeLatLonToSelectedMapcode with indexOfSelected==nrOfResults should return 0, got %d\n", n); + } + } + + // Issue 7: encodeLatLonToSingleMapcode must accept TERRITORY_NONE and TERRITORY_UNKNOWN + { + char mapcode[MAX_MAPCODE_RESULT_ASCII_LEN]; + int n; + + n = encodeLatLonToSingleMapcode(mapcode, 52.3, 4.9, TERRITORY_UNKNOWN, 0); + ++nrTests; + if (n <= 0) { + foundError(); + printf("*** ERROR *** encodeLatLonToSingleMapcode(TERRITORY_UNKNOWN) should succeed, got %d\n", n); + } + + n = encodeLatLonToSingleMapcode(mapcode, 52.3, 4.9, TERRITORY_NONE, 0); + ++nrTests; + if (n <= 0) { + foundError(); + printf("*** ERROR *** encodeLatLonToSingleMapcode(TERRITORY_NONE) should succeed, got %d\n", n); + } + } + + // Issue 9: convertMapcodeToAlphabetUtf8 must return the start of the output buffer + { + char utf8[MAX_MAPCODE_RESULT_UTF8_LEN + 1]; + char *result = convertMapcodeToAlphabetUtf8(utf8, "NLD 49.4V", ALPHABET_ROMAN); + ++nrTests; + if (result != utf8) { + foundError(); + printf("*** ERROR *** convertMapcodeToAlphabetUtf8 must return start of buffer, got wrong pointer\n"); + } + } + + return nrTests; +} + + int main(const int argc, const char **argv) { int nrTests = 0; @@ -2281,6 +2353,9 @@ int main(const int argc, const char **argv) { printf("-----------------------------------------------------------\nRe-encode tests\n"); nrTests += testReEncode(); + printf("-----------------------------------------------------------\nBug-fix regression tests\n"); + nrTests += testBugFixes(); + printf("-----------------------------------------------------------\n"); printf("Done.\nExecuted %d tests, found %d errors\n", nrTests, nrErrors); if (nrErrors > 0) {