From b14eef084a8f4baba5817fcc69bde87925c4d01b Mon Sep 17 00:00:00 2001
From: Samuel Tyler <samuel@samuelt.me>
Date: Mon, 26 Jan 2026 22:38:07 +1100
Subject: [PATCH] libunistring: update to 1.4.1, remove pregenerated files

---
 parts.rst                                     | 112 +++---
 steps/SHA256SUMS.pkgs                         |   7 +-
 steps/libunistring-0.9.10/pass1.sh            |  21 --
 steps/libunistring-0.9.10/sources             |   2 -
 .../libunistring-1.4.1/files/gen-uninames.py  | 325 ++++++++++++++++++
 steps/libunistring-1.4.1/pass1.sh             |  69 ++++
 .../patches/reproducible-texi2html.patch      |  25 ++
 steps/libunistring-1.4.1/sources              |  19 +
 steps/manifest                                |  14 +-
 9 files changed, 508 insertions(+), 86 deletions(-)
 delete mode 100755 steps/libunistring-0.9.10/pass1.sh
 delete mode 100644 steps/libunistring-0.9.10/sources
 create mode 100644 steps/libunistring-1.4.1/files/gen-uninames.py
 create mode 100755 steps/libunistring-1.4.1/pass1.sh
 create mode 100644 steps/libunistring-1.4.1/patches/reproducible-texi2html.patch
 create mode 100644 steps/libunistring-1.4.1/sources

diff --git a/parts.rst b/parts.rst
index 1660b2c0..e8dfa6c4 100644
--- a/parts.rst
+++ b/parts.rst
@@ -1256,65 +1256,12 @@ texinfo 7.2
 Texinfo is a typesetting syntax used for generating documentation. We can now use
 ``makeinfo`` script to convert ``.texi`` files into ``.info`` documentation format.
 
-libunistring 0.9.10
-===================
-
-Library for manipulating Unicode and C strings according to Unicode standard. This
-is a dependency of GNU Guile.
-
 libffi 3.3
 ==========
 
 The libffi library provides a portable, high level programming interface to various
 calling conventions.
 
-boehm-gc 8.0.4
-==============
-
-The Boehm-Demers-Weiser conservative garbage collector can be used as a garbage
-collecting replacement for C malloc or C++ new.
-
-guile 3.0.9
-===========
-
-GNU Ubiquitous Intelligent Language for Extensions (GNU Guile) is the preferred
-extension language system for the GNU Project and features an implementation
-of the programming language Scheme.
-
-We use ``guile-psyntax-bootstrapping`` project on Guile 3.0.7 to bootstrap
-Guile's ``psyntax.pp`` without relying on pre-expanded code. This is then
-transplanted into Guile 3.0.9.
-
-which 2.21
-==========
-
-``which`` shows the full path of (shell) commands. It mostly duplicates
-bash built-in ``command -v`` but some scripts call ``which`` instead.
-In particular, ``autogen`` scripts use it.
-
-grep 3.7
-========
-
-Newer ``grep`` will be needed to bootstrap ``autogen``.
-
-sed 4.8
-=======
-
-Earlier ``sed`` was built with manual makefile with most features compiled out.
-Build a newer ``sed`` using GNU Autotools build system. In particular this will let
-sed keep executable bit on after in place editing.
-
-autogen 5.18.16
-===============
-
-GNU Autogen is a tool designed to simplify the creation and maintenance of
-programs that contain large amounts of repetitious text. Unfortunately, the
-source is full of pregenerated files that require ``autogen`` to rebuild.
-
-We use the `gnu-autogen-bootstrapping <https://github.com/schierlm/gnu-autogen-bootstrapping>`_
-project to rebuild those and create (slightly crippled) ``autogen`` that
-is then able to build a full-featured version.
-
 python 2.5.6
 ============
 
@@ -1404,6 +1351,65 @@ Differences to 3.8.16:
   make output of precompiled Python libraries (``.pyc``) deterministic.
   Finally, we can re-enable compiling of Python modules.
 
+libunistring 1.4.1
+==================
+
+Library for manipulating Unicode and C strings according to Unicode standard. This
+is a dependency of GNU Guile.
+
+libatomic_ops 7.6.10
+====================
+
+``libatomic_ops`` provides semi-portable access to hardware-provided atomic memory
+update operations on a number of architectures.
+
+boehm-gc 8.0.4
+==============
+
+The Boehm-Demers-Weiser conservative garbage collector can be used as a garbage
+collecting replacement for C malloc or C++ new.
+
+guile 3.0.9
+===========
+
+GNU Ubiquitous Intelligent Language for Extensions (GNU Guile) is the preferred
+extension language system for the GNU Project and features an implementation
+of the programming language Scheme.
+
+We use ``guile-psyntax-bootstrapping`` project on Guile 3.0.7 to bootstrap
+Guile's ``psyntax.pp`` without relying on pre-expanded code. This is then
+transplanted into Guile 3.0.9.
+
+which 2.21
+==========
+
+``which`` shows the full path of (shell) commands. It mostly duplicates
+bash built-in ``command -v`` but some scripts call ``which`` instead.
+In particular, ``autogen`` scripts use it.
+
+grep 3.7
+========
+
+Newer ``grep`` will be needed to bootstrap ``autogen``.
+
+sed 4.8
+=======
+
+Earlier ``sed`` was built with manual makefile with most features compiled out.
+Build a newer ``sed`` using GNU Autotools build system. In particular this will let
+sed keep executable bit on after in place editing.
+
+autogen 5.18.16
+===============
+
+GNU Autogen is a tool designed to simplify the creation and maintenance of
+programs that contain large amounts of repetitious text. Unfortunately, the
+source is full of pregenerated files that require ``autogen`` to rebuild.
+
+We use the `gnu-autogen-bootstrapping <https://github.com/schierlm/gnu-autogen-bootstrapping>`_
+project to rebuild those and create (slightly crippled) ``autogen`` that
+is then able to build a full-featured version.
+
 gcc 10.5.0
 ==========
 
diff --git a/steps/SHA256SUMS.pkgs b/steps/SHA256SUMS.pkgs
index 953b94ac..16db2b83 100644
--- a/steps/SHA256SUMS.pkgs
+++ b/steps/SHA256SUMS.pkgs
@@ -9,7 +9,7 @@ b687370caaa654ba2fc622188240f50ea305a1c926accbced153df9179697adf  autoconf-2.53_
 b363f22b42418fb2507bf722b0a6b5b03410ac58c44c400767d650d828696eaa  autoconf-2.69_0.tar.bz2
 cde51b816fcce47742064f9dc0b4871e4194190be5065dc1852b39339cc9115b  autoconf-2.71_0.tar.bz2
 6ff691e3ddd4f3d1796d28b07a2a96691dbd9801844c99ccbe660da59dabd74b  autoconf-archive-2021.02.19_0.tar.bz2
-19ecb44e6de8e6cf16b1b858224214997ee14c1f05d4f1992ac40c10ca4c1e4b  autogen-5.18.16_0.tar.bz2
+09f13ec44f02a28b0dc89c68b39be2c91460402f959d33d7dd6e5799707dff1e  autogen-5.18.16_0.tar.bz2
 02a426e82a52d4a2cd7e73fe3dc5a2c58b3ed100f24a92c57e42511fd86e247c  automake-1.10.3_0.tar.bz2
 34925adeaa2876a5e068adeb17b71fe7c7105244c3cb5439d33f7406310674da  automake-1.11.2_0.tar.bz2
 7e36e9e4b3957c95b6de8ff475c9b37aa007e2f9735b01c0eaf14b711fdd112f  automake-1.15.1_0.tar.bz2
@@ -64,7 +64,7 @@ d34de902de6de9204abedb834c4c39fbb95b993e545fbb090de2b028fa1d4a61  gcc-4.0.4_0.ta
 c1989fc2855dbb0350c4e5b677028306614a9f4b5cb6c01fd459c5e1abf592b9  grep-2.4_0.tar.bz2
 3f3247c7d05fcef0d9f624a523afa01257207584045eb7efb7fb8f29fc782c7a  grep-3.7_0.tar.bz2
 39320fce205c3d53209351c48c7cc59c6157373f940bd9f425cf3c51558fb09f  grub-2.06_0.tar.bz2
-95bcaad636bee2233c552448c6fb925a5f42c12a2cc1b90913b82da543f9a701  guile-3.0.9_0.tar.bz2
+878272c1858f8345cb5307b7ceb0b8785e4d4ed2942e723b4d7c6c1572cc1fb3  guile-3.0.9_0.tar.bz2
 893bb26efb8b1789702b8fd1a3ed72951990c8e38ab650c5220992405e79b96e  gzip-1.13_0.tar.bz2
 f27e1eaafa7ab198d2ed549aecfd8a1205d960f59a509789a159997565abd6d4  heirloom-devtools-070527_0.tar.bz2
 8d2015b87337abbf287f7a39ee4cf53514120b5d3e90a93fe7d533dcc43f14fa  help2man-1.36.4_0.tar.bz2
@@ -74,10 +74,11 @@ f27e1eaafa7ab198d2ed549aecfd8a1205d960f59a509789a159997565abd6d4  heirloom-devto
 838989f6b0aa02d6185d7cd9138036260bcf24ef484e516c4a50819d29b5e0d6  libarchive-3.5.2_0.tar.bz2
 3e392a5332e8db77c409f1566b6f05d1f92a85e1fbe902dbe97704548c0f6c8e  libbsd-0.11.8_0.tar.bz2
 bb1b1196caa7d4eebf37bdb8f355b63caa5b5642a696b7930c6c62c035180d59  libffi-3.3_0.tar.bz2
+ce758eeb2964ae8756c3f8a52c6d436f4b9f102cfcfbdfa2ad2b5019947ff6d5  libffi-3.5.2_0.tar.bz2
 9e1932db252894478dc98985f931312cbe5ee0eb40db251c75c79dd01294b15b  libmd-1.1.0_0.tar.bz2
 311cc3a5ad4126b030b32bc1e9e787bd55caa861626d210bab4eb81e50244125  libtool-2.2.4_0.tar.bz2
 a9ddd248e490623caeb0380089d03bc638e78af5c4fb65426f3b7f73521af604  libtool-2.4.7_0.tar.bz2
-d03e020c88f94f242f97f8126daa86be4b9a0164a0351379ad593712c8aaeec9  libunistring-0.9.10_0.tar.bz2
+f41fe28a2b4854e0a08e120f21685d89cc9ffbee7449627eff279a70882229ed  libunistring-1.4.1_0.tar.bz2
 e34195462aa376c9ad92d77d68ef519c911100c06fe2f3929b260d0a811a4288  linux-4.14.341-openela_0.tar.bz2
 c97644d0db5b3de127b048683afee6d31453441d97ba5dea71df5838b13542a4  linux-headers-4.14.341-openela_0.tar.bz2
 8283d7c23a656c2f47198561b102ca3517dd2a48614e818f36149100c2c96d66  m4-1.4.19_0.tar.bz2
diff --git a/steps/libunistring-0.9.10/pass1.sh b/steps/libunistring-0.9.10/pass1.sh
deleted file mode 100755
index 6a28c65b..00000000
--- a/steps/libunistring-0.9.10/pass1.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-# SPDX-FileCopyrightText: 2022 Andrius Štikonas <andrius@stikonas.eu>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-src_prepare() {
-    find . -name '*.info*' -delete
-
-    # libunistring does not specify which gnulib snapshot was used,
-    # pick a random one that works
-    GNULIB_TOOL=../gnulib-52a06cb3/gnulib-tool ./autogen.sh
-
-    # autogen.sh does not regenerate libtool files
-    autoreconf-2.69 -fi
-}
-
-src_configure() {
-    ./configure \
-        --prefix="${PREFIX}" \
-        --libdir="${LIBDIR}" \
-        --disable-shared
-}
diff --git a/steps/libunistring-0.9.10/sources b/steps/libunistring-0.9.10/sources
deleted file mode 100644
index a2523c98..00000000
--- a/steps/libunistring-0.9.10/sources
+++ /dev/null
@@ -1,2 +0,0 @@
-f https://mirrors.kernel.org/gnu/libunistring/libunistring-0.9.10.tar.xz eb8fb2c3e4b6e2d336608377050892b54c3c983b646c561836550863003c05d7
-g https://https.git.savannah.gnu.org/git/gnulib.git~52a06cb3 _ 009989b81c0bebc5f6550636ed653fbcb237dafc2af5c706f3522087ca571e4d gnulib-52a06cb3.tar.gz
diff --git a/steps/libunistring-1.4.1/files/gen-uninames.py b/steps/libunistring-1.4.1/files/gen-uninames.py
new file mode 100644
index 00000000..77234c91
--- /dev/null
+++ b/steps/libunistring-1.4.1/files/gen-uninames.py
@@ -0,0 +1,325 @@
+# SPDX-FileCopyrightText: 2026 Samuel Tyler <samuel@samuelt.me>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+ 
+### Creation of gnulib's uninames.h from the UnicodeData.txt and NameAliases.txt
+### tables.
+
+### Written by Bruno Haible <bruno@clisp.org>, 2000-12-28.
+### Translated into Python by Samuel Tyler, 2026-01-31.
+###
+### This program is free software.
+### It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
+### You can redistribute it and/or modify it under either
+###   - the terms of the GNU Lesser General Public License as published
+###     by the Free Software Foundation, either version 3, or (at your
+###     option) any later version, or
+###   - the terms of the GNU General Public License as published by the
+###     Free Software Foundation; either version 2, or (at your option)
+###     any later version, or
+###   - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+### Lesser General Public License and the GNU General Public License
+### for more details.
+###
+### You should have received a copy of the GNU Lesser General Public
+### License and of the GNU General Public License along with this
+### program.  If not, see <https://www.gnu.org/licenses/>.
+
+import sys
+
+add_comments = False
+
+class UnicodeChar:
+    def __init__(self, index, name):
+        self.index = index
+        self.name = name
+        self.word_indices = []
+        self.word_indices_index = 0
+
+class Range:
+    def __init__(self, index, start_code, end_code):
+        self.index = index
+        self.start_code = start_code
+        self.end_code = end_code
+
+class WordList:
+    def __init__(self):
+        self.hashed = {}
+        self.sorted = []
+        self.size = 0
+        self.length = 0
+
+def main(inputfile, aliasfile, outputfile):
+    # Local variable initialization
+    all_chars = []
+    all_chars_hashed = {}
+    all_aliases = []
+    all_chars_and_aliases = []
+    all_ranges = []
+    name_index = 0
+    current_range = None
+
+    # Read all characters and names from the input file.
+    with open(inputfile, "r", encoding="utf-8") as istream:
+        for line in istream:
+            line = line.strip("\n")
+            if not line:
+                continue
+
+            code_string, name_string = line.split(";", 1)
+            code = int(code_string, 16)
+
+            # Ignore characters whose name starts with "<".
+            if name_string.startswith('<'):
+                continue
+
+            # Also ignore Hangul syllables; they are treated specially.
+            if 0xAC00 <= code <= 0xD7A3:
+                continue
+
+            # Also ignore CJK compatibility ideographs; they are treated
+            # specially as well.
+            if (0xF900 <= code <= 0xFA2D) or (0xFA30 <= code <= 0xFA6A) or \
+               (0xFA70 <= code <= 0xFAD9) or (0x2F800 <= code <= 0x2FA1D):
+                continue
+
+            # Also ignore variationselectors; they are treated
+            # specially as well.
+            if (0xFE00 <= code <= 0xFE0F) or (0xE0100 <= code <= 0xE01EF):
+                continue
+
+            uc = UnicodeChar(name_index, name_string)
+            all_chars.insert(0, uc)
+            all_chars_hashed[code] = uc
+
+            # Update the contiguous range, or start a new range.
+            if current_range and (current_range.end_code + 1 == code):
+                current_range.end_code = code
+            else:
+                if current_range:
+                    all_ranges.insert(0, current_range)
+                current_range = Range(name_index, code, code)
+            name_index += 1
+
+    all_chars.reverse()
+    if current_range:
+        all_ranges.insert(0, current_range)
+    all_ranges.reverse()
+
+    if aliasfile:
+        # Read all characters and names from the alias file.
+        with open(aliasfile, "r", encoding="utf-8") as istream:
+            for line in istream:
+                line = line.strip("\n")
+                if not line or line == "" or line.startswith('#'):
+                    continue
+
+                code_string, name_string = line.split(";", 1)
+                code = int(code_string, 16)
+
+                if code in all_chars_hashed:
+                    uc_alias = UnicodeChar(all_chars_hashed[code].index, name_string)
+                    all_aliases.insert(0, uc_alias)
+
+    all_aliases.reverse()
+    all_chars_and_aliases = all_chars + all_aliases
+
+    # Split into words.
+    words_by_length = []
+    for name in ["HANGUL SYLLABLE", "CJK COMPATIBILITY", "VARIATION"] + \
+        [c.name for c in all_chars_and_aliases]:
+        i1 = 0
+        while i1 < len(name):
+            i2 = name.find(' ', i1)
+            if i2 == -1:
+                i2 = len(name)
+
+            word = name[i1:i2]
+            while len(word) >= len(words_by_length):
+                words_by_length.append(WordList())
+
+            word_list = words_by_length[len(word)]
+            if word not in word_list.hashed:
+                word_list.hashed[word] = True
+                word_list.sorted.append(word)
+
+            i1 = i2 + 1
+
+    # Sort the word lists.
+    for length in range(len(words_by_length)):
+        if not words_by_length[length]:
+            words_by_length[length] = WordList()
+
+        word_list = words_by_length[length]
+        word_list.sorted.sort()
+        word_list.size = sum(len(w) for w in word_list.sorted)
+        word_list.length = len(word_list.sorted)
+
+    # Output the tables.
+    with open(outputfile, 'w', encoding='ascii') as ostream:
+        ostream.write("/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n")
+        ostream.write("/*\n")
+        ostream.write(f" * {outputfile}\n")
+        ostream.write(" *\n")
+        ostream.write(" * Unicode character name table.\n")
+        ostream.write(" * Generated automatically by the gen-uninames utility.\n")
+        ostream.write(" */\n")
+        ostream.write("/* Copyright (C) 2000-2024 Free Software Foundation, Inc.\n")
+        ostream.write("\n")
+        ostream.write("   This file is free software.\n")
+        ostream.write('   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".\n')
+        ostream.write("   You can redistribute it and/or modify it under either\n")
+        ostream.write("     - the terms of the GNU Lesser General Public License as published\n")
+        ostream.write("       by the Free Software Foundation, either version 3, or (at your\n")
+        ostream.write("       option) any later version, or\n")
+        ostream.write("     - the terms of the GNU General Public License as published by the\n")
+        ostream.write("       Free Software Foundation; either version 2, or (at your option)\n")
+        ostream.write("       any later version, or\n")
+        ostream.write('     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".\n')
+        ostream.write("\n")
+        ostream.write("   This file is distributed in the hope that it will be useful,\n")
+        ostream.write("   but WITHOUT ANY WARRANTY; without even the implied warranty of\n")
+        ostream.write("   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n")
+        ostream.write("   Lesser General Public License and the GNU General Public License\n")
+        ostream.write("   for more details.\n")
+        ostream.write("\n")
+        ostream.write("   You should have received a copy of the GNU Lesser General Public\n")
+        ostream.write("   License and of the GNU General Public License along with this\n")
+        ostream.write("   program.  If not, see <https://www.gnu.org/licenses/>.  */\n")
+        ostream.write("\n")
+
+        total_size = sum(wl.size for wl in words_by_length)
+        ostream.write(f"static const char unicode_name_words[{total_size}] = {{\n")
+
+        for wl in words_by_length:
+            for word in wl.sorted:
+                # format " ~{ '~C',~}"
+                # space before loop, print each char in single quotes followed by comma
+                chars = "".join(f"'{c}'," for c in word)
+                ostream.write(f" {chars}\n")
+        ostream.write("};\n")
+
+        total_num_words = sum(wl.length for wl in words_by_length)
+        ostream.write(f"#define UNICODE_CHARNAME_NUM_WORDS {total_num_words}\n")
+
+        # unicode_name_by_length
+        ostream.write(
+            "static const struct { uint32_t extra_offset; uint16_t ind_offset; } "
+            f"unicode_name_by_length[{len(words_by_length) + 1}] = {{\n"
+        )
+
+        extra_offset = 0
+        ind_offset = 0
+        for wl in words_by_length:
+            ostream.write(f"  {{ {extra_offset}, {ind_offset} }},\n")
+            extra_offset += wl.size
+            ind_offset += wl.length
+        ostream.write(f"  {{ {extra_offset}, {ind_offset} }}\n")
+        ostream.write("};\n")
+
+        # Assign indices to hashed words
+        current_idx = 0
+        for wl in words_by_length:
+            for word in wl.sorted:
+                wl.hashed[word] = current_idx
+                current_idx += 1
+
+        # Defines specific words
+        for word in ["HANGUL", "SYLLABLE", "CJK", "COMPATIBILITY", "VARIATION"]:
+            wlen = len(word)
+            idx = words_by_length[wlen].hashed.get(word)
+            ostream.write(f"#define UNICODE_CHARNAME_WORD_{word} {idx}\n")
+
+        # Compute word-indices for every unicode-char
+        for uc in all_chars_and_aliases:
+            indices = []
+            i1 = 0
+            name = uc.name
+            while i1 < len(name):
+                i2 = name.find(' ', i1)
+                if i2 == -1:
+                    i2 = len(name)
+                word = name[i1:i2]
+                wlen = len(word)
+                idx = words_by_length[wlen].hashed[word]
+                indices.append(idx)
+                i1 = i2 + 1
+            uc.word_indices = list(reversed(indices))
+
+        # Sort the list of unicode-chars by word-indices
+        all_chars_and_aliases.sort(key=lambda x: x.word_indices)
+
+        # Output the word-indices
+        total_indices = sum(len(uc.word_indices) for uc in all_chars_and_aliases)
+        ostream.write(f"static const uint16_t unicode_names[{total_indices}] = {{\n")
+
+        i = 0
+        for uc in all_chars_and_aliases:
+            packed_indices = []
+            wi = uc.word_indices
+            for k, val in enumerate(wi):
+                is_last = (k == len(wi) - 1)
+                packed_indices.append(val * 2 + is_last)
+
+            ostream.write(" " + " ".join(f"{val}," for val in packed_indices))
+
+            if add_comments:
+                ostream.write(f"{' ' * (40 - len(indices_str))}/* {uc.name} */")
+            ostream.write("\n")
+
+            uc.word_indices_index = i
+            i += len(uc.word_indices)
+        ostream.write("};\n")
+
+        ostream.write(
+            "static const struct { uint16_t index; uint32_t name:24; } ATTRIBUTE_PACKED "
+            f"unicode_name_to_index[{len(all_chars_and_aliases)}] = {{\n"
+        )
+        for uc in all_chars_and_aliases:
+            content = f"  {{ 0x{uc.index:04X}, {uc.word_indices_index} }},"
+            ostream.write(content)
+            if add_comments:
+                ostream.write(f"{' ' * (21 - len(content))}/* {uc.name} */")
+            ostream.write("\n")
+        ostream.write("};\n")
+
+        ostream.write(
+            f"static const struct {{ uint16_t index; uint32_t name:24; }} ATTRIBUTE_PACKED "
+            f"unicode_index_to_name[{len(all_chars)}] = {{\n"
+        )
+        for uc in sorted(all_chars, key=lambda c: c.index):
+            content = f"  {{ 0x{uc.index:04X}, {uc.word_indices_index} }},"
+            ostream.write(content)
+            if add_comments:
+                ostream.write(f"{' ' * (21 - len(content))}/* {uc.name} */")
+            ostream.write("\n")
+        ostream.write("};\n")
+
+        # Max counts
+        max_len = max(len(uc.name) for uc in all_chars_and_aliases)
+        ostream.write(f"#define UNICODE_CHARNAME_MAX_LENGTH {max_len}\n")
+
+        max_words = max(len(uc.word_indices) for uc in all_chars_and_aliases)
+        ostream.write(f"#define UNICODE_CHARNAME_MAX_WORDS {max_words}\n")
+
+        # Ranges
+        ostream.write(
+            "static const struct { uint16_t index; uint32_t gap; uint16_t length; } "
+            f"unicode_ranges[{len(all_ranges)}] = {{\n"
+        )
+        for r in all_ranges:
+            ostream.write(
+                f"  {{ {r.index}, {r.start_code - r.index}, {1 + r.end_code - r.start_code} }},\n"
+            )
+        ostream.write("};\n")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) >= 4:
+        main(sys.argv[1], sys.argv[2], sys.argv[3])
+    else:
+        print("Usage: script.py <inputfile> <aliasfile> <outputfile>", file=sys.stderr)
diff --git a/steps/libunistring-1.4.1/pass1.sh b/steps/libunistring-1.4.1/pass1.sh
new file mode 100755
index 00000000..6cb10af5
--- /dev/null
+++ b/steps/libunistring-1.4.1/pass1.sh
@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: 2022 Andrius Štikonas <andrius@stikonas.eu>
+# SPDX-FileCopyrightText: 2026 Samuel Tyler <samuel@samuelt.me>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+src_prepare() {
+    default
+
+    find . \( -name '*.info*' -o -name "*.html" \) -delete
+
+    local GNULIB
+    GNULIB=../gnulib-6d64a31
+    rm lib/unictype/*_byname.h \
+        lib/unicase/locale-languages.h \
+        lib/iconv_open-*.h
+    rm {$GNULIB,.}/lib/unictype/{digit,pr_*,ctype_*,categ_*,combiningclass,numeric,blocks,[bij]*_of,sy_*,scripts,decdigit,mirror}.h
+    rm {$GNULIB,.}/lib/unictype/scripts_byname.gperf \
+        {$GNULIB,.}/lib/uniwbrk/wbrkprop.h \
+        {$GNULIB,.}/lib/uniwidth/width*.h \
+        {$GNULIB,.}/lib/unimetadata/u-version.c \
+        {$GNULIB,.}/lib/uninorm/{decomposition-table[12].h,composition-table.gperf,composition-table-bounds.h} \
+        lib/uninorm/composition-table.h \
+        {$GNULIB,.}/lib/unigbrk/gbrkprop.h \
+        {$GNULIB,.}/lib/uniname/uninames.h \
+        {$GNULIB,.}/lib/unilbrk/{lbrkprop[12].h,lbrktables.c} \
+        {$GNULIB,.}/lib/unicase/{special-casing-table.*,to*.h,ignorable.h,cased.h}
+    find {$GNULIB,.}/tests/{unicase,unigbrk,unictype} -name "test-*.{c,h}" -delete -exec touch {} +
+    touch $GNULIB/lib/uniname/uninames.h
+
+    mv gen-uninames.py "$GNULIB/lib"
+    pushd "$GNULIB/lib"
+    mv ../../*.txt .
+    gcc -Iunictype -o gen-uni-tables gen-uni-tables.c
+    ./gen-uni-tables UnicodeData-17.0.0.txt \
+        PropList-17.0.0.txt \
+        DerivedCoreProperties-17.0.0.txt \
+        emoji-data-17.0.0.txt \
+        ArabicShaping-17.0.0.txt \
+        Scripts-17.0.0.txt \
+        Blocks-17.0.0.txt \
+        PropList-3.0.1.txt \
+        BidiMirroring-17.0.0.txt \
+        EastAsianWidth-17.0.0.txt \
+        LineBreak-17.0.0.txt \
+        WordBreakProperty-17.0.0.txt \
+        GraphemeBreakProperty-17.0.0.txt \
+        CompositionExclusions-17.0.0.txt \
+        SpecialCasing-17.0.0.txt \
+        CaseFolding-17.0.0.txt \
+        17.0.0
+    python3 gen-uninames.py \
+        UnicodeData-17.0.0.txt NameAliases-17.0.0.txt \
+        uniname/uninames.h
+    popd
+
+    # libunistring does not specify which gnulib snapshot was used,
+    # pick a random one that works
+    GNULIB_SRCDIR=$GNULIB ./autogen.sh
+
+    # autogen.sh does not regenerate libtool files
+    autoreconf -fi
+}
+
+src_configure() {
+    ./configure \
+        --prefix="${PREFIX}" \
+        --libdir="${LIBDIR}" \
+        --disable-shared
+}
diff --git a/steps/libunistring-1.4.1/patches/reproducible-texi2html.patch b/steps/libunistring-1.4.1/patches/reproducible-texi2html.patch
new file mode 100644
index 00000000..00f1da5a
--- /dev/null
+++ b/steps/libunistring-1.4.1/patches/reproducible-texi2html.patch
@@ -0,0 +1,25 @@
+SPDX-FileCopyrightText: 2026 Samuel Tyler <samuel@samuelt.me>
+
+SPDX-License-Identifier: GPL-3.0-or-later
+
+Make the vendored copy of texi2html reproducible.
+
+--- libunistring-1.4.1/build-aux/texi2html	2026-02-08 09:56:18.855886133 +1100
++++ libunistring-1.4.1/build-aux/texi2html	2026-02-08 10:02:34.030891700 +1100
+@@ -8854,14 +8854,13 @@
+         if (!$Texi2HTML::Config::TEST)
+         {
+             print STDERR "# Setting date in $Texi2HTML::Config::LANG\n" if ($T2H_DEBUG);
+-            $Texi2HTML::THISDOC{'today'} = Texi2HTML::I18n::pretty_date($Texi2HTML::Config::LANG);  # like "20 September 1993";
++            $Texi2HTML::THISDOC{'today'} = '';
+         }
+         else
+         {
+             $Texi2HTML::THISDOC{'today'} = 'a sunny day';
+         }
+-        $Texi2HTML::THISDOC{'today'} = $Texi2HTML::Config::DATE 
+-            if (defined($Texi2HTML::Config::DATE));
++        $Texi2HTML::THISDOC{'today'} = '';
+         $::things_map_ref->{'today'} = $Texi2HTML::THISDOC{'today'};
+         $::pre_map_ref->{'today'} = $Texi2HTML::THISDOC{'today'};
+         $::texi_map_ref->{'today'} = $Texi2HTML::THISDOC{'today'};
diff --git a/steps/libunistring-1.4.1/sources b/steps/libunistring-1.4.1/sources
new file mode 100644
index 00000000..4955bb21
--- /dev/null
+++ b/steps/libunistring-1.4.1/sources
@@ -0,0 +1,19 @@
+f https://mirrors.kernel.org/gnu/libunistring/libunistring-1.4.1.tar.xz 67d88430892527861903788868c77802a217b0959990f7449f2976126a307763
+g https://https.git.savannah.gnu.org/git/gnulib.git~6d64a31 _ 04541986073955243d0ce7aa0e5f4017298ae17892a0064ae0b5513808cdad1b gnulib-6d64a31.tar.gz
+f http://ftp.unicode.org/Public/17.0.0/ucd/UnicodeData.txt 2e1efc1dcb59c575eedf5ccae60f95229f706ee6d031835247d843c11d96470c UnicodeData-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/PropList.txt 130dcddcaadaf071008bdfce1e7743e04fdfbc910886f017d9f9ac931d8c64dd PropList-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt 24c7fed1195c482faaefd5c1e7eb821c5ee1fb6de07ecdbaa64b56a99da22c08 DerivedCoreProperties-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/emoji/emoji-data.txt 2cb2bb9455cda83e8481541ecf5b6dfda66a3bb89efa3fa7c5297eccf607b72b emoji-data-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/ArabicShaping.txt 39afa01e680e27d0fd10b67a9b27be13fbaa3d0efecfb5be45991de9a0d267d0 ArabicShaping-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/Scripts.txt 9f5e50d3abaee7d6ce09480f325c706f485ae3240912527e651954d2d6b035bf Scripts-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/Blocks.txt c0edefaf1a19771e830a82735472716af6bf3c3975f6c2a23ffbe2580fbbcb15 Blocks-17.0.0.txt
+f http://ftp.unicode.org/Public/3.0-Update1/PropList-3.0.1.txt 909eef4adbeddbdddcd9487c856fe8cdbb8912aa8eb315ed7885b6ef65f4dc4c
+f http://ftp.unicode.org/Public/17.0.0/ucd/BidiMirroring.txt a2f16fb873ab4fcdf3221cb1a8a85a134ddd6ed03603181823ff5206af3741ce BidiMirroring-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/EastAsianWidth.txt ea7ce50f3444a050333448dffef1cadd9325af55cbb764b4a2280faf52170a33 EastAsianWidth-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/LineBreak.txt e6a18fa91f8f6a6f8e534b1d3f128c21ada45bfe152eb6b1bcc5e15fd8ac92e6 LineBreak-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/auxiliary/WordBreakProperty.txt 72274cac1e6b919507db35655c3e175aa27274668a1ece95c28d2069f2ad9852 WordBreakProperty-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/auxiliary/GraphemeBreakProperty.txt d6b51d1d2ae5c33b451b7ed994b48f1f4dc62b2272a5831e7fd418514a6bae89 GraphemeBreakProperty-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/CompositionExclusions.txt 2f239196ef3b5b61db5cc476e9bd80f534d15aa1b74e1be1dea5d042a344c85f CompositionExclusions-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/SpecialCasing.txt efc25faf19de21b92c1194c111c932e03d2a5eaf18194e33f1156e96de4c9588 SpecialCasing-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/CaseFolding.txt ff8d8fefbf123574205085d6714c36149eb946d717a0c585c27f0f4ef58c4183 CaseFolding-17.0.0.txt
+f http://ftp.unicode.org/Public/17.0.0/ucd/NameAliases.txt 793f6f1e4d15fd90f05ae66460191dc4d75d1fea90136a25f30dd6a4cb950eac NameAliases-17.0.0.txt
diff --git a/steps/manifest b/steps/manifest
index 0aec65ff..3187d885 100644
--- a/steps/manifest
+++ b/steps/manifest
@@ -201,14 +201,7 @@ build: gperf-3.3
 build: gettext-0.26
 build: perl-Text-CSV-2.06
 build: texinfo-7.2
-build: libunistring-0.9.10
 build: libffi-3.3
-build: gc-8.0.4
-build: guile-3.0.9
-build: which-2.21
-build: grep-3.7
-build: sed-4.8
-build: autogen-5.18.16
 build: python-2.5.6
 build: python-3.1.5
 build: python-3.1.5
@@ -217,6 +210,13 @@ build: python-3.4.10
 build: python-3.8.16
 build: python-3.11.1
 uninstall: python-2.0.1 python-2.3.7 python-3.1.5 python-3.3.7 python-3.4.10 python-3.8.16
+build: libunistring-1.4.1
+build: gc-8.0.4
+build: guile-3.0.9
+build: which-2.21
+build: grep-3.7
+build: sed-4.8
+build: autogen-5.18.16
 jump: break ( INTERNAL_CI == pass1 ) # scripts are generated in pass1
 build: gcc-10.5.0
 build: binutils-2.41