From 85c389044d8b673137e5a827a6533d7762e1b069 Mon Sep 17 00:00:00 2001 From: vxtls <187420201+vxtls@users.noreply.github.com> Date: Tue, 3 Mar 2026 08:33:19 -0500 Subject: [PATCH] fix(kernel-bootstrap): unify external raw container flow and remove default second disk --- Payload_img_design.md | 31 +++++++------ README.rst | 43 +++++++++-------- lib/generator.py | 104 ++++++++++++++++++++++++++---------------- rootfs.py | 33 ++++++-------- 4 files changed, 120 insertions(+), 91 deletions(-) diff --git a/Payload_img_design.md b/Payload_img_design.md index d37fae5f..0060abc5 100644 --- a/Payload_img_design.md +++ b/Payload_img_design.md @@ -2,10 +2,10 @@ This repository uses [`README.rst`](./README.rst) as the canonical main documentation. -## Kernel-bootstrap `payload.img` +## Kernel-bootstrap raw `external.img` -`payload.img` is a raw container disk used in kernel-bootstrap offline mode -(`--repo` and `--external-sources` are both unset). +`external.img` is a raw container disk used in kernel-bootstrap mode when +`--external-sources` is set and `--repo` is unset. ### Why not put everything in the initial image? @@ -20,7 +20,7 @@ around the Fiwix transition in QEMU or on bare metal). So the design is intentionally split: - Initial image: only what is required to reach `improve: import_payload` -- `payload.img`: the rest of offline distfiles +- `external.img`: the rest of distfiles This is not a patch-style workaround. It is a two-phase transport design that keeps early boot deterministic and moves bulk data import to a stage where the @@ -29,14 +29,14 @@ runtime is robust enough to process it safely. ### Why import from an external image and copy into main filesystem? Because the bootstrap still expects distfiles to end up under the normal local -path (`/external/distfiles`) for later steps. `payload.img` is used as a +path (`/external/distfiles`) for later steps. `external.img` is used as a transport medium only. The flow is: 1. Boot minimal initial image. 2. Reach `improve: import_payload`. -3. Detect the payload disk by magic (`LBPAYLD1`) across detected block devices. +3. Detect the external container disk by magic (`LBPAYLD1`) across detected block devices. 4. Copy payload files into `/external/distfiles`. 5. Continue the build exactly as if files had been present locally all along. @@ -54,7 +54,7 @@ The importer probes detected block devices and selects the one with magic `LBPAY ### Manual creation without Python -Prepare `payload.list` as: +Prepare `external.list` as: ```text @@ -66,8 +66,8 @@ Then: cat > make-payload.sh <<'SH' #!/bin/sh set -e -out="${1:-payload.img}" -list="${2:-payload.list}" +out="${1:-external.img}" +list="${2:-external.list}" write_u32le() { v="$1" @@ -89,14 +89,17 @@ while read -r name path; do done < "${list}" SH chmod +x make-payload.sh -./make-payload.sh payload.img payload.list +./make-payload.sh external.img external.list ``` -Attach `payload.img` as an extra raw disk in QEMU, or as the second disk on bare metal. +Attach `external.img` as an extra raw disk in QEMU, or as the second disk on bare metal. ### When it is used -- Used in kernel-bootstrap offline mode. -- Not used when `--repo` or `--external-sources` is provided. -- `--build-guix-also` increases payload contents (includes post-early `steps-guix` +- Used in kernel-bootstrap with `--external-sources` and without `--repo`. +- Not used with `--repo` (that path still uses an ext filesystem disk). +- Without `--external-sources` and without `--repo`, there is no second disk: + the initial image only includes distfiles needed before `improve: get_network`, + and later distfiles are downloaded from mirrors. +- `--build-guix-also` increases container contents (includes post-early `steps-guix` sources), but does not change the mechanism. diff --git a/README.rst b/README.rst index decb33c7..1eb627e8 100644 --- a/README.rst +++ b/README.rst @@ -63,34 +63,36 @@ Without using Python: * *Only* copy distfiles listed in ``sources`` files for ``build:`` steps manifested before ``improve: get_network`` into this disk. - * In kernel-bootstrap offline mode (no ``--repo`` and no - ``--external-sources``), use the second image as ``payload.img``. - ``payload.img`` is a raw container (not a filesystem) used to carry the + * In kernel-bootstrap mode with ``--external-sources`` (and no ``--repo``), + use the second image as ``external.img``. + ``external.img`` is a raw container (not a filesystem) used to carry the distfiles that are not needed before ``improve: import_payload``. In other words, the first image only carries the minimal set needed to - reach the importer; the rest of the offline distfiles live in payload. + reach the importer; the rest of the distfiles live in ``external.img``. * Header magic: ``LBPAYLD1`` (8 bytes). * Then: little-endian ``u32`` file count. * Repeated for each file: little-endian ``u32`` name length, little-endian ``u32`` file size, raw file name bytes, raw file bytes. - * If you are not in that mode, the second disk can still be used as an - optional ext3 distfiles disk, as before. + * With ``--repo``, the second disk remains an ext3 distfiles/repo disk. + * Without ``--external-sources`` and without ``--repo``, no second disk is + used: the initial image includes only pre-network distfiles, and later + distfiles are downloaded from configured mirrors after networking starts. * Run QEMU, with 4+G RAM, optionally SMP (multicore), both drives (main - builder image plus payload/ext3 image), a NIC with model E1000 + builder image plus external image, when a second image is used), a NIC with model E1000 (``-nic user,model=e1000``), and ``-machine kernel-irqchip=split``. c. **Bare metal:** Follow the same steps as QEMU, but the disks need to be two different *physical* disks, and boot from the first disk. -Manual ``payload.img`` preparation ----------------------------------- +Manual raw ``external.img`` preparation +--------------------------------------- -The following script creates a raw ``payload.img`` from a manually prepared +The following script creates a raw ``external.img`` from a manually prepared file list. This is equivalent to what ``rootfs.py`` does for kernel-bootstrap -offline mode. +with ``--external-sources`` (and no ``--repo``). -1. Prepare a ``payload.list`` with one file per line, formatted as: +1. Prepare an ``external.list`` with one file per line, formatted as: `` ``. 2. Run: @@ -99,8 +101,8 @@ offline mode. cat > make-payload.sh <<'EOF' #!/bin/sh set -e - out="${1:-payload.img}" - list="${2:-payload.list}" + out="${1:-external.img}" + list="${2:-external.list}" write_u32le() { v="$1" @@ -122,16 +124,19 @@ offline mode. done < "${list}" EOF chmod +x make-payload.sh - ./make-payload.sh payload.img payload.list + ./make-payload.sh external.img external.list -3. Attach ``payload.img`` as an additional raw disk when booting in QEMU, or +3. Attach ``external.img`` as an additional raw disk when booting in QEMU, or as the second physical disk on bare metal. Notes: -* ``payload.img`` is used in kernel-bootstrap offline mode regardless of - ``--build-guix-also``. With ``--build-guix-also``, the payload content is - larger because it also includes post-early sources from ``steps-guix``. +* ``external.img`` raw container mode is used with ``--external-sources`` (and + no ``--repo``). With ``--build-guix-also``, the container content is larger + because it also includes post-early sources from ``steps-guix``. +* Without ``--external-sources`` and without ``--repo``, there is no second + image. The initial image only includes distfiles needed before + ``improve: get_network``; later distfiles are downloaded from mirrors. * The runtime importer identifies the correct disk by checking the magic ``LBPAYLD1`` on each detected block device, not by assuming a device name. diff --git a/lib/generator.py b/lib/generator.py index 964584ce..169df2a4 100755 --- a/lib/generator.py +++ b/lib/generator.py @@ -26,7 +26,7 @@ class Generator(): git_dir = os.path.join(os.path.dirname(os.path.join(__file__)), '..') distfiles_dir = os.path.join(git_dir, 'distfiles') - payload_magic = b'LBPAYLD1' + raw_container_magic = b'LBPAYLD1' # pylint: disable=too-many-arguments,too-many-positional-arguments def __init__(self, arch, external_sources, early_preseed, repo_path, mirrors, @@ -46,8 +46,9 @@ class Generator(): build_guix_also=self.build_guix_also ) self.bootstrap_source_manifest = self.source_manifest - self.payload_source_manifest = [] - self.payload_image = None + self.external_source_manifest = [] + self.external_image = None + self.kernel_bootstrap_mode = None self.target_dir = None self.external_dir = None @@ -59,13 +60,31 @@ class Generator(): self.external_dir = os.path.join(self.target_dir, 'external') self.distfiles() - def _prepare_kernel_bootstrap_payload_manifests(self): + def _select_kernel_bootstrap_mode(self): """ - Split early source payload from full offline payload. + Select how kernel-bootstrap should transport distfiles. """ - # Keep the early builder payload small: include only sources needed - # before improve: import_payload runs, so payload.img is the primary - # carrier for the rest of the offline distfiles. + if self.repo_path: + self.kernel_bootstrap_mode = "repo" + self.external_source_manifest = [] + return + + if self.external_sources: + self.kernel_bootstrap_mode = "raw_external" + self._prepare_kernel_bootstrap_external_manifests() + return + + self.kernel_bootstrap_mode = "network_only" + self.bootstrap_source_manifest = self.early_source_manifest + self.external_source_manifest = [] + + def _prepare_kernel_bootstrap_external_manifests(self): + """ + Split distfiles between init image and external raw container. + """ + # Keep the early builder image small: include only sources needed + # before improve: import_payload runs, so external.img is the primary + # carrier for the remaining distfiles. self.bootstrap_source_manifest = self.get_source_manifest( stop_before_improve="import_payload", build_guix_also=False @@ -75,7 +94,7 @@ class Generator(): if self.bootstrap_source_manifest == full_manifest: raise ValueError("steps/manifest must include `improve: import_payload` in kernel-bootstrap mode.") bootstrap_set = set(self.bootstrap_source_manifest) - self.payload_source_manifest = [entry for entry in full_manifest if entry not in bootstrap_set] + self.external_source_manifest = [entry for entry in full_manifest if entry not in bootstrap_set] def _copy_manifest_distfiles(self, out_dir, manifest): os.makedirs(out_dir, exist_ok=True) @@ -92,7 +111,7 @@ class Generator(): self.download_file(url, directory, file_name) self.check_file(distfile_path, checksum) - def _create_raw_payload_image(self, target_path, manifest): + def _create_raw_container_image(self, target_path, manifest, image_name="external.img"): if manifest is None: manifest = [] @@ -103,31 +122,33 @@ class Generator(): files_by_name = {} for checksum, _, _, file_name in manifest: if file_name in files_by_name and files_by_name[file_name] != checksum: - raise ValueError(f"Conflicting payload file with same name but different hash: {file_name}") + raise ValueError( + f"Conflicting container file with same name but different hash: {file_name}" + ) files_by_name[file_name] = checksum - payload_path = os.path.join(target_path, "payload.img") + container_path = os.path.join(target_path, image_name) ordered_names = sorted(files_by_name.keys()) - with open(payload_path, "wb") as payload: - payload.write(self.payload_magic) - payload.write(struct.pack(" 0xFFFFFFFF: - raise ValueError(f"Payload file name too long: {file_name}") + raise ValueError(f"Container file name too long: {file_name}") src_path = os.path.join(self.distfiles_dir, file_name) file_size = os.path.getsize(src_path) if file_size > 0xFFFFFFFF: - raise ValueError(f"Payload file too large for raw container format: {file_name}") + raise ValueError(f"Container file too large for raw container format: {file_name}") - payload.write(struct.pack("