MWE 32 - PREGO comparison on synthetic PoreSpy blobs¶

Rendered output needs refresh

The source notebook has been updated to use spherical seed search, level-queue growth, and the shared hagen_poiseuille conductance model for all backends. Re-run the notebook and refresh this report before using the rendered figures or tables as current benchmark evidence.

This notebook compares extraction backends on synthetic 256^3 PoreSpy blobs volumes. It sends the same binary images through three extraction backends:

PoreSpy snow2, the established watershed/SNOW-style reference path in voids
prego, the native PREGO-style seed-based region-growing backend
native_maximal_ball, a separate native extraction reference already used elsewhere in this repository

Scientific scope and assumptions:

the images are synthetic binary blobs, not scanner-derived segmentations
the notebook compares extracted-network behavior, not direct image physics
the same voids hagen_poiseuille conductance model and boundary conditions are used after extraction, so permeability differences mostly reflect extracted topology and geometry
the PREGO implementation follows the paper's published algorithmic description, but it is not a bitwise reproduction of the authors' code
256^3 cases are large enough to expose runtime differences and give more meaningful network-size distributions, but they are still notebook-scale examples rather than a full scaling benchmark

from __future__ import annotations

import time
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import porespy as ps

try:
    from IPython.display import display
except ImportError:  # pragma: no cover - notebook convenience fallback
    display = print

from voids.graph.metrics import coordination_numbers
from voids.image import construct_spanning_network, has_spanning_cluster
from voids.physics.petrophysics import absolute_porosity, effective_porosity
from voids.physics.singlephase import (
    FluidSinglePhase,
    PressureBC,
    SinglePhaseOptions,
    solve,
)


def _find_project_root() -> Path:
    cwd = Path.cwd().resolve()
    for candidate in (cwd, *cwd.parents):
        if (candidate / "mkdocs.yml").exists() and (candidate / "notebooks").exists():
            return candidate
    return cwd


def _finite_median(values: np.ndarray) -> float:
    finite = np.asarray(values, dtype=float)
    finite = finite[np.isfinite(finite)]
    if finite.size == 0:
        return float("nan")
    return float(np.median(finite))


def _finite_mean(values: np.ndarray) -> float:
    finite = np.asarray(values, dtype=float)
    finite = finite[np.isfinite(finite)]
    if finite.size == 0:
        return float("nan")
    return float(np.mean(finite))


def _diameter_array(container: dict[str, np.ndarray]) -> np.ndarray:
    if "diameter_inscribed" in container:
        return np.asarray(container["diameter_inscribed"], dtype=float)
    if "diameter_equivalent" in container:
        return np.asarray(container["diameter_equivalent"], dtype=float)
    if "radius_inscribed" in container:
        return 2.0 * np.asarray(container["radius_inscribed"], dtype=float)
    return np.asarray([], dtype=float)

project_root = _find_project_root()
output_dir = (
    project_root / "notebooks" / "outputs" / "32_mwe_prego_blobs_backend_comparison"
)
output_dir.mkdir(parents=True, exist_ok=True)

flow_axis = "x"
axis_index = 0
voxel_size = 2.0e-6
fluid = FluidSinglePhase(viscosity=1.0e-3)
bc = PressureBC("inlet_xmin", "outlet_xmax", pin=1.0, pout=0.0)
options = SinglePhaseOptions(conductance_model="hagen_poiseuille", solver="direct")

sample_shape = (256, 256, 256)
case_specs = [
    {
        "case": "phi032_b1",
        "shape": sample_shape,
        "porosity": 0.32,
        "blobiness": 1,
        "seed_start": 100,
    },
    {
        "case": "phi040_b1",
        "shape": sample_shape,
        "porosity": 0.40,
        "blobiness": 1,
        "seed_start": 400,
    },
    {
        "case": "phi045_b1",
        "shape": sample_shape,
        "porosity": 0.45,
        "blobiness": 1,
        "seed_start": 500,
    },
]

backend_specs = [
    {
        "backend": "porespy",
        "label": "PoreSpy snow2",
        "extraction_kwargs": {"sigma": 0.4, "r_max": 4},
    },
    {
        "backend": "prego",
        "label": "PREGO",
        "extraction_kwargs": {
            "settings": {
                "r_max": 4,
                "sigma": 0.4,
                "peak_footprint": "sphere",
                "growth_mode": "level_queue",
                "distance_map_backend": "scipy",
            },
        },
    },
    {
        "backend": "native_maximal_ball",
        "label": "Native maximal-ball",
        "extraction_kwargs": {
            "distance_map_backend": "scipy",
            "apply_boundary_clipping": False,
            "settings": {"minimal_pore_radius_voxels": 1.0},
        },
    },
]

case_specs

[{'case': 'phi032_b1',
  'shape': (256, 256, 256),
  'porosity': 0.32,
  'blobiness': 1,
  'seed_start': 100},
 {'case': 'phi040_b1',
  'shape': (256, 256, 256),
  'porosity': 0.4,
  'blobiness': 1,
  'seed_start': 400},
 {'case': 'phi045_b1',
  'shape': (256, 256, 256),
  'porosity': 0.45,
  'blobiness': 1,
  'seed_start': 500}]

Generate spanning PoreSpy blob images¶

PoreSpy blobs images provide controlled, reproducible synthetic pore-space morphologies. Here each case is generated directly from porespy.generators.blobs, then accepted only if it has a face-connected void path across the selected flow axis. The PREGO branch uses spherical seed search and level-queue region growth; the faster cubic seed search and stamped-sphere growth path remain available as explicit opt-in settings for runtime-focused comparisons.

images: dict[str, np.ndarray] = {}
image_rows: list[dict[str, object]] = []

for spec in case_specs:
    accepted_seed: int | None = None
    accepted_image: np.ndarray | None = None
    for seed in range(int(spec["seed_start"]), int(spec["seed_start"]) + 50):
        candidate = ps.generators.blobs(
            shape=list(spec["shape"]),
            porosity=float(spec["porosity"]),
            blobiness=int(spec["blobiness"]),
            seed=seed,
        )
        if has_spanning_cluster(candidate, axis_index=axis_index):
            accepted_seed = seed
            accepted_image = np.asarray(candidate, dtype=bool)
            break
    if accepted_image is None or accepted_seed is None:
        raise RuntimeError(
            f"Could not generate a spanning blob image for {spec['case']}"
        )

    case_name = str(spec["case"])
    images[case_name] = accepted_image
    image_rows.append(
        {
            **spec,
            "seed_used": accepted_seed,
            "phi_image": float(accepted_image.mean()),
            "spans_x": has_spanning_cluster(accepted_image, axis_index=0),
            "spans_y": has_spanning_cluster(accepted_image, axis_index=1),
            "spans_z": has_spanning_cluster(accepted_image, axis_index=2),
        }
    )

image_df = pd.DataFrame(image_rows)
image_df

	case	shape	porosity	blobiness	seed_start	seed_used	phi_image	spans_x	spans_y	spans_z
0	phi032_b1	(256, 256, 256)	0.32	1	100	100	0.32	True	True	True
1	phi040_b1	(256, 256, 256)	0.40	1	400	400	0.40	True	True	True
2	phi045_b1	(256, 256, 256)	0.45	1	500	500	0.45	True	True	True

Extract, solve, and summarize¶

The metrics below follow the paper's comparison categories: pore count, throat count, pore/throat size, coordination number, porosity, permeability, and CPU time. The reported CPU time is notebook-level wall time for extraction or solve on these small cases; it should not be interpreted as the large-volume scaling result from the paper. All extraction backends are solved with the same hagen_poiseuille conductance model, so this comparison isolates extraction behavior rather than mixing conductance closures. If an extracted network lacks pore-throat-pore conduit lengths, hagen_poiseuille falls back to the one-throat circular Poiseuille law.

rows: list[dict[str, object]] = []
constructions: dict[tuple[str, str], object] = {}
solve_results: dict[tuple[str, str], object] = {}

for case_name, image in images.items():
    for backend_spec in backend_specs:
        extraction_start = time.perf_counter()
        construction = construct_spanning_network(
            backend=str(backend_spec["backend"]),
            phases=image.astype(int),
            voxel_size=voxel_size,
            flow_axis=flow_axis,
            extraction_kwargs=dict(backend_spec["extraction_kwargs"]),
            provenance_notes={
                "benchmark_kind": "prego_blobs_backend_comparison",
                "synthetic_generator": "porespy.generators.blobs",
                "case": case_name,
            },
        )
        extraction_seconds = time.perf_counter() - extraction_start

        solve_start = time.perf_counter()
        result = solve(
            construction.net,
            fluid=fluid,
            bc=bc,
            axis=flow_axis,
            options=options,
        )
        solve_seconds = time.perf_counter() - solve_start

        net = construction.net
        pore_diameter = _diameter_array(net.pore)
        throat_diameter = _diameter_array(net.throat)
        coordination = coordination_numbers(net)

        rows.append(
            {
                "case": case_name,
                "backend": backend_spec["backend"],
                "backend_label": backend_spec["label"],
                "backend_version": construction.backend_version,
                "conductance_model": options.conductance_model,
                "shape": str(image.shape),
                "phi_image": float(image.mean()),
                "phi_abs": float(absolute_porosity(net)),
                "phi_eff_x": float(effective_porosity(net, axis=flow_axis)),
                "Np": int(net.Np),
                "Nt": int(net.Nt),
                "mean_coordination": _finite_mean(coordination),
                "median_pore_diameter_m": _finite_median(pore_diameter),
                "median_throat_diameter_m": _finite_median(throat_diameter),
                "Kx_m2": float(result.permeability[flow_axis]),
                "Q_m3_s": float(result.total_flow_rate),
                "mass_balance_error": float(result.mass_balance_error),
                "extraction_seconds": float(extraction_seconds),
                "solve_seconds": float(solve_seconds),
            }
        )
        constructions[(case_name, str(backend_spec["backend"]))] = construction
        solve_results[(case_name, str(backend_spec["backend"]))] = result

summary_df = pd.DataFrame(rows)

reference = summary_df.loc[
    summary_df["backend"] == "porespy", ["case", "Kx_m2", "Np", "Nt"]
]
reference = reference.rename(
    columns={"Kx_m2": "Kx_snow2_m2", "Np": "Np_snow2", "Nt": "Nt_snow2"}
)
summary_df = summary_df.merge(reference, on="case", how="left")
summary_df["Kx_ratio_to_snow2"] = summary_df["Kx_m2"] / summary_df["Kx_snow2_m2"]
summary_df["Np_ratio_to_snow2"] = summary_df["Np"] / summary_df["Np_snow2"]
summary_df["Nt_ratio_to_snow2"] = summary_df["Nt"] / summary_df["Nt_snow2"]

display_columns = [
    "case",
    "backend_label",
    "phi_image",
    "phi_abs",
    "phi_eff_x",
    "Np",
    "Nt",
    "mean_coordination",
    "median_pore_diameter_m",
    "median_throat_diameter_m",
    "Kx_m2",
    "Kx_ratio_to_snow2",
    "extraction_seconds",
]
summary_df.loc[:, display_columns]

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.

	case	backend_label	phi_image	phi_abs	phi_eff_x	Np	Nt	mean_coordination	median_pore_diameter_m	median_throat_diameter_m	Kx_m2	Kx_ratio_to_snow2	extraction_seconds
0	phi032_b1	PoreSpy snow2	0.32	0.334429	0.334429	1237	1998	3.230396	0.000020	0.000022	1.048927e-12	1.000000	10.602591
1	phi032_b1	PREGO	0.32	0.317970	0.317970	769	1746	4.540962	0.000029	0.000022	1.428455e-12	1.361825	7.236538
2	phi032_b1	Native maximal-ball	0.32	0.317970	0.317970	650	1270	3.907692	0.000032	0.000018	3.608839e-12	3.440506	12.486288
3	phi040_b1	PoreSpy snow2	0.40	0.419214	0.419214	1401	2568	3.665953	0.000020	0.000023	2.518224e-12	1.000000	9.952713
4	phi040_b1	PREGO	0.40	0.398919	0.398919	861	2350	5.458769	0.000030	0.000023	3.021492e-12	1.199850	7.537757
5	phi040_b1	Native maximal-ball	0.40	0.398919	0.398919	688	1578	4.587209	0.000035	0.000019	8.241014e-12	3.272550	16.117709
6	phi045_b1	PoreSpy snow2	0.45	0.472200	0.472200	1442	2834	3.930652	0.000020	0.000025	4.326223e-12	1.000000	10.299894
7	phi045_b1	PREGO	0.45	0.449695	0.449695	858	2601	6.062937	0.000032	0.000024	4.936248e-12	1.141006	8.180918
8	phi045_b1	Native maximal-ball	0.45	0.449695	0.449695	645	1669	5.175194	0.000038	0.000020	1.583461e-11	3.660146	17.867414

Representative segmentation input¶

These are the binary images passed to all backends. The extraction backends differ only after this point.

representative_case = "phi040_b1"
representative_image = images[representative_case]
mid = representative_image.shape[0] // 2

fig, ax = plt.subplots(figsize=(5.6, 5.0))
ax.imshow(representative_image[mid, :, :], cmap="gray", origin="lower")
ax.set_title(f"{representative_case}: PoreSpy blobs mid-plane")
ax.set_xlabel("z")
ax.set_ylabel("y")
plt.tight_layout()
input_slice_path = output_dir / "representative_blobs_slice.png"
fig.savefig(input_slice_path, dpi=160, bbox_inches="tight")
plt.show()
print("Saved:", input_slice_path)

png

Saved: /Users/dtvolpatto/Work/voids/notebooks/outputs/32_mwe_prego_blobs_backend_comparison/representative_blobs_slice.png

Backend-level comparisons¶

PREGO in this notebook is allowed to be coarser than snow2: the paper itself reports differences in pore and throat sizes, coordination number, and contact areas. The important scientific question is whether those differences are stable and whether transport-relevant quantities remain acceptable for the intended application.

fig, axes = plt.subplots(1, 3, figsize=(15, 4.4), sharex=True)

for backend_label, group in summary_df.groupby("backend_label", sort=False):
    axes[0].plot(
        group["case"], group["Np"], marker="o", linewidth=2, label=backend_label
    )
    axes[1].plot(
        group["case"], group["Nt"], marker="o", linewidth=2, label=backend_label
    )
    axes[2].plot(
        group["case"], group["Kx_m2"], marker="o", linewidth=2, label=backend_label
    )

axes[0].set_ylabel("Pore count")
axes[1].set_ylabel("Throat count")
axes[2].set_ylabel("Kx [m^2]")
axes[2].set_yscale("log")
for ax in axes:
    ax.set_xlabel("Synthetic blob case")
    ax.grid(alpha=0.3, linestyle=":")
    ax.tick_params(axis="x", rotation=20)
axes[0].legend(fontsize=8)
fig.suptitle("Extraction backend comparison on PoreSpy blobs", fontsize=14)
plt.tight_layout()
comparison_path = output_dir / "backend_counts_and_permeability.png"
fig.savefig(comparison_path, dpi=160, bbox_inches="tight")
plt.show()
print("Saved:", comparison_path)

png

Saved: /Users/dtvolpatto/Work/voids/notebooks/outputs/32_mwe_prego_blobs_backend_comparison/backend_counts_and_permeability.png

fig, axes = plt.subplots(1, 3, figsize=(15, 4.2), sharex=True)

for backend_label, group in summary_df.groupby("backend_label", sort=False):
    axes[0].plot(
        group["case"], group["phi_abs"], marker="o", linewidth=2, label=backend_label
    )
    axes[1].plot(
        group["case"],
        group["mean_coordination"],
        marker="o",
        linewidth=2,
        label=backend_label,
    )
    axes[2].plot(
        group["case"],
        group["extraction_seconds"],
        marker="o",
        linewidth=2,
        label=backend_label,
    )

axes[0].set_ylabel("Absolute porosity [-]")
axes[1].set_ylabel("Mean coordination [-]")
axes[2].set_ylabel("Extraction wall time [s]")
for ax in axes:
    ax.set_xlabel("Synthetic blob case")
    ax.grid(alpha=0.3, linestyle=":")
    ax.tick_params(axis="x", rotation=20)
axes[0].legend(fontsize=8)
fig.suptitle("Structural and runtime diagnostics", fontsize=14)
plt.tight_layout()
diagnostic_path = output_dir / "backend_structural_runtime_diagnostics.png"
fig.savefig(diagnostic_path, dpi=160, bbox_inches="tight")
plt.show()
print("Saved:", diagnostic_path)

png

Saved: /Users/dtvolpatto/Work/voids/notebooks/outputs/32_mwe_prego_blobs_backend_comparison/backend_structural_runtime_diagnostics.png

Representative size distributions¶

The paper compares pore diameter, throat diameter, coordination number, and pore-to-pore spacing. Here we show the first three directly from the canonical voids.Network fields for the representative case.

fig, axes = plt.subplots(1, 3, figsize=(15, 4.2))

for backend_spec in backend_specs:
    backend = str(backend_spec["backend"])
    label = str(backend_spec["label"])
    net = constructions[(representative_case, backend)].net
    pore_diameter_um = 1.0e6 * _diameter_array(net.pore)
    throat_diameter_um = 1.0e6 * _diameter_array(net.throat)
    coordination = coordination_numbers(net)

    if pore_diameter_um.size:
        axes[0].hist(
            pore_diameter_um[np.isfinite(pore_diameter_um)], alpha=0.45, label=label
        )
    if throat_diameter_um.size:
        axes[1].hist(
            throat_diameter_um[np.isfinite(throat_diameter_um)], alpha=0.45, label=label
        )
    if coordination.size:
        axes[2].hist(coordination[np.isfinite(coordination)], alpha=0.45, label=label)

axes[0].set_xlabel("Pore diameter [um]")
axes[1].set_xlabel("Throat diameter [um]")
axes[2].set_xlabel("Coordination number [-]")
for ax in axes:
    ax.set_ylabel("Count")
    ax.grid(alpha=0.3, linestyle=":")
axes[0].legend(fontsize=8)
fig.suptitle(f"{representative_case}: extracted-network distributions", fontsize=14)
plt.tight_layout()
distribution_path = output_dir / "representative_backend_distributions.png"
fig.savefig(distribution_path, dpi=160, bbox_inches="tight")
plt.show()
print("Saved:", distribution_path)

png

Saved: /Users/dtvolpatto/Work/voids/notebooks/outputs/32_mwe_prego_blobs_backend_comparison/representative_backend_distributions.png

Save tabular outputs¶

The CSV files are useful for checking whether future PREGO changes move structural or permeability metrics.

summary_csv = output_dir / "prego_blobs_backend_summary.csv"
image_csv = output_dir / "prego_blobs_image_summary.csv"
summary_df.to_csv(summary_csv, index=False)
image_df.to_csv(image_csv, index=False)
print("Saved:", summary_csv)
print("Saved:", image_csv)

Saved: /Users/dtvolpatto/Work/voids/notebooks/outputs/32_mwe_prego_blobs_backend_comparison/prego_blobs_backend_summary.csv
Saved: /Users/dtvolpatto/Work/voids/notebooks/outputs/32_mwe_prego_blobs_backend_comparison/prego_blobs_image_summary.csv

ratio_table = summary_df.pivot(
    index="case", columns="backend_label", values="Kx_ratio_to_snow2"
)
count_table = summary_df.pivot(
    index="case", columns="backend_label", values="Np_ratio_to_snow2"
)

print("Kx ratios relative to PoreSpy snow2:")
display(ratio_table)

print("Pore-count ratios relative to PoreSpy snow2:")
display(count_table)

max_mass_balance_error = float(summary_df["mass_balance_error"].abs().max())
print(f"Max absolute mass-balance error: {max_mass_balance_error:.3e}")

if max_mass_balance_error < 1.0e-8:
    print(
        "All backend solves satisfy the mass-balance tolerance used in this notebook."
    )
else:
    print(
        "At least one backend solve has a large mass-balance residual; inspect before using Kx."
    )

Kx ratios relative to PoreSpy snow2:

backend_label	Native maximal-ball	PREGO	PoreSpy snow2
case
phi032_b1	3.440506	1.361825	1.0
phi040_b1	3.272550	1.199850	1.0
phi045_b1	3.660146	1.141006	1.0

Pore-count ratios relative to PoreSpy snow2:

backend_label	Native maximal-ball	PREGO	PoreSpy snow2
case
phi032_b1	0.525465	0.621665	1.0
phi040_b1	0.491078	0.614561	1.0
phi045_b1	0.447295	0.595007	1.0

Max absolute mass-balance error: 3.181e-26
All backend solves satisfy the mass-balance tolerance used in this notebook.