Skip to content

MWE 15 - External pnextract / pnflow benchmark against voids

This notebook compares voids against a committed reference dataset produced earlier with the Imperial College pnextract + pnflow workflow. The reference data now lives under examples/data/external_pnflow_benchmark/, so this notebook does not invoke pnextract or pnflow and remains runnable even if those binaries are unavailable in the future.

The committed benchmark bundle includes:

  • the exact binary input volumes used for the benchmark
  • pnflow report files (*_pnflow.prt, *_upscaled.tsv)
  • extracted-network files (*_node*.dat, *_link*.dat) for later inspection

Scientific scope and caveats: - this is an end-to-end workflow comparison, not a solver-only cross-check - any mismatch reflects both extraction differences and constitutive-model differences - the voids side uses snow2 extraction plus the selected conductance model - the external side uses previously saved pnextract geometry plus pnflow's internal single-phase model - the committed input volumes make this benchmark stable against future changes in the synthetic generator implementation - we keep mu = constant here on purpose because the checked pnflow code path uses scalar fluid viscosities rather than a thermodynamic mu(P, T) coupling

from __future__ import annotations

import re
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from voids.image import extract_spanning_pore_network
from voids.paths import data_path, project_root
from voids.physics.petrophysics import absolute_porosity, effective_porosity
from voids.physics.singlephase import (
    FluidSinglePhase,
    PressureBC,
    SinglePhaseOptions,
    solve,
)


def _require_match(pattern: str, text: str, *, label: str) -> re.Match[str]:
    match = re.search(pattern, text, flags=re.MULTILINE)
    if match is None:
        raise ValueError(f"Could not parse {label!r}")
    return match


def _parse_upscaled_metrics(path: Path, *, prefix: str) -> dict[str, float]:
    text = path.read_text()
    permeability = float(
        _require_match(
            rf"{re.escape(prefix)}_permeability:\s*([0-9.eE+-]+);",
            text,
            label=f"{prefix}_permeability",
        ).group(1)
    )
    porosity = float(
        _require_match(
            rf"{re.escape(prefix)}_porosity:\s*([0-9.eE+-]+);",
            text,
            label=f"{prefix}_porosity",
        ).group(1)
    )
    formation_factor = float(
        _require_match(
            rf"{re.escape(prefix)}_formationfactor:\s*([0-9.eE+-]+);",
            text,
            label=f"{prefix}_formationfactor",
        ).group(1)
    )
    return {
        "k_pnflow": permeability,
        "phi_pnflow": porosity,
        "formation_factor_pnflow": formation_factor,
    }


def _parse_pnflow_prt(path: Path) -> dict[str, float | int]:
    text = path.read_text()
    int_patterns = {
        "pnflow_n_pores": r"Number of pores:\s+([0-9]+)",
        "pnflow_n_throats": r"Number of throats:\s+([0-9]+)",
        "pnflow_n_inlet_connections": r"Number of inlet connections:\s+([0-9]+)",
        "pnflow_n_outlet_connections": r"Number of outlet connections:\s+([0-9]+)",
        "pnflow_n_isolated_elements": r"Number of isolated elements:\s+([0-9]+)",
    }
    float_patterns = {
        "phi_pnflow_prt": r"Total porosity:\s+([0-9.eE+-]+)",
        "k_pnflow_prt": r"Absolute permeability:\s+([0-9.eE+-]+)",
    }
    parsed: dict[str, float | int] = {}
    for key, pattern in int_patterns.items():
        value = _require_match(pattern, text, label=key).group(1)
        parsed[key] = int(value)
    for key, pattern in float_patterns.items():
        value = _require_match(pattern, text, label=key).group(1)
        parsed[key] = float(value)
    return parsed


def _load_reference_case(
    reference_root: Path, row: pd.Series
) -> tuple[np.ndarray, dict[str, object]]:
    case = str(row["case"])
    volume = np.load(reference_root / str(row["volume_file"]))
    prt_path = reference_root / str(row["pnflow_prt_file"])
    upscaled_path = reference_root / str(row["pnflow_upscaled_file"])
    metrics: dict[str, object] = {
        **_parse_upscaled_metrics(upscaled_path, prefix=case),
        **_parse_pnflow_prt(prt_path),
        "reference_case_dir": str((reference_root / case).relative_to(project_root())),
    }
    return np.asarray(volume, dtype=bool), metrics


def _run_voids_case(
    volume: np.ndarray,
    *,
    voxel_size: float,
    flow_axis: str,
    fluid: FluidSinglePhase,
    options: SinglePhaseOptions,
) -> dict[str, float | int]:
    extract = extract_spanning_pore_network(
        volume.astype(int),
        voxel_size=voxel_size,
        flow_axis=flow_axis,
        provenance_notes={"benchmark_kind": "external_pnflow_reference"},
    )
    bc = PressureBC(
        f"inlet_{flow_axis}min",
        f"outlet_{flow_axis}max",
        pin=2.0e5,
        pout=0.0,
    )
    result = solve(
        extract.net,
        fluid=fluid,
        bc=bc,
        axis=flow_axis,
        options=options,
    )
    return {
        "phi_image": float(np.asarray(volume, dtype=bool).mean()),
        "phi_abs_voids": float(absolute_porosity(extract.net)),
        "phi_eff_voids": float(effective_porosity(extract.net, axis=flow_axis)),
        "Np_voids": int(extract.net.Np),
        "Nt_voids": int(extract.net.Nt),
        "k_voids": float(result.permeability[flow_axis]),
        "Q_voids": float(result.total_flow_rate),
    }


def _make_permeability_comparison_figure(
    summary_frame: pd.DataFrame,
    *,
    output_path: Path,
) -> None:
    fig, axes = plt.subplots(1, 2, figsize=(13, 4.5))

    kmin = float(min(summary_frame["k_voids"].min(), summary_frame["k_pnflow"].min()))
    kmax = float(max(summary_frame["k_voids"].max(), summary_frame["k_pnflow"].max()))
    pad = 0.05 * max(kmax - kmin, 1.0e-30)

    axes[0].scatter(
        summary_frame["k_pnflow"], summary_frame["k_voids"], s=65, color="tab:blue"
    )
    axes[0].plot(
        [kmin - pad, kmax + pad],
        [kmin - pad, kmax + pad],
        color="black",
        lw=1.2,
        linestyle="--",
    )
    for row in summary_frame.itertuples(index=False):
        axes[0].annotate(
            row.case,
            (row.k_pnflow, row.k_voids),
            textcoords="offset points",
            xytext=(5, 4),
            fontsize=8,
        )
    axes[0].set_xlabel("pnflow permeability [m$^2$]")
    axes[0].set_ylabel("voids permeability [m$^2$]")
    axes[0].set_title("Permeability scatter")

    axes[1].bar(
        summary_frame["case"], 100.0 * summary_frame["k_rel_diff"], color="tab:orange"
    )
    axes[1].set_xlabel("case")
    axes[1].set_ylabel("relative difference [%]")
    axes[1].set_title("Per-case permeability mismatch")
    axes[1].tick_params(axis="x", rotation=25)

    fig.suptitle("External pnflow benchmark", fontsize=13)
    plt.tight_layout()
    fig.savefig(output_path, dpi=160, bbox_inches="tight")
    plt.show()


def _make_porosity_trend_figure(
    summary_frame: pd.DataFrame,
    *,
    output_path: Path,
) -> None:
    plot_df = summary_frame.sort_values(["phi_image", "blobiness"]).copy()
    fig, ax = plt.subplots(figsize=(7.5, 4.8))

    ax.semilogy(
        plot_df["phi_image"],
        plot_df["k_pnflow"],
        marker="o",
        lw=1.5,
        label="pnflow",
    )
    ax.semilogy(
        plot_df["phi_image"],
        plot_df["k_voids"],
        marker="s",
        lw=1.5,
        label="voids",
    )
    ax.set_xlabel("image porosity [-]")
    ax.set_ylabel("permeability [m$^2$]")
    ax.set_title("Permeability trend with porosity")
    ax.grid(True, which="both", alpha=0.3)
    ax.legend()

    plt.tight_layout()
    fig.savefig(output_path, dpi=160, bbox_inches="tight")
    plt.show()


examples_data = data_path()
reference_root = examples_data / "external_pnflow_benchmark"
manifest_path = reference_root / "manifest.csv"
report_dir = project_root() / "docs" / "assets" / "verification"
report_dir.mkdir(parents=True, exist_ok=True)
report_csv = report_dir / "pnflow_5_case_results.csv"
comparison_figure_path = report_dir / "pnflow_permeability_scatter_and_error.png"
porosity_figure_path = report_dir / "pnflow_porosity_vs_permeability.png"

if not manifest_path.exists():
    raise FileNotFoundError(
        "Committed reference data not found. Expected " f"{manifest_path} to exist."
    )

fluid = FluidSinglePhase(viscosity=1.0e-3)
options = SinglePhaseOptions(
    conductance_model="valvatne_blunt",
    solver="direct",
)
manifest_df = pd.read_csv(manifest_path)

Reference dataset provenance

The table below is the committed benchmark manifest. The notebook loads the exact saved volumes and previously generated pnflow reports from examples/data/external_pnflow_benchmark/.

manifest_df
case shape porosity_target blobiness seed_start seed_used flow_axis voxel_size_m volume_file pnflow_prt_file pnflow_upscaled_file
0 phi032_b14 32x32x32 0.32 1.4 401 401 x 0.000002 phi032_b14/void_volume.npy phi032_b14/phi032_b14_pnflow.prt phi032_b14/phi032_b14_upscaled.tsv
1 phi035_b16 32x32x32 0.35 1.6 501 501 x 0.000002 phi035_b16/void_volume.npy phi035_b16/phi035_b16_pnflow.prt phi035_b16/phi035_b16_upscaled.tsv
2 phi038_b18 32x32x32 0.38 1.8 601 601 x 0.000002 phi038_b18/void_volume.npy phi038_b18/phi038_b18_pnflow.prt phi038_b18/phi038_b18_upscaled.tsv
3 phi040_b18 32x32x32 0.40 1.8 901 901 x 0.000002 phi040_b18/void_volume.npy phi040_b18/phi040_b18_pnflow.prt phi040_b18/phi040_b18_upscaled.tsv
4 phi041_b20 32x32x32 0.41 2.0 701 701 x 0.000002 phi041_b20/void_volume.npy phi041_b20/phi041_b20_pnflow.prt phi041_b20/phi041_b20_upscaled.tsv

Representative benchmark input

The committed input volumes are binary segmented images (True = void) that were used in the original external pnextract / pnflow run.

representative_row = manifest_df.loc[manifest_df["case"] == "phi038_b18"].iloc[0]
representative_volume = np.load(
    reference_root / representative_row["volume_file"]
).astype(bool)
mid = representative_volume.shape[0] // 2

fig, axes = plt.subplots(1, 3, figsize=(15, 4))
axes[0].imshow(representative_volume[mid, :, :], cmap="gray", origin="lower")
axes[0].set_title("YZ slice")
axes[0].set_xlabel("z")
axes[0].set_ylabel("y")

axes[1].imshow(representative_volume[:, mid, :], cmap="gray", origin="lower")
axes[1].set_title("XZ slice")
axes[1].set_xlabel("z")
axes[1].set_ylabel("x")

axes[2].imshow(representative_volume[:, :, mid], cmap="gray", origin="lower")
axes[2].set_title("XY slice")
axes[2].set_xlabel("y")
axes[2].set_ylabel("x")

fig.suptitle(
    f"Representative committed input: {representative_row['case']}", fontsize=13
)
plt.tight_layout()
plt.show()

print("shape =", representative_volume.shape)
print("void fraction =", float(representative_volume.mean()))

png

shape = (32, 32, 32)
void fraction = 0.3800048828125

Run voids against the committed external references

For each case, the notebook loads the saved external outputs, runs the current voids workflow on the same exact volume, and then compares permeability, porosity, and network size.

rows: list[dict[str, object]] = []

for row in manifest_df.itertuples(index=False):
    row_series = pd.Series(row._asdict())
    volume, pnflow_metrics = _load_reference_case(reference_root, row_series)
    voids_metrics = _run_voids_case(
        volume,
        voxel_size=float(row_series["voxel_size_m"]),
        flow_axis=str(row_series["flow_axis"]),
        fluid=fluid,
        options=options,
    )
    rows.append(
        {
            **row._asdict(),
            **pnflow_metrics,
            **voids_metrics,
        }
    )

summary_df = pd.DataFrame(rows)
summary_df["k_ratio_voids_to_pnflow"] = summary_df["k_voids"] / summary_df["k_pnflow"]
summary_df["k_rel_diff"] = np.abs(
    summary_df["k_voids"] - summary_df["k_pnflow"]
) / np.maximum(
    np.maximum(np.abs(summary_df["k_voids"]), np.abs(summary_df["k_pnflow"])),
    1.0e-30,
)
summary_df["phi_abs_diff"] = summary_df["phi_abs_voids"] - summary_df["phi_pnflow"]
summary_df["np_rel_diff"] = np.abs(
    summary_df["Np_voids"] - summary_df["pnflow_n_pores"]
) / np.maximum(summary_df["pnflow_n_pores"], 1.0)
summary_df["nt_rel_diff"] = np.abs(
    summary_df["Nt_voids"] - summary_df["pnflow_n_throats"]
) / np.maximum(summary_df["pnflow_n_throats"], 1.0)

display_columns = [
    "case",
    "seed_used",
    "porosity_target",
    "blobiness",
    "phi_image",
    "phi_abs_voids",
    "phi_pnflow",
    "Np_voids",
    "pnflow_n_pores",
    "Nt_voids",
    "pnflow_n_throats",
    "k_voids",
    "k_pnflow",
    "k_rel_diff",
]
summary_df.loc[:, display_columns]
case seed_used porosity_target blobiness phi_image phi_abs_voids phi_pnflow Np_voids pnflow_n_pores Nt_voids pnflow_n_throats k_voids k_pnflow k_rel_diff
0 phi032_b14 401 0.32 1.4 0.320007 0.320190 0.275787 53 80 150 202 6.096766e-15 9.751930e-15 0.374814
1 phi035_b16 501 0.35 1.6 0.350006 0.349762 0.295502 26 71 79 198 1.702232e-14 1.331850e-14 0.217586
2 phi038_b18 601 0.38 1.8 0.380005 0.380005 0.316315 36 64 106 180 2.091703e-14 1.199270e-14 0.426654
3 phi040_b18 901 0.40 1.8 0.399994 0.399994 0.343414 45 83 134 280 2.033442e-14 1.575700e-14 0.225107
4 phi041_b20 701 0.41 2.0 0.410004 0.410004 0.354004 37 72 106 247 3.925603e-14 1.436830e-14 0.633985

Save derived comparison artifacts

The committed external reference data under examples/data/ are treated as inputs. The CSV and figures written here remain derived benchmark reports for documentation and review.

summary_df.to_csv(report_csv, index=False)
print("Saved:", report_csv)
Saved: /Users/dtvolpatto/Work/voids/docs/assets/verification/pnflow_5_case_results.csv
_make_permeability_comparison_figure(
    summary_df,
    output_path=comparison_figure_path,
)
print("Saved:", comparison_figure_path)

png

Saved: /Users/dtvolpatto/Work/voids/docs/assets/verification/pnflow_permeability_scatter_and_error.png
_make_porosity_trend_figure(
    summary_df,
    output_path=porosity_figure_path,
)
print("Saved:", porosity_figure_path)

png

Saved: /Users/dtvolpatto/Work/voids/docs/assets/verification/pnflow_porosity_vs_permeability.png

Interpretation

The permeability comparison is the main benchmark output. The porosity and network-size columns help diagnose whether a mismatch comes primarily from extraction topology, pore/throat geometry, or the transport model itself.

Points to keep in mind when interpreting the numbers: - pnflow reports porosity after its own extracted-network construction, not the binary-image void fraction - voids absolute porosity is computed from the pruned spanning network and may differ even when the original binary volume is identical - a close permeability match would be encouraging, but it would not prove geometric equivalence - a large mismatch is not automatically a bug in voids; it may reflect different extraction and conductance assumptions in the two workflows