MWE 15 - External pnextract / pnflow benchmark against voids¶
This notebook compares voids against a committed reference dataset produced earlier with the
Imperial College pnextract + pnflow workflow. The reference data now lives under
examples/data/external_pnflow_benchmark/, so this notebook does not invoke pnextract or
pnflow and remains runnable even if those binaries are unavailable in the future.
The committed benchmark bundle includes:
- the exact binary input volumes used for the benchmark
pnflowreport files (*_pnflow.prt,*_upscaled.tsv)- extracted-network files (
*_node*.dat,*_link*.dat) for later inspection
Scientific scope and caveats:
- this is an end-to-end workflow comparison, not a solver-only cross-check
- any mismatch reflects both extraction differences and constitutive-model differences
- the voids side uses snow2 extraction plus the selected conductance model
- the external side uses previously saved pnextract geometry plus pnflow's internal
single-phase model
- the committed input volumes make this benchmark stable against future changes in the synthetic
generator implementation
- we keep mu = constant here on purpose because the checked pnflow code path uses scalar fluid
viscosities rather than a thermodynamic mu(P, T) coupling
from __future__ import annotations
import re
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from voids.image import extract_spanning_pore_network
from voids.paths import data_path, project_root
from voids.physics.petrophysics import absolute_porosity, effective_porosity
from voids.physics.singlephase import (
FluidSinglePhase,
PressureBC,
SinglePhaseOptions,
solve,
)
def _require_match(pattern: str, text: str, *, label: str) -> re.Match[str]:
match = re.search(pattern, text, flags=re.MULTILINE)
if match is None:
raise ValueError(f"Could not parse {label!r}")
return match
def _parse_upscaled_metrics(path: Path, *, prefix: str) -> dict[str, float]:
text = path.read_text()
permeability = float(
_require_match(
rf"{re.escape(prefix)}_permeability:\s*([0-9.eE+-]+);",
text,
label=f"{prefix}_permeability",
).group(1)
)
porosity = float(
_require_match(
rf"{re.escape(prefix)}_porosity:\s*([0-9.eE+-]+);",
text,
label=f"{prefix}_porosity",
).group(1)
)
formation_factor = float(
_require_match(
rf"{re.escape(prefix)}_formationfactor:\s*([0-9.eE+-]+);",
text,
label=f"{prefix}_formationfactor",
).group(1)
)
return {
"k_pnflow": permeability,
"phi_pnflow": porosity,
"formation_factor_pnflow": formation_factor,
}
def _parse_pnflow_prt(path: Path) -> dict[str, float | int]:
text = path.read_text()
int_patterns = {
"pnflow_n_pores": r"Number of pores:\s+([0-9]+)",
"pnflow_n_throats": r"Number of throats:\s+([0-9]+)",
"pnflow_n_inlet_connections": r"Number of inlet connections:\s+([0-9]+)",
"pnflow_n_outlet_connections": r"Number of outlet connections:\s+([0-9]+)",
"pnflow_n_isolated_elements": r"Number of isolated elements:\s+([0-9]+)",
}
float_patterns = {
"phi_pnflow_prt": r"Total porosity:\s+([0-9.eE+-]+)",
"k_pnflow_prt": r"Absolute permeability:\s+([0-9.eE+-]+)",
}
parsed: dict[str, float | int] = {}
for key, pattern in int_patterns.items():
value = _require_match(pattern, text, label=key).group(1)
parsed[key] = int(value)
for key, pattern in float_patterns.items():
value = _require_match(pattern, text, label=key).group(1)
parsed[key] = float(value)
return parsed
def _load_reference_case(
reference_root: Path, row: pd.Series
) -> tuple[np.ndarray, dict[str, object]]:
case = str(row["case"])
volume = np.load(reference_root / str(row["volume_file"]))
prt_path = reference_root / str(row["pnflow_prt_file"])
upscaled_path = reference_root / str(row["pnflow_upscaled_file"])
metrics: dict[str, object] = {
**_parse_upscaled_metrics(upscaled_path, prefix=case),
**_parse_pnflow_prt(prt_path),
"reference_case_dir": str((reference_root / case).relative_to(project_root())),
}
return np.asarray(volume, dtype=bool), metrics
def _run_voids_case(
volume: np.ndarray,
*,
voxel_size: float,
flow_axis: str,
fluid: FluidSinglePhase,
options: SinglePhaseOptions,
) -> dict[str, float | int]:
extract = extract_spanning_pore_network(
volume.astype(int),
voxel_size=voxel_size,
flow_axis=flow_axis,
provenance_notes={"benchmark_kind": "external_pnflow_reference"},
)
bc = PressureBC(
f"inlet_{flow_axis}min",
f"outlet_{flow_axis}max",
pin=2.0e5,
pout=0.0,
)
result = solve(
extract.net,
fluid=fluid,
bc=bc,
axis=flow_axis,
options=options,
)
return {
"phi_image": float(np.asarray(volume, dtype=bool).mean()),
"phi_abs_voids": float(absolute_porosity(extract.net)),
"phi_eff_voids": float(effective_porosity(extract.net, axis=flow_axis)),
"Np_voids": int(extract.net.Np),
"Nt_voids": int(extract.net.Nt),
"k_voids": float(result.permeability[flow_axis]),
"Q_voids": float(result.total_flow_rate),
}
def _make_permeability_comparison_figure(
summary_frame: pd.DataFrame,
*,
output_path: Path,
) -> None:
fig, axes = plt.subplots(1, 2, figsize=(13, 4.5))
kmin = float(min(summary_frame["k_voids"].min(), summary_frame["k_pnflow"].min()))
kmax = float(max(summary_frame["k_voids"].max(), summary_frame["k_pnflow"].max()))
pad = 0.05 * max(kmax - kmin, 1.0e-30)
axes[0].scatter(
summary_frame["k_pnflow"], summary_frame["k_voids"], s=65, color="tab:blue"
)
axes[0].plot(
[kmin - pad, kmax + pad],
[kmin - pad, kmax + pad],
color="black",
lw=1.2,
linestyle="--",
)
for row in summary_frame.itertuples(index=False):
axes[0].annotate(
row.case,
(row.k_pnflow, row.k_voids),
textcoords="offset points",
xytext=(5, 4),
fontsize=8,
)
axes[0].set_xlabel("pnflow permeability [m$^2$]")
axes[0].set_ylabel("voids permeability [m$^2$]")
axes[0].set_title("Permeability scatter")
axes[1].bar(
summary_frame["case"], 100.0 * summary_frame["k_rel_diff"], color="tab:orange"
)
axes[1].set_xlabel("case")
axes[1].set_ylabel("relative difference [%]")
axes[1].set_title("Per-case permeability mismatch")
axes[1].tick_params(axis="x", rotation=25)
fig.suptitle("External pnflow benchmark", fontsize=13)
plt.tight_layout()
fig.savefig(output_path, dpi=160, bbox_inches="tight")
plt.show()
def _make_porosity_trend_figure(
summary_frame: pd.DataFrame,
*,
output_path: Path,
) -> None:
plot_df = summary_frame.sort_values(["phi_image", "blobiness"]).copy()
fig, ax = plt.subplots(figsize=(7.5, 4.8))
ax.semilogy(
plot_df["phi_image"],
plot_df["k_pnflow"],
marker="o",
lw=1.5,
label="pnflow",
)
ax.semilogy(
plot_df["phi_image"],
plot_df["k_voids"],
marker="s",
lw=1.5,
label="voids",
)
ax.set_xlabel("image porosity [-]")
ax.set_ylabel("permeability [m$^2$]")
ax.set_title("Permeability trend with porosity")
ax.grid(True, which="both", alpha=0.3)
ax.legend()
plt.tight_layout()
fig.savefig(output_path, dpi=160, bbox_inches="tight")
plt.show()
examples_data = data_path()
reference_root = examples_data / "external_pnflow_benchmark"
manifest_path = reference_root / "manifest.csv"
report_dir = project_root() / "docs" / "assets" / "verification"
report_dir.mkdir(parents=True, exist_ok=True)
report_csv = report_dir / "pnflow_5_case_results.csv"
comparison_figure_path = report_dir / "pnflow_permeability_scatter_and_error.png"
porosity_figure_path = report_dir / "pnflow_porosity_vs_permeability.png"
if not manifest_path.exists():
raise FileNotFoundError(
"Committed reference data not found. Expected " f"{manifest_path} to exist."
)
fluid = FluidSinglePhase(viscosity=1.0e-3)
options = SinglePhaseOptions(
conductance_model="valvatne_blunt",
solver="direct",
)
manifest_df = pd.read_csv(manifest_path)
Reference dataset provenance¶
The table below is the committed benchmark manifest. The notebook loads the exact saved volumes
and previously generated pnflow reports from examples/data/external_pnflow_benchmark/.
| case | shape | porosity_target | blobiness | seed_start | seed_used | flow_axis | voxel_size_m | volume_file | pnflow_prt_file | pnflow_upscaled_file | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | phi032_b14 | 32x32x32 | 0.32 | 1.4 | 401 | 401 | x | 0.000002 | phi032_b14/void_volume.npy | phi032_b14/phi032_b14_pnflow.prt | phi032_b14/phi032_b14_upscaled.tsv |
| 1 | phi035_b16 | 32x32x32 | 0.35 | 1.6 | 501 | 501 | x | 0.000002 | phi035_b16/void_volume.npy | phi035_b16/phi035_b16_pnflow.prt | phi035_b16/phi035_b16_upscaled.tsv |
| 2 | phi038_b18 | 32x32x32 | 0.38 | 1.8 | 601 | 601 | x | 0.000002 | phi038_b18/void_volume.npy | phi038_b18/phi038_b18_pnflow.prt | phi038_b18/phi038_b18_upscaled.tsv |
| 3 | phi040_b18 | 32x32x32 | 0.40 | 1.8 | 901 | 901 | x | 0.000002 | phi040_b18/void_volume.npy | phi040_b18/phi040_b18_pnflow.prt | phi040_b18/phi040_b18_upscaled.tsv |
| 4 | phi041_b20 | 32x32x32 | 0.41 | 2.0 | 701 | 701 | x | 0.000002 | phi041_b20/void_volume.npy | phi041_b20/phi041_b20_pnflow.prt | phi041_b20/phi041_b20_upscaled.tsv |
Representative benchmark input¶
The committed input volumes are binary segmented images (True = void) that were used in the
original external pnextract / pnflow run.
representative_row = manifest_df.loc[manifest_df["case"] == "phi038_b18"].iloc[0]
representative_volume = np.load(
reference_root / representative_row["volume_file"]
).astype(bool)
mid = representative_volume.shape[0] // 2
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
axes[0].imshow(representative_volume[mid, :, :], cmap="gray", origin="lower")
axes[0].set_title("YZ slice")
axes[0].set_xlabel("z")
axes[0].set_ylabel("y")
axes[1].imshow(representative_volume[:, mid, :], cmap="gray", origin="lower")
axes[1].set_title("XZ slice")
axes[1].set_xlabel("z")
axes[1].set_ylabel("x")
axes[2].imshow(representative_volume[:, :, mid], cmap="gray", origin="lower")
axes[2].set_title("XY slice")
axes[2].set_xlabel("y")
axes[2].set_ylabel("x")
fig.suptitle(
f"Representative committed input: {representative_row['case']}", fontsize=13
)
plt.tight_layout()
plt.show()
print("shape =", representative_volume.shape)
print("void fraction =", float(representative_volume.mean()))

shape = (32, 32, 32)
void fraction = 0.3800048828125
Run voids against the committed external references¶
For each case, the notebook loads the saved external outputs, runs the current voids workflow on
the same exact volume, and then compares permeability, porosity, and network size.
rows: list[dict[str, object]] = []
for row in manifest_df.itertuples(index=False):
row_series = pd.Series(row._asdict())
volume, pnflow_metrics = _load_reference_case(reference_root, row_series)
voids_metrics = _run_voids_case(
volume,
voxel_size=float(row_series["voxel_size_m"]),
flow_axis=str(row_series["flow_axis"]),
fluid=fluid,
options=options,
)
rows.append(
{
**row._asdict(),
**pnflow_metrics,
**voids_metrics,
}
)
summary_df = pd.DataFrame(rows)
summary_df["k_ratio_voids_to_pnflow"] = summary_df["k_voids"] / summary_df["k_pnflow"]
summary_df["k_rel_diff"] = np.abs(
summary_df["k_voids"] - summary_df["k_pnflow"]
) / np.maximum(
np.maximum(np.abs(summary_df["k_voids"]), np.abs(summary_df["k_pnflow"])),
1.0e-30,
)
summary_df["phi_abs_diff"] = summary_df["phi_abs_voids"] - summary_df["phi_pnflow"]
summary_df["np_rel_diff"] = np.abs(
summary_df["Np_voids"] - summary_df["pnflow_n_pores"]
) / np.maximum(summary_df["pnflow_n_pores"], 1.0)
summary_df["nt_rel_diff"] = np.abs(
summary_df["Nt_voids"] - summary_df["pnflow_n_throats"]
) / np.maximum(summary_df["pnflow_n_throats"], 1.0)
display_columns = [
"case",
"seed_used",
"porosity_target",
"blobiness",
"phi_image",
"phi_abs_voids",
"phi_pnflow",
"Np_voids",
"pnflow_n_pores",
"Nt_voids",
"pnflow_n_throats",
"k_voids",
"k_pnflow",
"k_rel_diff",
]
summary_df.loc[:, display_columns]
| case | seed_used | porosity_target | blobiness | phi_image | phi_abs_voids | phi_pnflow | Np_voids | pnflow_n_pores | Nt_voids | pnflow_n_throats | k_voids | k_pnflow | k_rel_diff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | phi032_b14 | 401 | 0.32 | 1.4 | 0.320007 | 0.320190 | 0.275787 | 53 | 80 | 150 | 202 | 6.096766e-15 | 9.751930e-15 | 0.374814 |
| 1 | phi035_b16 | 501 | 0.35 | 1.6 | 0.350006 | 0.349762 | 0.295502 | 26 | 71 | 79 | 198 | 1.702232e-14 | 1.331850e-14 | 0.217586 |
| 2 | phi038_b18 | 601 | 0.38 | 1.8 | 0.380005 | 0.380005 | 0.316315 | 36 | 64 | 106 | 180 | 2.091703e-14 | 1.199270e-14 | 0.426654 |
| 3 | phi040_b18 | 901 | 0.40 | 1.8 | 0.399994 | 0.399994 | 0.343414 | 45 | 83 | 134 | 280 | 2.033442e-14 | 1.575700e-14 | 0.225107 |
| 4 | phi041_b20 | 701 | 0.41 | 2.0 | 0.410004 | 0.410004 | 0.354004 | 37 | 72 | 106 | 247 | 3.925603e-14 | 1.436830e-14 | 0.633985 |
Save derived comparison artifacts¶
The committed external reference data under examples/data/ are treated as inputs. The CSV and
figures written here remain derived benchmark reports for documentation and review.
Saved: /Users/dtvolpatto/Work/voids/docs/assets/verification/pnflow_5_case_results.csv
_make_permeability_comparison_figure(
summary_df,
output_path=comparison_figure_path,
)
print("Saved:", comparison_figure_path)

Saved: /Users/dtvolpatto/Work/voids/docs/assets/verification/pnflow_permeability_scatter_and_error.png
_make_porosity_trend_figure(
summary_df,
output_path=porosity_figure_path,
)
print("Saved:", porosity_figure_path)

Saved: /Users/dtvolpatto/Work/voids/docs/assets/verification/pnflow_porosity_vs_permeability.png
Interpretation¶
The permeability comparison is the main benchmark output. The porosity and network-size columns help diagnose whether a mismatch comes primarily from extraction topology, pore/throat geometry, or the transport model itself.
Points to keep in mind when interpreting the numbers:
- pnflow reports porosity after its own extracted-network construction, not the binary-image
void fraction
- voids absolute porosity is computed from the pruned spanning network and may differ even when
the original binary volume is identical
- a close permeability match would be encouraging, but it would not prove geometric equivalence
- a large mismatch is not automatically a bug in voids; it may reflect different extraction and
conductance assumptions in the two workflows