Source code for att.embedding.validation
"""Embedding quality validation via condition number analysis."""
import numpy as np
[docs]
class EmbeddingDegeneracyWarning(UserWarning):
"""Raised when an embedding has a dangerously high condition number."""
pass
[docs]
def validate_embedding(
cloud: np.ndarray,
expected_dim: int | None = None,
condition_threshold: float = 1e4,
) -> dict:
"""Check embedding quality via SVD of the centered point cloud matrix.
The condition number is σ_max / σ_min. High values mean some embedding
dimensions are near-linear combinations of others — the manifold is
collapsed along those directions.
Parameters
----------
cloud : (n_points, dimension) array
expected_dim : expected intrinsic dimension (informational only)
condition_threshold : condition number above which embedding is flagged
as degenerate. Default 1e4 calibrated on coupled Rössler-Lorenz.
Returns
-------
dict with keys:
condition_number: float
singular_values: np.ndarray
effective_rank: int (singular values > 1e-3 * σ_max)
degenerate: bool
warning: str | None
"""
cloud = np.asarray(cloud)
if cloud.ndim != 2:
raise ValueError(f"Expected 2D array, got shape {cloud.shape}")
# Center the cloud
centered = cloud - cloud.mean(axis=0)
# SVD
singular_values = np.linalg.svd(centered, compute_uv=False)
sigma_max = singular_values[0]
sigma_min = singular_values[-1]
if sigma_min < 1e-15:
condition_number = float("inf")
else:
condition_number = sigma_max / sigma_min
effective_rank = int(np.sum(singular_values > 1e-3 * sigma_max))
degenerate = condition_number > condition_threshold
warning_msg = None
if degenerate:
warning_msg = (
f"Embedding is near-degenerate: condition number = {condition_number:.1e} "
f"(threshold = {condition_threshold:.1e}). "
f"Effective rank = {effective_rank}/{cloud.shape[1]}. "
f"Consider per-channel delay estimation or SVD denoising."
)
return {
"condition_number": condition_number,
"singular_values": singular_values,
"effective_rank": effective_rank,
"degenerate": degenerate,
"warning": warning_msg,
}
[docs]
def svd_embedding(
X: np.ndarray,
delay: int,
dimension: int,
n_components: int | None = None,
) -> np.ndarray:
"""SVD-projected delay embedding for noise reduction.
Constructs the delay matrix then projects onto the top n_components
principal components.
Parameters
----------
X : (n_samples,) 1D time series
delay : time delay
dimension : embedding dimension
n_components : number of SVD components to keep (default: dimension)
Returns
-------
(n_valid, n_components) projected point cloud
"""
X = np.asarray(X).ravel()
n = len(X) - (dimension - 1) * delay
if n <= 0:
raise ValueError("Time series too short for given delay and dimension.")
if n_components is None:
n_components = dimension
# Build delay matrix
cloud = np.zeros((n, dimension))
for i in range(dimension):
cloud[:, i] = X[i * delay: i * delay + n]
# Center and SVD
centered = cloud - cloud.mean(axis=0)
U, S, Vt = np.linalg.svd(centered, full_matrices=False)
# Project onto top components
return U[:, :n_components] * S[:n_components]