Source code for att.llm.intrinsic_dim
"""Intrinsic dimension estimators for LLM hidden-state point clouds.
Direction 7: tracks how representation complexity changes across layers
and difficulty levels using TwoNN (Facco et al. 2017) and PHD (Birdal et al.
2021) estimators.
"""
from __future__ import annotations
import numpy as np
from scipy.spatial.distance import cdist
[docs]
def twonn_dimension(cloud: np.ndarray, fraction: float = 0.9) -> float:
"""Estimate intrinsic dimension via the TwoNN method (Facco et al. 2017).
Uses the ratio of distances to the second and first nearest neighbours.
The ID is estimated as d = 1 / mean(log(mu)) where mu = r2/r1.
Parameters
----------
cloud : (n, d) point cloud.
fraction : float
Fraction of points to use after trimming high-mu outliers (0, 1].
Returns
-------
float : estimated intrinsic dimension.
"""
n = cloud.shape[0]
if n < 3:
return 0.0
# Pairwise distances
if n <= 5000:
D = cdist(cloud, cloud)
else:
# For large clouds, use sklearn for efficiency
from sklearn.neighbors import NearestNeighbors
nn = NearestNeighbors(n_neighbors=3, algorithm="auto")
nn.fit(cloud)
dists, _ = nn.kneighbors(cloud)
r1 = dists[:, 1] # skip self
r2 = dists[:, 2]
valid = r1 > 1e-15
r1 = r1[valid]
r2 = r2[valid]
if len(r1) == 0:
return 0.0
mu = r2 / r1
mu = np.sort(mu)
n_use = max(1, int(len(mu) * fraction))
mu = mu[:n_use]
log_mu = np.log(mu)
mean_log_mu = np.mean(log_mu)
if mean_log_mu < 1e-15:
return 0.0
return 1.0 / mean_log_mu
# Full pairwise path
np.fill_diagonal(D, np.inf)
sorted_dists = np.sort(D, axis=1)
r1 = sorted_dists[:, 0]
r2 = sorted_dists[:, 1]
valid = r1 > 1e-15
r1 = r1[valid]
r2 = r2[valid]
if len(r1) == 0:
return 0.0
mu = r2 / r1
mu = np.sort(mu)
n_use = max(1, int(len(mu) * fraction))
mu = mu[:n_use]
log_mu = np.log(mu)
mean_log_mu = np.mean(log_mu)
if mean_log_mu < 1e-15:
return 0.0
return 1.0 / mean_log_mu
[docs]
def phd_dimension(diagrams: list[np.ndarray], dim: int = 1) -> float:
"""Estimate intrinsic dimension from persistence diagram lifetimes.
Based on the observation that in dimension d, the expected lifetime of
H_k features scales as n^(-1/d) (Birdal et al. 2021). We estimate d
from the distribution of H1 lifetimes using the log-log slope of the
survival function.
Parameters
----------
diagrams : list of (n_features, 2) arrays (persistence diagrams).
dim : int
Homology dimension to use (default 1 for loops).
Returns
-------
float : estimated intrinsic dimension (0.0 if insufficient features).
"""
if dim >= len(diagrams):
return 0.0
dgm = diagrams[dim]
if len(dgm) < 5:
return 0.0
lifetimes = dgm[:, 1] - dgm[:, 0]
lifetimes = lifetimes[lifetimes > 1e-15]
if len(lifetimes) < 5:
return 0.0
# Sort lifetimes descending
lifetimes = np.sort(lifetimes)[::-1]
n = len(lifetimes)
# Survival function: P(lifetime > t) vs t in log-log space
# Use empirical CDF complement
log_t = np.log(lifetimes)
log_surv = np.log(np.arange(1, n + 1) / n)
# Fit linear regression to log-log survival
# Slope = -d (dimension estimate)
A = np.column_stack([log_t, np.ones(n)])
result = np.linalg.lstsq(A, log_surv, rcond=None)
slope = result[0][0]
# d = -slope (survival decays as t^{-d})
d_est = max(0.0, -slope)
return float(d_est)
[docs]
def id_profile(
loader,
levels: list[int] | None = None,
n_pca_components: int = 50,
method: str = "twonn",
fraction: float = 0.9,
) -> dict[int, np.ndarray]:
"""Compute intrinsic dimension profile across layers for each difficulty level.
Parameters
----------
loader : HiddenStateLoader
Loaded hidden-state archive.
levels : list of int or None
Difficulty levels to analyze. None = all levels.
n_pca_components : int
PCA components before ID estimation (avoids curse of ambient dim).
method : str
"twonn" (default) or "phd".
fraction : float
Fraction parameter for TwoNN trimming.
Returns
-------
dict mapping level -> (n_layers,) array of ID estimates.
"""
from sklearn.decomposition import PCA
from att.topology.persistence import PersistenceAnalyzer
if levels is None:
levels = sorted(loader.unique_levels.tolist())
profiles = {}
for level in levels:
n_layers = loader.num_layers
ids = np.zeros(n_layers)
for layer_idx in range(n_layers):
cloud = loader.get_level_cloud(level, layer=layer_idx)
n_pts = cloud.shape[0]
if n_pts < 5:
continue
n_comp = min(n_pca_components, n_pts - 1, cloud.shape[1])
pca = PCA(n_components=n_comp)
cloud_pca = pca.fit_transform(cloud)
if method == "twonn":
ids[layer_idx] = twonn_dimension(cloud_pca, fraction=fraction)
elif method == "phd":
pa = PersistenceAnalyzer(max_dim=1, backend="ripser")
result = pa.fit_transform(cloud_pca, subsample=min(n_pts, 200))
ids[layer_idx] = phd_dimension(result["diagrams"], dim=1)
else:
raise ValueError(f"Unknown method: {method}")
profiles[level] = ids
return profiles