76 lines
2.6 KiB
Python
76 lines
2.6 KiB
Python
from pathlib import Path
|
|
import numpy as np
|
|
import matplotlib
|
|
matplotlib.use("Agg") # 仅保存图片,不弹窗
|
|
import matplotlib.pyplot as plt
|
|
from umap import UMAP
|
|
|
|
def umap_dir_to_pngs(dir_path: str) -> None:
|
|
"""
|
|
对目录内每个 .npy 文件执行 UMAP(30D->2D) 并保存散点图。
|
|
- 输入 .npy 期望形状为 (n_samples, 30) 或 (30, n_samples)
|
|
- 输出图片保存在同目录,命名为 <原文件名>_umap.png
|
|
"""
|
|
p = Path(dir_path)
|
|
if not p.is_dir():
|
|
raise ValueError(f"{dir_path!r} 不是有效文件夹")
|
|
|
|
files = sorted(p.glob("*.npy"))
|
|
if not files:
|
|
print(f"目录 {p} 中未找到 .npy 文件")
|
|
return
|
|
|
|
for f in files:
|
|
try:
|
|
data = np.load(f)
|
|
if data.ndim == 2:
|
|
if data.shape[1] == 30:
|
|
X = data
|
|
elif data.shape[0] == 30:
|
|
X = data.T
|
|
else:
|
|
print(f"[跳过] {f.name}: shape={data.shape}, 未检测到 30 维特征")
|
|
continue
|
|
else:
|
|
print(f"[跳过] {f.name}: 期望二维数组,实际 shape={data.shape}")
|
|
continue
|
|
|
|
# 清理非数值行
|
|
mask = np.isfinite(X).all(axis=1)
|
|
if not np.all(mask):
|
|
X = X[mask]
|
|
print(f"[提示] {f.name}: 移除了含 NaN/Inf 的样本行")
|
|
|
|
n_samples = X.shape[0]
|
|
if n_samples < 3:
|
|
print(f"[跳过] {f.name}: 样本数过少(n={n_samples}),无法稳定降维")
|
|
continue
|
|
|
|
# 确保 n_neighbors 合法
|
|
n_neighbors = min(15, max(2, n_samples - 1))
|
|
reducer = UMAP(
|
|
n_components=2,
|
|
n_neighbors=n_neighbors,
|
|
min_dist=0.1,
|
|
metric="euclidean",
|
|
random_state=42,
|
|
)
|
|
emb = reducer.fit_transform(X)
|
|
|
|
fig, ax = plt.subplots(figsize=(6, 5), dpi=150)
|
|
ax.scatter(emb[:, 0], emb[:, 1], s=6, c="#1f77b4", alpha=0.8, edgecolors="none")
|
|
ax.set_title(f"{f.name} • UMAP (n={len(X)}, nn={n_neighbors})", fontsize=10)
|
|
ax.set_xlabel("UMAP-1")
|
|
ax.set_ylabel("UMAP-2")
|
|
ax.grid(True, linestyle="--", linewidth=0.3, alpha=0.5)
|
|
fig.tight_layout()
|
|
|
|
out_png = f.with_suffix("").as_posix() + "_umap.png"
|
|
fig.savefig(out_png)
|
|
plt.close(fig)
|
|
print(f"[完成] {f.name} -> {out_png}")
|
|
except Exception as e:
|
|
print(f"[错误] 处理 {f.name} 失败: {e}")
|
|
|
|
if __name__=="__main__":
|
|
umap_dir_to_pngs("data") |