一些小修改
This commit is contained in:
76
GPUMD/Umap/umap_make.py
Normal file
76
GPUMD/Umap/umap_make.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
matplotlib.use("Agg") # 仅保存图片,不弹窗
|
||||
import matplotlib.pyplot as plt
|
||||
from umap import UMAP
|
||||
|
||||
def umap_dir_to_pngs(dir_path: str) -> None:
|
||||
"""
|
||||
对目录内每个 .npy 文件执行 UMAP(30D->2D) 并保存散点图。
|
||||
- 输入 .npy 期望形状为 (n_samples, 30) 或 (30, n_samples)
|
||||
- 输出图片保存在同目录,命名为 <原文件名>_umap.png
|
||||
"""
|
||||
p = Path(dir_path)
|
||||
if not p.is_dir():
|
||||
raise ValueError(f"{dir_path!r} 不是有效文件夹")
|
||||
|
||||
files = sorted(p.glob("*.npy"))
|
||||
if not files:
|
||||
print(f"目录 {p} 中未找到 .npy 文件")
|
||||
return
|
||||
|
||||
for f in files:
|
||||
try:
|
||||
data = np.load(f)
|
||||
if data.ndim == 2:
|
||||
if data.shape[1] == 30:
|
||||
X = data
|
||||
elif data.shape[0] == 30:
|
||||
X = data.T
|
||||
else:
|
||||
print(f"[跳过] {f.name}: shape={data.shape}, 未检测到 30 维特征")
|
||||
continue
|
||||
else:
|
||||
print(f"[跳过] {f.name}: 期望二维数组,实际 shape={data.shape}")
|
||||
continue
|
||||
|
||||
# 清理非数值行
|
||||
mask = np.isfinite(X).all(axis=1)
|
||||
if not np.all(mask):
|
||||
X = X[mask]
|
||||
print(f"[提示] {f.name}: 移除了含 NaN/Inf 的样本行")
|
||||
|
||||
n_samples = X.shape[0]
|
||||
if n_samples < 3:
|
||||
print(f"[跳过] {f.name}: 样本数过少(n={n_samples}),无法稳定降维")
|
||||
continue
|
||||
|
||||
# 确保 n_neighbors 合法
|
||||
n_neighbors = min(15, max(2, n_samples - 1))
|
||||
reducer = UMAP(
|
||||
n_components=2,
|
||||
n_neighbors=n_neighbors,
|
||||
min_dist=0.1,
|
||||
metric="euclidean",
|
||||
random_state=42,
|
||||
)
|
||||
emb = reducer.fit_transform(X)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(6, 5), dpi=150)
|
||||
ax.scatter(emb[:, 0], emb[:, 1], s=6, c="#1f77b4", alpha=0.8, edgecolors="none")
|
||||
ax.set_title(f"{f.name} • UMAP (n={len(X)}, nn={n_neighbors})", fontsize=10)
|
||||
ax.set_xlabel("UMAP-1")
|
||||
ax.set_ylabel("UMAP-2")
|
||||
ax.grid(True, linestyle="--", linewidth=0.3, alpha=0.5)
|
||||
fig.tight_layout()
|
||||
|
||||
out_png = f.with_suffix("").as_posix() + "_umap.png"
|
||||
fig.savefig(out_png)
|
||||
plt.close(fig)
|
||||
print(f"[完成] {f.name} -> {out_png}")
|
||||
except Exception as e:
|
||||
print(f"[错误] 处理 {f.name} 失败: {e}")
|
||||
|
||||
if __name__=="__main__":
|
||||
umap_dir_to_pngs("data")
|
||||
161
GPUMD/Umap/umap_make_2.py
Normal file
161
GPUMD/Umap/umap_make_2.py
Normal file
@@ -0,0 +1,161 @@
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
try:
|
||||
from umap import UMAP
|
||||
except Exception:
|
||||
from umap.umap_ import UMAP
|
||||
|
||||
|
||||
def umap_dir_shared_coords(
|
||||
dir_path: str,
|
||||
*,
|
||||
metric: str = "cosine",
|
||||
n_neighbors: int = 15,
|
||||
min_dist: float = 0.0,
|
||||
spread: float = 1.2,
|
||||
standardize: bool = False,
|
||||
context: bool = True,
|
||||
make_joint: bool = True,
|
||||
init: str = "random", # 关键:禁用谱初始化,避免告警;也可用 "pca"
|
||||
jitter: float = 0.0, # 可选:拟合前加微弱噪声,如 1e-6
|
||||
random_state: int = 42
|
||||
) -> None:
|
||||
"""
|
||||
在同一 UMAP 坐标系中为目录内每个 .npy 文件生成 2D 图。
|
||||
- 每个 .npy 形状为 (n_samples, 30) 或 (30, n_samples)
|
||||
- 统一坐标轴范围;各自输出 *_umap_shared.png,另可输出总览图
|
||||
"""
|
||||
p = Path(dir_path)
|
||||
if not p.is_dir():
|
||||
raise ValueError(f"{dir_path!r} 不是有效文件夹")
|
||||
|
||||
files = sorted(p.glob("*.npy"))
|
||||
if not files:
|
||||
print(f"目录 {p} 中未找到 .npy 文件")
|
||||
return
|
||||
|
||||
X_list, paths, counts = [], [], []
|
||||
for f in files:
|
||||
try:
|
||||
data = np.load(f)
|
||||
if data.ndim != 2:
|
||||
print(f"[跳过] {f.name}: 期望二维数组,实际 shape={data.shape}")
|
||||
continue
|
||||
|
||||
if data.shape[1] == 30:
|
||||
X = data
|
||||
elif data.shape[0] == 30:
|
||||
X = data.T
|
||||
else:
|
||||
print(f"[跳过] {f.name}: shape={data.shape}, 未检测到 30 维特征")
|
||||
continue
|
||||
|
||||
mask = np.isfinite(X).all(axis=1)
|
||||
if not np.all(mask):
|
||||
X = X[mask]
|
||||
print(f"[提示] {f.name}: 移除了含 NaN/Inf 的样本行")
|
||||
|
||||
if X.shape[0] < 3:
|
||||
print(f"[跳过] {f.name}: 样本数过少(n={X.shape[0]})")
|
||||
continue
|
||||
|
||||
X_list.append(X)
|
||||
paths.append(f)
|
||||
counts.append(X.shape[0])
|
||||
except Exception as e:
|
||||
print(f"[错误] 读取 {f.name} 失败: {e}")
|
||||
|
||||
if not X_list:
|
||||
print("未找到可用的数据文件")
|
||||
return
|
||||
|
||||
X_all = np.vstack(X_list)
|
||||
|
||||
if standardize:
|
||||
mean = X_all.mean(axis=0)
|
||||
std = X_all.std(axis=0)
|
||||
std[std == 0] = 1.0
|
||||
X_all = (X_all - mean) / std
|
||||
|
||||
if jitter and jitter > 0:
|
||||
rng = np.random.default_rng(random_state)
|
||||
X_all = X_all + rng.normal(scale=jitter, size=X_all.shape)
|
||||
|
||||
reducer = UMAP(
|
||||
n_components=2,
|
||||
n_neighbors=int(max(2, n_neighbors)),
|
||||
min_dist=float(min_dist),
|
||||
spread=float(spread),
|
||||
metric=metric,
|
||||
init=init, # 关键改动:避免谱初始化告警
|
||||
random_state=random_state,
|
||||
)
|
||||
Z_all = reducer.fit_transform(X_all)
|
||||
|
||||
x_min, x_max = float(Z_all[:, 0].min()), float(Z_all[:, 0].max())
|
||||
y_min, y_max = float(Z_all[:, 1].min()), float(Z_all[:, 1].max())
|
||||
pad_x = 0.05 * (x_max - x_min) if x_max > x_min else 1.0
|
||||
pad_y = 0.05 * (y_max - y_min) if y_max > y_min else 1.0
|
||||
|
||||
base_colors = [
|
||||
"#1f77b4","#ff7f0e","#2ca02c","#d62728","#9467bd",
|
||||
"#8c564b","#e377c2","#7f7f7f","#bcbd22","#17becf"
|
||||
]
|
||||
|
||||
start = 0
|
||||
for i, (f, n) in enumerate(zip(paths, counts)):
|
||||
Zi = Z_all[start:start + n]
|
||||
start += n
|
||||
|
||||
fig, ax = plt.subplots(figsize=(6, 5), dpi=150)
|
||||
if context:
|
||||
ax.scatter(Z_all[:, 0], Z_all[:, 1], s=5, c="#cccccc",
|
||||
alpha=0.35, edgecolors="none", label="All")
|
||||
ax.scatter(Zi[:, 0], Zi[:, 1], s=10,
|
||||
c=base_colors[i % len(base_colors)],
|
||||
alpha=0.9, edgecolors="none", label=f.name)
|
||||
|
||||
ax.set_title(
|
||||
f"{f.name} • UMAP(shared) (nn={n_neighbors}, min={min_dist}, metric={metric}, init={init})",
|
||||
fontsize=9
|
||||
)
|
||||
ax.set_xlabel("UMAP-1")
|
||||
ax.set_ylabel("UMAP-2")
|
||||
ax.set_xlim(x_min - pad_x, x_max + pad_x)
|
||||
ax.set_ylim(y_min - pad_y, y_max + pad_y)
|
||||
ax.grid(True, linestyle="--", linewidth=0.3, alpha=0.5)
|
||||
if context:
|
||||
ax.legend(loc="best", fontsize=8, frameon=False)
|
||||
fig.tight_layout()
|
||||
|
||||
out_png = f.with_suffix("").as_posix() + "_umap_shared.png"
|
||||
fig.savefig(out_png)
|
||||
plt.close(fig)
|
||||
print(f"[完成] {f.name} -> {out_png}")
|
||||
|
||||
if make_joint:
|
||||
start = 0
|
||||
fig, ax = plt.subplots(figsize=(7, 6), dpi=150)
|
||||
for i, (f, n) in enumerate(zip(paths, counts)):
|
||||
Zi = Z_all[start:start + n]; start += n
|
||||
ax.scatter(Zi[:, 0], Zi[:, 1], s=8,
|
||||
c=base_colors[i % len(base_colors)],
|
||||
alpha=0.85, edgecolors="none", label=f.name)
|
||||
ax.set_title(f"UMAP(shared) overview (metric={metric}, nn={n_neighbors}, min={min_dist}, init={init})",
|
||||
fontsize=10)
|
||||
ax.set_xlabel("UMAP-1"); ax.set_ylabel("UMAP-2")
|
||||
ax.set_xlim(x_min - pad_x, x_max + pad_x)
|
||||
ax.set_ylim(y_min - pad_y, y_max + pad_y)
|
||||
ax.grid(True, linestyle="--", linewidth=0.3, alpha=0.5)
|
||||
ax.legend(loc="best", fontsize=8, frameon=False, ncol=1)
|
||||
fig.tight_layout()
|
||||
out_png = Path(dir_path) / "umap_shared_overview.png"
|
||||
fig.savefig(out_png.as_posix())
|
||||
plt.close(fig)
|
||||
print(f"[完成] 总览 -> {out_png}")
|
||||
if __name__=="__main__":
|
||||
umap_dir_shared_coords("data")
|
||||
88
GPUMD/data_POSCAR/origin/pnma.vasp
Normal file
88
GPUMD/data_POSCAR/origin/pnma.vasp
Normal file
@@ -0,0 +1,88 @@
|
||||
Li Y Cl
|
||||
1.0000000000000000
|
||||
12.1082364219999992 -0.0000000000000000 0.0000000000000000
|
||||
0.0000420925000000 12.6964871139000000 0.0000000000000000
|
||||
0.0000111360000000 0.0000097283000000 11.1520040839999997
|
||||
Li Y Cl
|
||||
24 8 48
|
||||
Cartesian
|
||||
3.0170113299999999 11.0208475999999997 6.5429541999999996
|
||||
9.0710813300000002 11.0208076100000003 6.5429413099999998
|
||||
3.0372732299999998 1.6755553700000001 0.9669378900000000
|
||||
9.0914532300000008 1.6755853700000001 0.9668849900000001
|
||||
5.9960454600000004 8.0228419300000002 4.6273539599999998
|
||||
12.0502254600000001 8.0228319300000006 4.6273410699999999
|
||||
6.0439837100000000 8.0239104000000001 0.9669839400000000
|
||||
12.0980237099999997 8.0238703999999998 0.9669410500000000
|
||||
2.9687930800000002 11.0219791300000001 10.2032142199999996
|
||||
9.0228930799999993 11.0219691300000004 10.2032413300000009
|
||||
3.0851749800000001 1.6745967300000000 4.6273578600000000
|
||||
9.1393549800000002 1.6745967399999999 4.6273049700000000
|
||||
0.0581325900000000 4.6736939399999997 10.2033481199999994
|
||||
6.1122525899999998 4.6736539400000003 10.2033652299999993
|
||||
0.0102008400000000 4.6725925699999999 6.5430081500000004
|
||||
6.0643308400000002 4.6725225699999999 6.5430152499999998
|
||||
6.0582017800000001 11.3105425600000000 6.4882826299999996
|
||||
0.0040517800000000 11.3105725600000007 6.4882655299999996
|
||||
3.0311532099999998 7.7341853599999997 0.9123054900000001
|
||||
9.0851632099999993 7.7341953600000002 0.9123126000000000
|
||||
3.0230812999999999 4.9623175699999997 6.4882665900000003
|
||||
9.0772212999999997 4.9622875700000000 6.4882637000000001
|
||||
12.1043127199999994 1.3859403699999999 0.9123036700000000
|
||||
6.0501727199999999 1.3859103699999999 0.9123265600000000
|
||||
0.0501691200000000 4.7065010400000000 2.8276881199999999
|
||||
6.1042791200000002 4.7064910400000004 2.8277152200000000
|
||||
6.0040072100000001 7.9899634099999997 8.4036839699999994
|
||||
12.0581272100000003 7.9900134100000004 8.4037010799999994
|
||||
3.0772167300000000 1.6417582399999999 8.4037178800000003
|
||||
9.1312967300000007 1.6417482400000001 8.4036949900000000
|
||||
2.9769896100000000 11.0547561999999999 2.8276842100000001
|
||||
9.0310896100000004 11.0547362000000007 2.8277013100000001
|
||||
4.5156189400000004 10.0925444199999994 4.7485038499999996
|
||||
10.5696489400000004 10.0924044199999994 4.7485109599999999
|
||||
1.5387092400000000 2.6040715400000001 10.3245482299999995
|
||||
7.5927392300000003 2.6040115400000001 10.3245253399999992
|
||||
1.3626281400000000 9.1096203100000004 6.4718857500000002
|
||||
7.4167081399999999 9.1095703100000005 6.4718328500000002
|
||||
1.4883697199999999 8.9523616199999996 10.3245457500000004
|
||||
7.5425797200000000 8.9522516299999992 10.3245128600000005
|
||||
4.3896168400000004 9.9351631099999995 0.8958039700000000
|
||||
10.4437368399999997 9.9351431100000003 0.8958210800000000
|
||||
1.6644776500000000 2.7613398100000000 6.4718681100000000
|
||||
7.7185976500000004 2.7613198099999998 6.4718352200000000
|
||||
4.6916063499999998 3.5868826100000000 0.8958463400000000
|
||||
10.7457363499999996 3.5868826100000000 0.8958134500000000
|
||||
4.5657084499999998 3.7442043400000000 4.7485363400000002
|
||||
10.6197684500000005 3.7442543399999999 4.7484934399999998
|
||||
1.4271435299999999 5.7840809399999999 8.3327970300000000
|
||||
7.4812435300000004 5.7840109399999999 8.3328141400000000
|
||||
4.6270127299999997 6.9124334500000000 2.7567950600000000
|
||||
10.6811427299999995 6.9124334500000000 2.7567721600000001
|
||||
4.4542422500000001 0.5641837300000000 2.7567976500000002
|
||||
10.5083422500000001 0.5641637400000000 2.7567847500000000
|
||||
1.5999540200000000 12.1323306500000001 8.3327844399999993
|
||||
7.6540440199999997 12.1323306500000001 8.3327815399999992
|
||||
1.4488319300000001 5.8551694799999998 4.7388569900000004
|
||||
7.5029319299999999 5.8551594800000002 4.7388941000000004
|
||||
4.6052662299999998 6.8413464700000004 10.3148350900000008
|
||||
10.6594162299999997 6.8413164799999997 10.3148222000000001
|
||||
4.4984539600000000 0.5241951300000000 6.4733476400000001
|
||||
10.5525539599999991 0.5241751300000000 6.4733547500000004
|
||||
4.4759357399999997 0.4930566900000000 10.3148076700000004
|
||||
10.5300357400000006 0.4930866900000000 10.3148047700000003
|
||||
1.4714200500000001 5.8240679200000001 0.8973369900000000
|
||||
7.5254900500000002 5.8240779299999996 0.8973441000000000
|
||||
4.5827044399999997 6.8724549899999996 6.4733550900000001
|
||||
10.6368744399999997 6.8724349900000004 6.4733521999999999
|
||||
1.5557505300000001 12.1723877900000002 0.8973444400000000
|
||||
7.6098205300000004 12.1722777900000008 0.8973515500000000
|
||||
1.5782524200000001 12.2033792699999992 4.7388244200000003
|
||||
7.6324124199999996 12.2033992700000002 4.7388615300000003
|
||||
1.5507855200000000 2.5414585299999999 2.7575782499999999
|
||||
7.6049855199999996 2.5414785300000000 2.7574953600000001
|
||||
4.5034907500000001 10.1550858599999998 8.3335738300000006
|
||||
10.5574507499999992 10.1550558599999992 8.3335109400000000
|
||||
4.5777702700000003 3.8068257399999998 8.3335863099999994
|
||||
10.6319002699999992 3.8067957400000001 8.3335634100000000
|
||||
1.4764060000000001 8.8896886500000001 2.7575657800000002
|
||||
7.5304859999999998 8.8896686500000008 2.7575728900000001
|
||||
149
GPUMD/raw2xyz.py
Normal file
149
GPUMD/raw2xyz.py
Normal file
@@ -0,0 +1,149 @@
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
|
||||
def convert_raw_to_gpumd_xyz(input_folder: str, output_filename: str = "gpumd_nep_training_data.xyz"):
|
||||
"""
|
||||
将 DeePMD-kit 风格的 .raw 训练数据转换为 GPUMD NEP 训练所需的 extended XYZ 格式。
|
||||
调整为 GPUMD 期望的格式,包括在注释行中添加 Properties 字段,
|
||||
并将每个原子的力数据附加到原子坐标行。
|
||||
Args:
|
||||
input_folder (str): 包含 .raw 文件的文件夹路径 (例如 './set.000/').
|
||||
output_filename (str): 输出的 GPUMD extended XYZ 文件的名称。
|
||||
Raises:
|
||||
FileNotFoundError: 如果必需的 .raw 文件不存在。
|
||||
ValueError: 如果数据格式不符合预期。
|
||||
"""
|
||||
required_files = [
|
||||
'box.raw', 'coord.raw', 'energy.raw', 'force.raw',
|
||||
'type.raw', 'type_map.raw', 'virial.raw'
|
||||
]
|
||||
# 检查所有必需的文件是否存在
|
||||
for filename in required_files:
|
||||
filepath = os.path.join(input_folder, filename)
|
||||
if not os.path.exists(filepath):
|
||||
raise FileNotFoundError(
|
||||
f"Missing required file: {filepath}. Please ensure all .raw files are in the specified folder.")
|
||||
print(f"Loading raw from folder: {input_folder}")
|
||||
|
||||
# --- 1. 读取数据 ---
|
||||
try:
|
||||
# 读取 type_map.raw
|
||||
with open(os.path.join(input_folder, 'type_map.raw'), 'r') as f:
|
||||
type_map_list = [line.strip() for line in f if line.strip()] # 移除空行
|
||||
|
||||
# 首次加载 coord.raw 来确定 num_atoms
|
||||
first_coord_line = np.loadtxt(os.path.join(input_folder, 'coord.raw'), max_rows=1)
|
||||
if first_coord_line.ndim == 0: # 如果只有1个数字
|
||||
num_atoms = 1
|
||||
else:
|
||||
num_atoms = first_coord_line.shape[0] // 3
|
||||
if num_atoms == 0:
|
||||
raise ValueError(f"Could not determine num_atoms from coord.raw. It seems empty or malformed.")
|
||||
|
||||
# 现在有了正确的 num_atoms,重新加载 type.raw 以获取原子类型列表
|
||||
with open(os.path.join(input_folder, 'type.raw'), 'r') as f:
|
||||
all_types_lines = f.readlines()
|
||||
if not all_types_lines:
|
||||
raise ValueError(f"{os.path.join(input_folder, 'type.raw')} is empty or malformed.")
|
||||
|
||||
# 假设所有构型的原子类型序列是相同的,我们只需要第一个构型的类型
|
||||
first_type_config = np.array([int(x) for x in all_types_lines[0].strip().split()])
|
||||
if len(first_type_config) != num_atoms:
|
||||
# 尝试另一种 DeePMD 常见的 type.raw 格式:一个长序列,表示所有原子类型
|
||||
# 如果 type.raw 的行数等于原子数,我们假设每行一个原子类型
|
||||
if len(all_types_lines) == num_atoms:
|
||||
atom_types_numeric = np.array([int(line.strip()) for line in all_types_lines])
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Mismatch between num_atoms ({num_atoms}) derived from coord.raw and type.raw format. "
|
||||
f"First line of type.raw has {len(first_type_config)} types, total lines {len(all_types_lines)}. "
|
||||
f"Please check type.raw format and adjust script.")
|
||||
else:
|
||||
atom_types_numeric = first_type_config # 正常情况,第一行就是第一个构型的所有原子类型
|
||||
|
||||
atom_symbols = [type_map_list[t] for t in atom_types_numeric]
|
||||
|
||||
# 读取其他数据
|
||||
boxes = np.loadtxt(os.path.join(input_folder, 'box.raw')).reshape(-1, 3, 3)
|
||||
coords_flat = np.loadtxt(os.path.join(input_folder, 'coord.raw'))
|
||||
energies = np.loadtxt(os.path.join(input_folder, 'energy.raw'))
|
||||
forces_flat = np.loadtxt(os.path.join(input_folder, 'force.raw'))
|
||||
virials_flat = np.loadtxt(os.path.join(input_folder, 'virial.raw')) # 可能是 9 个分量
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error reading .raw files. Please check their format. Details: {e}")
|
||||
|
||||
# 验证数据维度
|
||||
num_configs = len(energies)
|
||||
expected_coord_cols = num_atoms * 3
|
||||
expected_virial_cols = 9 # DeepMD通常输出9个分量
|
||||
|
||||
if coords_flat.shape[1] != expected_coord_cols:
|
||||
raise ValueError(
|
||||
f"coord.raw has {coords_flat.shape[1]} columns, but expected {expected_coord_cols} (N_atoms * 3).")
|
||||
if boxes.shape[0] != num_configs:
|
||||
raise ValueError(f"box.raw has {boxes.shape[0]} configurations, but expected {num_configs}. Data mismatch.")
|
||||
if forces_flat.shape[1] != expected_coord_cols:
|
||||
raise ValueError(
|
||||
f"force.raw has {forces_flat.shape[1]} columns, but expected {expected_coord_cols} (N_atoms * 3). Check file format.")
|
||||
if virials_flat.shape[0] != num_configs or virials_flat.shape[1] != expected_virial_cols:
|
||||
raise ValueError(
|
||||
f"virial.raw has shape {virials_flat.shape}, but expected ({num_configs}, {expected_virial_cols}). Check file format.")
|
||||
|
||||
coords = coords_flat.reshape(num_configs, num_atoms, 3)
|
||||
forces = forces_flat.reshape(num_configs, num_atoms, 3)
|
||||
virials_matrix = virials_flat.reshape(num_configs, 3, 3)
|
||||
|
||||
print(f"Loaded {num_configs} configurations with {num_atoms} atoms each.")
|
||||
|
||||
# --- 2. 写入到 GPUMD NEP 的 extended XYZ 格式 ---
|
||||
# 确保输出路径的目录存在
|
||||
output_dir = os.path.dirname(output_filename)
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
output_filepath = output_filename # 直接使用传入的output_filename作为最终路径
|
||||
|
||||
with open(output_filepath, 'w') as f:
|
||||
for i in range(num_configs):
|
||||
# 第一行:原子数量
|
||||
f.write(f"{num_atoms}\n")
|
||||
|
||||
# 第二行:元数据
|
||||
box_matrix_flat = boxes[i].flatten()
|
||||
box_str = " ".join(f"{x:.10f}" for x in box_matrix_flat)
|
||||
energy_str = f"{energies[i]:.10f}"
|
||||
|
||||
virial_tensor = virials_matrix[i]
|
||||
# --- 关键修改处:输出 Virial 的九个分量 ---
|
||||
# 展平 3x3 矩阵以得到九个分量
|
||||
virial_gpumd_components = virial_tensor.flatten()
|
||||
virial_str = " ".join(f"{x:.10f}" for x in virial_gpumd_components)
|
||||
|
||||
# 构造 GPUMD 兼容的第二行
|
||||
config_type_str = f"Config_type=dpgen_iter{i:03d}" # 示例:迭代号,可以自定义
|
||||
weight_str = "Weight=1.0"
|
||||
properties_str = "Properties=species:S:1:pos:R:3:forces:R:3" # 关键修改
|
||||
|
||||
f.write(
|
||||
f'{config_type_str} {weight_str} Lattice="{box_str}" Energy={energy_str} Virial="{virial_str}" pbc="T T T" {properties_str}\n'
|
||||
)
|
||||
|
||||
# 后续行:原子符号、坐标和力
|
||||
for j in range(num_atoms):
|
||||
x, y, z = coords[i, j]
|
||||
fx, fy, fz = forces[i, j]
|
||||
f.write(f"{atom_symbols[j]} {x:.10f} {y:.10f} {z:.10f} {fx:.10f} {fy:.10f} {fz:.10f}\n")
|
||||
|
||||
print(f"Successfully converted {num_configs} configurations to {output_filepath}")
|
||||
print(f"Output file saved at: {output_filepath}")
|
||||
|
||||
|
||||
# --- 如何使用这个函数 ---
|
||||
if __name__ == "__main__":
|
||||
# 示例用法:
|
||||
input_folder_path = 'data/dpmd_data/lyc/training_data/p3m1_data/raw'
|
||||
output_file_path = 'data/dpmd_data/lyc/training_data/p3m1_data/p3m1_train.xyz'
|
||||
|
||||
convert_raw_to_gpumd_xyz(input_folder=input_folder_path, output_filename=output_file_path)
|
||||
180
GPUMD/swap_li.py
Normal file
180
GPUMD/swap_li.py
Normal file
@@ -0,0 +1,180 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: ascii -*-
|
||||
"""
|
||||
Randomly swap one Li-Y pair in a VASP5 POSCAR and write N new files.
|
||||
- Keeps coordinate mode (Direct/Cartesian), Selective Dynamics flags, and Velocities.
|
||||
- Requires VASP5+ POSCAR (with element symbols line).
|
||||
"""
|
||||
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
|
||||
def _is_ints(tokens):
|
||||
try:
|
||||
_ = [int(t) for t in tokens]
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def _find_species_index(species, target):
|
||||
t = target.lower()
|
||||
for i, s in enumerate(species):
|
||||
if s.lower() == t:
|
||||
return i
|
||||
raise ValueError("Element '%s' not found in species line: %s" % (target, " ".join(species)))
|
||||
|
||||
def parse_poscar(lines):
|
||||
if len(lines) < 8:
|
||||
raise ValueError("POSCAR too short")
|
||||
|
||||
comment = lines[0].rstrip("\n")
|
||||
scale = lines[1].rstrip("\n")
|
||||
lv = [lines[2].rstrip("\n"), lines[3].rstrip("\n"), lines[4].rstrip("\n")]
|
||||
|
||||
i = 5
|
||||
tokens = lines[i].split()
|
||||
if _is_ints(tokens):
|
||||
raise ValueError("VASP4 format (no element symbols line) is not supported.")
|
||||
species = tokens
|
||||
i += 1
|
||||
|
||||
counts_line = lines[i].rstrip("\n")
|
||||
counts = [int(x) for x in counts_line.split()]
|
||||
i += 1
|
||||
|
||||
selective = False
|
||||
sel_line = None
|
||||
if i < len(lines) and lines[i].strip().lower().startswith("s"):
|
||||
selective = True
|
||||
sel_line = lines[i].rstrip("\n")
|
||||
i += 1
|
||||
|
||||
coord_line = lines[i].rstrip("\n")
|
||||
i += 1
|
||||
|
||||
natoms = sum(counts)
|
||||
pos_start = i
|
||||
pos_end = i + natoms
|
||||
if pos_end > len(lines):
|
||||
raise ValueError("Atom count exceeds file length.")
|
||||
pos_lines = [lines[j].rstrip("\n") for j in range(pos_start, pos_end)]
|
||||
|
||||
# Optional Velocities section
|
||||
k = pos_end
|
||||
while k < len(lines) and lines[k].strip() == "":
|
||||
k += 1
|
||||
|
||||
vel_header = None
|
||||
vel_lines = None
|
||||
vel_end = k
|
||||
if k < len(lines) and lines[k].strip().lower().startswith("veloc"):
|
||||
vel_header = lines[k].rstrip("\n")
|
||||
vel_start = k + 1
|
||||
vel_end = vel_start + natoms
|
||||
if vel_end > len(lines):
|
||||
raise ValueError("Velocities section length inconsistent with atom count.")
|
||||
vel_lines = [lines[j].rstrip("\n") for j in range(vel_start, vel_end)]
|
||||
|
||||
tail_lines = [lines[j].rstrip("\n") for j in range(vel_end, len(lines))] if vel_end < len(lines) else []
|
||||
|
||||
# Species index ranges (by order in species list)
|
||||
starts = []
|
||||
acc = 0
|
||||
for c in counts:
|
||||
starts.append(acc)
|
||||
acc += c
|
||||
species_ranges = []
|
||||
for idx, sp in enumerate(species):
|
||||
s, e = starts[idx], starts[idx] + counts[idx]
|
||||
species_ranges.append((sp, s, e))
|
||||
|
||||
return {
|
||||
"comment": comment,
|
||||
"scale": scale,
|
||||
"lv": lv,
|
||||
"species": species,
|
||||
"counts": counts,
|
||||
"counts_line": counts_line,
|
||||
"selective": selective,
|
||||
"sel_line": sel_line,
|
||||
"coord_line": coord_line,
|
||||
"natoms": natoms,
|
||||
"pos_lines": pos_lines,
|
||||
"vel_header": vel_header,
|
||||
"vel_lines": vel_lines,
|
||||
"tail_lines": tail_lines,
|
||||
"species_ranges": species_ranges,
|
||||
}
|
||||
|
||||
def build_poscar(data, pos_lines, vel_lines=None):
|
||||
out = []
|
||||
out.append(data["comment"])
|
||||
out.append(data["scale"])
|
||||
out.extend(data["lv"])
|
||||
out.append(" ".join(data["species"]))
|
||||
out.append(data["counts_line"])
|
||||
if data["selective"]:
|
||||
out.append(data["sel_line"] if data["sel_line"] is not None else "Selective dynamics")
|
||||
out.append(data["coord_line"])
|
||||
out.extend(pos_lines)
|
||||
if data["vel_header"] is not None and vel_lines is not None:
|
||||
out.append(data["vel_header"])
|
||||
out.extend(vel_lines)
|
||||
if data["tail_lines"]:
|
||||
out.extend(data["tail_lines"])
|
||||
return "\n".join(out) + "\n"
|
||||
|
||||
def _swap_once(data, rng, li_label="Li", y_label="Y"):
|
||||
si_li = _find_species_index(data["species"], li_label)
|
||||
si_y = _find_species_index(data["species"], y_label)
|
||||
_, li_start, li_end = data["species_ranges"][si_li]
|
||||
_, y_start, y_end = data["species_ranges"][si_y]
|
||||
|
||||
li_pick = rng.randrange(li_start, li_end)
|
||||
y_pick = rng.randrange(y_start, y_end)
|
||||
|
||||
new_pos = list(data["pos_lines"])
|
||||
new_pos[li_pick], new_pos[y_pick] = new_pos[y_pick], new_pos[li_pick]
|
||||
|
||||
new_vel = None
|
||||
if data["vel_lines"] is not None:
|
||||
new_vel = list(data["vel_lines"])
|
||||
new_vel[li_pick], new_vel[y_pick] = new_vel[y_pick], new_vel[li_pick]
|
||||
|
||||
return new_pos, new_vel, (li_pick, y_pick)
|
||||
|
||||
def swap(n, input_file, output_dir):
|
||||
"""
|
||||
Generate n POSCAR files, each with one random Li-Y swap.
|
||||
|
||||
Returns: list of Path to written files.
|
||||
"""
|
||||
input_path = Path(input_file)
|
||||
out_dir = Path(output_dir)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
lines = input_path.read_text().splitlines()
|
||||
data = parse_poscar(lines)
|
||||
|
||||
rng = random.Random()
|
||||
base = input_path.name
|
||||
|
||||
out_paths = []
|
||||
for k in range(1, n + 1):
|
||||
new_pos, new_vel, picked = _swap_once(data, rng)
|
||||
txt = build_poscar(data, new_pos, new_vel)
|
||||
out_path = out_dir / f"swap_{k}_{base}"
|
||||
out_path.write_text(txt)
|
||||
out_paths.append(out_path)
|
||||
print(f"Wrote {out_path} (swapped Li idx {picked[0]} <-> Y idx {picked[1]})")
|
||||
return out_paths
|
||||
# --------- Editable defaults for direct run ---------
|
||||
INPUT_FILE = "data_POSCAR/origin/p3m1.vasp" # path to input POSCAR
|
||||
OUTPUT_DIR = "data_POSCAR/p3m1" # output directory
|
||||
N = 5 # number of files to generate
|
||||
# ----------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
# Direct-run entry: edit INPUT_FILE/OUTPUT_DIR/N above to change behavior.
|
||||
swap(n=N, input_file=INPUT_FILE, output_dir=OUTPUT_DIR)
|
||||
140
GPUMD/t-SNE/t-SNE.py
Normal file
140
GPUMD/t-SNE/t-SNE.py
Normal file
@@ -0,0 +1,140 @@
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.manifold import TSNE
|
||||
from sklearn.decomposition import PCA
|
||||
|
||||
def tsne_dir_shared_coords(
|
||||
dir_path: str,
|
||||
*,
|
||||
metric: str = "euclidean", # 可试 "cosine";想保留尺度差异用 "euclidean"
|
||||
perplexity: float = 50.0, # 30k~50k 样本建议 30~50
|
||||
n_iter: int = 1000,
|
||||
early_exaggeration: float = 12.0,
|
||||
learning_rate = "auto",
|
||||
standardize: bool = False,
|
||||
pca_dim: int | None = None, # 先用 PCA 降到 pca_dim(如 20) 再跑 t-SNE,可提速
|
||||
context: bool = True,
|
||||
make_joint: bool = True,
|
||||
init: str = "pca",
|
||||
random_state: int = 42
|
||||
) -> None:
|
||||
p = Path(dir_path)
|
||||
if not p.is_dir():
|
||||
raise ValueError(f"{dir_path!r} 不是有效文件夹")
|
||||
|
||||
files = sorted(p.glob("*.npy"))
|
||||
if not files:
|
||||
print(f"目录 {p} 中未找到 .npy 文件")
|
||||
return
|
||||
|
||||
X_list, paths, counts = [], [], []
|
||||
for f in files:
|
||||
try:
|
||||
data = np.load(f)
|
||||
if data.ndim != 2:
|
||||
print(f"[跳过] {f.name}: 期望二维数组,实际 shape={data.shape}")
|
||||
continue
|
||||
|
||||
# 统一到 (n_samples, 30)
|
||||
if data.shape[1] == 30:
|
||||
X = data
|
||||
elif data.shape[0] == 30:
|
||||
X = data.T
|
||||
else:
|
||||
print(f"[跳过] {f.name}: shape={data.shape}, 未检测到 30 维特征")
|
||||
continue
|
||||
|
||||
mask = np.isfinite(X).all(axis=1)
|
||||
if not np.all(mask):
|
||||
X = X[mask]
|
||||
print(f"[提示] {f.name}: 移除了含 NaN/Inf 的样本行")
|
||||
|
||||
if X.shape[0] < 3:
|
||||
print(f"[跳过] {f.name}: 样本数过少(n={X.shape[0]})")
|
||||
continue
|
||||
|
||||
X_list.append(X)
|
||||
paths.append(f)
|
||||
counts.append(X.shape[0])
|
||||
except Exception as e:
|
||||
print(f"[错误] 读取 {f.name} 失败: {e}")
|
||||
|
||||
if not X_list:
|
||||
print("未找到可用的数据文件")
|
||||
return
|
||||
|
||||
X_all = np.vstack(X_list)
|
||||
|
||||
if standardize:
|
||||
mean = X_all.mean(axis=0)
|
||||
std = X_all.std(axis=0); std[std == 0] = 1.0
|
||||
X_all = (X_all - mean) / std
|
||||
|
||||
if pca_dim is not None and pca_dim > 2:
|
||||
X_all = PCA(n_components=pca_dim, random_state=random_state).fit_transform(X_all)
|
||||
|
||||
tsne = TSNE(
|
||||
n_components=2,
|
||||
metric=metric,
|
||||
perplexity=float(perplexity),
|
||||
early_exaggeration=float(early_exaggeration),
|
||||
learning_rate=learning_rate,
|
||||
init=init,
|
||||
random_state=random_state,
|
||||
method="barnes_hut", # 适合大样本
|
||||
angle=0.5,
|
||||
verbose=0,
|
||||
)
|
||||
Z_all = tsne.fit_transform(X_all)
|
||||
|
||||
# 统一坐标轴范围
|
||||
x_min, x_max = float(Z_all[:, 0].min()), float(Z_all[:, 0].max())
|
||||
y_min, y_max = float(Z_all[:, 1].min()), float(Z_all[:, 1].max())
|
||||
pad_x = 0.05 * (x_max - x_min) if x_max > x_min else 1.0
|
||||
pad_y = 0.05 * (y_max - y_min) if y_max > y_min else 1.0
|
||||
|
||||
colors = [
|
||||
"#1f77b4","#ff7f0e","#2ca02c","#d62728","#9467bd",
|
||||
"#8c564b","#e377c2","#7f7f7f","#bcbd22","#17becf"
|
||||
]
|
||||
|
||||
# 分文件出图
|
||||
start = 0
|
||||
for i, (f, n) in enumerate(zip(paths, counts)):
|
||||
Zi = Z_all[start:start + n]; start += n
|
||||
fig, ax = plt.subplots(figsize=(6, 5), dpi=150)
|
||||
if context:
|
||||
ax.scatter(Z_all[:, 0], Z_all[:, 1], s=5, c="#cccccc", alpha=0.35, edgecolors="none", label="All")
|
||||
ax.scatter(Zi[:, 0], Zi[:, 1], s=8, c=colors[i % len(colors)], alpha=0.9, edgecolors="none", label=f.name)
|
||||
ax.set_title(f"{f.name} • t-SNE(shared) (perp={perplexity}, metric={metric})", fontsize=9)
|
||||
ax.set_xlabel("t-SNE-1"); ax.set_ylabel("t-SNE-2")
|
||||
ax.set_xlim(x_min - pad_x, x_max + pad_x); ax.set_ylim(y_min - pad_y, y_max + pad_y)
|
||||
ax.grid(True, linestyle="--", linewidth=0.3, alpha=0.5)
|
||||
if context: ax.legend(loc="best", fontsize=8, frameon=False)
|
||||
fig.tight_layout()
|
||||
out_png = f.with_suffix("").as_posix() + "_tsne_shared.png"
|
||||
fig.savefig(out_png); plt.close(fig)
|
||||
print(f"[完成] {f.name} -> {out_png}")
|
||||
|
||||
# 总览图
|
||||
if make_joint:
|
||||
start = 0
|
||||
fig, ax = plt.subplots(figsize=(7, 6), dpi=150)
|
||||
for i, (f, n) in enumerate(zip(paths, counts)):
|
||||
Zi = Z_all[start:start + n]; start += n
|
||||
ax.scatter(Zi[:, 0], Zi[:, 1], s=8, c=colors[i % len(colors)], alpha=0.85, edgecolors="none", label=f.name)
|
||||
ax.set_title(f"t-SNE(shared) overview (perp={perplexity}, metric={metric})", fontsize=10)
|
||||
ax.set_xlabel("t-SNE-1"); ax.set_ylabel("t-SNE-2")
|
||||
ax.set_xlim(x_min - pad_x, x_max + pad_x); ax.set_ylim(y_min - pad_y, y_max + pad_y)
|
||||
ax.grid(True, linestyle="--", linewidth=0.3, alpha=0.5)
|
||||
ax.legend(loc="best", fontsize=8, frameon=False)
|
||||
fig.tight_layout()
|
||||
out_png = Path(dir_path) / "tsne_shared_overview.png"
|
||||
fig.savefig(out_png.as_posix()); plt.close(fig)
|
||||
print(f"[完成] 总览 -> {out_png}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
tsne_dir_shared_coords("data")
|
||||
Reference in New Issue
Block a user