nep框架搭建

This commit is contained in:
2025-12-08 22:05:06 +08:00
parent 5057d18e98
commit cba2afb403
9 changed files with 498 additions and 5 deletions

2
.idea/NEP-auto.iml generated
View File

@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" /> <orderEntry type="jdk" jdkName="Python 3.12" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
</module> </module>

View File

@@ -25,7 +25,12 @@ systems:
command: "nep" command: "nep"
env_setup: "" env_setup: ""
gpu_id: 0 gpu_id: 0
gpumdkit:
# 假设是 GPU 版本,可能不需要 mpirun 或者只需要少量核
command: "gpumdkit.sh"
env_setup: ""
# 即使是 local 模式,有时也需要指定并行度
n_procs: 1
# 3. VASP (GPU 版) 配置 # 3. VASP (GPU 版) 配置
vasp: vasp:
# 假设是 GPU 版本,可能不需要 mpirun 或者只需要少量核 # 假设是 GPU 版本,可能不需要 mpirun 或者只需要少量核

View File

@@ -32,9 +32,7 @@ notification:
# --- 3. 各模块具体的物理/算法参数 --- # --- 3. 各模块具体的物理/算法参数 ---
params: params:
preheat: preheat:
temp: 300 template_file: "run_ramp.in"
steps: 10000
# 这里不需要指定 gpumd 路径,只需要指定物理量
select: select:
target_min: 60 target_min: 60

View File

@@ -0,0 +1,113 @@
import shutil
import logging
from pathlib import Path
from .base_module import BaseModule
class PreheatModule(BaseModule):
def __init__(self, driver, iter_id):
super().__init__(driver, iter_id)
self.template_subdir = "00_md"
def get_work_dir(self):
return self.iter_dir / "00.md" / "preheat"
def initialize(self):
super().initialize() # 创建目录
work_dir = self.get_work_dir()
# 1. 准备 run.in (从配置读取模板名)
template_name = self.config_param['params']['preheat'].get('template_file', 'run.in')
self.copy_template(template_name, "run.in")
# 2. 准备 nep.in (GPUMD 运行必需,虽然内容可能很简单)
self.copy_template("nep.in")
# 3. 准备 nep.txt (势函数)
self._prepare_potential()
# 4. 准备 model.xyz (结构)
self._prepare_structure()
def _prepare_potential(self):
"""准备势函数文件 nep.txt"""
dst = self.get_work_dir() / "nep.txt"
if self.iter_id == 1:
# 第一轮:使用 system.yaml 里定义的初始势
init_pot = Path(self.config_sys['system']['initial_potential'])
if not init_pot.exists():
raise FileNotFoundError(f"Initial potential not found: {init_pot}")
shutil.copy(init_pot, dst)
self.logger.info(f" -> Copied initial potential: {init_pot.name}")
else:
# 后续轮次:使用上一轮训练结果
prev_iter = f"iter_{self.iter_id - 1:03d}"
prev_train_dir = self.root / prev_iter / "03.train"
src = prev_train_dir / "nep.txt"
if not src.exists():
raise FileNotFoundError(f"Previous potential not found: {src}")
shutil.copy(src, dst)
self.logger.info(f" -> Copied potential from {prev_iter}")
def _prepare_structure(self):
"""准备 model.xyz"""
work_dir = self.get_work_dir()
# 目前逻辑Preheat 总是从初始结构开始(或者你可以改为从上一轮的 dump 中取)
# 这里演示从 VASP 文件转换
vasp_path = Path(self.config_sys['system']['initial_structure'])
if not vasp_path.exists():
raise FileNotFoundError(f"Structure file not found: {vasp_path}")
# 复制到工作目录
local_vasp = work_dir / vasp_path.name
shutil.copy(vasp_path, local_vasp)
# 调用 gpumdkit.sh -addlabel 进行转换
# 命令格式: gpumdkit.sh -addlabel file.vasp Li Y Cl
elements = " ".join(self.config_sys['system']['elements'])
self.logger.info(" -> Converting VASP to model.xyz...")
# 使用 runner 调用 gpumdkit (必须在 machine.yaml 里定义了 'gpumdkit')
# 注意gpumdkit.sh 可能不输出 model.xyz 而是输出 file.xyz需要确认
# 假设输出为 model.xyz
cmd_args = f"-addlabel {local_vasp.name} {elements}"
self.runner.run("gpumdkit", cwd=work_dir, extra_args=cmd_args)
# 检查是否生成成功
if not (work_dir / "model.xyz").exists():
# 有时候 gpumdkit 生成的文件名可能是 LiYCl.xyz需要重命名为 model.xyz
# 这里做一个容错检查
expected_name = local_vasp.stem + ".xyz" # e.g., LiYCl.xyz
if (work_dir / expected_name).exists():
shutil.move(work_dir / expected_name, work_dir / "model.xyz")
else:
raise RuntimeError("Failed to generate model.xyz from gpumdkit")
def run(self):
"""执行 GPUMD"""
work_dir = self.get_work_dir()
# 检查是否已经跑完 (简单的锁文件机制)
if (work_dir / "thermo.out").exists():
self.logger.info(f" -> Pre-check: thermo.out exists, skipping preheat.")
# 这里可以加更复杂的检查,比如步数是否足够
return
self.logger.info(f"🔥 Running Preheat in {self.iter_name}")
self.initialize()
# 调用 GPUMD
# GPUMD 没有参数,直接运行
self.runner.run("gpumd", cwd=work_dir)
self.logger.info(" -> Preheat finished.")
def check_done(self):
# 简单检查 thermo.out 是否存在且非空
f = self.get_work_dir() / "thermo.out"
return f.exists() and f.stat().st_size > 0

View File

@@ -0,0 +1,116 @@
import shutil
import glob
from pathlib import Path
from nep_auto.modules.base_module import BaseModule
class MDModule(BaseModule):
def __init__(self, driver, iter_id):
super().__init__(driver, iter_id)
self.template_subdir = "00_md"
# 预热目录 (输入源)
self.preheat_dir = self.iter_dir / "00.md" / "preheat"
# MD 目录 (工作区)
self.work_dir = self.iter_dir / "00.md" / "md"
def get_work_dir(self):
return self.work_dir
def run(self):
self.logger.info(f"🌪️ [MD] Starting Sampling Phase Iter {self.iter_id}...")
self.initialize()
# ----------------------------------------
# 1. 从预热轨迹中采样 (dump.xyz -> sampled_structures.xyz)
# ----------------------------------------
preheat_dump = self.preheat_dir / "dump.xyz"
if not preheat_dump.exists():
raise FileNotFoundError(f"Preheat dump not found: {preheat_dump}")
# 调用 sample_structures.py
# 假设参数: input_file method number
kit_root = self.driver.config_param['env']['gpumdkit_root']
script = f"{kit_root}/Scripts/sample_structures/sample_structures.py"
# 复制 dump 到当前目录以便处理
local_dump = self.work_dir / "preheat_dump.xyz"
shutil.copy(preheat_dump, local_dump)
self.logger.info(" -> Sampling structures from preheat trajectory...")
# 按照你的描述: sample_structures.py dump.xyz uniform 4
# 这里 "4" 可以放到 param.yaml 里配置,暂时写死或读取默认
self.runner.run(
"python_script", # 这里可以用 local runner 直接跑 python
cwd=self.work_dir,
extra_args=f"{script} preheat_dump.xyz uniform 4"
)
# 产物通常叫 sampled_structures.xyz我们需要把它作为后续 MD 的起始结构
# 但注意GPUMD MD 通常读取 model.xyz 或者 restart。
# 如果你的 run.in 里写的是 load_xyz sampled_structures.xyz那就没问题。
# 如果不是,通常做法是把 sampled_structures.xyz 切分成多个文件夹。
# --- 修正逻辑:根据你的描述 "生成 sample_1-4 文件夹" ---
# 我们遍历 template/00_md/md_run_*.in
tpl_path = Path("template") / self.template_subdir
run_templates = sorted(list(tpl_path.glob("md_run_*.in")))
if not run_templates:
self.logger.warning(f"⚠️ No 'md_run_*.in' found in {tpl_path}, looking for 'run.in'...")
run_templates = list(tpl_path.glob("run.in"))
sub_tasks = []
nep_source = self.preheat_dir / "nep.txt" # 沿用预热阶段的势函数
for idx, tpl in enumerate(run_templates, start=1):
task_name = f"sample_{idx}"
task_dir = self.work_dir / task_name
task_dir.mkdir(exist_ok=True)
sub_tasks.append(task_dir)
# 1. 复制 run.in
shutil.copy(tpl, task_dir / "run.in")
# 2. 复制 nep.txt
shutil.copy(nep_source, task_dir / "nep.txt")
# 3. 复制结构 (假设所有 sample 都从预热的最后一帧或 sampled_structures 开始)
# 这里简化处理:复制 model.xyz (初始结构) 或者 使用 preheat 的最后状态
# 根据你的流程,通常需要把 sampled_structures.xyz 里的某一帧放进去
# 或者 GPUMD 支持直接读取 exyz。
# 这里我们假设 run.in 里配置好了读取方式,我们只负责给文件。
if (self.preheat_dir / "model.xyz").exists():
shutil.copy(self.preheat_dir / "model.xyz", task_dir / "model.xyz")
# ----------------------------------------
# 2. 执行所有 Sample 任务
# ----------------------------------------
self.logger.info(f" -> Submitting {len(sub_tasks)} MD tasks...")
for task_dir in sub_tasks:
self.logger.info(f" -> Running {task_dir.name}...")
self.runner.run("gpumd", cwd=task_dir)
# ----------------------------------------
# 3. 合并结果
# ----------------------------------------
self.logger.info(" -> Merging dump files...")
# cat sample_*/dump.xyz >> dump.xyz
# 使用 python 实现 cat 以跨平台安全
target_dump = self.work_dir / "dump.xyz"
with open(target_dump, 'wb') as outfile:
for task_dir in sub_tasks:
src = task_dir / "dump.xyz"
if src.exists():
with open(src, 'rb') as infile:
shutil.copyfileobj(infile, outfile)
else:
self.logger.warning(f"⚠️ {task_dir.name} generated no dump.xyz")
self.check_done()
def check_done(self):
if (self.work_dir / "dump.xyz").exists():
self.logger.info("✅ MD Sampling finished.")
return True
raise RuntimeError("MD failed: dump.xyz not created.")

View File

@@ -0,0 +1,105 @@
import shutil
import re
from pathlib import Path
from nep_auto.modules.base_module import BaseModule
class SelectModule(BaseModule):
def __init__(self, driver, iter_id):
super().__init__(driver, iter_id)
self.work_dir = self.iter_dir / "01.select"
self.md_dir = self.iter_dir / "00.md" / "md"
def get_work_dir(self):
return self.work_dir
def get_frame_count(self, xyz_file):
"""读取 xyz 文件帧数 (简单通过 grep 'Lattice' 计数,或用 ASE)"""
if not xyz_file.exists():
return 0
# 简单方法:读取文件统计 Lattice 出现的次数 (ExtXYZ 格式)
try:
with open(xyz_file, 'r') as f:
content = f.read()
return content.count("Lattice=")
except:
return 0
def run(self):
self.logger.info(f"🔍 [Select] Starting Active Learning Selection Iter {self.iter_id}...")
self.initialize()
# 准备数据
src_dump = self.md_dir / "dump.xyz"
train_xyz_prev = self.root / "00.data" / "train.xyz" # 或者是上一轮的 train
# 如果是 iter > 1train.xyz 应该是累积的。这里简化,先假设有一个参考的 train.xyz
# 必须文件dump.xyz, train.xyz, nep.txt
shutil.copy(src_dump, self.work_dir / "dump.xyz")
# 这里的 train.xyz 是给 neptrain_select_structs.py 用作参考的
if self.iter_id == 1:
# 第一轮可以用 data 里的初始文件,或者做一个空的
pass
else:
# 复制上一轮的 train.xyz
pass
# 复制 nep.txt
shutil.copy(self.md_dir / "nep.txt", self.work_dir / "nep.txt")
# 读取参数
cfg = self.config_param['params']['select']
target_min = cfg.get('target_min', 60)
target_max = cfg.get('target_max', 120)
threshold = cfg.get('init_threshold', 0.01)
kit_root = self.driver.config_param['env']['gpumdkit_root']
script = f"{kit_root}/Scripts/sample_structures/neptrain_select_structs.py"
# 循环筛选
max_attempts = 10
attempt = 0
while attempt < max_attempts:
self.logger.info(f" -> Attempt {attempt + 1}: Threshold = {threshold}")
# 构造命令: python script dump.xyz train.xyz nep.txt [options]
# 注意:如果你的脚本不支持命令行传参阈值,需要修改脚本或用 sed 修改
# 假设脚本已经被修改支持 --distance {threshold},或者我们用一种 hack 方式
# 既然原流程是交互式的,这里强烈建议你修改 neptrain_select_structs.py
# 让它支持命令行参数parser.add_argument('--distance', ...)
cmd_args = f"{script} dump.xyz train.xyz nep.txt --distance {threshold} --auto_confirm"
try:
self.runner.run("python_script", cwd=self.work_dir, extra_args=cmd_args)
except Exception as e:
self.logger.warning(f"Select script warning: {e}")
# 检查结果
selected_file = self.work_dir / "selected.xyz"
count = self.get_frame_count(selected_file)
self.logger.info(f" -> Selected {count} structures.")
if target_min <= count <= target_max:
self.logger.info("✅ Selection criteria met!")
break
elif count < target_min:
self.logger.info(" -> Too few, lowering threshold (-0.01)...")
threshold = threshold - 0.01
else:
self.logger.info(" -> Too many, raising threshold (+0.01)...")
threshold = threshold + 0.01
attempt += 1
if attempt >= max_attempts:
self.logger.warning("⚠️ Max attempts reached in selection. Proceeding with current best.")
self.check_done()
def check_done(self):
if (self.work_dir / "selected.xyz").exists():
return True
raise RuntimeError("Selection failed: selected.xyz not found")

View File

@@ -0,0 +1,91 @@
import shutil
from pathlib import Path
from ase.io import read, write
from nep_auto.modules.base_module import BaseModule
class SCFModule(BaseModule):
def __init__(self, driver, iter_id):
super().__init__(driver, iter_id)
self.template_subdir = "02_scf"
self.work_dir = self.iter_dir / "02.scf"
self.select_dir = self.iter_dir / "01.select"
def get_work_dir(self):
return self.work_dir
def run(self):
self.logger.info(f"⚛️ [SCF] Starting DFT Calculation Iter {self.iter_id}...")
self.initialize()
# 1. 读取 selected.xyz
selected_xyz = self.select_dir / "selected.xyz"
if not selected_xyz.exists():
raise FileNotFoundError("selected.xyz missing")
self.logger.info(" -> Reading structures using ASE...")
atoms_list = read(selected_xyz, index=':')
self.logger.info(f" -> Found {len(atoms_list)} structures.")
# 2. 准备任务文件夹
task_dirs = []
for i, atoms in enumerate(atoms_list):
task_name = f"task.{i:03d}"
task_dir = self.work_dir / task_name
task_dir.mkdir(exist_ok=True)
task_dirs.append(task_dir)
# 写 POSCAR
write(task_dir / "POSCAR", atoms, format='vasp')
# 复制模版 INCAR, KPOINTS, POTCAR
self.copy_template("INCAR", target_name=None) # 复制到 self.work_dir
shutil.copy(self.work_dir / "INCAR", task_dir / "INCAR") # 再分发
self.copy_template("KPOINTS", target_name=None)
shutil.copy(self.work_dir / "KPOINTS", task_dir / "KPOINTS")
self.copy_template("POTCAR", target_name=None)
shutil.copy(self.work_dir / "POTCAR", task_dir / "POTCAR")
# 3. 提交任务
# 这里区分 local 模式和 slurm 模式
# 既然你目前是 interactive gpu我们假设是串行或者简单的并行
self.logger.info(" -> Running VASP jobs...")
success_count = 0
for task_dir in task_dirs:
self.logger.info(f" -> Running {task_dir.name}...")
try:
# 调用 machine.yaml 里定义的 vasp
# 注意:如果 task 很多,这里最好写成多进程并发
self.runner.run("vasp", cwd=task_dir)
# 简单检查
if (task_dir / "OUTCAR").exists():
success_count += 1
except Exception as e:
self.logger.error(f"Task {task_dir.name} failed: {e}")
self.logger.info(f" -> Finished. Success: {success_count}/{len(task_dirs)}")
# 4. 收集数据 (OUTCAR -> NEP-dataset.xyz)
self.logger.info(" -> Collecting data...")
valid_atoms = []
for task_dir in task_dirs:
try:
# 读取 OUTCAR
atoms = read(task_dir / "OUTCAR", format='vasp-outcar')
valid_atoms.append(atoms)
except:
pass
if valid_atoms:
write(self.work_dir / "NEP-dataset.xyz", valid_atoms, format='extxyz')
else:
raise RuntimeError("No valid OUTCARs found!")
self.check_done()
def check_done(self):
if (self.work_dir / "NEP-dataset.xyz").exists():
return True
raise RuntimeError("SCF failed: NEP-dataset.xyz not generated")

View File

@@ -0,0 +1,57 @@
import shutil
from nep_auto.modules.base_module import BaseModule
class TrainModule(BaseModule):
def __init__(self, driver, iter_id):
super().__init__(driver, iter_id)
self.template_subdir = "03_train"
self.work_dir = self.iter_dir / "03.train"
def get_work_dir(self):
return self.work_dir
def run(self):
self.logger.info(f"🧠 [Train] Starting Training Iter {self.iter_id}...")
self.initialize()
# 1. 准备 train.xyz
# 逻辑:当前 train.xyz = 上一轮 train.xyz + 本轮 scf/NEP-dataset.xyz
current_train_xyz = self.work_dir / "train.xyz"
# 打开输出文件
with open(current_train_xyz, 'wb') as outfile:
# A. 写入上一轮数据 (或初始数据)
if self.iter_id == 1:
# 第一轮,看是否有初始训练集,如果没有则只用本轮的 SCF 数据
# 这里假设 iter_000 是个虚拟的,或者直接去 00.data 里找
init_data = self.root / "00.data" / "train.xyz" # 预留位置
pass
else:
prev_train = self.root / f"iter_{self.iter_id - 1:03d}" / "03.train" / "train.xyz"
if prev_train.exists():
with open(prev_train, 'rb') as infile:
shutil.copyfileobj(infile, outfile)
# B. 写入本轮新数据
new_data = self.iter_dir / "02.scf" / "NEP-dataset.xyz"
if new_data.exists():
with open(new_data, 'rb') as infile:
shutil.copyfileobj(infile, outfile)
else:
raise FileNotFoundError("New training data (NEP-dataset.xyz) missing!")
# 2. 准备 nep.in
self.copy_template("nep.in")
# 3. 运行训练
self.logger.info(" -> Running NEP training...")
self.runner.run("nep", cwd=self.work_dir)
self.check_done()
def check_done(self):
if (self.work_dir / "nep.txt").exists():
self.logger.info("✅ Training finished.")
return True
raise RuntimeError("Training failed: nep.txt not generated")

View File

@@ -0,0 +1,8 @@
potential ./nep.txt
velocity 100
ensemble npt_mttk temp 100 400 aniso 0 0
run 100000
ensemble npt_mttk temp 400 1200 aniso 0 0
dump_thermo 10
dump_exyz 10000
run 100000