nep框架重构 02.scf

This commit is contained in:
2025-12-09 20:24:52 +08:00
parent ceff569583
commit f19d8ac4f0
2 changed files with 133 additions and 2 deletions

View File

@@ -22,6 +22,6 @@ executors:
cmd: "gpumd" # 对应 config/scripts/gpumd.sh
# 3. Slurm 提交测试 (VASP CPU)
vasp_cpu:
vasp_gpu:
type: "local"
cmd: "mpirun -np 1 vasp_std"

View File

@@ -310,4 +310,135 @@ class Workflow:
return
else:
self.logger.info("Skipping Select (Already Done).")
self.logger.info("Skipping Select (Already Done).")
# ==========================
# Step: 02.scf (VASP Calculation)
# ==========================
elif step_name == "02.scf":
step_dir = os.path.join(iter_path, "02.scf")
task_id_scf = f"{iter_name}.02.scf"
if not self.tracker.is_done(task_id_scf):
self.logger.info("=== Step: 02.scf (VASP) ===")
os.makedirs(step_dir, exist_ok=True)
# 1. 准备 selected.xyz
# 尝试从同轮次的 01.select 获取
select_step_dir = os.path.join(iter_path, "01.select")
src_selected = os.path.join(select_step_dir, "selected.xyz")
if not os.path.exists(src_selected):
self.logger.error(f"selected.xyz not found in {select_step_dir}")
return
dst_selected = os.path.join(step_dir, "selected.xyz")
if os.path.exists(dst_selected): os.remove(dst_selected)
os.symlink(os.path.abspath(src_selected), dst_selected)
# 2. 运行 301 拆分结构
# 命令: echo -e "301\niter" | gpumdkit.sh
# 这会生成 iterX_1, iterX_2... 和 fp 文件夹
kit_path = self.machine.config['paths'].get('gpumdkit', 'gpumdkit.sh')
input_str_301 = "301\niter" # 这里 "iter" 是文件夹前缀名gpumdkit 会自动加数字
self.logger.info("Splitting structures (301)...")
if not run_cmd_with_log(kit_path, step_dir, "scf_setup.log", input_str=input_str_301):
self.logger.error("301 command failed.")
return
# 3. 准备 VASP 输入文件到 'fp' 文件夹
# gpumdkit 生成的 fp 文件夹通常存放公共文件,子文件夹会软链过去
fp_dir = os.path.join(step_dir, "fp")
if not os.path.exists(fp_dir):
self.logger.error("'fp' directory was not created by 301.")
return
self.logger.info("Distributing VASP inputs to 'fp' folder...")
# A. POTCAR (来自 Data)
potcar_src = os.path.join(self.data_dir, self.param['files']['potcar'])
if os.path.exists(potcar_src):
shutil.copy(potcar_src, os.path.join(fp_dir, "POTCAR"))
else:
self.logger.error(f"POTCAR missing: {potcar_src}")
return
# B. INCAR (来自 Template)
# Template 路径: template/02.scf/INCAR
incar_src = os.path.join(self.template_dir, "02.scf", "INCAR")
if os.path.exists(incar_src):
shutil.copy(incar_src, os.path.join(fp_dir, "INCAR"))
else:
self.logger.error(f"INCAR missing in template: {incar_src}")
return
# C. KPOINTS (来自 Template, 可选)
kpoints_src = os.path.join(self.template_dir, "02.scf", "KPOINTS")
if os.path.exists(kpoints_src):
shutil.copy(kpoints_src, os.path.join(fp_dir, "KPOINTS"))
else:
self.logger.info("KPOINTS not found in template, assuming KSPACING in INCAR.")
# 4. 生成并提交计算任务
# 这里我们不理会 gpumdkit 生成的 presub.sh而是根据 machine.yaml 生成自己的
executor_name = step_conf.get('executor', 'vasp_gpu') # 默认用 cpu
# 获取执行命令 (例如 "mpirun -np 32 vasp_std")
# 这里的逻辑需要调用 machine 模块的一个新功能:批量生成提交脚本
# 但为了简化,我们在 Local 模式下生成一个遍历脚本
self.logger.info(f"Generating batch submission script for {executor_name}...")
# 读取 machine 配置里的命令
exec_conf = self.machine.config['executors'].get(executor_name, {})
vasp_cmd = exec_conf.get('cmd', 'mpirun -np 1 vasp_std') # 默认值
# 生成 run_vasp.sh
run_script_path = os.path.join(step_dir, "run_vasp.sh")
with open(run_script_path, 'w') as f:
f.write("#!/bin/bash\n")
# 遍历 iter* 目录
f.write(f"for dir in iter*_*; do\n")
f.write(f" if [ -d \"$dir\" ]; then\n")
f.write(f" echo \"Running VASP in $dir ...\"\n")
f.write(f" cd $dir\n")
# 写入具体的 VASP 执行命令
f.write(f" {vasp_cmd} > vasp.log 2>&1\n") # 重定向日志
f.write(f" cd ..\n")
f.write(f" fi\n")
f.write(f"done\n")
os.chmod(run_script_path, 0o755)
# 执行 VASP 计算
# 注意:如果是在 Slurm 上,这里应该提交 run_vasp.sh并使用 Job ID 等待
# 目前 Local 模式直接运行
self.logger.info(">>> Executing VASP batch calculations (this may take time)...")
if not run_cmd_with_log("./run_vasp.sh", step_dir, "scf_exec.log"):
self.logger.error("VASP batch execution failed.")
return
# 5. 结果收集 (out2xyz)
self.logger.info("Collecting results (out2xyz)...")
cmd_collect = f"{kit_path} -out2xyz ."
if run_cmd_with_log(cmd_collect, step_dir, "scf_collect.log"):
# 检查结果
res_dir = os.path.join(step_dir, "NEPdataset-multiple_frames")
res_file = os.path.join(res_dir, "NEP-dataset.xyz")
if os.path.exists(res_file):
self.logger.info(f"VASP data collected: {res_file}")
# 保存这个路径供 Train 使用
self.new_data_chunk = res_file
self.tracker.mark_done(task_id_scf)
else:
self.logger.error("NEP-dataset.xyz not found after collection.")
else:
self.logger.error("out2xyz failed.")
else:
self.logger.info("Skipping SCF (Already Done).")
# 即使跳过,也要尝试恢复 self.new_data_chunk 变量,防止 Train 找不到数据
# 这里简单推断一下
res_file = os.path.join(step_dir, "NEPdataset-multiple_frames", "NEP-dataset.xyz")
if os.path.exists(res_file):
self.new_data_chunk = res_file