nep框架重构 02.scf
This commit is contained in:
@@ -22,6 +22,6 @@ executors:
|
|||||||
cmd: "gpumd" # 对应 config/scripts/gpumd.sh
|
cmd: "gpumd" # 对应 config/scripts/gpumd.sh
|
||||||
|
|
||||||
# 3. Slurm 提交测试 (VASP CPU)
|
# 3. Slurm 提交测试 (VASP CPU)
|
||||||
vasp_cpu:
|
vasp_gpu:
|
||||||
type: "local"
|
type: "local"
|
||||||
cmd: "mpirun -np 1 vasp_std"
|
cmd: "mpirun -np 1 vasp_std"
|
||||||
133
src/workflow.py
133
src/workflow.py
@@ -310,4 +310,135 @@ class Workflow:
|
|||||||
return
|
return
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.logger.info("Skipping Select (Already Done).")
|
self.logger.info("Skipping Select (Already Done).")
|
||||||
|
# ==========================
|
||||||
|
# Step: 02.scf (VASP Calculation)
|
||||||
|
# ==========================
|
||||||
|
elif step_name == "02.scf":
|
||||||
|
step_dir = os.path.join(iter_path, "02.scf")
|
||||||
|
task_id_scf = f"{iter_name}.02.scf"
|
||||||
|
|
||||||
|
if not self.tracker.is_done(task_id_scf):
|
||||||
|
self.logger.info("=== Step: 02.scf (VASP) ===")
|
||||||
|
os.makedirs(step_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 1. 准备 selected.xyz
|
||||||
|
# 尝试从同轮次的 01.select 获取
|
||||||
|
select_step_dir = os.path.join(iter_path, "01.select")
|
||||||
|
src_selected = os.path.join(select_step_dir, "selected.xyz")
|
||||||
|
if not os.path.exists(src_selected):
|
||||||
|
self.logger.error(f"selected.xyz not found in {select_step_dir}")
|
||||||
|
return
|
||||||
|
|
||||||
|
dst_selected = os.path.join(step_dir, "selected.xyz")
|
||||||
|
if os.path.exists(dst_selected): os.remove(dst_selected)
|
||||||
|
os.symlink(os.path.abspath(src_selected), dst_selected)
|
||||||
|
|
||||||
|
# 2. 运行 301 拆分结构
|
||||||
|
# 命令: echo -e "301\niter" | gpumdkit.sh
|
||||||
|
# 这会生成 iterX_1, iterX_2... 和 fp 文件夹
|
||||||
|
kit_path = self.machine.config['paths'].get('gpumdkit', 'gpumdkit.sh')
|
||||||
|
input_str_301 = "301\niter" # 这里 "iter" 是文件夹前缀名,gpumdkit 会自动加数字
|
||||||
|
|
||||||
|
self.logger.info("Splitting structures (301)...")
|
||||||
|
if not run_cmd_with_log(kit_path, step_dir, "scf_setup.log", input_str=input_str_301):
|
||||||
|
self.logger.error("301 command failed.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 3. 准备 VASP 输入文件到 'fp' 文件夹
|
||||||
|
# gpumdkit 生成的 fp 文件夹通常存放公共文件,子文件夹会软链过去
|
||||||
|
fp_dir = os.path.join(step_dir, "fp")
|
||||||
|
if not os.path.exists(fp_dir):
|
||||||
|
self.logger.error("'fp' directory was not created by 301.")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.logger.info("Distributing VASP inputs to 'fp' folder...")
|
||||||
|
|
||||||
|
# A. POTCAR (来自 Data)
|
||||||
|
potcar_src = os.path.join(self.data_dir, self.param['files']['potcar'])
|
||||||
|
if os.path.exists(potcar_src):
|
||||||
|
shutil.copy(potcar_src, os.path.join(fp_dir, "POTCAR"))
|
||||||
|
else:
|
||||||
|
self.logger.error(f"POTCAR missing: {potcar_src}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# B. INCAR (来自 Template)
|
||||||
|
# Template 路径: template/02.scf/INCAR
|
||||||
|
incar_src = os.path.join(self.template_dir, "02.scf", "INCAR")
|
||||||
|
if os.path.exists(incar_src):
|
||||||
|
shutil.copy(incar_src, os.path.join(fp_dir, "INCAR"))
|
||||||
|
else:
|
||||||
|
self.logger.error(f"INCAR missing in template: {incar_src}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# C. KPOINTS (来自 Template, 可选)
|
||||||
|
kpoints_src = os.path.join(self.template_dir, "02.scf", "KPOINTS")
|
||||||
|
if os.path.exists(kpoints_src):
|
||||||
|
shutil.copy(kpoints_src, os.path.join(fp_dir, "KPOINTS"))
|
||||||
|
else:
|
||||||
|
self.logger.info("KPOINTS not found in template, assuming KSPACING in INCAR.")
|
||||||
|
|
||||||
|
# 4. 生成并提交计算任务
|
||||||
|
# 这里我们不理会 gpumdkit 生成的 presub.sh,而是根据 machine.yaml 生成自己的
|
||||||
|
executor_name = step_conf.get('executor', 'vasp_gpu') # 默认用 cpu
|
||||||
|
|
||||||
|
# 获取执行命令 (例如 "mpirun -np 32 vasp_std")
|
||||||
|
# 这里的逻辑需要调用 machine 模块的一个新功能:批量生成提交脚本
|
||||||
|
# 但为了简化,我们在 Local 模式下生成一个遍历脚本
|
||||||
|
|
||||||
|
self.logger.info(f"Generating batch submission script for {executor_name}...")
|
||||||
|
|
||||||
|
# 读取 machine 配置里的命令
|
||||||
|
exec_conf = self.machine.config['executors'].get(executor_name, {})
|
||||||
|
vasp_cmd = exec_conf.get('cmd', 'mpirun -np 1 vasp_std') # 默认值
|
||||||
|
|
||||||
|
# 生成 run_vasp.sh
|
||||||
|
run_script_path = os.path.join(step_dir, "run_vasp.sh")
|
||||||
|
with open(run_script_path, 'w') as f:
|
||||||
|
f.write("#!/bin/bash\n")
|
||||||
|
# 遍历 iter* 目录
|
||||||
|
f.write(f"for dir in iter*_*; do\n")
|
||||||
|
f.write(f" if [ -d \"$dir\" ]; then\n")
|
||||||
|
f.write(f" echo \"Running VASP in $dir ...\"\n")
|
||||||
|
f.write(f" cd $dir\n")
|
||||||
|
# 写入具体的 VASP 执行命令
|
||||||
|
f.write(f" {vasp_cmd} > vasp.log 2>&1\n") # 重定向日志
|
||||||
|
f.write(f" cd ..\n")
|
||||||
|
f.write(f" fi\n")
|
||||||
|
f.write(f"done\n")
|
||||||
|
|
||||||
|
os.chmod(run_script_path, 0o755)
|
||||||
|
|
||||||
|
# 执行 VASP 计算
|
||||||
|
# 注意:如果是在 Slurm 上,这里应该提交 run_vasp.sh,并使用 Job ID 等待
|
||||||
|
# 目前 Local 模式直接运行
|
||||||
|
self.logger.info(">>> Executing VASP batch calculations (this may take time)...")
|
||||||
|
if not run_cmd_with_log("./run_vasp.sh", step_dir, "scf_exec.log"):
|
||||||
|
self.logger.error("VASP batch execution failed.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 5. 结果收集 (out2xyz)
|
||||||
|
self.logger.info("Collecting results (out2xyz)...")
|
||||||
|
cmd_collect = f"{kit_path} -out2xyz ."
|
||||||
|
if run_cmd_with_log(cmd_collect, step_dir, "scf_collect.log"):
|
||||||
|
# 检查结果
|
||||||
|
res_dir = os.path.join(step_dir, "NEPdataset-multiple_frames")
|
||||||
|
res_file = os.path.join(res_dir, "NEP-dataset.xyz")
|
||||||
|
|
||||||
|
if os.path.exists(res_file):
|
||||||
|
self.logger.info(f"VASP data collected: {res_file}")
|
||||||
|
# 保存这个路径供 Train 使用
|
||||||
|
self.new_data_chunk = res_file
|
||||||
|
self.tracker.mark_done(task_id_scf)
|
||||||
|
else:
|
||||||
|
self.logger.error("NEP-dataset.xyz not found after collection.")
|
||||||
|
else:
|
||||||
|
self.logger.error("out2xyz failed.")
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.logger.info("Skipping SCF (Already Done).")
|
||||||
|
# 即使跳过,也要尝试恢复 self.new_data_chunk 变量,防止 Train 找不到数据
|
||||||
|
# 这里简单推断一下
|
||||||
|
res_file = os.path.join(step_dir, "NEPdataset-multiple_frames", "NEP-dataset.xyz")
|
||||||
|
if os.path.exists(res_file):
|
||||||
|
self.new_data_chunk = res_file
|
||||||
Reference in New Issue
Block a user