From f19d8ac4f037138e4539e03b6c6d6dd5a12fa020 Mon Sep 17 00:00:00 2001 From: koko <1429659362@qq.com> Date: Tue, 9 Dec 2025 20:24:52 +0800 Subject: [PATCH] =?UTF-8?q?nep=E6=A1=86=E6=9E=B6=E9=87=8D=E6=9E=84=2002.sc?= =?UTF-8?q?f?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/machine.yaml | 2 +- src/workflow.py | 133 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/config/machine.yaml b/config/machine.yaml index dca3693..0d975d7 100644 --- a/config/machine.yaml +++ b/config/machine.yaml @@ -22,6 +22,6 @@ executors: cmd: "gpumd" # 对应 config/scripts/gpumd.sh # 3. Slurm 提交测试 (VASP CPU) - vasp_cpu: + vasp_gpu: type: "local" cmd: "mpirun -np 1 vasp_std" \ No newline at end of file diff --git a/src/workflow.py b/src/workflow.py index 155105a..66798c3 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -310,4 +310,135 @@ class Workflow: return else: - self.logger.info("Skipping Select (Already Done).") \ No newline at end of file + self.logger.info("Skipping Select (Already Done).") + # ========================== + # Step: 02.scf (VASP Calculation) + # ========================== + elif step_name == "02.scf": + step_dir = os.path.join(iter_path, "02.scf") + task_id_scf = f"{iter_name}.02.scf" + + if not self.tracker.is_done(task_id_scf): + self.logger.info("=== Step: 02.scf (VASP) ===") + os.makedirs(step_dir, exist_ok=True) + + # 1. 准备 selected.xyz + # 尝试从同轮次的 01.select 获取 + select_step_dir = os.path.join(iter_path, "01.select") + src_selected = os.path.join(select_step_dir, "selected.xyz") + if not os.path.exists(src_selected): + self.logger.error(f"selected.xyz not found in {select_step_dir}") + return + + dst_selected = os.path.join(step_dir, "selected.xyz") + if os.path.exists(dst_selected): os.remove(dst_selected) + os.symlink(os.path.abspath(src_selected), dst_selected) + + # 2. 运行 301 拆分结构 + # 命令: echo -e "301\niter" | gpumdkit.sh + # 这会生成 iterX_1, iterX_2... 和 fp 文件夹 + kit_path = self.machine.config['paths'].get('gpumdkit', 'gpumdkit.sh') + input_str_301 = "301\niter" # 这里 "iter" 是文件夹前缀名,gpumdkit 会自动加数字 + + self.logger.info("Splitting structures (301)...") + if not run_cmd_with_log(kit_path, step_dir, "scf_setup.log", input_str=input_str_301): + self.logger.error("301 command failed.") + return + + # 3. 准备 VASP 输入文件到 'fp' 文件夹 + # gpumdkit 生成的 fp 文件夹通常存放公共文件,子文件夹会软链过去 + fp_dir = os.path.join(step_dir, "fp") + if not os.path.exists(fp_dir): + self.logger.error("'fp' directory was not created by 301.") + return + + self.logger.info("Distributing VASP inputs to 'fp' folder...") + + # A. POTCAR (来自 Data) + potcar_src = os.path.join(self.data_dir, self.param['files']['potcar']) + if os.path.exists(potcar_src): + shutil.copy(potcar_src, os.path.join(fp_dir, "POTCAR")) + else: + self.logger.error(f"POTCAR missing: {potcar_src}") + return + + # B. INCAR (来自 Template) + # Template 路径: template/02.scf/INCAR + incar_src = os.path.join(self.template_dir, "02.scf", "INCAR") + if os.path.exists(incar_src): + shutil.copy(incar_src, os.path.join(fp_dir, "INCAR")) + else: + self.logger.error(f"INCAR missing in template: {incar_src}") + return + + # C. KPOINTS (来自 Template, 可选) + kpoints_src = os.path.join(self.template_dir, "02.scf", "KPOINTS") + if os.path.exists(kpoints_src): + shutil.copy(kpoints_src, os.path.join(fp_dir, "KPOINTS")) + else: + self.logger.info("KPOINTS not found in template, assuming KSPACING in INCAR.") + + # 4. 生成并提交计算任务 + # 这里我们不理会 gpumdkit 生成的 presub.sh,而是根据 machine.yaml 生成自己的 + executor_name = step_conf.get('executor', 'vasp_gpu') # 默认用 cpu + + # 获取执行命令 (例如 "mpirun -np 32 vasp_std") + # 这里的逻辑需要调用 machine 模块的一个新功能:批量生成提交脚本 + # 但为了简化,我们在 Local 模式下生成一个遍历脚本 + + self.logger.info(f"Generating batch submission script for {executor_name}...") + + # 读取 machine 配置里的命令 + exec_conf = self.machine.config['executors'].get(executor_name, {}) + vasp_cmd = exec_conf.get('cmd', 'mpirun -np 1 vasp_std') # 默认值 + + # 生成 run_vasp.sh + run_script_path = os.path.join(step_dir, "run_vasp.sh") + with open(run_script_path, 'w') as f: + f.write("#!/bin/bash\n") + # 遍历 iter* 目录 + f.write(f"for dir in iter*_*; do\n") + f.write(f" if [ -d \"$dir\" ]; then\n") + f.write(f" echo \"Running VASP in $dir ...\"\n") + f.write(f" cd $dir\n") + # 写入具体的 VASP 执行命令 + f.write(f" {vasp_cmd} > vasp.log 2>&1\n") # 重定向日志 + f.write(f" cd ..\n") + f.write(f" fi\n") + f.write(f"done\n") + + os.chmod(run_script_path, 0o755) + + # 执行 VASP 计算 + # 注意:如果是在 Slurm 上,这里应该提交 run_vasp.sh,并使用 Job ID 等待 + # 目前 Local 模式直接运行 + self.logger.info(">>> Executing VASP batch calculations (this may take time)...") + if not run_cmd_with_log("./run_vasp.sh", step_dir, "scf_exec.log"): + self.logger.error("VASP batch execution failed.") + return + + # 5. 结果收集 (out2xyz) + self.logger.info("Collecting results (out2xyz)...") + cmd_collect = f"{kit_path} -out2xyz ." + if run_cmd_with_log(cmd_collect, step_dir, "scf_collect.log"): + # 检查结果 + res_dir = os.path.join(step_dir, "NEPdataset-multiple_frames") + res_file = os.path.join(res_dir, "NEP-dataset.xyz") + + if os.path.exists(res_file): + self.logger.info(f"VASP data collected: {res_file}") + # 保存这个路径供 Train 使用 + self.new_data_chunk = res_file + self.tracker.mark_done(task_id_scf) + else: + self.logger.error("NEP-dataset.xyz not found after collection.") + else: + self.logger.error("out2xyz failed.") + + else: + self.logger.info("Skipping SCF (Already Done).") + # 即使跳过,也要尝试恢复 self.new_data_chunk 变量,防止 Train 找不到数据 + # 这里简单推断一下 + res_file = os.path.join(step_dir, "NEPdataset-multiple_frames", "NEP-dataset.xyz") + if os.path.exists(res_file): + self.new_data_chunk = res_file \ No newline at end of file