NEP框架重构

2025-12-09 10:46:24 +08:00
parent 99d7742c21
commit 13d9ce4385
1 changed files with 69 additions and 52 deletions
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -50,75 +50,92 @@ class Workflow:
                if step_name == "00.md":
                    step_dir = os.path.join(iter_path, "00.md")
-                    # --- A. 第一轮初始化：POSCAR -> model.xyz ---
+                    # 1. 第一轮初始化：POSCAR -> model.xyz (保持不变)
                    if iter_id == 0:
                        os.makedirs(step_dir, exist_ok=True)
                        poscar_name = self.param['files']['poscar']
                        poscar_src = os.path.join(self.data_dir, poscar_name)
                        if os.path.exists(poscar_src):
                            shutil.copy(poscar_src, os.path.join(step_dir, poscar_name))
                            atom_labels = self.param['files'].get('label', '')
                            kit_path = self.machine.config['paths'].get('gpumdkit', 'gpumdkit.sh')
                            cmd = f"{kit_path} -addlabel {poscar_name} {atom_labels}"
                            self.logger.info(f"Initializing model.xyz: {cmd}")
                            subprocess.check_call(cmd, shell=True, cwd=step_dir)
                        else:
                            self.logger.error(f"POSCAR missing: {poscar_src}")
-                            continue  # 跳过这一步
+                            continue
-                    # 检查 model.xyz 是否存在 (无论是刚生成的，还是上一轮传过来的)
+                    # 确保 gpumdkit 路径可用
-                    # 注意：如果是 Iter > 0，这里需要逻辑从上一轮获取，目前先保证 Iter 0
+                    kit_path = self.machine.config['paths'].get('gpumdkit', 'gpumdkit.sh')
                    # ----------------------------------------------------
                    # 2. 核心修改：分别处理 preheat 和 production
                    # ----------------------------------------------------
                    # === Sub-task 1: Preheat (预热) ===
                    # 逻辑：复制model.xyz -> 跑MD -> 跑201采样 -> 生成 sampled_structures.xyz
                    preheat_dir = os.path.join(step_dir, "preheat")
                    os.makedirs(preheat_dir, exist_ok=True)
                    # 准备文件
                    current_model_xyz = os.path.join(step_dir, "model.xyz")
-                    if not os.path.exists(current_model_xyz):
+                    shutil.copy(current_model_xyz, os.path.join(preheat_dir, "model.xyz"))
-                        self.logger.error(f"Critical: model.xyz not found in {step_dir}")
+                    shutil.copy(self.current_nep_pot, os.path.join(preheat_dir, "nep.txt"))
-                        continue
+                    shutil.copy(os.path.join(self.template_dir, "00.md", "preheat", "run.in"),
                                os.path.join(preheat_dir, "run.in"))
-                    # --- B. 执行子任务 (Preheat / Production) ---
+                    self.logger.info(">>> Running Preheat MD...")
-                    sub_tasks = step_conf.get('sub_tasks', [])
+                    # 使用 Machine 运行 GPUMD (假设 machine.yaml 里 gpumd 是基础命令)
-                    if not sub_tasks:
+                    self.machine.execute("gpumd", preheat_dir)
                        self.logger.warning("No sub_tasks defined for 00.md in param.yaml!")
-                    for sub in sub_tasks:
+                    # [关键] Preheat 后处理：采样
-                        template_sub_name = sub.get('template_sub')
+                    if os.path.exists(os.path.join(preheat_dir, "dump.xyz")):
-                        if not template_sub_name: continue
+                        self.logger.info(">>> Running Sampling (201)...")
                        # 构造命令: echo -e "201\ndump.xyz\nuniform\n4" | gpumdkit.sh
                        # 注意：根据你的描述 "dump.xyz uniform 4"，我这里构造输入流
                        # 如果你的脚本交互顺序不同，请调整这里的字符串
                        # 这里的 \n 代表回车
                        input_str = "201\ndump.xyz\nuniform\n4"
-                        # 1. 定义子目录 (e.g., iter_00/00.md/preheat)
+                        try:
-                        sub_work_dir = os.path.join(step_dir, template_sub_name)
+                            # 调用 gpumdkit
-                        os.makedirs(sub_work_dir, exist_ok=True)
+                            process = subprocess.Popen(
                                kit_path,
                                shell=True,
                                cwd=preheat_dir,
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                text=True
                            )
                            stdout, stderr = process.communicate(input=input_str)
-                        self.logger.info(f"--> Setup Sub-task: {template_sub_name}")
+                            if os.path.exists(os.path.join(preheat_dir, "sampled_structures.xyz")):
-
+                                self.logger.info("Sampled structures generated successfully.")
                        # 2. 【关键】将 model.xyz 分发到子目录
                        shutil.copy(current_model_xyz, os.path.join(sub_work_dir, "model.xyz"))
                        # 3. 准备 Template (run.in)
                        # 路径: template/00.md/preheat/
                        template_path = os.path.join(self.template_dir, "00.md", template_sub_name)
                        if not os.path.exists(template_path):
                            self.logger.error(f"Template not found: {template_path}")
                            continue
                        # 4. 实例化 MD 任务
                        # 这里的 name 仅仅是日志用的标识
                        md_task = MDStep(f"MD-{template_sub_name}", sub_work_dir, self.machine, self.config)
                        # 5. 运行 (传入势函数路径 和 模板路径)
                        # run() 方法内部会处理 nep.txt 的复制和 run.in 的复制
                        success = md_task.run(self.current_nep_pot, template_path)
                        if success:
                            # 记录最新的 dump.xyz 位置，供下一步 Select 使用
                            self.last_dump_path = os.path.join(sub_work_dir, "dump.xyz")
                            # 同时也把最新的 model.xyz (如果MD改变了结构) 或 dump 更新为下一次的起点？
                            # 目前逻辑：Select 步会用 self.last_dump_path
                            else:
-                            self.logger.error(f"MD Sub-task {template_sub_name} Failed.")
+                                self.logger.error(f"Sampling failed. Output: {stdout} Error: {stderr}")
-                            # 根据策略决定是否中断，这里暂时中断
+                                continue  # 如果没生成采样文件，后续Production没法做
-                            break
+                        except Exception as e:
-                            # ==========================
+                            self.logger.error(f"Error executing sampling: {e}")
                            continue
                    else:
                        self.logger.error("Preheat dump.xyz missing.")
                        continue
                    # === Sub-task 2: Production (加工/正式采样) ===
                    # 逻辑：链接 sampled_structures -> 跑302 -> 跑presub.sh
                    prod_dir = os.path.join(step_dir, "production")
                    os.makedirs(prod_dir, exist_ok=True)
                    # 1. 建立软链接 (sampled_structures.xyz)
                    src_sample = os.path.abspath(os.path.join(preheat_dir, "sampled_structures.xyz"))
                    dst_sample = os.path.join(prod_dir, "sampled_structures.xyz")
                    if os.path.exists(dst_sample): os.remove(dst_sample)  # 清理旧的
                    os.symlink(src_sample, dst_sample)
                    # 2. 准备基础文件
                    self.logger.error(f"presub.sh execution failed: {e}")
                # Step: 01.select
                # ==========================
                elif step_name == "01.select":