NEP-auto/src/workflow.py

# src/workflow.py
import os
import shutil
import logging
from src.utils import load_yaml
from src.machine import MachineManager
from src.steps import MDStep, SelectStep, SCFStep, TrainStep


class Workflow:
    def __init__(self, root_dir):
        self.root_dir = root_dir

        # 1. 加载配置
        self.param = load_yaml(os.path.join(root_dir, "config/param.yaml"))

        # 2. 初始化机器管理器
        self.machine = MachineManager(os.path.join(root_dir, "config/machine.yaml"))

        # 3. 初始化路径变量
        self.workspace = os.path.join(root_dir, "workspace")
        self.data_dir = os.path.join(root_dir, "data")
        self.template_dir = os.path.join(root_dir, "template")
        self.logger = logging.getLogger()

        # 状态追踪变量
        self.current_nep_pot = os.path.join(self.data_dir, self.param['files']['initial_pot'])
        # 假设第一轮之前的 train set 也是空的或者由用户提供，这里先指向一个基础文件
        self.current_train_set = os.path.join(self.workspace, "accumulated_train.xyz")

    def run(self):
        self.logger.info(f"Workflow Started: {self.param['project']}")

        # 遍历每一轮迭代
        for iteration in self.param['iterations']:
            iter_id = iteration['id']
            iter_name = f"iter_{iter_id:02d}"
            iter_path = os.path.join(self.workspace, iter_name)

            self.logger.info(f"\n >>> Starting Iteration: {iter_id} <<<")
            os.makedirs(iter_path, exist_ok=True)

            # --- 执行该轮定义的各个 Step ---
            for step_conf in iteration['steps']:
                step_name = step_conf['name']

                # ==========================
                # Step: 00.md
                # ==========================
                if step_name == "00.md":
                    # 1. 【修正点】先定义 step_dir，确保后续都能访问到
                    step_dir = os.path.join(iter_path, "00.md")

                    # --- 第一轮初始化：POSCAR -> model.xyz ---
                    if iter_id == 0:
                        # 2. 现在使用 step_dir 是安全的
                        os.makedirs(step_dir, exist_ok=True)

                        # 获取文件名和路径
                        poscar_name = self.param['files']['poscar']
                        poscar_src = os.path.join(self.data_dir, poscar_name)

                        # 复制原始 POSCAR
                        if not os.path.exists(poscar_src):
                            self.logger.error(f"Initial POSCAR not found: {poscar_src}")
                            return
                        shutil.copy(poscar_src, os.path.join(step_dir, poscar_name))

                        # 获取标签
                        atom_labels = self.param['files'].get('label', '')
                        if not atom_labels:
                            self.logger.error("Missing 'label' in param.yaml files section.")
                            return

                        # 执行转换
                        kit_path = self.machine.config['paths'].get('gpumdkit', 'gpumdkit.sh')
                        # 确保使用绝对路径，防止 subprocess 找不到
                        if not os.path.isabs(kit_path):
                            # 假设相对于项目根目录，或者在 PATH 中
                            pass

                        cmd = f"{kit_path} -addlabel {poscar_name} {atom_labels}"

                        self.logger.info(f"Initializing model.xyz with command: {cmd}")
                        try:
                            subprocess.check_call(cmd, shell=True, cwd=step_dir)
                        except subprocess.CalledProcessError as e:
                            self.logger.error(f"Failed to convert POSCAR: {e}")
                            return

                        if not os.path.exists(os.path.join(step_dir, "model.xyz")):
                            self.logger.error("model.xyz was not generated.")
                            return
                        else:
                            self.logger.info("Successfully generated model.xyz")

                    # --- 遍历子任务 (preheat, production...) ---
                    for sub in step_conf.get('sub_tasks', []):
                        template_sub_name = sub['template_sub']
                        sub_work_dir = os.path.join(step_dir, template_sub_name)
                        template_path = os.path.join(self.template_dir, "00.md", template_sub_name)

                # ==========================
                # Step: 01.select
                # ==========================
                elif step_name == "01.select":
                    step_dir = os.path.join(iter_path, "01.select")
                    select_task = SelectStep("Select", step_dir, self.machine, self.config)

                    # 使用上一步产生的 dump 和 当前的训练集/势函数
                    select_task.run(
                        dump_path=getattr(self, 'last_dump_path', None),
                        train_path=self.current_train_set,
                        nep_path=self.current_nep_pot,
                        method=step_conf.get('method'),
                        params=step_conf.get('params')
                    )

                # ==========================
                # Step: 02.scf
                # ==========================
                elif step_name == "02.scf":
                    step_dir = os.path.join(iter_path, "02.scf")
                    scf_task = SCFStep("SCF", step_dir, self.machine, self.config)

                    template_path = os.path.join(self.template_dir, "02.scf")
                    potcar_path = os.path.join(self.data_dir, self.param['files']['potcar'])

                    scf_task.run(template_path, potcar_path)

                    # 假装产生了一些新数据
                    self.new_data_chunk = os.path.join(step_dir, "scf_results.xyz")

                # ==========================
                # Step: 03.train
                # ==========================
                elif step_name == "03.train":
                    step_dir = os.path.join(iter_path, "03.train")
                    train_task = TrainStep("Train", step_dir, self.machine, self.config)

                    template_path = os.path.join(self.template_dir, "03.train")

                    # 实际逻辑应该是把 self.new_data_chunk 合并到 total_train.xyz
                    # 这里直接传入
                    train_task.run(template_path, getattr(self, 'new_data_chunk', None))

                    # 更新当前势函数路径，供下一轮使用
                    self.current_nep_pot = os.path.join(step_dir, "nep.txt")

        self.logger.info("Workflow Finished Successfully.")

    @property
    def config(self):
        return self.param  # 简单透传