NEP-auto/nep_auto/modules/m1_md.py

import shutil
import glob
from pathlib import Path
from nep_auto.modules.base_module import BaseModule


class MDModule(BaseModule):
    def __init__(self, driver, iter_id):
        super().__init__(driver, iter_id)
        self.template_subdir = "00_md"
        # 预热目录 (输入源)
        self.preheat_dir = self.iter_dir / "00.md" / "preheat"
        # MD 目录 (工作区)
        self.work_dir = self.iter_dir / "00.md" / "md"

    def get_work_dir(self):
        return self.work_dir

    def run(self):
        self.logger.info(f"🌪️ [MD] Starting Sampling Phase Iter {self.iter_id}...")
        self.initialize()

        # ----------------------------------------
        # 1. 从预热轨迹中采样 (dump.xyz -> sampled_structures.xyz)
        # ----------------------------------------
        preheat_dump = self.preheat_dir / "dump.xyz"
        if not preheat_dump.exists():
            raise FileNotFoundError(f"Preheat dump not found: {preheat_dump}")

        # 调用 sample_structures.py
        # 假设参数: input_file method number
        kit_root = self.driver.config_param['env']['gpumdkit_root']
        script = f"{kit_root}/Scripts/sample_structures/sample_structures.py"

        # 复制 dump 到当前目录以便处理
        local_dump = self.work_dir / "preheat_dump.xyz"
        shutil.copy(preheat_dump, local_dump)

        self.logger.info("   -> Sampling structures from preheat trajectory...")
        # 按照你的描述: sample_structures.py dump.xyz uniform 4
        # 这里 "4" 可以放到 param.yaml 里配置，暂时写死或读取默认
        self.runner.run(
            "python_script",  # 这里可以用 local runner 直接跑 python
            cwd=self.work_dir,
            extra_args=f"{script} preheat_dump.xyz uniform 4"
        )

        # 产物通常叫 sampled_structures.xyz，我们需要把它作为后续 MD 的起始结构
        # 但注意：GPUMD MD 通常读取 model.xyz 或者 restart。
        # 如果你的 run.in 里写的是 load_xyz sampled_structures.xyz，那就没问题。
        # 如果不是，通常做法是把 sampled_structures.xyz 切分成多个文件夹。

        # --- 修正逻辑：根据你的描述 "生成 sample_1-4 文件夹" ---
        # 我们遍历 template/00_md/md_run_*.in
        tpl_path = Path("template") / self.template_subdir
        run_templates = sorted(list(tpl_path.glob("md_run_*.in")))

        if not run_templates:
            self.logger.warning(f"⚠️ No 'md_run_*.in' found in {tpl_path}, looking for 'run.in'...")
            run_templates = list(tpl_path.glob("run.in"))

        sub_tasks = []
        nep_source = self.preheat_dir / "nep.txt"  # 沿用预热阶段的势函数

        for idx, tpl in enumerate(run_templates, start=1):
            task_name = f"sample_{idx}"
            task_dir = self.work_dir / task_name
            task_dir.mkdir(exist_ok=True)
            sub_tasks.append(task_dir)

            # 1. 复制 run.in
            shutil.copy(tpl, task_dir / "run.in")

            # 2. 复制 nep.txt
            shutil.copy(nep_source, task_dir / "nep.txt")

            # 3. 复制结构 (假设所有 sample 都从预热的最后一帧或 sampled_structures 开始)
            # 这里简化处理：复制 model.xyz (初始结构) 或者 使用 preheat 的最后状态
            # 根据你的流程，通常需要把 sampled_structures.xyz 里的某一帧放进去
            # 或者 GPUMD 支持直接读取 exyz。
            # 这里我们假设 run.in 里配置好了读取方式，我们只负责给文件。
            if (self.preheat_dir / "model.xyz").exists():
                shutil.copy(self.preheat_dir / "model.xyz", task_dir / "model.xyz")

        # ----------------------------------------
        # 2. 执行所有 Sample 任务
        # ----------------------------------------
        self.logger.info(f"   -> Submitting {len(sub_tasks)} MD tasks...")

        for task_dir in sub_tasks:
            self.logger.info(f"      -> Running {task_dir.name}...")
            self.runner.run("gpumd", cwd=task_dir)

        # ----------------------------------------
        # 3. 合并结果
        # ----------------------------------------
        self.logger.info("   -> Merging dump files...")
        # cat sample_*/dump.xyz >> dump.xyz
        # 使用 python 实现 cat 以跨平台安全
        target_dump = self.work_dir / "dump.xyz"
        with open(target_dump, 'wb') as outfile:
            for task_dir in sub_tasks:
                src = task_dir / "dump.xyz"
                if src.exists():
                    with open(src, 'rb') as infile:
                        shutil.copyfileobj(infile, outfile)
                else:
                    self.logger.warning(f"⚠️ {task_dir.name} generated no dump.xyz")

        self.check_done()

    def check_done(self):
        if (self.work_dir / "dump.xyz").exists():
            self.logger.info("✅ MD Sampling finished.")
            return True
        raise RuntimeError("MD failed: dump.xyz not created.")