diff --git a/.idea/NEP-auto.iml b/.idea/NEP-auto.iml index d0876a7..f571432 100644 --- a/.idea/NEP-auto.iml +++ b/.idea/NEP-auto.iml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/config/machine.yaml b/config/machine.yaml index 4cd5174..b8b1db5 100644 --- a/config/machine.yaml +++ b/config/machine.yaml @@ -25,7 +25,12 @@ systems: command: "nep" env_setup: "" gpu_id: 0 - + gpumdkit: + # 假设是 GPU 版本,可能不需要 mpirun 或者只需要少量核 + command: "gpumdkit.sh" + env_setup: "" + # 即使是 local 模式,有时也需要指定并行度 + n_procs: 1 # 3. VASP (GPU 版) 配置 vasp: # 假设是 GPU 版本,可能不需要 mpirun 或者只需要少量核 diff --git a/config/param.yaml b/config/param.yaml index 4fbad98..52d2ac8 100644 --- a/config/param.yaml +++ b/config/param.yaml @@ -32,9 +32,7 @@ notification: # --- 3. 各模块具体的物理/算法参数 --- params: preheat: - temp: 300 - steps: 10000 - # 这里不需要指定 gpumd 路径,只需要指定物理量 + template_file: "run_ramp.in" select: target_min: 60 diff --git a/nep_auto/modules/m0_preheat.py b/nep_auto/modules/m0_preheat.py index e69de29..ff89202 100644 --- a/nep_auto/modules/m0_preheat.py +++ b/nep_auto/modules/m0_preheat.py @@ -0,0 +1,113 @@ +import shutil +import logging +from pathlib import Path +from .base_module import BaseModule + + +class PreheatModule(BaseModule): + def __init__(self, driver, iter_id): + super().__init__(driver, iter_id) + self.template_subdir = "00_md" + + def get_work_dir(self): + return self.iter_dir / "00.md" / "preheat" + + def initialize(self): + super().initialize() # 创建目录 + work_dir = self.get_work_dir() + + # 1. 准备 run.in (从配置读取模板名) + template_name = self.config_param['params']['preheat'].get('template_file', 'run.in') + self.copy_template(template_name, "run.in") + + # 2. 准备 nep.in (GPUMD 运行必需,虽然内容可能很简单) + self.copy_template("nep.in") + + # 3. 准备 nep.txt (势函数) + self._prepare_potential() + + # 4. 准备 model.xyz (结构) + self._prepare_structure() + + def _prepare_potential(self): + """准备势函数文件 nep.txt""" + dst = self.get_work_dir() / "nep.txt" + + if self.iter_id == 1: + # 第一轮:使用 system.yaml 里定义的初始势 + init_pot = Path(self.config_sys['system']['initial_potential']) + if not init_pot.exists(): + raise FileNotFoundError(f"Initial potential not found: {init_pot}") + shutil.copy(init_pot, dst) + self.logger.info(f" -> Copied initial potential: {init_pot.name}") + else: + # 后续轮次:使用上一轮训练结果 + prev_iter = f"iter_{self.iter_id - 1:03d}" + prev_train_dir = self.root / prev_iter / "03.train" + src = prev_train_dir / "nep.txt" + + if not src.exists(): + raise FileNotFoundError(f"Previous potential not found: {src}") + shutil.copy(src, dst) + self.logger.info(f" -> Copied potential from {prev_iter}") + + def _prepare_structure(self): + """准备 model.xyz""" + work_dir = self.get_work_dir() + + # 目前逻辑:Preheat 总是从初始结构开始(或者你可以改为从上一轮的 dump 中取) + # 这里演示从 VASP 文件转换 + vasp_path = Path(self.config_sys['system']['initial_structure']) + if not vasp_path.exists(): + raise FileNotFoundError(f"Structure file not found: {vasp_path}") + + # 复制到工作目录 + local_vasp = work_dir / vasp_path.name + shutil.copy(vasp_path, local_vasp) + + # 调用 gpumdkit.sh -addlabel 进行转换 + # 命令格式: gpumdkit.sh -addlabel file.vasp Li Y Cl + elements = " ".join(self.config_sys['system']['elements']) + + self.logger.info(" -> Converting VASP to model.xyz...") + + # 使用 runner 调用 gpumdkit (必须在 machine.yaml 里定义了 'gpumdkit') + # 注意:gpumdkit.sh 可能不输出 model.xyz 而是输出 file.xyz,需要确认 + # 假设输出为 model.xyz + cmd_args = f"-addlabel {local_vasp.name} {elements}" + + self.runner.run("gpumdkit", cwd=work_dir, extra_args=cmd_args) + + # 检查是否生成成功 + if not (work_dir / "model.xyz").exists(): + # 有时候 gpumdkit 生成的文件名可能是 LiYCl.xyz,需要重命名为 model.xyz + # 这里做一个容错检查 + expected_name = local_vasp.stem + ".xyz" # e.g., LiYCl.xyz + if (work_dir / expected_name).exists(): + shutil.move(work_dir / expected_name, work_dir / "model.xyz") + else: + raise RuntimeError("Failed to generate model.xyz from gpumdkit") + + def run(self): + """执行 GPUMD""" + work_dir = self.get_work_dir() + + # 检查是否已经跑完 (简单的锁文件机制) + if (work_dir / "thermo.out").exists(): + self.logger.info(f" -> Pre-check: thermo.out exists, skipping preheat.") + # 这里可以加更复杂的检查,比如步数是否足够 + return + + self.logger.info(f"🔥 Running Preheat in {self.iter_name}") + self.initialize() + + # 调用 GPUMD + # GPUMD 没有参数,直接运行 + self.runner.run("gpumd", cwd=work_dir) + + self.logger.info(" -> Preheat finished.") + + def check_done(self): + # 简单检查 thermo.out 是否存在且非空 + f = self.get_work_dir() / "thermo.out" + return f.exists() and f.stat().st_size > 0 \ No newline at end of file diff --git a/nep_auto/modules/m1_md.py b/nep_auto/modules/m1_md.py index e69de29..777bb85 100644 --- a/nep_auto/modules/m1_md.py +++ b/nep_auto/modules/m1_md.py @@ -0,0 +1,116 @@ +import shutil +import glob +from pathlib import Path +from nep_auto.modules.base_module import BaseModule + + +class MDModule(BaseModule): + def __init__(self, driver, iter_id): + super().__init__(driver, iter_id) + self.template_subdir = "00_md" + # 预热目录 (输入源) + self.preheat_dir = self.iter_dir / "00.md" / "preheat" + # MD 目录 (工作区) + self.work_dir = self.iter_dir / "00.md" / "md" + + def get_work_dir(self): + return self.work_dir + + def run(self): + self.logger.info(f"🌪️ [MD] Starting Sampling Phase Iter {self.iter_id}...") + self.initialize() + + # ---------------------------------------- + # 1. 从预热轨迹中采样 (dump.xyz -> sampled_structures.xyz) + # ---------------------------------------- + preheat_dump = self.preheat_dir / "dump.xyz" + if not preheat_dump.exists(): + raise FileNotFoundError(f"Preheat dump not found: {preheat_dump}") + + # 调用 sample_structures.py + # 假设参数: input_file method number + kit_root = self.driver.config_param['env']['gpumdkit_root'] + script = f"{kit_root}/Scripts/sample_structures/sample_structures.py" + + # 复制 dump 到当前目录以便处理 + local_dump = self.work_dir / "preheat_dump.xyz" + shutil.copy(preheat_dump, local_dump) + + self.logger.info(" -> Sampling structures from preheat trajectory...") + # 按照你的描述: sample_structures.py dump.xyz uniform 4 + # 这里 "4" 可以放到 param.yaml 里配置,暂时写死或读取默认 + self.runner.run( + "python_script", # 这里可以用 local runner 直接跑 python + cwd=self.work_dir, + extra_args=f"{script} preheat_dump.xyz uniform 4" + ) + + # 产物通常叫 sampled_structures.xyz,我们需要把它作为后续 MD 的起始结构 + # 但注意:GPUMD MD 通常读取 model.xyz 或者 restart。 + # 如果你的 run.in 里写的是 load_xyz sampled_structures.xyz,那就没问题。 + # 如果不是,通常做法是把 sampled_structures.xyz 切分成多个文件夹。 + + # --- 修正逻辑:根据你的描述 "生成 sample_1-4 文件夹" --- + # 我们遍历 template/00_md/md_run_*.in + tpl_path = Path("template") / self.template_subdir + run_templates = sorted(list(tpl_path.glob("md_run_*.in"))) + + if not run_templates: + self.logger.warning(f"⚠️ No 'md_run_*.in' found in {tpl_path}, looking for 'run.in'...") + run_templates = list(tpl_path.glob("run.in")) + + sub_tasks = [] + nep_source = self.preheat_dir / "nep.txt" # 沿用预热阶段的势函数 + + for idx, tpl in enumerate(run_templates, start=1): + task_name = f"sample_{idx}" + task_dir = self.work_dir / task_name + task_dir.mkdir(exist_ok=True) + sub_tasks.append(task_dir) + + # 1. 复制 run.in + shutil.copy(tpl, task_dir / "run.in") + + # 2. 复制 nep.txt + shutil.copy(nep_source, task_dir / "nep.txt") + + # 3. 复制结构 (假设所有 sample 都从预热的最后一帧或 sampled_structures 开始) + # 这里简化处理:复制 model.xyz (初始结构) 或者 使用 preheat 的最后状态 + # 根据你的流程,通常需要把 sampled_structures.xyz 里的某一帧放进去 + # 或者 GPUMD 支持直接读取 exyz。 + # 这里我们假设 run.in 里配置好了读取方式,我们只负责给文件。 + if (self.preheat_dir / "model.xyz").exists(): + shutil.copy(self.preheat_dir / "model.xyz", task_dir / "model.xyz") + + # ---------------------------------------- + # 2. 执行所有 Sample 任务 + # ---------------------------------------- + self.logger.info(f" -> Submitting {len(sub_tasks)} MD tasks...") + + for task_dir in sub_tasks: + self.logger.info(f" -> Running {task_dir.name}...") + self.runner.run("gpumd", cwd=task_dir) + + # ---------------------------------------- + # 3. 合并结果 + # ---------------------------------------- + self.logger.info(" -> Merging dump files...") + # cat sample_*/dump.xyz >> dump.xyz + # 使用 python 实现 cat 以跨平台安全 + target_dump = self.work_dir / "dump.xyz" + with open(target_dump, 'wb') as outfile: + for task_dir in sub_tasks: + src = task_dir / "dump.xyz" + if src.exists(): + with open(src, 'rb') as infile: + shutil.copyfileobj(infile, outfile) + else: + self.logger.warning(f"⚠️ {task_dir.name} generated no dump.xyz") + + self.check_done() + + def check_done(self): + if (self.work_dir / "dump.xyz").exists(): + self.logger.info("✅ MD Sampling finished.") + return True + raise RuntimeError("MD failed: dump.xyz not created.") \ No newline at end of file diff --git a/nep_auto/modules/m2_select.py b/nep_auto/modules/m2_select.py index e69de29..419e1ff 100644 --- a/nep_auto/modules/m2_select.py +++ b/nep_auto/modules/m2_select.py @@ -0,0 +1,105 @@ +import shutil +import re +from pathlib import Path +from nep_auto.modules.base_module import BaseModule + + +class SelectModule(BaseModule): + def __init__(self, driver, iter_id): + super().__init__(driver, iter_id) + self.work_dir = self.iter_dir / "01.select" + self.md_dir = self.iter_dir / "00.md" / "md" + + def get_work_dir(self): + return self.work_dir + + def get_frame_count(self, xyz_file): + """读取 xyz 文件帧数 (简单通过 grep 'Lattice' 计数,或用 ASE)""" + if not xyz_file.exists(): + return 0 + # 简单方法:读取文件统计 Lattice 出现的次数 (ExtXYZ 格式) + try: + with open(xyz_file, 'r') as f: + content = f.read() + return content.count("Lattice=") + except: + return 0 + + def run(self): + self.logger.info(f"🔍 [Select] Starting Active Learning Selection Iter {self.iter_id}...") + self.initialize() + + # 准备数据 + src_dump = self.md_dir / "dump.xyz" + train_xyz_prev = self.root / "00.data" / "train.xyz" # 或者是上一轮的 train + # 如果是 iter > 1,train.xyz 应该是累积的。这里简化,先假设有一个参考的 train.xyz + + # 必须文件:dump.xyz, train.xyz, nep.txt + shutil.copy(src_dump, self.work_dir / "dump.xyz") + + # 这里的 train.xyz 是给 neptrain_select_structs.py 用作参考的 + if self.iter_id == 1: + # 第一轮可以用 data 里的初始文件,或者做一个空的 + pass + else: + # 复制上一轮的 train.xyz + pass + + # 复制 nep.txt + shutil.copy(self.md_dir / "nep.txt", self.work_dir / "nep.txt") + + # 读取参数 + cfg = self.config_param['params']['select'] + target_min = cfg.get('target_min', 60) + target_max = cfg.get('target_max', 120) + threshold = cfg.get('init_threshold', 0.01) + + kit_root = self.driver.config_param['env']['gpumdkit_root'] + script = f"{kit_root}/Scripts/sample_structures/neptrain_select_structs.py" + + # 循环筛选 + max_attempts = 10 + attempt = 0 + + while attempt < max_attempts: + self.logger.info(f" -> Attempt {attempt + 1}: Threshold = {threshold}") + + # 构造命令: python script dump.xyz train.xyz nep.txt [options] + # 注意:如果你的脚本不支持命令行传参阈值,需要修改脚本或用 sed 修改 + # 假设脚本已经被修改支持 --distance {threshold},或者我们用一种 hack 方式 + # 既然原流程是交互式的,这里强烈建议你修改 neptrain_select_structs.py + # 让它支持命令行参数:parser.add_argument('--distance', ...) + + cmd_args = f"{script} dump.xyz train.xyz nep.txt --distance {threshold} --auto_confirm" + + try: + self.runner.run("python_script", cwd=self.work_dir, extra_args=cmd_args) + except Exception as e: + self.logger.warning(f"Select script warning: {e}") + + # 检查结果 + selected_file = self.work_dir / "selected.xyz" + count = self.get_frame_count(selected_file) + self.logger.info(f" -> Selected {count} structures.") + + if target_min <= count <= target_max: + self.logger.info("✅ Selection criteria met!") + break + elif count < target_min: + self.logger.info(" -> Too few, lowering threshold (-0.01)...") + threshold = threshold - 0.01 + else: + self.logger.info(" -> Too many, raising threshold (+0.01)...") + threshold = threshold + 0.01 + + attempt += 1 + + if attempt >= max_attempts: + self.logger.warning("⚠️ Max attempts reached in selection. Proceeding with current best.") + + self.check_done() + + def check_done(self): + if (self.work_dir / "selected.xyz").exists(): + return True + raise RuntimeError("Selection failed: selected.xyz not found") \ No newline at end of file diff --git a/nep_auto/modules/m3_scf.py b/nep_auto/modules/m3_scf.py index e69de29..0f73a85 100644 --- a/nep_auto/modules/m3_scf.py +++ b/nep_auto/modules/m3_scf.py @@ -0,0 +1,91 @@ +import shutil +from pathlib import Path +from ase.io import read, write +from nep_auto.modules.base_module import BaseModule + + +class SCFModule(BaseModule): + def __init__(self, driver, iter_id): + super().__init__(driver, iter_id) + self.template_subdir = "02_scf" + self.work_dir = self.iter_dir / "02.scf" + self.select_dir = self.iter_dir / "01.select" + + def get_work_dir(self): + return self.work_dir + + def run(self): + self.logger.info(f"⚛️ [SCF] Starting DFT Calculation Iter {self.iter_id}...") + self.initialize() + + # 1. 读取 selected.xyz + selected_xyz = self.select_dir / "selected.xyz" + if not selected_xyz.exists(): + raise FileNotFoundError("selected.xyz missing") + + self.logger.info(" -> Reading structures using ASE...") + atoms_list = read(selected_xyz, index=':') + self.logger.info(f" -> Found {len(atoms_list)} structures.") + + # 2. 准备任务文件夹 + task_dirs = [] + for i, atoms in enumerate(atoms_list): + task_name = f"task.{i:03d}" + task_dir = self.work_dir / task_name + task_dir.mkdir(exist_ok=True) + task_dirs.append(task_dir) + + # 写 POSCAR + write(task_dir / "POSCAR", atoms, format='vasp') + + # 复制模版 INCAR, KPOINTS, POTCAR + self.copy_template("INCAR", target_name=None) # 复制到 self.work_dir + shutil.copy(self.work_dir / "INCAR", task_dir / "INCAR") # 再分发 + self.copy_template("KPOINTS", target_name=None) + shutil.copy(self.work_dir / "KPOINTS", task_dir / "KPOINTS") + self.copy_template("POTCAR", target_name=None) + shutil.copy(self.work_dir / "POTCAR", task_dir / "POTCAR") + + # 3. 提交任务 + # 这里区分 local 模式和 slurm 模式 + # 既然你目前是 interactive gpu,我们假设是串行或者简单的并行 + self.logger.info(" -> Running VASP jobs...") + + success_count = 0 + for task_dir in task_dirs: + self.logger.info(f" -> Running {task_dir.name}...") + try: + # 调用 machine.yaml 里定义的 vasp + # 注意:如果 task 很多,这里最好写成多进程并发 + self.runner.run("vasp", cwd=task_dir) + + # 简单检查 + if (task_dir / "OUTCAR").exists(): + success_count += 1 + except Exception as e: + self.logger.error(f"Task {task_dir.name} failed: {e}") + + self.logger.info(f" -> Finished. Success: {success_count}/{len(task_dirs)}") + + # 4. 收集数据 (OUTCAR -> NEP-dataset.xyz) + self.logger.info(" -> Collecting data...") + valid_atoms = [] + for task_dir in task_dirs: + try: + # 读取 OUTCAR + atoms = read(task_dir / "OUTCAR", format='vasp-outcar') + valid_atoms.append(atoms) + except: + pass + + if valid_atoms: + write(self.work_dir / "NEP-dataset.xyz", valid_atoms, format='extxyz') + else: + raise RuntimeError("No valid OUTCARs found!") + + self.check_done() + + def check_done(self): + if (self.work_dir / "NEP-dataset.xyz").exists(): + return True + raise RuntimeError("SCF failed: NEP-dataset.xyz not generated") \ No newline at end of file diff --git a/nep_auto/modules/m4_train.py b/nep_auto/modules/m4_train.py index e69de29..5f930d2 100644 --- a/nep_auto/modules/m4_train.py +++ b/nep_auto/modules/m4_train.py @@ -0,0 +1,57 @@ +import shutil +from nep_auto.modules.base_module import BaseModule + + +class TrainModule(BaseModule): + def __init__(self, driver, iter_id): + super().__init__(driver, iter_id) + self.template_subdir = "03_train" + self.work_dir = self.iter_dir / "03.train" + + def get_work_dir(self): + return self.work_dir + + def run(self): + self.logger.info(f"🧠 [Train] Starting Training Iter {self.iter_id}...") + self.initialize() + + # 1. 准备 train.xyz + # 逻辑:当前 train.xyz = 上一轮 train.xyz + 本轮 scf/NEP-dataset.xyz + current_train_xyz = self.work_dir / "train.xyz" + + # 打开输出文件 + with open(current_train_xyz, 'wb') as outfile: + # A. 写入上一轮数据 (或初始数据) + if self.iter_id == 1: + # 第一轮,看是否有初始训练集,如果没有则只用本轮的 SCF 数据 + # 这里假设 iter_000 是个虚拟的,或者直接去 00.data 里找 + init_data = self.root / "00.data" / "train.xyz" # 预留位置 + pass + else: + prev_train = self.root / f"iter_{self.iter_id - 1:03d}" / "03.train" / "train.xyz" + if prev_train.exists(): + with open(prev_train, 'rb') as infile: + shutil.copyfileobj(infile, outfile) + + # B. 写入本轮新数据 + new_data = self.iter_dir / "02.scf" / "NEP-dataset.xyz" + if new_data.exists(): + with open(new_data, 'rb') as infile: + shutil.copyfileobj(infile, outfile) + else: + raise FileNotFoundError("New training data (NEP-dataset.xyz) missing!") + + # 2. 准备 nep.in + self.copy_template("nep.in") + + # 3. 运行训练 + self.logger.info(" -> Running NEP training...") + self.runner.run("nep", cwd=self.work_dir) + + self.check_done() + + def check_done(self): + if (self.work_dir / "nep.txt").exists(): + self.logger.info("✅ Training finished.") + return True + raise RuntimeError("Training failed: nep.txt not generated") \ No newline at end of file diff --git a/template/00_md/run_rump.in b/template/00_md/run_rump.in new file mode 100644 index 0000000..2199a70 --- /dev/null +++ b/template/00_md/run_rump.in @@ -0,0 +1,8 @@ +potential ./nep.txt +velocity 100 +ensemble npt_mttk temp 100 400 aniso 0 0 +run 100000 +ensemble npt_mttk temp 400 1200 aniso 0 0 +dump_thermo 10 +dump_exyz 10000 +run 100000 \ No newline at end of file