nep框架重构

2025-12-09 01:15:38 +08:00
parent 19a6924a41
commit 91bdb0dab1
30 changed files with 7930 additions and 1001 deletions
--- a/config/machine.yaml
+++ b/config/machine.yaml
@@ -1,58 +1,21 @@
-# config/machine.yaml
+machine_name: "Local_Test_Env"
+root_dir: "." # <--- 请修改这里为你的实际路径

-# 当前使用的计算系统配置名
-current_system: "interactive_gpu"
+# 脚本库位置
+script_dir: "config/scripts"

-systems:
-  # --- 配置 1: 交互式 GPU 环境 (当前使用) ---
-  # 场景: 你已经用 srun/tmux 申请到了资源，直接运行命令即可
-  interactive_gpu:
-    type: "local"  # local 表示直接运行 subprocess，不提交 sbatch
+executors:
+  # 1. 简单的本地命令 (如 NEP 训练)
+  nep_local:
+    type: "local"
+    cmd: "nep"

-    # 路径配置
-    gpumdkit_root: "/cluster/home/koko125/tool/GPUMDkit"
+  # 2. 复杂的本地脚本 (如 GPUMD)
+  gpumd:
+    type: "local"
+    cmd: "gpumd" # 对应 config/scripts/gpumd.sh

-    tools:
-      # 1. GPUMD 配置
-      gpumd:
-        command: "gpumd"
-        # 运行前需要 source 的环境脚本
-        env_setup: ""
-        gpu_id: 0
-
-      # 2. NEP 配置 (同上)
-      nep:
-        command: "nep"
-        env_setup: ""
-        gpu_id: 0
-      gpumdkit:
-        # 假设是 GPU 版本，可能不需要 mpirun 或者只需要少量核
-        command: "gpumdkit.sh"
-        env_setup: ""
-        # 即使是 local 模式，有时也需要指定并行度
-        n_procs: 1
-      # 3. VASP (GPU 版) 配置
-      vasp:
-        # 假设是 GPU 版本，可能不需要 mpirun 或者只需要少量核
-        command: "mpirun -np 1 vasp_std"
-        env_setup: ""
-        # 即使是 local 模式，有时也需要指定并行度
-        n_procs: 1
-
-  # --- 配置 2: VASP CPU 集群模式 (预留，未来使用) ---
-  # 场景: 需要生成 submit.slurm 并 sbatch 提交
-  slurm_cpu_cluster:
-    type: "slurm"
-
-    gpumdkit_root: "/cluster/home/koko125/tool/GPUMDkit"
-
-    tools:
-      vasp:
-        command: "mpirun -np 4 vasp_std"
-        env_setup: "module load vasp/6.3-cpu"
-
-        # Slurm 头部参数
-        slurm_header:
-          partition: "cpu_long"
-          ntasks_per_node: 64
-          time: "24:00:00"
+  # 3. Slurm 提交测试 (VASP CPU)
+  vasp_cpu:
+    type: "local"
+    cmd: "mpirun -np 1 vasp_std"
--- a/config/param.yaml
+++ b/config/param.yaml
@@ -1,44 +1,55 @@
-# config/param.yaml
+# param.yaml

-# --- 1. 流程控制 ---
-stages_def:
-  p: "preheat"
-  m: "md"
-  s: "select"
-  d: "scf"
-  t: "train"
-  pr: "predict"
-  o: "output"
+project: "LiYCl_Auto"

-# 默认流程
-default_workflow: ["p", "m", "s", "d", "t", "pr"]
+# 1. 初始文件定义 (对应 data/ 目录)
+files:
+  poscar: "LiYCl.vasp"
+  potcar: "POTCAR"
+  initial_pot: "nep89.txt" # 第一轮 MD 用的势函数

-# 自定义调度
-schedule:
-  1: ["p", "m", "s", "d", "t", "o"]
+# 2. 迭代流程控制
+iterations:
+  # --- 第一轮 ---
+  - id: 0
+    steps:
+      # Step 1: MD (预热 + 采样)
+      # 逻辑：会把 nep.txt (来自 initial_pot) 和 model.xyz 准备好
+      - name: "00.md"
+        sub_tasks:
+          # 你提到可能有预热，也可能有加工，这里支持串行执行
+          - template_sub: "preheat"     # 使用 template/00.md/preheat/run.in
+          - template_sub: "production"  # 使用 template/00.md/production/run.in
+        executor: "gpumd" # 对应 machine.yaml

-# --- 2. 容错与通知 ---
-control:
-  max_retries: 3
-  check_interval: 60
+      # Step 2: 筛选
+      - name: "01.select"
+        method: "distance"
+        params: [0.01, 60, 120]

-notification:
-  enable_log: true
-  log_file: "./logs/sys_runtime.log"
-  enable_hook: true
-  hook_script: "python ./hooks/send_alert.py"
-  alert_events: ["fail", "finish"]
+      # Step 3: SCF (VASP)
+      # 逻辑：cp template/02.scf/INCAR; check KPOINTS; cp data/POTCAR
+      - name: "02.scf"
+        executor: "vasp_std" # 对应 machine.yaml (可能调用 vasp_std.sh)

-# --- 3. 各模块具体的物理/算法参数 ---
-params:
-  preheat:
-    template_file: "run_ramp.in"
+      # Step 4: 训练
+      # 逻辑：cp template/03.train/nep.in
+      - name: "03.train"
+        executor: "nep_local"

-  select:
-    target_min: 60
-    target_max: 120
-    init_threshold: 0.01
+  # --- 第二轮 ---
+  - id: 1
+    steps:
+      - name: "00.md"
+        sub_tasks:
+          - template_sub: "production" # 第二轮可能只需要 sampling
+        # 注意：这一轮的 nep.txt 会自动指向 iter_00/03.train/nep.txt

-  scf:
-    # 比如指定用 machine.yaml 里的哪个 tool 配置
-    tool_key: "vasp"
+      - name: "01.select"
+        method: "distance"
+        params: [0.012, 60, 120]
+
+      - name: "02.scf"
+        executor: "vasp_std"
+
+      - name: "03.train"
--- a/config/scripts/vasp_cpu.sh
+++ b/config/scripts/vasp_cpu.sh
--- a/config/system.yaml
+++ b/config/system.yaml
@@ -1,16 +0,0 @@
-# config/system.yaml
-project_name: "LiYCl_Transport_v1"
-
-# 物理体系定义
-system:
-  elements: ["Li", "Y", "Cl"]
-
-  # 初始结构 (VASP格式)
-  initial_structure: "./initial_data/LiYCl.vasp"
-
-  # 初始势函数 (第一轮 preheat 使用)
-  # 如果是第一轮，使用此通用势；后续轮次自动使用上一轮训练结果
-  initial_potential: "./initial_data/nep89.txt"
-
-  # 晶格常数或扩胞设置 (可选，视具体模块逻辑而定)
-  supercell: [1, 1, 1]