nep框架重构
This commit is contained in:
@@ -1,58 +1,21 @@
|
||||
# config/machine.yaml
|
||||
machine_name: "Local_Test_Env"
|
||||
root_dir: "." # <--- 请修改这里为你的实际路径
|
||||
|
||||
# 当前使用的计算系统配置名
|
||||
current_system: "interactive_gpu"
|
||||
# 脚本库位置
|
||||
script_dir: "config/scripts"
|
||||
|
||||
systems:
|
||||
# --- 配置 1: 交互式 GPU 环境 (当前使用) ---
|
||||
# 场景: 你已经用 srun/tmux 申请到了资源,直接运行命令即可
|
||||
interactive_gpu:
|
||||
type: "local" # local 表示直接运行 subprocess,不提交 sbatch
|
||||
executors:
|
||||
# 1. 简单的本地命令 (如 NEP 训练)
|
||||
nep_local:
|
||||
type: "local"
|
||||
cmd: "nep"
|
||||
|
||||
# 路径配置
|
||||
gpumdkit_root: "/cluster/home/koko125/tool/GPUMDkit"
|
||||
# 2. 复杂的本地脚本 (如 GPUMD)
|
||||
gpumd:
|
||||
type: "local"
|
||||
cmd: "gpumd" # 对应 config/scripts/gpumd.sh
|
||||
|
||||
tools:
|
||||
# 1. GPUMD 配置
|
||||
gpumd:
|
||||
command: "gpumd"
|
||||
# 运行前需要 source 的环境脚本
|
||||
env_setup: ""
|
||||
gpu_id: 0
|
||||
|
||||
# 2. NEP 配置 (同上)
|
||||
nep:
|
||||
command: "nep"
|
||||
env_setup: ""
|
||||
gpu_id: 0
|
||||
gpumdkit:
|
||||
# 假设是 GPU 版本,可能不需要 mpirun 或者只需要少量核
|
||||
command: "gpumdkit.sh"
|
||||
env_setup: ""
|
||||
# 即使是 local 模式,有时也需要指定并行度
|
||||
n_procs: 1
|
||||
# 3. VASP (GPU 版) 配置
|
||||
vasp:
|
||||
# 假设是 GPU 版本,可能不需要 mpirun 或者只需要少量核
|
||||
command: "mpirun -np 1 vasp_std"
|
||||
env_setup: ""
|
||||
# 即使是 local 模式,有时也需要指定并行度
|
||||
n_procs: 1
|
||||
|
||||
# --- 配置 2: VASP CPU 集群模式 (预留,未来使用) ---
|
||||
# 场景: 需要生成 submit.slurm 并 sbatch 提交
|
||||
slurm_cpu_cluster:
|
||||
type: "slurm"
|
||||
|
||||
gpumdkit_root: "/cluster/home/koko125/tool/GPUMDkit"
|
||||
|
||||
tools:
|
||||
vasp:
|
||||
command: "mpirun -np 4 vasp_std"
|
||||
env_setup: "module load vasp/6.3-cpu"
|
||||
|
||||
# Slurm 头部参数
|
||||
slurm_header:
|
||||
partition: "cpu_long"
|
||||
ntasks_per_node: 64
|
||||
time: "24:00:00"
|
||||
# 3. Slurm 提交测试 (VASP CPU)
|
||||
vasp_cpu:
|
||||
type: "local"
|
||||
cmd: "mpirun -np 1 vasp_std"
|
||||
@@ -1,44 +1,55 @@
|
||||
# config/param.yaml
|
||||
# param.yaml
|
||||
|
||||
# --- 1. 流程控制 ---
|
||||
stages_def:
|
||||
p: "preheat"
|
||||
m: "md"
|
||||
s: "select"
|
||||
d: "scf"
|
||||
t: "train"
|
||||
pr: "predict"
|
||||
o: "output"
|
||||
project: "LiYCl_Auto"
|
||||
|
||||
# 默认流程
|
||||
default_workflow: ["p", "m", "s", "d", "t", "pr"]
|
||||
# 1. 初始文件定义 (对应 data/ 目录)
|
||||
files:
|
||||
poscar: "LiYCl.vasp"
|
||||
potcar: "POTCAR"
|
||||
initial_pot: "nep89.txt" # 第一轮 MD 用的势函数
|
||||
|
||||
# 自定义调度
|
||||
schedule:
|
||||
1: ["p", "m", "s", "d", "t", "o"]
|
||||
# 2. 迭代流程控制
|
||||
iterations:
|
||||
# --- 第一轮 ---
|
||||
- id: 0
|
||||
steps:
|
||||
# Step 1: MD (预热 + 采样)
|
||||
# 逻辑:会把 nep.txt (来自 initial_pot) 和 model.xyz 准备好
|
||||
- name: "00.md"
|
||||
sub_tasks:
|
||||
# 你提到可能有预热,也可能有加工,这里支持串行执行
|
||||
- template_sub: "preheat" # 使用 template/00.md/preheat/run.in
|
||||
- template_sub: "production" # 使用 template/00.md/production/run.in
|
||||
executor: "gpumd" # 对应 machine.yaml
|
||||
|
||||
# --- 2. 容错与通知 ---
|
||||
control:
|
||||
max_retries: 3
|
||||
check_interval: 60
|
||||
# Step 2: 筛选
|
||||
- name: "01.select"
|
||||
method: "distance"
|
||||
params: [0.01, 60, 120]
|
||||
|
||||
notification:
|
||||
enable_log: true
|
||||
log_file: "./logs/sys_runtime.log"
|
||||
enable_hook: true
|
||||
hook_script: "python ./hooks/send_alert.py"
|
||||
alert_events: ["fail", "finish"]
|
||||
# Step 3: SCF (VASP)
|
||||
# 逻辑:cp template/02.scf/INCAR; check KPOINTS; cp data/POTCAR
|
||||
- name: "02.scf"
|
||||
executor: "vasp_std" # 对应 machine.yaml (可能调用 vasp_std.sh)
|
||||
|
||||
# --- 3. 各模块具体的物理/算法参数 ---
|
||||
params:
|
||||
preheat:
|
||||
template_file: "run_ramp.in"
|
||||
# Step 4: 训练
|
||||
# 逻辑:cp template/03.train/nep.in
|
||||
- name: "03.train"
|
||||
executor: "nep_local"
|
||||
|
||||
select:
|
||||
target_min: 60
|
||||
target_max: 120
|
||||
init_threshold: 0.01
|
||||
# --- 第二轮 ---
|
||||
- id: 1
|
||||
steps:
|
||||
- name: "00.md"
|
||||
sub_tasks:
|
||||
- template_sub: "production" # 第二轮可能只需要 sampling
|
||||
# 注意:这一轮的 nep.txt 会自动指向 iter_00/03.train/nep.txt
|
||||
|
||||
scf:
|
||||
# 比如指定用 machine.yaml 里的哪个 tool 配置
|
||||
tool_key: "vasp"
|
||||
- name: "01.select"
|
||||
method: "distance"
|
||||
params: [0.012, 60, 120]
|
||||
|
||||
- name: "02.scf"
|
||||
executor: "vasp_std"
|
||||
|
||||
- name: "03.train"
|
||||
0
config/scripts/vasp_cpu.sh
Normal file
0
config/scripts/vasp_cpu.sh
Normal file
@@ -1,16 +0,0 @@
|
||||
# config/system.yaml
|
||||
project_name: "LiYCl_Transport_v1"
|
||||
|
||||
# 物理体系定义
|
||||
system:
|
||||
elements: ["Li", "Y", "Cl"]
|
||||
|
||||
# 初始结构 (VASP格式)
|
||||
initial_structure: "./initial_data/LiYCl.vasp"
|
||||
|
||||
# 初始势函数 (第一轮 preheat 使用)
|
||||
# 如果是第一轮,使用此通用势;后续轮次自动使用上一轮训练结果
|
||||
initial_potential: "./initial_data/nep89.txt"
|
||||
|
||||
# 晶格常数或扩胞设置 (可选,视具体模块逻辑而定)
|
||||
supercell: [1, 1, 1]
|
||||
Reference in New Issue
Block a user