nep框架搭建
This commit is contained in:
4
.idea/vcs.xml
generated
4
.idea/vcs.xml
generated
@@ -1,4 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings" defaultProject="true" />
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
70
config/param.yaml
Normal file
70
config/param.yaml
Normal file
@@ -0,0 +1,70 @@
|
||||
# config/param.yaml
|
||||
|
||||
# --- 1. 环境与路径配置 ---
|
||||
env:
|
||||
# 可执行文件绝对路径
|
||||
vasp_std: "mpirun -np 1 /cluster/home/koko125/vasp/bin_gpu/vasp_std"
|
||||
gpumd: "/cluster/home/koko125/tool/GPUMD/src/gpumd"
|
||||
nep: "/cluster/home/koko125/tool/GPUMD/src/nep"
|
||||
|
||||
# GPUMDKit 脚本库根目录
|
||||
gpumdkit_root: "/cluster/home/koko125/tool/GPUMDkit"
|
||||
|
||||
# 【修改点】HPC 作业提交配置 (用于填充 submit.slurm 模板)
|
||||
# 这些变量会被自动替换到 .sh 脚本头部
|
||||
# slurm_config:
|
||||
# partition: "v100" # 队列分区名
|
||||
# account: "def-user" # 账户名 (如果有)
|
||||
# gpu_per_node: 1 # 每节点 GPU 数
|
||||
# ntasks_per_node: 32 # 每节点 CPU 核数
|
||||
# time_limit: "24:00:00" # 墙钟时间限制
|
||||
|
||||
# --- 2. 流程控制 ---
|
||||
# 阶段代号定义 (对应 modules 下的 Python 文件)
|
||||
stages_def:
|
||||
p: "preheat" # 00.md/preheat
|
||||
m: "md" # 00.md/md
|
||||
s: "select" # 01.select
|
||||
d: "scf" # 02.scf
|
||||
t: "train" # 03.train
|
||||
pr: "predict" # 04.predict (新增:用于性质预测)
|
||||
o: "output" # 05.output (始终默认执行:整理报告)
|
||||
|
||||
# 自定义流程调度
|
||||
# 注意:'o' (output) 不需要显式写在这里,代码逻辑会强制每轮最后执行它
|
||||
schedule:
|
||||
# 第1轮: 跑完训练,不做预测,看一眼结果
|
||||
1: ["p", "m", "s", "d", "t"]
|
||||
|
||||
# 第2轮: 跑完训练,加入预测步骤 (计算电导/扩散等)
|
||||
2: ["p", "m", "s", "d", "t", "pr"]
|
||||
|
||||
# 默认流程 (如果没有定义轮次)
|
||||
default_workflow: ["p", "m", "s", "d", "t", "pr"]
|
||||
|
||||
# --- 3. 容错与通知 ---
|
||||
control:
|
||||
max_retries: 3 # 任务失败自动重启次数
|
||||
check_interval: 60 # 状态检查间隔 (秒)
|
||||
|
||||
notification:
|
||||
enable_log: true
|
||||
log_file: "./logs/sys_runtime.log"
|
||||
|
||||
enable_hook: true
|
||||
hook_script: "python ./hooks/send_alert.py"
|
||||
alert_events: ["fail", "finish"]
|
||||
|
||||
# --- 4. 模块参数 ---
|
||||
params:
|
||||
preheat:
|
||||
temp: 300
|
||||
steps: 10000
|
||||
select:
|
||||
target_min: 60
|
||||
target_max: 120
|
||||
init_threshold: 0.01
|
||||
predict:
|
||||
# 预测阶段需要的参数,比如计算电导率的温度范围
|
||||
temperatures: [300, 400, 500]
|
||||
script_path: "scripts/calc_conductivity.py" # 具体的计算脚本
|
||||
16
config/system.yaml
Normal file
16
config/system.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
# config/system.yaml
|
||||
project_name: "LiYCl_Transport_v1"
|
||||
|
||||
# 物理体系定义
|
||||
system:
|
||||
elements: ["Li", "Y", "Cl"]
|
||||
|
||||
# 初始结构 (VASP格式)
|
||||
initial_structure: "./initial_data/LiYCl.vasp"
|
||||
|
||||
# 初始势函数 (第一轮 preheat 使用)
|
||||
# 如果是第一轮,使用此通用势;后续轮次自动使用上一轮训练结果
|
||||
initial_potential: "./initial_data/nep89.txt"
|
||||
|
||||
# 晶格常数或扩胞设置 (可选,视具体模块逻辑而定)
|
||||
supercell: [1, 1, 1]
|
||||
0
hooks/send_alert.py
Normal file
0
hooks/send_alert.py
Normal file
33
main.py
Normal file
33
main.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from nep_auto.driver import NEPDriver
|
||||
from nep_auto.utils.logger import setup_logger
|
||||
|
||||
|
||||
def main():
|
||||
# 1. 初始化全局日志
|
||||
logger = setup_logger("logs/sys_runtime.log")
|
||||
logger.info("========================================")
|
||||
logger.info("🚀 NEP Automation Framework Starting...")
|
||||
logger.info("========================================")
|
||||
|
||||
try:
|
||||
# 2. 初始化驱动器 (加载配置,恢复状态)
|
||||
driver = NEPDriver()
|
||||
|
||||
# 3. 启动主循环
|
||||
driver.run()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.warning("⚠️ 用户手动中断程序 (KeyboardInterrupt)")
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 程序发生严重崩溃: {str(e)}")
|
||||
logger.error(traceback.format_exc())
|
||||
# 这里可以加入发送崩溃通知的逻辑
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
nep_auto/__init__.py
Normal file
0
nep_auto/__init__.py
Normal file
37
nep_auto/driver.py
Normal file
37
nep_auto/driver.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import yaml
|
||||
import time
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from nep_auto.status_manager import StatusManager
|
||||
|
||||
|
||||
class NEPDriver:
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger("NEP_Auto")
|
||||
self.root = Path(".")
|
||||
|
||||
# 1. 加载配置
|
||||
self.config_sys = self._load_yaml("config/system.yaml")
|
||||
self.config_param = self._load_yaml("config/param.yaml")
|
||||
self.logger.info(f"项目名称: {self.config_sys.get('project_name')}")
|
||||
|
||||
# 2. 初始化状态管理器
|
||||
self.status = StatusManager(self.root / "workspace")
|
||||
|
||||
def _load_yaml(self, path):
|
||||
if not Path(path).exists():
|
||||
raise FileNotFoundError(f"配置文件缺失: {path}")
|
||||
with open(path, 'r') as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
def run(self):
|
||||
"""主循环"""
|
||||
self.logger.info("✅ 驱动器初始化完成,准备进入主循环...")
|
||||
|
||||
# 获取当前轮次
|
||||
current_iter = self.status.get_current_iter()
|
||||
self.logger.info(f"当前进度: iter_{current_iter:03d}")
|
||||
|
||||
# 暂时只打印一次就退出,用于测试环境
|
||||
self.logger.info("测试阶段:环境检查通过。等待模块代码实现...")
|
||||
# while True: ... (后续我们将在这里实现调度逻辑)
|
||||
0
nep_auto/modules/__init__.py
Normal file
0
nep_auto/modules/__init__.py
Normal file
0
nep_auto/modules/base_module.py
Normal file
0
nep_auto/modules/base_module.py
Normal file
0
nep_auto/modules/m0_preheat.py
Normal file
0
nep_auto/modules/m0_preheat.py
Normal file
0
nep_auto/modules/m1_md.py
Normal file
0
nep_auto/modules/m1_md.py
Normal file
0
nep_auto/modules/m2_select.py
Normal file
0
nep_auto/modules/m2_select.py
Normal file
0
nep_auto/modules/m3_scf.py
Normal file
0
nep_auto/modules/m3_scf.py
Normal file
0
nep_auto/modules/m4_train.py
Normal file
0
nep_auto/modules/m4_train.py
Normal file
0
nep_auto/modules/m5_predict.py
Normal file
0
nep_auto/modules/m5_predict.py
Normal file
27
nep_auto/status_manager.py
Normal file
27
nep_auto/status_manager.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class StatusManager:
|
||||
def __init__(self, workspace_path):
|
||||
self.workspace = Path(workspace_path)
|
||||
self.status_file = self.workspace / "status.json"
|
||||
|
||||
if not self.workspace.exists():
|
||||
self.workspace.mkdir(parents=True)
|
||||
|
||||
# 如果没有状态文件,创建一个初始的
|
||||
if not self.status_file.exists():
|
||||
self._save_status({"current_iter": 1, "stages": {}})
|
||||
|
||||
def _save_status(self, data):
|
||||
with open(self.status_file, 'w') as f:
|
||||
json.dump(data, f, indent=4)
|
||||
|
||||
def get_current_iter(self):
|
||||
if self.status_file.exists():
|
||||
with open(self.status_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
return data.get("current_iter", 1)
|
||||
return 1
|
||||
0
nep_auto/utils/__init__.py
Normal file
0
nep_auto/utils/__init__.py
Normal file
33
nep_auto/utils/logger.py
Normal file
33
nep_auto/utils/logger.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def setup_logger(log_file="logs/runtime.log"):
|
||||
# 确保日志目录存在
|
||||
os.makedirs(os.path.dirname(log_file), exist_ok=True)
|
||||
|
||||
logger = logging.getLogger("NEP_Auto")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# 避免重复添加 handler
|
||||
if logger.handlers:
|
||||
return logger
|
||||
|
||||
# 格式
|
||||
formatter = logging.Formatter(
|
||||
'[%(asctime)s] [%(levelname)s] %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
# 文件输出
|
||||
fh = logging.FileHandler(log_file, mode='a', encoding='utf-8')
|
||||
fh.setFormatter(formatter)
|
||||
logger.addHandler(fh)
|
||||
|
||||
# 屏幕输出
|
||||
ch = logging.StreamHandler(sys.stdout)
|
||||
ch.setFormatter(formatter)
|
||||
logger.addHandler(ch)
|
||||
|
||||
return logger
|
||||
0
nep_auto/utils/notifier.py
Normal file
0
nep_auto/utils/notifier.py
Normal file
0
nep_auto/utils/runner.py
Normal file
0
nep_auto/utils/runner.py
Normal file
Reference in New Issue
Block a user