diff --git a/.idea/vcs.xml b/.idea/vcs.xml index d843f34..94a25f7 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -1,4 +1,6 @@ - + + + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/config/param.yaml b/config/param.yaml new file mode 100644 index 0000000..af9eefa --- /dev/null +++ b/config/param.yaml @@ -0,0 +1,70 @@ +# config/param.yaml + +# --- 1. 环境与路径配置 --- +env: + # 可执行文件绝对路径 + vasp_std: "mpirun -np 1 /cluster/home/koko125/vasp/bin_gpu/vasp_std" + gpumd: "/cluster/home/koko125/tool/GPUMD/src/gpumd" + nep: "/cluster/home/koko125/tool/GPUMD/src/nep" + + # GPUMDKit 脚本库根目录 + gpumdkit_root: "/cluster/home/koko125/tool/GPUMDkit" + + # 【修改点】HPC 作业提交配置 (用于填充 submit.slurm 模板) + # 这些变量会被自动替换到 .sh 脚本头部 +# slurm_config: +# partition: "v100" # 队列分区名 +# account: "def-user" # 账户名 (如果有) +# gpu_per_node: 1 # 每节点 GPU 数 +# ntasks_per_node: 32 # 每节点 CPU 核数 +# time_limit: "24:00:00" # 墙钟时间限制 + +# --- 2. 流程控制 --- +# 阶段代号定义 (对应 modules 下的 Python 文件) +stages_def: + p: "preheat" # 00.md/preheat + m: "md" # 00.md/md + s: "select" # 01.select + d: "scf" # 02.scf + t: "train" # 03.train + pr: "predict" # 04.predict (新增:用于性质预测) + o: "output" # 05.output (始终默认执行:整理报告) + +# 自定义流程调度 +# 注意:'o' (output) 不需要显式写在这里,代码逻辑会强制每轮最后执行它 +schedule: + # 第1轮: 跑完训练,不做预测,看一眼结果 + 1: ["p", "m", "s", "d", "t"] + + # 第2轮: 跑完训练,加入预测步骤 (计算电导/扩散等) + 2: ["p", "m", "s", "d", "t", "pr"] + +# 默认流程 (如果没有定义轮次) +default_workflow: ["p", "m", "s", "d", "t", "pr"] + +# --- 3. 容错与通知 --- +control: + max_retries: 3 # 任务失败自动重启次数 + check_interval: 60 # 状态检查间隔 (秒) + +notification: + enable_log: true + log_file: "./logs/sys_runtime.log" + + enable_hook: true + hook_script: "python ./hooks/send_alert.py" + alert_events: ["fail", "finish"] + +# --- 4. 模块参数 --- +params: + preheat: + temp: 300 + steps: 10000 + select: + target_min: 60 + target_max: 120 + init_threshold: 0.01 + predict: + # 预测阶段需要的参数,比如计算电导率的温度范围 + temperatures: [300, 400, 500] + script_path: "scripts/calc_conductivity.py" # 具体的计算脚本 \ No newline at end of file diff --git a/config/system.yaml b/config/system.yaml new file mode 100644 index 0000000..f06e773 --- /dev/null +++ b/config/system.yaml @@ -0,0 +1,16 @@ +# config/system.yaml +project_name: "LiYCl_Transport_v1" + +# 物理体系定义 +system: + elements: ["Li", "Y", "Cl"] + + # 初始结构 (VASP格式) + initial_structure: "./initial_data/LiYCl.vasp" + + # 初始势函数 (第一轮 preheat 使用) + # 如果是第一轮,使用此通用势;后续轮次自动使用上一轮训练结果 + initial_potential: "./initial_data/nep89.txt" + + # 晶格常数或扩胞设置 (可选,视具体模块逻辑而定) + supercell: [1, 1, 1] \ No newline at end of file diff --git a/hooks/send_alert.py b/hooks/send_alert.py new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py new file mode 100644 index 0000000..097ffd4 --- /dev/null +++ b/main.py @@ -0,0 +1,33 @@ +import sys +import time +import traceback +from nep_auto.driver import NEPDriver +from nep_auto.utils.logger import setup_logger + + +def main(): + # 1. 初始化全局日志 + logger = setup_logger("logs/sys_runtime.log") + logger.info("========================================") + logger.info("🚀 NEP Automation Framework Starting...") + logger.info("========================================") + + try: + # 2. 初始化驱动器 (加载配置,恢复状态) + driver = NEPDriver() + + # 3. 启动主循环 + driver.run() + + except KeyboardInterrupt: + logger.warning("⚠️ 用户手动中断程序 (KeyboardInterrupt)") + sys.exit(0) + except Exception as e: + logger.error(f"❌ 程序发生严重崩溃: {str(e)}") + logger.error(traceback.format_exc()) + # 这里可以加入发送崩溃通知的逻辑 + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/nep_auto/__init__.py b/nep_auto/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/driver.py b/nep_auto/driver.py new file mode 100644 index 0000000..f6dfcb8 --- /dev/null +++ b/nep_auto/driver.py @@ -0,0 +1,37 @@ +import yaml +import time +import logging +from pathlib import Path +from nep_auto.status_manager import StatusManager + + +class NEPDriver: + def __init__(self): + self.logger = logging.getLogger("NEP_Auto") + self.root = Path(".") + + # 1. 加载配置 + self.config_sys = self._load_yaml("config/system.yaml") + self.config_param = self._load_yaml("config/param.yaml") + self.logger.info(f"项目名称: {self.config_sys.get('project_name')}") + + # 2. 初始化状态管理器 + self.status = StatusManager(self.root / "workspace") + + def _load_yaml(self, path): + if not Path(path).exists(): + raise FileNotFoundError(f"配置文件缺失: {path}") + with open(path, 'r') as f: + return yaml.safe_load(f) + + def run(self): + """主循环""" + self.logger.info("✅ 驱动器初始化完成,准备进入主循环...") + + # 获取当前轮次 + current_iter = self.status.get_current_iter() + self.logger.info(f"当前进度: iter_{current_iter:03d}") + + # 暂时只打印一次就退出,用于测试环境 + self.logger.info("测试阶段:环境检查通过。等待模块代码实现...") + # while True: ... (后续我们将在这里实现调度逻辑) \ No newline at end of file diff --git a/nep_auto/modules/__init__.py b/nep_auto/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/modules/base_module.py b/nep_auto/modules/base_module.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/modules/m0_preheat.py b/nep_auto/modules/m0_preheat.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/modules/m1_md.py b/nep_auto/modules/m1_md.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/modules/m2_select.py b/nep_auto/modules/m2_select.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/modules/m3_scf.py b/nep_auto/modules/m3_scf.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/modules/m4_train.py b/nep_auto/modules/m4_train.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/modules/m5_predict.py b/nep_auto/modules/m5_predict.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/status_manager.py b/nep_auto/status_manager.py new file mode 100644 index 0000000..546d61a --- /dev/null +++ b/nep_auto/status_manager.py @@ -0,0 +1,27 @@ +import json +import os +from pathlib import Path + + +class StatusManager: + def __init__(self, workspace_path): + self.workspace = Path(workspace_path) + self.status_file = self.workspace / "status.json" + + if not self.workspace.exists(): + self.workspace.mkdir(parents=True) + + # 如果没有状态文件,创建一个初始的 + if not self.status_file.exists(): + self._save_status({"current_iter": 1, "stages": {}}) + + def _save_status(self, data): + with open(self.status_file, 'w') as f: + json.dump(data, f, indent=4) + + def get_current_iter(self): + if self.status_file.exists(): + with open(self.status_file, 'r') as f: + data = json.load(f) + return data.get("current_iter", 1) + return 1 \ No newline at end of file diff --git a/nep_auto/utils/__init__.py b/nep_auto/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/utils/logger.py b/nep_auto/utils/logger.py new file mode 100644 index 0000000..f31814d --- /dev/null +++ b/nep_auto/utils/logger.py @@ -0,0 +1,33 @@ +import logging +import os +import sys + + +def setup_logger(log_file="logs/runtime.log"): + # 确保日志目录存在 + os.makedirs(os.path.dirname(log_file), exist_ok=True) + + logger = logging.getLogger("NEP_Auto") + logger.setLevel(logging.INFO) + + # 避免重复添加 handler + if logger.handlers: + return logger + + # 格式 + formatter = logging.Formatter( + '[%(asctime)s] [%(levelname)s] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + + # 文件输出 + fh = logging.FileHandler(log_file, mode='a', encoding='utf-8') + fh.setFormatter(formatter) + logger.addHandler(fh) + + # 屏幕输出 + ch = logging.StreamHandler(sys.stdout) + ch.setFormatter(formatter) + logger.addHandler(ch) + + return logger \ No newline at end of file diff --git a/nep_auto/utils/notifier.py b/nep_auto/utils/notifier.py new file mode 100644 index 0000000..e69de29 diff --git a/nep_auto/utils/runner.py b/nep_auto/utils/runner.py new file mode 100644 index 0000000..e69de29