From 1d416d4dd841c6a5bca49e3b207527e3d7809f99 Mon Sep 17 00:00:00 2001 From: koko <1429659362@qq.com> Date: Sun, 7 Dec 2025 16:01:42 +0800 Subject: [PATCH] =?UTF-8?q?calc=E2=80=94=5Fv2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.sh | 8 +-- py/step2_4_combined.py | 109 +++++++++++++++++++++++------------------ 2 files changed, 66 insertions(+), 51 deletions(-) diff --git a/main.sh b/main.sh index 33a65f0..e287767 100644 --- a/main.sh +++ b/main.sh @@ -6,11 +6,11 @@ # 1. 初始化设置 # 修改上一级目录权限 -chmod -R u+w ../Screen +chmod -R u+w ../screen # 启用 screen 环境 (Python 3.11) source $(conda info --base)/etc/profile.d/conda.sh -conda activate screen +conda activate ~/anaconda3/envs/screen # 设置当前目录为 PYTHONPATH cd py/ @@ -31,7 +31,7 @@ python make_sh.py # 2. 切换环境运行 Zeo++ echo "============ Stage 2: Zeo++ Calculations ============" conda deactivate -conda activate zeo +conda activate ~/anaconda3/envs/zeo # 进入数据目录执行所有生成的 shell 脚本 cd ../data/after_step1 @@ -46,7 +46,7 @@ fi echo "============ Stage 3: Data Extraction & Advanced Screening ============" # 切回 screen 环境 conda deactivate -conda activate screen +conda activate ~/anaconda3/envs/screen cd ../../py # 提取日志数据 diff --git a/py/step2_4_combined.py b/py/step2_4_combined.py index b8a5ebd..36e707e 100644 --- a/py/step2_4_combined.py +++ b/py/step2_4_combined.py @@ -1,12 +1,10 @@ import os import pandas as pd import math +import shutil # ================= 配置区域 ================= # 定义各阴离子的筛选阈值 -# perc: Percolation diameter (对应 Step 2, 大于此值) -# min_d: Minimum of d (对应 Step 3, 小于此值) -# node: Maximum node length (对应 Step 4, 大于此值) THRESHOLDS = { "O": {"perc": 0.50, "min_d": 3.0, "node": 2.2}, "S": {"perc": 0.55, "min_d": 3.0, "node": 2.2}, @@ -16,7 +14,7 @@ THRESHOLDS = { # 路径配置 CSV_ROOT_DIR = "../output" # CSV 所在的根目录 -DATA_SOURCE_DIR = "../data/after_step1" # 原始 CIF 文件所在的根目录 (用于创建链接源) +DATA_SOURCE_DIR = "../data/after_step1" # 原始 CIF 文件所在的根目录 TARGET_DIR = "../data/after_screening" # 筛选后放置软链接的目标目录 @@ -26,28 +24,21 @@ def check_requirements(row, anion_type): """ 检查单行数据是否符合要求 """ - # 获取该阴离子类型的阈值配置 config = THRESHOLDS.get(anion_type) if not config: - print(f"Warning: 未知的阴离子类型 {anion_type},跳过筛选。") return False try: - # 获取数值 (处理可能的空值或非数字情况) perc = float(row["Percolation Diameter (A)"]) min_d = float(row["Minimum of d"]) node = float(row["Maximum Node Length (A)"]) - # 检查是否为 NaN if math.isnan(perc) or math.isnan(min_d) or math.isnan(node): return False - # --- 筛选逻辑 --- - # Step 2: 连通孔径 > 阈值 + # 筛选逻辑 c1 = perc > config["perc"] - # Step 3: 最短距离 < 3.0 (所有元素目前都是3.0) c2 = min_d < config["min_d"] - # Step 4: 扩大锂离子节点 > 阈值 c3 = node > config["node"] return c1 and c2 and c3 @@ -56,39 +47,56 @@ def check_requirements(row, anion_type): return False -def create_symlink(group_name, anion_name, material_id): +def create_result_file(group_name, anion_name, material_id): """ - 创建软链接 - 源: ../data/after_step1/Group/Anion/ID/ID.cif - 目: ../data/after_screening/Group/Anion/ID.cif + 创建结果文件 (这里改为直接复制,软链接在跨文件系统或某些环境下可能不稳定,复制更稳妥) + 如果确实需要软链接,可以将 shutil.copy 换回 os.symlink """ - # 1. 构建源文件路径 (必须使用绝对路径以确保软链接在任何地方都有效) - # 注意:根据你修改后的 step1,文件在 ID 文件夹内,如 141/141.cif - rel_source_path = os.path.join(DATA_SOURCE_DIR, group_name, anion_name, material_id, f"{material_id}.cif") + # 1. 构建源文件路径 + # 正确路径: ../data/after_step1/Group/MaterialID/MaterialID.cif + # 例如: ../data/after_step1/S/195819/195819.cif + # 注意:如果原本结构是 S+O/S/ID... 这里会自动适配 + + # 这里的路径逻辑要非常小心,取决于 extract_data.py 是怎么生成 CSV 目录结构的 + # 如果 CSV 在 output/S/S.csv -> 对应源文件在 after_step1/S/... + # 如果 CSV 在 output/S+O/S/S.csv -> 对应源文件在 after_step1/S+O/S/... + + if group_name == anion_name: + # 单阴离子情况 (如 output/S/S.csv -> after_step1/S/ID/ID.cif) + rel_source_path = os.path.join(DATA_SOURCE_DIR, group_name, material_id, f"{material_id}.cif") + else: + # 混合阴离子情况 (如 output/S+O/S/S.csv -> after_step1/S+O/S/ID/ID.cif) + rel_source_path = os.path.join(DATA_SOURCE_DIR, group_name, anion_name, material_id, f"{material_id}.cif") + abs_source_path = os.path.abspath(rel_source_path) if not os.path.exists(abs_source_path): print(f"源文件不存在: {abs_source_path}") return - # 2. 构建目标文件夹路径 - target_subdir = os.path.join(TARGET_DIR, group_name, anion_name) + # 2. 构建目标文件夹路径 ../data/after_screening/Group/Anion + if group_name == anion_name: + target_subdir = os.path.join(TARGET_DIR, group_name) + else: + target_subdir = os.path.join(TARGET_DIR, group_name, anion_name) + if not os.path.exists(target_subdir): os.makedirs(target_subdir) - # 3. 构建目标链接路径 - target_link_path = os.path.join(target_subdir, f"{material_id}.cif") + # 3. 构建目标文件路径 + target_file_path = os.path.join(target_subdir, f"{material_id}.cif") - # 4. 创建链接 + # 4. 执行复制 (改为复制以确保结果独立) try: - # 如果目标已经存在(可能是旧的链接),先删除 - if os.path.exists(target_link_path) or os.path.islink(target_link_path): - os.remove(target_link_path) + if os.path.exists(target_file_path): + os.remove(target_file_path) + + shutil.copy(abs_source_path, target_file_path) + # 如果你非常确定要软链接,请注释上一行,解开下一行: + # os.symlink(abs_source_path, target_file_path) - os.symlink(abs_source_path, target_link_path) - # print(f"Link: {material_id} -> Passed") except OSError as e: - print(f"创建软链接失败 {material_id}: {e}") + print(f"创建文件失败 {material_id}: {e}") def process_all_csvs(): @@ -101,46 +109,53 @@ def process_all_csvs(): print("开始执行 Step 2-4 联合筛选...") - # 遍历 output 目录 - # 结构预期: ../output/Group/Anion/Anion.csv (例如 ../output/O+S/O/O.csv 或 ../output/O/O.csv) for root, dirs, files in os.walk(CSV_ROOT_DIR): for file in files: if file.endswith(".csv"): csv_path = os.path.join(root, file) - # 推断 Group 和 Anion - # root 的末尾应该是 .../Group/Anion - # 例如 root = ../output/O+S/O + # --- 核心修正 1: 路径解析逻辑 --- + # 获取相对于 output 根目录的路径部分 + # 例如 root = ../output/S -> rel_root = S + # 例如 root = ../output/S+O/S -> rel_root = S+O/S + rel_root = os.path.relpath(root, CSV_ROOT_DIR) + path_parts = rel_root.split(os.sep) - path_parts = os.path.normpath(root).split(os.sep) - # 倒数第一级是 Anion (O), 倒数第二级是 Group (O+S) - if len(path_parts) >= 2: - anion_name = path_parts[-1] - group_name = path_parts[-2] + # 解析 Group 和 Anion + if len(path_parts) == 1: + # 单层目录: output/S -> Group=S, Anion=S + group_name = path_parts[0] + anion_name = path_parts[0] + elif len(path_parts) >= 2: + # 双层目录: output/S+O/S -> Group=S+O, Anion=S + group_name = path_parts[0] + anion_name = path_parts[1] else: - print(f"跳过路径结构异常的 CSV: {csv_path}") + # 根目录下直接有csv的情况 (不应该发生) continue - # 确保这是一个有效的阴离子类型 if anion_name not in THRESHOLDS: + print(f"跳过不支持的阴离子类型: {anion_name}") continue print(f"正在处理: Group={group_name}, Anion={anion_name} ({file})") - # 读取 CSV - df = pd.read_csv(csv_path) + # --- 核心修正 2: 防止 Filename 被读取为浮点数 --- + # dtypeStr={'Filename': str} 强制将第一列读取为字符串 + df = pd.read_csv(csv_path, dtype={'Filename': str}) pass_count = 0 total_count = len(df) for index, row in df.iterrows(): - material_id = str(row['Filename']) + # 去除可能存在的 .0 后缀 (以防万一 CSV 里已经写成了浮点格式) + material_id = str(row['Filename']).replace('.0', '') if check_requirements(row, anion_name): - create_symlink(group_name, anion_name, material_id) + create_result_file(group_name, anion_name, material_id) pass_count += 1 - print(f" - 完成: {pass_count}/{total_count} 个材料通过筛选并建立链接。") + print(f" - 完成: {pass_count}/{total_count} 个材料通过筛选并保存至 {TARGET_DIR}。") if __name__ == "__main__":