CSM及TET，CS

2025-12-07 20:08:19 +08:00
parent 08f5a51fc4
commit 3d44b31194
8 changed files with 1087 additions and 0 deletions
--- a/py/CS_catulate.py
+++ b/py/CS_catulate.py
@@ -0,0 +1,118 @@
+import os
+import pandas as pd
+from pymatgen.core import Structure
+# 确保你的 utils 文件夹在 py 目录下，并且包含 CS_analyse.py
+from utils.CS_analyse import CS_catulate, check_only_corner_sharing
+from tqdm import tqdm
+
+# 配置路径
+CSV_ROOT_DIR = "../output"
+DATA_SOURCE_DIR = "../data/after_step1"
+
+
+def get_cif_path(group_name, anion_name, material_id):
+    """
+    根据 CSV 的层级信息构建 CIF 文件的绝对路径
+    """
+    # 构建路径: ../data/after_step1/Group/Anion/ID/ID.cif
+    # 注意处理单阴离子情况 (Group == Anion)
+    if group_name == anion_name:
+        # 路径: ../data/after_step1/S/123/123.cif
+        rel_path = os.path.join(DATA_SOURCE_DIR, group_name, material_id, f"{material_id}.cif")
+    else:
+        # 路径: ../data/after_step1/S+O/S/123/123.cif
+        rel_path = os.path.join(DATA_SOURCE_DIR, group_name, anion_name, material_id, f"{material_id}.cif")
+
+    return os.path.abspath(rel_path)
+
+
+def process_single_csv(csv_path, group_name, anion_name):
+    """
+    处理单个 CSV 文件：读取 -> 计算角共享 -> 添加列 -> 保存
+    """
+    print(f"正在处理 CSV: {csv_path}")
+
+    # 读取 CSV，强制 ID 为字符串
+    try:
+        df = pd.read_csv(csv_path, dtype={'Filename': str})
+    except Exception as e:
+        print(f"读取 CSV 失败: {e}")
+        return
+
+    # 检查是否已经存在该列，如果存在且想重新计算，可以先删除，或者跳过
+    if 'Is_Only_Corner_Sharing' in df.columns:
+        print("  - 'Is_Only_Corner_Sharing' 列已存在，将覆盖更新。")
+
+    results = []
+
+    # 使用 tqdm 显示进度
+    for index, row in tqdm(df.iterrows(), total=df.shape[0], desc=f"Analyzing {anion_name}"):
+        material_id = str(row['Filename']).replace('.0', '')
+        cif_path = get_cif_path(group_name, anion_name, material_id)
+
+        cs_result = None  # 默认值
+
+        if os.path.exists(cif_path):
+            try:
+                # 1. 加载结构
+                struct = Structure.from_file(cif_path)
+
+                # 2. 计算共享关系 (默认检测 Li 和常见阴离子)
+                # 你可以根据需要调整 anion 列表，或者动态使用 anion_name
+                target_anions = ['O', 'S', 'Cl', 'F', 'Br', 'I', 'N', 'P']
+                sharing_details = CS_catulate(struct, sp='Li', anion=target_anions)
+
+                # 3. 判断是否仅角共享 (返回 1 或 0 或 True/False)
+                # 根据你提供的截图，似乎是返回 0 或 1
+                is_only_corner = check_only_corner_sharing(sharing_details)
+
+                cs_result = is_only_corner
+
+            except Exception as e:
+                # print(f"计算出错 {material_id}: {e}")
+                cs_result = "Error"
+        else:
+            print(f"  - 警告: 找不到 CIF 文件 {cif_path}")
+            cs_result = "File_Not_Found"
+
+        results.append(cs_result)
+
+    # 将结果添加为新列
+    df['Is_Only_Corner_Sharing'] = results
+
+    # 保存覆盖原文件
+    df.to_csv(csv_path, index=False)
+    print(f"  - 已更新 CSV: {csv_path}")
+
+
+def run_cs_analysis():
+    """
+    遍历所有 CSV 并运行分析
+    """
+    if not os.path.exists(CSV_ROOT_DIR):
+        print(f"CSV 根目录不存在: {CSV_ROOT_DIR}")
+        return
+
+    for root, dirs, files in os.walk(CSV_ROOT_DIR):
+        for file in files:
+            if file.endswith(".csv"):
+                csv_path = os.path.join(root, file)
+
+                # 解析 Group 和 Anion (用于定位 CIF)
+                rel_root = os.path.relpath(root, CSV_ROOT_DIR)
+                path_parts = rel_root.split(os.sep)
+
+                if len(path_parts) == 1:
+                    group_name = path_parts[0]
+                    anion_name = path_parts[0]
+                elif len(path_parts) >= 2:
+                    group_name = path_parts[0]
+                    anion_name = path_parts[1]
+                else:
+                    continue
+
+                process_single_csv(csv_path, group_name, anion_name)
+
+
+if __name__ == "__main__":
+    run_cs_analysis()