CSM及TET，CS

2025-12-07 20:08:19 +08:00
parent 08f5a51fc4
commit 3d44b31194
8 changed files with 1087 additions and 0 deletions
--- a/py/update_tet_occupancy.py
+++ b/py/update_tet_occupancy.py
@@ -0,0 +1,129 @@
+import os
+import pandas as pd
+from tqdm import tqdm
+
+# ================= 配置区域 =================
+# CSV 所在的根目录
+CSV_ROOT_DIR = "../output"
+# CSM .dat 文件所在的根目录
+CSM_ROOT_DIR = "../output/CSM"
+
+
+# ===========================================
+
+def calculate_tet_ratio_from_dat(dat_path):
+    """
+    解析 .dat 文件，计算四面体位 Li 的占比。
+    返回: float (0.0 - 1.0) 或 None (如果文件不存在或为空)
+    """
+    if not os.path.exists(dat_path):
+        return None
+
+    tet_count = 0
+    total_count = 0
+
+    try:
+        with open(dat_path, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+
+            # 简单检查文件是否包含 "No environments found"
+            if len(lines) > 0 and "No environments found" in lines[0]:
+                return None
+
+            for line in lines:
+                # 根据截图，每行是一个位点的信息
+                # 简单字符串匹配，这比 eval 更安全且足够快
+                if "'type': 'tet'" in line:
+                    tet_count += 1
+                    total_count += 1
+                elif "'type': 'oct'" in line:
+                    total_count += 1
+                # 如果还有其他类型，可以在这里加，或者只要是位点行都算进 total
+
+        if total_count == 0:
+            return 0.0
+
+        return round(tet_count / total_count, 4)
+
+    except Exception as e:
+        print(f"解析出错 {dat_path}: {e}")
+        return None
+
+
+def process_single_csv(csv_path, group_name, anion_name):
+    """
+    读取 CSV -> 寻找对应的 CSM dat 文件 -> 计算比例 -> 更新 CSV
+    """
+    print(f"正在更新 CSV: {csv_path}")
+
+    # 读取 CSV，确保 ID 是字符串
+    try:
+        df = pd.read_csv(csv_path, dtype={'Filename': str})
+    except Exception as e:
+        print(f"读取 CSV 失败: {e}")
+        return
+
+    tet_ratios = []
+
+    # 遍历 CSV 中的每一行
+    for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Updating Occupancy"):
+        material_id = str(row['Filename']).replace('.0', '')
+
+        # 构建对应的 .dat 文件路径
+        # 路径逻辑: ../output/CSM/Group/Anion/ID.dat
+        # 注意: 这里的 Group/Anion 结构必须与 analyze_csm.py 生成的一致
+
+        if group_name == anion_name:
+            # 单一阴离子: ../output/CSM/S/123.dat
+            dat_rel_path = os.path.join(group_name, f"{material_id}.dat")
+        else:
+            # 混合阴离子: ../output/CSM/S+O/S/123.dat
+            dat_rel_path = os.path.join(group_name, anion_name, f"{material_id}.dat")
+
+        dat_path = os.path.join(CSM_ROOT_DIR, dat_rel_path)
+
+        # 计算比例
+        ratio = calculate_tet_ratio_from_dat(dat_path)
+        tet_ratios.append(ratio)
+
+    # 添加或更新列
+    df['Tet_Li_Ratio'] = tet_ratios
+
+    # 保存
+    df.to_csv(csv_path, index=False)
+    print(f"  - 已保存更新后的数据到: {csv_path}")
+
+
+def run_update():
+    """
+    主程序：遍历 output 目录下的 CSV
+    """
+    if not os.path.exists(CSV_ROOT_DIR):
+        print(f"CSV 目录不存在: {CSV_ROOT_DIR}")
+        return
+
+    for root, dirs, files in os.walk(CSV_ROOT_DIR):
+        for file in files:
+            if file.endswith(".csv"):
+                csv_path = os.path.join(root, file)
+
+                # 解析路径获取 Group 和 Anion
+                # root: ../output/S  --> rel: S
+                rel_root = os.path.relpath(root, CSV_ROOT_DIR)
+                path_parts = rel_root.split(os.sep)
+
+                if len(path_parts) == 1:
+                    group_name = path_parts[0]
+                    anion_name = path_parts[0]
+                elif len(path_parts) >= 2:
+                    group_name = path_parts[0]
+                    anion_name = path_parts[1]
+                else:
+                    continue
+
+                # 只有当 CSM 目录里有对应的文件夹时才处理（可选）
+                process_single_csv(csv_path, group_name, anion_name)
+
+
+if __name__ == "__main__":
+    run_update()