import os import pandas as pd from tqdm import tqdm # ================= 配置区域 ================= # CSV 所在的根目录 CSV_ROOT_DIR = "../output" # CSM .dat 文件所在的根目录 CSM_ROOT_DIR = "../output/CSM" # =========================================== def calculate_tet_ratio_from_dat(dat_path): """ 解析 .dat 文件,计算四面体位 Li 的占比。 返回: float (0.0 - 1.0) 或 None (如果文件不存在或为空) """ if not os.path.exists(dat_path): return None tet_count = 0 total_count = 0 try: with open(dat_path, 'r', encoding='utf-8') as f: lines = f.readlines() # 简单检查文件是否包含 "No environments found" if len(lines) > 0 and "No environments found" in lines[0]: return None for line in lines: # 根据截图,每行是一个位点的信息 # 简单字符串匹配,这比 eval 更安全且足够快 if "'type': 'tet'" in line: tet_count += 1 total_count += 1 elif "'type': 'oct'" in line: total_count += 1 # 如果还有其他类型,可以在这里加,或者只要是位点行都算进 total if total_count == 0: return 0.0 return round(tet_count / total_count, 4) except Exception as e: print(f"解析出错 {dat_path}: {e}") return None def process_single_csv(csv_path, group_name, anion_name): """ 读取 CSV -> 寻找对应的 CSM dat 文件 -> 计算比例 -> 更新 CSV """ print(f"正在更新 CSV: {csv_path}") # 读取 CSV,确保 ID 是字符串 try: df = pd.read_csv(csv_path, dtype={'Filename': str}) except Exception as e: print(f"读取 CSV 失败: {e}") return tet_ratios = [] # 遍历 CSV 中的每一行 for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Updating Occupancy"): material_id = str(row['Filename']).replace('.0', '') # 构建对应的 .dat 文件路径 # 路径逻辑: ../output/CSM/Group/Anion/ID.dat # 注意: 这里的 Group/Anion 结构必须与 analyze_csm.py 生成的一致 if group_name == anion_name: # 单一阴离子: ../output/CSM/S/123.dat dat_rel_path = os.path.join(group_name, f"{material_id}.dat") else: # 混合阴离子: ../output/CSM/S+O/S/123.dat dat_rel_path = os.path.join(group_name, anion_name, f"{material_id}.dat") dat_path = os.path.join(CSM_ROOT_DIR, dat_rel_path) # 计算比例 ratio = calculate_tet_ratio_from_dat(dat_path) tet_ratios.append(ratio) # 添加或更新列 df['Tet_Li_Ratio'] = tet_ratios # 保存 df.to_csv(csv_path, index=False) print(f" - 已保存更新后的数据到: {csv_path}") def run_update(): """ 主程序:遍历 output 目录下的 CSV """ if not os.path.exists(CSV_ROOT_DIR): print(f"CSV 目录不存在: {CSV_ROOT_DIR}") return for root, dirs, files in os.walk(CSV_ROOT_DIR): for file in files: if file.endswith(".csv"): csv_path = os.path.join(root, file) # 解析路径获取 Group 和 Anion # root: ../output/S --> rel: S rel_root = os.path.relpath(root, CSV_ROOT_DIR) path_parts = rel_root.split(os.sep) if len(path_parts) == 1: group_name = path_parts[0] anion_name = path_parts[0] elif len(path_parts) >= 2: group_name = path_parts[0] anion_name = path_parts[1] else: continue # 只有当 CSM 目录里有对应的文件夹时才处理(可选) process_single_csv(csv_path, group_name, anion_name) if __name__ == "__main__": run_update()