Files
screen/py/update_tet_occupancy.py
2025-12-07 20:08:19 +08:00

129 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import pandas as pd
from tqdm import tqdm
# ================= 配置区域 =================
# CSV 所在的根目录
CSV_ROOT_DIR = "../output"
# CSM .dat 文件所在的根目录
CSM_ROOT_DIR = "../output/CSM"
# ===========================================
def calculate_tet_ratio_from_dat(dat_path):
"""
解析 .dat 文件,计算四面体位 Li 的占比。
返回: float (0.0 - 1.0) 或 None (如果文件不存在或为空)
"""
if not os.path.exists(dat_path):
return None
tet_count = 0
total_count = 0
try:
with open(dat_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# 简单检查文件是否包含 "No environments found"
if len(lines) > 0 and "No environments found" in lines[0]:
return None
for line in lines:
# 根据截图,每行是一个位点的信息
# 简单字符串匹配,这比 eval 更安全且足够快
if "'type': 'tet'" in line:
tet_count += 1
total_count += 1
elif "'type': 'oct'" in line:
total_count += 1
# 如果还有其他类型,可以在这里加,或者只要是位点行都算进 total
if total_count == 0:
return 0.0
return round(tet_count / total_count, 4)
except Exception as e:
print(f"解析出错 {dat_path}: {e}")
return None
def process_single_csv(csv_path, group_name, anion_name):
"""
读取 CSV -> 寻找对应的 CSM dat 文件 -> 计算比例 -> 更新 CSV
"""
print(f"正在更新 CSV: {csv_path}")
# 读取 CSV确保 ID 是字符串
try:
df = pd.read_csv(csv_path, dtype={'Filename': str})
except Exception as e:
print(f"读取 CSV 失败: {e}")
return
tet_ratios = []
# 遍历 CSV 中的每一行
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Updating Occupancy"):
material_id = str(row['Filename']).replace('.0', '')
# 构建对应的 .dat 文件路径
# 路径逻辑: ../output/CSM/Group/Anion/ID.dat
# 注意: 这里的 Group/Anion 结构必须与 analyze_csm.py 生成的一致
if group_name == anion_name:
# 单一阴离子: ../output/CSM/S/123.dat
dat_rel_path = os.path.join(group_name, f"{material_id}.dat")
else:
# 混合阴离子: ../output/CSM/S+O/S/123.dat
dat_rel_path = os.path.join(group_name, anion_name, f"{material_id}.dat")
dat_path = os.path.join(CSM_ROOT_DIR, dat_rel_path)
# 计算比例
ratio = calculate_tet_ratio_from_dat(dat_path)
tet_ratios.append(ratio)
# 添加或更新列
df['Tet_Li_Ratio'] = tet_ratios
# 保存
df.to_csv(csv_path, index=False)
print(f" - 已保存更新后的数据到: {csv_path}")
def run_update():
"""
主程序:遍历 output 目录下的 CSV
"""
if not os.path.exists(CSV_ROOT_DIR):
print(f"CSV 目录不存在: {CSV_ROOT_DIR}")
return
for root, dirs, files in os.walk(CSV_ROOT_DIR):
for file in files:
if file.endswith(".csv"):
csv_path = os.path.join(root, file)
# 解析路径获取 Group 和 Anion
# root: ../output/S --> rel: S
rel_root = os.path.relpath(root, CSV_ROOT_DIR)
path_parts = rel_root.split(os.sep)
if len(path_parts) == 1:
group_name = path_parts[0]
anion_name = path_parts[0]
elif len(path_parts) >= 2:
group_name = path_parts[0]
anion_name = path_parts[1]
else:
continue
# 只有当 CSM 目录里有对应的文件夹时才处理(可选)
process_single_csv(csv_path, group_name, anion_name)
if __name__ == "__main__":
run_update()