CSM及TET,CS
This commit is contained in:
129
py/update_tet_occupancy.py
Normal file
129
py/update_tet_occupancy.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
# ================= 配置区域 =================
|
||||
# CSV 所在的根目录
|
||||
CSV_ROOT_DIR = "../output"
|
||||
# CSM .dat 文件所在的根目录
|
||||
CSM_ROOT_DIR = "../output/CSM"
|
||||
|
||||
|
||||
# ===========================================
|
||||
|
||||
def calculate_tet_ratio_from_dat(dat_path):
|
||||
"""
|
||||
解析 .dat 文件,计算四面体位 Li 的占比。
|
||||
返回: float (0.0 - 1.0) 或 None (如果文件不存在或为空)
|
||||
"""
|
||||
if not os.path.exists(dat_path):
|
||||
return None
|
||||
|
||||
tet_count = 0
|
||||
total_count = 0
|
||||
|
||||
try:
|
||||
with open(dat_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# 简单检查文件是否包含 "No environments found"
|
||||
if len(lines) > 0 and "No environments found" in lines[0]:
|
||||
return None
|
||||
|
||||
for line in lines:
|
||||
# 根据截图,每行是一个位点的信息
|
||||
# 简单字符串匹配,这比 eval 更安全且足够快
|
||||
if "'type': 'tet'" in line:
|
||||
tet_count += 1
|
||||
total_count += 1
|
||||
elif "'type': 'oct'" in line:
|
||||
total_count += 1
|
||||
# 如果还有其他类型,可以在这里加,或者只要是位点行都算进 total
|
||||
|
||||
if total_count == 0:
|
||||
return 0.0
|
||||
|
||||
return round(tet_count / total_count, 4)
|
||||
|
||||
except Exception as e:
|
||||
print(f"解析出错 {dat_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def process_single_csv(csv_path, group_name, anion_name):
|
||||
"""
|
||||
读取 CSV -> 寻找对应的 CSM dat 文件 -> 计算比例 -> 更新 CSV
|
||||
"""
|
||||
print(f"正在更新 CSV: {csv_path}")
|
||||
|
||||
# 读取 CSV,确保 ID 是字符串
|
||||
try:
|
||||
df = pd.read_csv(csv_path, dtype={'Filename': str})
|
||||
except Exception as e:
|
||||
print(f"读取 CSV 失败: {e}")
|
||||
return
|
||||
|
||||
tet_ratios = []
|
||||
|
||||
# 遍历 CSV 中的每一行
|
||||
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Updating Occupancy"):
|
||||
material_id = str(row['Filename']).replace('.0', '')
|
||||
|
||||
# 构建对应的 .dat 文件路径
|
||||
# 路径逻辑: ../output/CSM/Group/Anion/ID.dat
|
||||
# 注意: 这里的 Group/Anion 结构必须与 analyze_csm.py 生成的一致
|
||||
|
||||
if group_name == anion_name:
|
||||
# 单一阴离子: ../output/CSM/S/123.dat
|
||||
dat_rel_path = os.path.join(group_name, f"{material_id}.dat")
|
||||
else:
|
||||
# 混合阴离子: ../output/CSM/S+O/S/123.dat
|
||||
dat_rel_path = os.path.join(group_name, anion_name, f"{material_id}.dat")
|
||||
|
||||
dat_path = os.path.join(CSM_ROOT_DIR, dat_rel_path)
|
||||
|
||||
# 计算比例
|
||||
ratio = calculate_tet_ratio_from_dat(dat_path)
|
||||
tet_ratios.append(ratio)
|
||||
|
||||
# 添加或更新列
|
||||
df['Tet_Li_Ratio'] = tet_ratios
|
||||
|
||||
# 保存
|
||||
df.to_csv(csv_path, index=False)
|
||||
print(f" - 已保存更新后的数据到: {csv_path}")
|
||||
|
||||
|
||||
def run_update():
|
||||
"""
|
||||
主程序:遍历 output 目录下的 CSV
|
||||
"""
|
||||
if not os.path.exists(CSV_ROOT_DIR):
|
||||
print(f"CSV 目录不存在: {CSV_ROOT_DIR}")
|
||||
return
|
||||
|
||||
for root, dirs, files in os.walk(CSV_ROOT_DIR):
|
||||
for file in files:
|
||||
if file.endswith(".csv"):
|
||||
csv_path = os.path.join(root, file)
|
||||
|
||||
# 解析路径获取 Group 和 Anion
|
||||
# root: ../output/S --> rel: S
|
||||
rel_root = os.path.relpath(root, CSV_ROOT_DIR)
|
||||
path_parts = rel_root.split(os.sep)
|
||||
|
||||
if len(path_parts) == 1:
|
||||
group_name = path_parts[0]
|
||||
anion_name = path_parts[0]
|
||||
elif len(path_parts) >= 2:
|
||||
group_name = path_parts[0]
|
||||
anion_name = path_parts[1]
|
||||
else:
|
||||
continue
|
||||
|
||||
# 只有当 CSM 目录里有对应的文件夹时才处理(可选)
|
||||
process_single_csv(csv_path, group_name, anion_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_update()
|
||||
Reference in New Issue
Block a user