Files
screen/py/CS_catulate.py
2025-12-07 20:08:19 +08:00

118 lines
4.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import pandas as pd
from pymatgen.core import Structure
# 确保你的 utils 文件夹在 py 目录下,并且包含 CS_analyse.py
from utils.CS_analyse import CS_catulate, check_only_corner_sharing
from tqdm import tqdm
# 配置路径
CSV_ROOT_DIR = "../output"
DATA_SOURCE_DIR = "../data/after_step1"
def get_cif_path(group_name, anion_name, material_id):
"""
根据 CSV 的层级信息构建 CIF 文件的绝对路径
"""
# 构建路径: ../data/after_step1/Group/Anion/ID/ID.cif
# 注意处理单阴离子情况 (Group == Anion)
if group_name == anion_name:
# 路径: ../data/after_step1/S/123/123.cif
rel_path = os.path.join(DATA_SOURCE_DIR, group_name, material_id, f"{material_id}.cif")
else:
# 路径: ../data/after_step1/S+O/S/123/123.cif
rel_path = os.path.join(DATA_SOURCE_DIR, group_name, anion_name, material_id, f"{material_id}.cif")
return os.path.abspath(rel_path)
def process_single_csv(csv_path, group_name, anion_name):
"""
处理单个 CSV 文件:读取 -> 计算角共享 -> 添加列 -> 保存
"""
print(f"正在处理 CSV: {csv_path}")
# 读取 CSV强制 ID 为字符串
try:
df = pd.read_csv(csv_path, dtype={'Filename': str})
except Exception as e:
print(f"读取 CSV 失败: {e}")
return
# 检查是否已经存在该列,如果存在且想重新计算,可以先删除,或者跳过
if 'Is_Only_Corner_Sharing' in df.columns:
print(" - 'Is_Only_Corner_Sharing' 列已存在,将覆盖更新。")
results = []
# 使用 tqdm 显示进度
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc=f"Analyzing {anion_name}"):
material_id = str(row['Filename']).replace('.0', '')
cif_path = get_cif_path(group_name, anion_name, material_id)
cs_result = None # 默认值
if os.path.exists(cif_path):
try:
# 1. 加载结构
struct = Structure.from_file(cif_path)
# 2. 计算共享关系 (默认检测 Li 和常见阴离子)
# 你可以根据需要调整 anion 列表,或者动态使用 anion_name
target_anions = ['O', 'S', 'Cl', 'F', 'Br', 'I', 'N', 'P']
sharing_details = CS_catulate(struct, sp='Li', anion=target_anions)
# 3. 判断是否仅角共享 (返回 1 或 0 或 True/False)
# 根据你提供的截图,似乎是返回 0 或 1
is_only_corner = check_only_corner_sharing(sharing_details)
cs_result = is_only_corner
except Exception as e:
# print(f"计算出错 {material_id}: {e}")
cs_result = "Error"
else:
print(f" - 警告: 找不到 CIF 文件 {cif_path}")
cs_result = "File_Not_Found"
results.append(cs_result)
# 将结果添加为新列
df['Is_Only_Corner_Sharing'] = results
# 保存覆盖原文件
df.to_csv(csv_path, index=False)
print(f" - 已更新 CSV: {csv_path}")
def run_cs_analysis():
"""
遍历所有 CSV 并运行分析
"""
if not os.path.exists(CSV_ROOT_DIR):
print(f"CSV 根目录不存在: {CSV_ROOT_DIR}")
return
for root, dirs, files in os.walk(CSV_ROOT_DIR):
for file in files:
if file.endswith(".csv"):
csv_path = os.path.join(root, file)
# 解析 Group 和 Anion (用于定位 CIF)
rel_root = os.path.relpath(root, CSV_ROOT_DIR)
path_parts = rel_root.split(os.sep)
if len(path_parts) == 1:
group_name = path_parts[0]
anion_name = path_parts[0]
elif len(path_parts) >= 2:
group_name = path_parts[0]
anion_name = path_parts[1]
else:
continue
process_single_csv(csv_path, group_name, anion_name)
if __name__ == "__main__":
run_cs_analysis()