118 lines
4.0 KiB
Python
118 lines
4.0 KiB
Python
import os
|
||
import pandas as pd
|
||
from pymatgen.core import Structure
|
||
# 确保你的 utils 文件夹在 py 目录下,并且包含 CS_analyse.py
|
||
from utils.CS_analyse import CS_catulate, check_only_corner_sharing
|
||
from tqdm import tqdm
|
||
|
||
# 配置路径
|
||
CSV_ROOT_DIR = "../output"
|
||
DATA_SOURCE_DIR = "../data/after_step1"
|
||
|
||
|
||
def get_cif_path(group_name, anion_name, material_id):
|
||
"""
|
||
根据 CSV 的层级信息构建 CIF 文件的绝对路径
|
||
"""
|
||
# 构建路径: ../data/after_step1/Group/Anion/ID/ID.cif
|
||
# 注意处理单阴离子情况 (Group == Anion)
|
||
if group_name == anion_name:
|
||
# 路径: ../data/after_step1/S/123/123.cif
|
||
rel_path = os.path.join(DATA_SOURCE_DIR, group_name, material_id, f"{material_id}.cif")
|
||
else:
|
||
# 路径: ../data/after_step1/S+O/S/123/123.cif
|
||
rel_path = os.path.join(DATA_SOURCE_DIR, group_name, anion_name, material_id, f"{material_id}.cif")
|
||
|
||
return os.path.abspath(rel_path)
|
||
|
||
|
||
def process_single_csv(csv_path, group_name, anion_name):
|
||
"""
|
||
处理单个 CSV 文件:读取 -> 计算角共享 -> 添加列 -> 保存
|
||
"""
|
||
print(f"正在处理 CSV: {csv_path}")
|
||
|
||
# 读取 CSV,强制 ID 为字符串
|
||
try:
|
||
df = pd.read_csv(csv_path, dtype={'Filename': str})
|
||
except Exception as e:
|
||
print(f"读取 CSV 失败: {e}")
|
||
return
|
||
|
||
# 检查是否已经存在该列,如果存在且想重新计算,可以先删除,或者跳过
|
||
if 'Is_Only_Corner_Sharing' in df.columns:
|
||
print(" - 'Is_Only_Corner_Sharing' 列已存在,将覆盖更新。")
|
||
|
||
results = []
|
||
|
||
# 使用 tqdm 显示进度
|
||
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc=f"Analyzing {anion_name}"):
|
||
material_id = str(row['Filename']).replace('.0', '')
|
||
cif_path = get_cif_path(group_name, anion_name, material_id)
|
||
|
||
cs_result = None # 默认值
|
||
|
||
if os.path.exists(cif_path):
|
||
try:
|
||
# 1. 加载结构
|
||
struct = Structure.from_file(cif_path)
|
||
|
||
# 2. 计算共享关系 (默认检测 Li 和常见阴离子)
|
||
# 你可以根据需要调整 anion 列表,或者动态使用 anion_name
|
||
target_anions = ['O', 'S', 'Cl', 'F', 'Br', 'I', 'N', 'P']
|
||
sharing_details = CS_catulate(struct, sp='Li', anion=target_anions)
|
||
|
||
# 3. 判断是否仅角共享 (返回 1 或 0 或 True/False)
|
||
# 根据你提供的截图,似乎是返回 0 或 1
|
||
is_only_corner = check_only_corner_sharing(sharing_details)
|
||
|
||
cs_result = is_only_corner
|
||
|
||
except Exception as e:
|
||
# print(f"计算出错 {material_id}: {e}")
|
||
cs_result = "Error"
|
||
else:
|
||
print(f" - 警告: 找不到 CIF 文件 {cif_path}")
|
||
cs_result = "File_Not_Found"
|
||
|
||
results.append(cs_result)
|
||
|
||
# 将结果添加为新列
|
||
df['Is_Only_Corner_Sharing'] = results
|
||
|
||
# 保存覆盖原文件
|
||
df.to_csv(csv_path, index=False)
|
||
print(f" - 已更新 CSV: {csv_path}")
|
||
|
||
|
||
def run_cs_analysis():
|
||
"""
|
||
遍历所有 CSV 并运行分析
|
||
"""
|
||
if not os.path.exists(CSV_ROOT_DIR):
|
||
print(f"CSV 根目录不存在: {CSV_ROOT_DIR}")
|
||
return
|
||
|
||
for root, dirs, files in os.walk(CSV_ROOT_DIR):
|
||
for file in files:
|
||
if file.endswith(".csv"):
|
||
csv_path = os.path.join(root, file)
|
||
|
||
# 解析 Group 和 Anion (用于定位 CIF)
|
||
rel_root = os.path.relpath(root, CSV_ROOT_DIR)
|
||
path_parts = rel_root.split(os.sep)
|
||
|
||
if len(path_parts) == 1:
|
||
group_name = path_parts[0]
|
||
anion_name = path_parts[0]
|
||
elif len(path_parts) >= 2:
|
||
group_name = path_parts[0]
|
||
anion_name = path_parts[1]
|
||
else:
|
||
continue
|
||
|
||
process_single_csv(csv_path, group_name, anion_name)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
run_cs_analysis() |