From 3d44b311941de52a1af064f443cc1af33df005bc Mon Sep 17 00:00:00 2001 From: koko <1429659362@qq.com> Date: Sun, 7 Dec 2025 20:08:19 +0800 Subject: [PATCH] =?UTF-8?q?CSM=E5=8F=8ATET=EF=BC=8CCS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main_property.sh | 85 +++++++++ py/CS_catulate.py | 118 ++++++++++++ py/csm.py | 90 ++++++++++ py/csv/csm.py | 0 py/py/step1_direct.py | 99 +++++++++++ py/update_tet_occupancy.py | 129 ++++++++++++++ py/utils/CS_analyse.py | 356 +++++++++++++++++++++++++++++++++++++ py/utils/analyze_env_st.py | 210 ++++++++++++++++++++++ 8 files changed, 1087 insertions(+) create mode 100644 main_property.sh create mode 100644 py/CS_catulate.py create mode 100644 py/csm.py delete mode 100644 py/csv/csm.py create mode 100644 py/py/step1_direct.py create mode 100644 py/update_tet_occupancy.py create mode 100644 py/utils/CS_analyse.py create mode 100644 py/utils/analyze_env_st.py diff --git a/main_property.sh b/main_property.sh new file mode 100644 index 0000000..b3e79b7 --- /dev/null +++ b/main_property.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +# ========================================== +# 全流程自动化脚本 (直通筛选版) +# ========================================== + +# 1. 环境初始化 +echo "============ Stage 0: Initialization ============" +chmod -R u+w ../Screen +source $(conda info --base)/etc/profile.d/conda.sh + +# 激活 screen 环境 +conda activate screen +cd py/ +export PYTHONPATH=$(pwd):$PYTHONPATH + +# 2. 预处理与文件整理 (替代原 Step 1) +echo "============ Stage 1: File Organization (Direct Pass) ============" +# 运行预处理 (可选,确保 input 文件夹就绪) +python pre_process.py + +# 运行直通版整理脚本 +# 功能: 读取 input, 识别阴离子, 按结构复制到 after_step1/Anion/ID/ID.cif +# 跳过 check_basic 等耗时检查 +python step1_direct.py + +# 生成 Zeo++ 运行脚本 +# 功能: 遍历 after_step1, 生成 analyze.sh 和 sh_all.sh +python make_sh.py + +# 3. 运行 Zeo++ 计算 +echo "============ Stage 2: Zeo++ Calculations ============" +conda deactivate +conda activate zeo + +# 进入数据目录 +cd ../data/after_step1 +if [ -f "sh_all.sh" ]; then + # 执行所有计算 + source sh_all.sh + # 清理总脚本 (可选) + # rm sh_all.sh +else + echo "Error: sh_all.sh not found! Please check Stage 1." + exit 1 +fi + +# 4. 数据提取与高级分析 +echo "============ Stage 3: Data Extraction & Analysis ============" +# 切回 screen 环境 +conda deactivate +conda activate screen +cd ../../py + +# 3.1 提取 Zeo++ 基础数据 +# 输出: ../output/Anion/Anion.csv (含 Perc, Min_d, Max_node) +python extract_data.py + +# 3.2 计算角共享 (Corner Sharing) +# 输出: 更新 CSV, 增加 Is_Only_Corner_Sharing 列 +echo "Running Corner Sharing Analysis..." +python analyze_cs.py + +# 3.3 联合筛选 +# 功能: 读取 CSV, 根据阈值筛选, 生成 ../data/after_screening 软链接/文件 +python step2_4_combined.py + +# 3.4 CSM 分析 (仅针对筛选后的材料) +# 输出: ../output/CSM/Anion/ID.dat +echo "Running CSM Analysis..." +python analyze_csm.py + +# 3.5 统计四面体占据率 +# 输出: 读取 .dat, 更新 CSV, 增加 Tet_Li_Ratio 列 +echo "Updating Tetrahedral Li Ratio..." +python update_tet_occupancy.py + +# 5. 结束 +echo "========================================================" +echo "All tasks completed!" +echo "Results stored in:" +echo " - CSV Data: ../output/" +echo " - Screened: ../data/after_screening/" +echo " - CSM Details: ../output/CSM/" +echo "========================================================" \ No newline at end of file diff --git a/py/CS_catulate.py b/py/CS_catulate.py new file mode 100644 index 0000000..f23d3af --- /dev/null +++ b/py/CS_catulate.py @@ -0,0 +1,118 @@ +import os +import pandas as pd +from pymatgen.core import Structure +# 确保你的 utils 文件夹在 py 目录下,并且包含 CS_analyse.py +from utils.CS_analyse import CS_catulate, check_only_corner_sharing +from tqdm import tqdm + +# 配置路径 +CSV_ROOT_DIR = "../output" +DATA_SOURCE_DIR = "../data/after_step1" + + +def get_cif_path(group_name, anion_name, material_id): + """ + 根据 CSV 的层级信息构建 CIF 文件的绝对路径 + """ + # 构建路径: ../data/after_step1/Group/Anion/ID/ID.cif + # 注意处理单阴离子情况 (Group == Anion) + if group_name == anion_name: + # 路径: ../data/after_step1/S/123/123.cif + rel_path = os.path.join(DATA_SOURCE_DIR, group_name, material_id, f"{material_id}.cif") + else: + # 路径: ../data/after_step1/S+O/S/123/123.cif + rel_path = os.path.join(DATA_SOURCE_DIR, group_name, anion_name, material_id, f"{material_id}.cif") + + return os.path.abspath(rel_path) + + +def process_single_csv(csv_path, group_name, anion_name): + """ + 处理单个 CSV 文件:读取 -> 计算角共享 -> 添加列 -> 保存 + """ + print(f"正在处理 CSV: {csv_path}") + + # 读取 CSV,强制 ID 为字符串 + try: + df = pd.read_csv(csv_path, dtype={'Filename': str}) + except Exception as e: + print(f"读取 CSV 失败: {e}") + return + + # 检查是否已经存在该列,如果存在且想重新计算,可以先删除,或者跳过 + if 'Is_Only_Corner_Sharing' in df.columns: + print(" - 'Is_Only_Corner_Sharing' 列已存在,将覆盖更新。") + + results = [] + + # 使用 tqdm 显示进度 + for index, row in tqdm(df.iterrows(), total=df.shape[0], desc=f"Analyzing {anion_name}"): + material_id = str(row['Filename']).replace('.0', '') + cif_path = get_cif_path(group_name, anion_name, material_id) + + cs_result = None # 默认值 + + if os.path.exists(cif_path): + try: + # 1. 加载结构 + struct = Structure.from_file(cif_path) + + # 2. 计算共享关系 (默认检测 Li 和常见阴离子) + # 你可以根据需要调整 anion 列表,或者动态使用 anion_name + target_anions = ['O', 'S', 'Cl', 'F', 'Br', 'I', 'N', 'P'] + sharing_details = CS_catulate(struct, sp='Li', anion=target_anions) + + # 3. 判断是否仅角共享 (返回 1 或 0 或 True/False) + # 根据你提供的截图,似乎是返回 0 或 1 + is_only_corner = check_only_corner_sharing(sharing_details) + + cs_result = is_only_corner + + except Exception as e: + # print(f"计算出错 {material_id}: {e}") + cs_result = "Error" + else: + print(f" - 警告: 找不到 CIF 文件 {cif_path}") + cs_result = "File_Not_Found" + + results.append(cs_result) + + # 将结果添加为新列 + df['Is_Only_Corner_Sharing'] = results + + # 保存覆盖原文件 + df.to_csv(csv_path, index=False) + print(f" - 已更新 CSV: {csv_path}") + + +def run_cs_analysis(): + """ + 遍历所有 CSV 并运行分析 + """ + if not os.path.exists(CSV_ROOT_DIR): + print(f"CSV 根目录不存在: {CSV_ROOT_DIR}") + return + + for root, dirs, files in os.walk(CSV_ROOT_DIR): + for file in files: + if file.endswith(".csv"): + csv_path = os.path.join(root, file) + + # 解析 Group 和 Anion (用于定位 CIF) + rel_root = os.path.relpath(root, CSV_ROOT_DIR) + path_parts = rel_root.split(os.sep) + + if len(path_parts) == 1: + group_name = path_parts[0] + anion_name = path_parts[0] + elif len(path_parts) >= 2: + group_name = path_parts[0] + anion_name = path_parts[1] + else: + continue + + process_single_csv(csv_path, group_name, anion_name) + + +if __name__ == "__main__": + run_cs_analysis() \ No newline at end of file diff --git a/py/csm.py b/py/csm.py new file mode 100644 index 0000000..b505b05 --- /dev/null +++ b/py/csm.py @@ -0,0 +1,90 @@ +import os +from pymatgen.core import Structure +from pymatgen.core.periodic_table import Element +# 导入你的CSM计算工具库 (根据 provided context [11]) +try: + from utils.analyze_env_st import extract_sites, export_envs +except ImportError: + print("Error: 找不到 utils.analyze_env_st 模块,请检查 utils 文件夹。") + exit() + +from tqdm import tqdm + +# ================= 配置区域 ================= +# 输入目录:使用筛选后的目录,只计算符合要求的材料 +INPUT_DIR = "../data/after_screening" +# 输出目录 +OUTPUT_DIR = "../output/CSM" +# 分析参数 +TARGET_ELEMENT = 'Li' +ENV_TYPE = 'both' # 可选 'tet', 'oct', 'both' +# =========================================== + +def run_csm_analysis(): + """ + 遍历 after_screening 文件夹,计算 CSM 并生成 .dat 文件到 output/CSM + """ + if not os.path.exists(INPUT_DIR): + print(f"输入目录不存在: {INPUT_DIR},请先运行筛选步骤。") + return + + # 收集所有需要处理的 CIF 文件 + cif_files = [] + for root, dirs, files in os.walk(INPUT_DIR): + for file in files: + if file.endswith(".cif"): + # 保存完整路径 + cif_files.append(os.path.join(root, file)) + + print(f"开始进行 CSM 分析,共找到 {len(cif_files)} 个筛选后的材料...") + + for cif_path in tqdm(cif_files, desc="Calculating CSM"): + try: + # 1. 确定输出路径,保持目录结构 + # 获取相对路径 (例如: S/195819.cif 或 S+O/S/195819.cif) + rel_path = os.path.relpath(cif_path, INPUT_DIR) + # 获取所在文件夹 (例如: S 或 S+O/S) + rel_dir = os.path.dirname(rel_path) + # 获取文件名 (例如: 195819) + file_base = os.path.splitext(os.path.basename(cif_path))[0] + + # 构建目标文件夹: ../output/CSM/S/ + target_dir = os.path.join(OUTPUT_DIR, rel_dir) + if not os.path.exists(target_dir): + os.makedirs(target_dir) + + # 构建目标文件路径: ../output/CSM/S/195819.dat + target_dat_path = os.path.join(target_dir, f"{file_base}.dat") + + # 2. 如果已经存在,跳过 (可选,视需求而定,这里默认覆盖) + # if os.path.exists(target_dat_path): + # continue + + # 3. 读取结构 + struct = Structure.from_file(cif_path) + + # 检查是否包含目标元素 (Li) + if Element(TARGET_ELEMENT) not in struct.composition.elements: + # print(f"Skipping {file_base}: No {TARGET_ELEMENT}") + continue + + # 4. 计算 CSM (引用 utils 中的函数) + # extract_sites 返回环境列表 + env_list = extract_sites(struct, sp=TARGET_ELEMENT, envtype=ENV_TYPE) + + # 5. 导出结果 (引用 utils 中的函数) + # export_envs 将结果写入 .dat 文件 + if env_list: + export_envs(env_list, sp=TARGET_ELEMENT, envtype=ENV_TYPE, fname=target_dat_path) + else: + # 如果没有提取到环境(例如没有配位环境),生成一个空文件或记录日志 + with open(target_dat_path, 'w') as f: + f.write("No environments found.") + + except Exception as e: + print(f"处理出错 {cif_path}: {e}") + + print(f"CSM 分析完成,结果已保存至 {OUTPUT_DIR}") + +if __name__ == "__main__": + run_csm_analysis() \ No newline at end of file diff --git a/py/csv/csm.py b/py/csv/csm.py deleted file mode 100644 index e69de29..0000000 diff --git a/py/py/step1_direct.py b/py/py/step1_direct.py new file mode 100644 index 0000000..d174013 --- /dev/null +++ b/py/py/step1_direct.py @@ -0,0 +1,99 @@ +import os +import shutil +from pymatgen.core import Structure + + +def get_anion_type(structure): + """ + 简单判断阴离子类型。 + 返回: 'O', 'S', 'S+O' 等字符串 + """ + # 定义常见的阴离子列表 + valid_anions = {'O', 'S', 'Se', 'Te', 'F', 'Cl', 'Br', 'I', 'N', 'P'} + + # 获取结构中的所有元素符号 + elements = set([e.symbol for e in structure.composition.elements]) + + # 取交集找到当前结构包含的阴离子 + found_anions = elements.intersection(valid_anions) + + if not found_anions: + return "Unknown" + + # 如果有多个阴离子,按字母顺序排序并用 '+' 连接 (模拟 step1 的逻辑) + sorted_anions = sorted(list(found_anions)) + return "+".join(sorted_anions) + + +def organize_files_direct(input_folder, output_base): + if not os.path.exists(input_folder): + print(f"输入文件夹不存在: {input_folder}") + return + + cif_files = [f for f in os.listdir(input_folder) if f.endswith(".cif")] + print(f"发现 {len(cif_files)} 个 CIF 文件,开始直接整理...") + + for filename in cif_files: + file_path = os.path.join(input_folder, filename) + + try: + # 仅读取结构用于分类 + struct = Structure.from_file(file_path) + anion_type = get_anion_type(struct) + + # 获取不带后缀的文件名 (ID) + file_base_name = os.path.splitext(filename)[0] + + # --- 构建目标路径逻辑 --- + # 逻辑:../data/after_step1 / 阴离子类别 / ID / ID.cif + + # 处理混合阴离子情况 (如 S+O) + if "+" in anion_type: + # 按照之前的逻辑,如果是混合阴离子,通常会有多层 + # 但为了统一后续处理,我们这里将其放入组合名的文件夹下 + # 比如: after_step1/S+O/S/123/123.cif (复杂) + # 或者简化为 after_step1/S+O/123/123.cif (简单) + # 根据你之前的 make_sh.py 和 extract_data.py, + # 只要是 Folder/ID/ID.cif 结构即可。 + # 为了兼容 analyze_cs.py 的逻辑 (group_name, anion_name), + # 这里我们采用 simplified 逻辑: + # 如果是混合,我们在第一层建 S+O,第二层建具体的 anion 文件夹(比如首字母排序第一个) + # 或者直接: after_step1/S+O/ID/ID.cif -> 这样 group=S+O, anion=ID (不对) + + # 兼容旧代码的最佳实践: + # 对于混合 S+O,我们建立 S+O/S/ID/ID.cif 和 S+O/O/ID/ID.cif ? + # 不,原 Step1 是把一个文件复制了两份到不同文件夹。 + # 这里为了简化,我们只复制一份到主阴离子文件夹,或者直接按组合命名。 + + # 让我们采用最稳妥的方式:如果是 S+O,放入 S+O/Mix/ID/ID.cif + # 这样 group=S+O, anion=Mix。 + # 但为了让 CS_calc 正常工作,最好还是放入具体的元素文件夹。 + # 这里我们简单处理:直接放入 S+O/Combined/ID/ + # 或者根据你的 extract_data.py 逻辑: + # 它会遍历 top_dir (S+O) -> sub_anion (S, O) + + # 策略:拆分放入。 + sub_anions = anion_type.split("+") + for sub in sub_anions: + target_folder = os.path.join(output_base, anion_type, sub, file_base_name) + if not os.path.exists(target_folder): + os.makedirs(target_folder) + shutil.copy(file_path, os.path.join(target_folder, filename)) + + print(f"整理: {filename} -> {anion_type} (已复制到各子类)") + + else: + # 单一阴离子: after_step1/S/ID/ID.cif + target_folder = os.path.join(output_base, anion_type, file_base_name) + if not os.path.exists(target_folder): + os.makedirs(target_folder) + + shutil.copy(file_path, os.path.join(target_folder, filename)) + print(f"整理: {filename} -> {anion_type}") + + except Exception as e: + print(f"处理 {filename} 失败: {e}") + + +if __name__ == "__main__": + organize_files_direct("../data/input", "../data/after_step1") \ No newline at end of file diff --git a/py/update_tet_occupancy.py b/py/update_tet_occupancy.py new file mode 100644 index 0000000..0a08689 --- /dev/null +++ b/py/update_tet_occupancy.py @@ -0,0 +1,129 @@ +import os +import pandas as pd +from tqdm import tqdm + +# ================= 配置区域 ================= +# CSV 所在的根目录 +CSV_ROOT_DIR = "../output" +# CSM .dat 文件所在的根目录 +CSM_ROOT_DIR = "../output/CSM" + + +# =========================================== + +def calculate_tet_ratio_from_dat(dat_path): + """ + 解析 .dat 文件,计算四面体位 Li 的占比。 + 返回: float (0.0 - 1.0) 或 None (如果文件不存在或为空) + """ + if not os.path.exists(dat_path): + return None + + tet_count = 0 + total_count = 0 + + try: + with open(dat_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + # 简单检查文件是否包含 "No environments found" + if len(lines) > 0 and "No environments found" in lines[0]: + return None + + for line in lines: + # 根据截图,每行是一个位点的信息 + # 简单字符串匹配,这比 eval 更安全且足够快 + if "'type': 'tet'" in line: + tet_count += 1 + total_count += 1 + elif "'type': 'oct'" in line: + total_count += 1 + # 如果还有其他类型,可以在这里加,或者只要是位点行都算进 total + + if total_count == 0: + return 0.0 + + return round(tet_count / total_count, 4) + + except Exception as e: + print(f"解析出错 {dat_path}: {e}") + return None + + +def process_single_csv(csv_path, group_name, anion_name): + """ + 读取 CSV -> 寻找对应的 CSM dat 文件 -> 计算比例 -> 更新 CSV + """ + print(f"正在更新 CSV: {csv_path}") + + # 读取 CSV,确保 ID 是字符串 + try: + df = pd.read_csv(csv_path, dtype={'Filename': str}) + except Exception as e: + print(f"读取 CSV 失败: {e}") + return + + tet_ratios = [] + + # 遍历 CSV 中的每一行 + for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Updating Occupancy"): + material_id = str(row['Filename']).replace('.0', '') + + # 构建对应的 .dat 文件路径 + # 路径逻辑: ../output/CSM/Group/Anion/ID.dat + # 注意: 这里的 Group/Anion 结构必须与 analyze_csm.py 生成的一致 + + if group_name == anion_name: + # 单一阴离子: ../output/CSM/S/123.dat + dat_rel_path = os.path.join(group_name, f"{material_id}.dat") + else: + # 混合阴离子: ../output/CSM/S+O/S/123.dat + dat_rel_path = os.path.join(group_name, anion_name, f"{material_id}.dat") + + dat_path = os.path.join(CSM_ROOT_DIR, dat_rel_path) + + # 计算比例 + ratio = calculate_tet_ratio_from_dat(dat_path) + tet_ratios.append(ratio) + + # 添加或更新列 + df['Tet_Li_Ratio'] = tet_ratios + + # 保存 + df.to_csv(csv_path, index=False) + print(f" - 已保存更新后的数据到: {csv_path}") + + +def run_update(): + """ + 主程序:遍历 output 目录下的 CSV + """ + if not os.path.exists(CSV_ROOT_DIR): + print(f"CSV 目录不存在: {CSV_ROOT_DIR}") + return + + for root, dirs, files in os.walk(CSV_ROOT_DIR): + for file in files: + if file.endswith(".csv"): + csv_path = os.path.join(root, file) + + # 解析路径获取 Group 和 Anion + # root: ../output/S --> rel: S + rel_root = os.path.relpath(root, CSV_ROOT_DIR) + path_parts = rel_root.split(os.sep) + + if len(path_parts) == 1: + group_name = path_parts[0] + anion_name = path_parts[0] + elif len(path_parts) >= 2: + group_name = path_parts[0] + anion_name = path_parts[1] + else: + continue + + # 只有当 CSM 目录里有对应的文件夹时才处理(可选) + process_single_csv(csv_path, group_name, anion_name) + + +if __name__ == "__main__": + run_update() \ No newline at end of file diff --git a/py/utils/CS_analyse.py b/py/utils/CS_analyse.py new file mode 100644 index 0000000..e8818dc --- /dev/null +++ b/py/utils/CS_analyse.py @@ -0,0 +1,356 @@ +from typing import List, Dict + +from pymatgen.core.structure import Structure +from pymatgen.analysis.local_env import VoronoiNN +import numpy as np + +def check_real(nearest): + real_nearest = [] + for site in nearest: + if np.all((site.frac_coords >= 0) & (site.frac_coords <= 1)): + real_nearest.append(site) + + return real_nearest + +def special_check_for_3(site, nearest): + real_nearest = [] + distances = [] + for site2 in nearest: + distance = np.linalg.norm(np.array(site.frac_coords) - np.array(site2.frac_coords)) + distances.append(distance) + + sorted_indices = np.argsort(distances) + for index in sorted_indices[:3]: + real_nearest.append(nearest[index]) + + return real_nearest + + +def CS_catulate( + struct, + sp: str = 'Li', + anion: List[str] = ['O'], + tol: float = 0, + cutoff: float = 3.0, + notice: bool = False +) -> Dict[str, Dict[str, int]]: + """ + 计算结构中不同类型阳离子多面体之间的共享关系(角、边、面共享)。 + + 该函数会分别计算以下三种情况的共享数量: + 1. 目标原子 vs 目标原子 (e.g., Li-Li) + 2. 目标原子 vs 其他阳离子 (e.g., Li-X) + 3. 其他阳离子 vs 其他阳离子 (e.g., X-Y) + + 参数: + struct (Structure): 输入的pymatgen结构对象。 + sp (str): 目标元素符号,默认为 'Li'。 + anion (list): 阴离子元素符号列表,默认为 ['O']。 + tol (float): VoronoiNN 的容差。对于Li,通常设为0。 + cutoff (float): VoronoiNN 的截断距离。对于Li,通常设为3.0。 + notice (bool): 是否打印详细的共享信息。 + + 返回: + dict: 一个字典,包含三类共享关系的统计结果。 + 键 "sp_vs_sp", "sp_vs_other", "other_vs_other" 分别对应上述三种情况。 + 每个键的值是另一个字典,统计了共享2个(边)、3个(面)等情况的数量。 + 例如: {'sp_vs_sp': {'1': 10, '2': 4}, 'sp_vs_other': ...} + 共享1个阴离子为角共享,2个为边共享,3个为面共享。 + """ + # 初始化 VoronoiNN 对象 + voro_nn = VoronoiNN(tol=tol, cutoff=cutoff) + + # 1. 分类存储所有阳离子的近邻阴离子信息 + target_sites_info = [] + other_cation_sites_info = [] + + for index, site in enumerate(struct.sites): + # 跳过阴离子本身 + if site.species.chemical_system in anion: + continue + + # 获取当前位点的近邻阴离子 + try: + # 使用 get_nn_info 更直接 + nn_info = voro_nn.get_nn_info(struct, index) + nearest_anions = [ + nn["site"] for nn in nn_info + if nn["site"].species.chemical_system in anion + ] + except Exception as e: + print(f"Warning: Could not get neighbors for site {index} ({site.species_string}): {e}") + continue + + if not nearest_anions: + continue + + # 整理信息 + site_info = { + 'index': index, + 'element': site.species.chemical_system, + 'nearest_anion_indices': {nn.index for nn in nearest_anions} + } + + # 根据是否为目标原子进行分类 + if site.species.chemical_system == sp: + target_sites_info.append(site_info) + else: + other_cation_sites_info.append(site_info) + + # 2. 初始化结果字典 + # 共享数量key: 1-角, 2-边, 3-面 + results = { + "sp_vs_sp": {"1": 0, "2": 0, "3": 0, "4": 0}, + "sp_vs_other": {"1": 0, "2": 0, "3": 0, "4": 0}, + "other_vs_other": {"1": 0, "2": 0, "3": 0, "4": 0}, + } + + # 3. 计算不同类别之间的共享关系 + + # 3.1 目标原子 vs 目标原子 (sp_vs_sp) + for i in range(len(target_sites_info)): + for j in range(i + 1, len(target_sites_info)): + atom_i = target_sites_info[i] + atom_j = target_sites_info[j] + + shared_anions = atom_i['nearest_anion_indices'].intersection(atom_j['nearest_anion_indices']) + shared_count = len(shared_anions) + + if shared_count > 0 and str(shared_count) in results["sp_vs_sp"]: + results["sp_vs_sp"][str(shared_count)] += 1 + if notice: + print( + f"[Li-Li] Atom {atom_i['index']} and {atom_j['index']} share {shared_count} anions: {shared_anions}") + + # 3.2 目标原子 vs 其他阳离子 (sp_vs_other) + for atom_sp in target_sites_info: + for atom_other in other_cation_sites_info: + shared_anions = atom_sp['nearest_anion_indices'].intersection(atom_other['nearest_anion_indices']) + shared_count = len(shared_anions) + + if shared_count > 0 and str(shared_count) in results["sp_vs_other"]: + results["sp_vs_other"][str(shared_count)] += 1 + if notice: + print( + f"[Li-Other] Atom {atom_sp['index']} and {atom_other['index']} share {shared_count} anions: {shared_anions}") + + # 3.3 其他阳离子 vs 其他阳离子 (other_vs_other) + for i in range(len(other_cation_sites_info)): + for j in range(i + 1, len(other_cation_sites_info)): + atom_i = other_cation_sites_info[i] + atom_j = other_cation_sites_info[j] + + shared_anions = atom_i['nearest_anion_indices'].intersection(atom_j['nearest_anion_indices']) + shared_count = len(shared_anions) + + if shared_count > 0 and str(shared_count) in results["other_vs_other"]: + results["other_vs_other"][str(shared_count)] += 1 + if notice: + print( + f"[Other-Other] Atom {atom_i['index']} and {atom_j['index']} share {shared_count} anions: {shared_anions}") + + return results + +def CS_catulate_old(struct, sp='Li', anion=['O'], tol=0, cutoff=3.0,notice=False,ID=None): + """ + 计算结构中目标元素与最近阴离子的共享关系。 + + 参数: + struct (Structure): 输入结构。 + sp (str): 目标元素符号,默认为 'Li'。 + anion (list): 阴离子列表,默认为 ['O']。 + tol (float): VoronoiNN 的容差,默认为 0。 + cutoff (float): VoronoiNN 的截断距离,默认为 3.0。 + + 返回: + list: 包含每个目标位点及其最近阴离子索引的列表。 + """ + # 初始化 VoronoiNN 对象 + if sp=='Li': + tol = 0 + cutoff = 3.0 + voro_nn = VoronoiNN(tol=tol, cutoff=cutoff) + # 初始化字典,用于统计共享关系 + shared_count = {"2": 0, "3": 0,"4":0,"5":0,"6":0} + # 存储结果的列表 + atom_dice = [] + + # 遍历结构中的每个位点 + for index,site in enumerate(struct.sites): + # 跳过阴离子位点 + if site.species.chemical_system in anion: + continue + # 跳过Li原子 + if site.species.chemical_system == sp: + continue + # 获取 Voronoi 多面体信息 + voro_info = voro_nn.get_voronoi_polyhedra(struct, index) + + # 找到最近的阴离子位点 + nearest_anions = [ + nn_info["site"] for nn_info in voro_info.values() + if nn_info["site"].species.chemical_system in anion + ] + + # 如果没有找到最近的阴离子,跳过 + if not nearest_anions: + print(f"No nearest anions found for {ID} site {index}.") + continue + if site.species.chemical_system == 'B' or site.species.chemical_system == 'N': + nearest_anions = special_check_for_3(site,nearest_anions) + nearest_anions = check_real(nearest_anions) + # 将结果添加到 atom_dice 列表中 + atom_dice.append({ + 'index': index, + 'nearest_index': [nn.index for nn in nearest_anions] + }) + + + + + + # 枚举 atom_dice 中的所有原子对 + for i, atom_i in enumerate(atom_dice): + for j, atom_j in enumerate(atom_dice[i + 1:], start=i + 1): + # 获取两个原子的最近阴离子索引 + nearest_i = set(atom_i['nearest_index']) + nearest_j = set(atom_j['nearest_index']) + + # 比较最近阴离子的交集大小 + shared_count_key = str(len(nearest_i & nearest_j)) + + # 更新字典中的计数 + if shared_count_key in shared_count: + shared_count[shared_count_key] += 1 + if notice: + if shared_count_key=='2': + print(f"{atom_j['index']}与{atom_i['index']}之间存在共线") + print(f"共线的阴离子为{nearest_i & nearest_j}") + if shared_count_key=='3': + print(f"{atom_j['index']}与{atom_i['index']}之间存在共面") + print(f"共面的阴离子为{nearest_i & nearest_j}") + + # # 最后将字典中的值除以 2,因为每个共享关系被计算了两次 + # for key in shared_count.keys(): + # shared_count[key] //= 2 + + return shared_count + + +def CS_count(struct, sharing_results: Dict[str, Dict[str, int]], sp: str = 'Li') -> float: + """ + 分析多面体共享结果,计算平均每个目标原子参与的共享阴离子数。 + + 这个函数是 calculate_polyhedra_sharing 的配套函数。 + + 参数: + struct (Structure): 输入的pymatgen结构对象,用于统计目标原子总数。 + sharing_results (dict): 来自 calculate_polyhedra_sharing 函数的输出结果。 + sp (str): 目标元素符号,默认为 'Li'。 + + 返回: + float: 平均每个目标原子sp参与的共享阴离子数量。 + 例如,结果为2.5意味着平均每个Li原子通过共享与其他阳离子 + (包括Li和其他阳离子)连接了2.5个阴离子。 + """ + # 1. 统计结构中目标原子的总数 + target_atom_count = 0 + for site in struct.sites: + if site.species.chemical_system == sp: + target_atom_count += 1 + + # 如果结构中没有目标原子,直接返回0,避免除以零错误 + if target_atom_count == 0: + return 0.0 + + # 2. 计算加权的共享阴离子总数 + total_shared_anions = 0 + + # 处理 sp_vs_sp (例如 Li-Li) 的共享 + # 每个共享关系涉及两个目标原子,所以权重需要乘以 2 + if "sp_vs_sp" in sharing_results: + sp_vs_sp_counts = sharing_results["sp_vs_sp"] + for num_shared_str, count in sp_vs_sp_counts.items(): + num_shared = int(num_shared_str) + # 权重 = 共享阴离子数 * 涉及的目标原子数 (2) * 出现次数 + total_shared_anions += num_shared * 2 * count + + # 处理 sp_vs_other (例如 Li-X) 的共享 + # 每个共享关系涉及一个目标原子,所以权重乘以 1 + if "sp_vs_other" in sharing_results: + sp_vs_other_counts = sharing_results["sp_vs_other"] + for num_shared_str, count in sp_vs_other_counts.items(): + num_shared = int(num_shared_str) + # 权重 = 共享阴离子数 * 涉及的目标原子数 (1) * 出现次数 + total_shared_anions += num_shared * 1 * count + + # 3. 计算平均值 + # 平均每个目标原子参与的共享阴离子数 = 总的加权共享数 / 目标原子总数 + average_sharing_per_atom = total_shared_anions / target_atom_count + + return average_sharing_per_atom +def CS_count_old(struct, shared_count, sp='Li'): + count = 0 + for site in struct.sites: + if site.species.chemical_system == sp: + count += 1 # 累加符合条件的原子数量 + + CS_count = 0 + for i in range(2, 7): # 遍历范围 [2, 3, 4, 5] + if str(i) in shared_count: # 检查键是否存在 + CS_count += shared_count[str(i)] * i # 累加计算结果 + + if count > 0: # 防止除以零 + CS_count /= count # 平均化结果 + else: + CS_count = 0 # 如果 count 为 0,直接返回 0 + + return CS_count + + +def check_only_corner_sharing(sharing_results: Dict[str, Dict[str, int]]) -> int: + """ + 检查目标原子(sp)是否只参与了角共享(共享1个阴离子)。 + + 该函数是 calculate_polyhedra_sharing 的配套函数。 + + 参数: + sharing_results (dict): 来自 calculate_polyhedra_sharing 函数的输出结果。 + + 返回: + int: + - 1: 如果 sp 的共享关系中,边共享(2)、面共享(3)等数量均为0, + 并且至少存在一个角共享(1)。 + - 0: 如果 sp 存在任何边、面等共享,或者没有任何共享关系。 + """ + # 提取与目标原子 sp 相关的共享数据 + sp_vs_sp_counts = sharing_results.get("sp_vs_sp", {}) + sp_vs_other_counts = sharing_results.get("sp_vs_other", {}) + + # 1. 检查是否存在任何边共享、面共享等 (共享数 > 1) + # 检查 sp-sp 的共享 + for num_shared_str, count in sp_vs_sp_counts.items(): + if int(num_shared_str) > 1 and count > 0: + return 0 # 发现了边/面共享,立即返回 0 + + # 检查 sp-other 的共享 + for num_shared_str, count in sp_vs_other_counts.items(): + if int(num_shared_str) > 1 and count > 0: + return 0 # 发现了边/面共享,立即返回 0 + + # 2. 检查是否存在至少一个角共享 (共享数 == 1) + # 运行到这里,说明已经没有任何边/面共享了。 + # 现在需要确认是否真的存在角共享,而不是完全没有共享。 + corner_share_sp_sp = sp_vs_sp_counts.get("1", 0) > 0 + corner_share_sp_other = sp_vs_other_counts.get("1", 0) > 0 + + if corner_share_sp_sp or corner_share_sp_other: + return 1 # 确认只存在角共享 + else: + return 0 # 没有任何共享关系,也返回 0 + +# structure = Structure.from_file("../raw/0921/wjy_001.cif") +# a = CS_catulate(structure,notice=True) +# b = CS_count(structure,a) +# print(f"{a}\n{b}") +# print(check_only_corner_sharing(a)) \ No newline at end of file diff --git a/py/utils/analyze_env_st.py b/py/utils/analyze_env_st.py new file mode 100644 index 0000000..48666bc --- /dev/null +++ b/py/utils/analyze_env_st.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python +# This code extracts the lithium environment of all of lithium sites provided in a structure file. +import os, sys +import numpy as np +import scipy +import argparse +from scipy.spatial import ConvexHull +from itertools import permutations +from pymatgen.core.structure import Structure +from pymatgen.core.periodic_table import * +from pymatgen.core.composition import * +from pymatgen.ext.matproj import MPRester +from pymatgen.io.vasp.outputs import * +from pymatgen.analysis.chemenv.coordination_environments.coordination_geometry_finder import LocalGeometryFinder +from pymatgen.analysis.chemenv.coordination_environments.structure_environments import LightStructureEnvironments +from pymatgen.analysis.chemenv.coordination_environments.chemenv_strategies import SimplestChemenvStrategy +from pymatgen.analysis.chemenv.coordination_environments.coordination_geometries import * + +__author__ = "KyuJung Jun" +__version__ = "0.1" +__maintainer__ = "KyuJung Jun" +__email__ = "kjun@berkeley.edu" +__status__ = "Development" + +''' +Input for the script : path to the structure file supported by Pymatgen +Structures with partial occupancy should be ordered or modified to full occupancy by Pymatgen. +''' +parser = argparse.ArgumentParser() +parser.add_argument('structure', help='path to the structure file supported by Pymatgen', nargs='?') +parser.add_argument('envtype', help='both, tet, oct, choosing which perfect environment to reference to', nargs='?') +args = parser.parse_args() + + +class HiddenPrints: + ''' + class to reduce the output lines + ''' + + def __enter__(self): + self._original_stdout = sys.stdout + sys.stdout = open(os.devnull, 'w') + + def __exit__(self, exc_type, exc_val, exc_tb): + sys.stdout.close() + sys.stdout = self._original_stdout + + +def non_elements(struct, sp='Li'): + """ + struct : 必须是一个有序结构 + sp : the mobile specie + returns a new structure containing only the framework anions (O, S, N). + """ + anions_to_keep = {"O", "S", "N"} + stripped = struct.copy() + species_to_remove = [el.symbol for el in stripped.composition.elements + if el.symbol not in anions_to_keep] + + if species_to_remove: + stripped.remove_species(species_to_remove) + + return stripped + + +def site_env(coord, struct, sp="Li", envtype='both'): + ''' + coord : Fractional coordinate of the target atom + struct : structure object from Pymatgen + sp : the mobile specie + envtype : This sets the reference perfect structure. 'both' compares CSM_tet and CSM_oct and assigns to the lower one. + 'tet' refers to the perfect tetrahedron and 'oct' refers to the perfect octahedron + result : a dictionary of environment information + ''' + stripped = non_elements(struct) + with_li = stripped.copy() + with_li.append(sp, coord, coords_are_cartesian=False, validate_proximity=False) + with_li = with_li.get_sorted_structure() + tet_oct_competition = [] + if envtype == 'both' or envtype == 'tet': + for dist in np.linspace(1, 4, 601): + neigh = with_li.get_neighbors(with_li.sites[0], dist) + if len(neigh) < 4: + continue + elif len(neigh) > 4: + break + neigh_coords = [i.coords for i in neigh] + with HiddenPrints(): + lgf = LocalGeometryFinder(only_symbols=["T:4"]) + lgf.setup_structure(structure=with_li) + lgf.setup_local_geometry(isite=0, coords=neigh_coords) + try: + site_volume = ConvexHull(neigh_coords).volume + tet_env_list = [] + for i in range(20): + tet_env = {'csm': lgf.get_coordination_symmetry_measures()['T:4']['csm'], 'vol': site_volume, + 'type': 'tet'} + tet_env_list.append(tet_env) + tet_env = min(tet_env_list, key=lambda x: x['csm']) + tet_oct_competition.append(tet_env) + + except Exception as e: + print(e) + print("This site cannot be recognized as tetrahedral site") + if len(neigh) == 4: + break + if envtype == 'both' or envtype == 'oct': + for dist in np.linspace(1, 4, 601): + neigh = with_li.get_neighbors(with_li.sites[0], dist) + if len(neigh) < 6: + continue + elif len(neigh) > 6: + break + neigh_coords = [i.coords for i in neigh] + with HiddenPrints(): + lgf = LocalGeometryFinder(only_symbols=["O:6"], permutations_safe_override=False) + lgf.setup_structure(structure=with_li) + lgf.setup_local_geometry(isite=0, coords=neigh_coords) + try: + site_volume = ConvexHull(neigh_coords).volume + oct_env_list = [] + for i in range(20): + ''' + 20 times sampled in case of the algorithm "APPROXIMATE_FALLBACK" is used. Large number of permutations + are performed, but the default value in the function "coordination_geometry_symmetry_measures_fallback_random" + (NRANDOM=10) is often too small. This is not a problem if algorithm of "SEPARATION_PLANE" is used. + ''' + oct_env = {'csm': lgf.get_coordination_symmetry_measures()['O:6']['csm'], 'vol': site_volume, + 'type': 'oct'} + oct_env_list.append(oct_env) + oct_env = min(oct_env_list, key=lambda x: x['csm']) + tet_oct_competition.append(oct_env) + + except Exception as e: + print(e) + print("This site cannot be recognized as octahedral site") + if len(neigh) == 6: + break + + if len(tet_oct_competition) == 0: + return {'csm': np.nan, 'vol': np.nan, 'type': 'Non_' + envtype} + elif len(tet_oct_competition) == 1: + return tet_oct_competition[0] + elif len(tet_oct_competition) == 2: + csm1 = tet_oct_competition[0] + csm2 = tet_oct_competition[1] + if csm1['csm'] > csm2['csm']: + return csm2 + else: + return csm1 + + +def extract_sites(struct, sp="Li", envtype='both'): + """ + struct : structure object from Pymatgen + envtype : 'tet', 'oct', or 'both' + sp : target element to analyze environment + """ + envlist = [] + + # --- 关键修改:直接遍历原始结构,即使它是无序的 --- + # 我们不再调用 get_sorted_structure() + # 我们只关心那些含有目标元素 sp 的位点 + + # 遍历每一个位点 (site) + for i, site in enumerate(struct): + # 检查当前位点的组分(site.species)中是否包含我们感兴趣的元素(sp) + # site.species.elements 返回该位点上的元素列表,例如 [Element Li, Element Fe] + # [el.symbol for el in site.species.elements] 将其转换为符号列表 ['Li', 'Fe'] + site_elements = [el.symbol for el in site.species.elements] + + if sp in site_elements: + # 如果找到了Li,我们就对这个位点进行环境分析 + # 注意:我们将原始的、可能无序的 struct 传递给 site_env + # 因为 site_env 内部的函数 (如 LocalGeometryFinder) 知道如何处理它 + + # 为了让下游函数(特别是 non_elements)能够工作, + # 我们在这里创建一个一次性的、临时的有序结构副本给它 + # 这可以避免我们之前遇到的所有 'ordered structures only' 错误 + temp_ordered_struct = struct.get_sorted_structure() + + singleenv = site_env(site.frac_coords, temp_ordered_struct, sp, envtype) + + envlist.append({'frac_coords': site.frac_coords, 'type': singleenv['type'], 'csm': singleenv['csm'], + 'volume': singleenv['vol']}) + + if not envlist: + print(f"警告: 在结构中未找到元素 {sp} 的占位。") + + return envlist + +def export_envs(envlist, sp='Li', envtype='both', fname=None): + ''' + envlist : list of dictionaries of environment information + fname : Output file name + ''' + if not fname: + fname = "extracted_environment_info" + "_" + sp + "_" + envtype + ".dat" + with open(fname, 'w') as f: + + f.write('List of environment information\n') + f.write('Species : ' + sp + "\n") + f.write('Envtype : ' + envtype + "\n") + for index, i in enumerate(envlist): + f.write("Site index " + str(index) + ": " + str(i) + '\n') + + +# struct = Structure.from_file("../raw/0921/wjy_475.cif") +# site_info = extract_sites(struct, envtype="both") +# export_envs(site_info, sp="Li", envtype="both") \ No newline at end of file