V1

2025-12-07 13:56:33 +08:00
parent 49f54b04cd
commit c83985cd02
22 changed files with 2732 additions and 0 deletions
--- a/py/Occupation.py
+++ b/py/Occupation.py
@@ -0,0 +1,329 @@
+from fontTools.misc.plistlib import end_string
+from pymatgen.core import Structure
+import spglib
+from fractions import Fraction
+import random
+from pymatgen.core.sites import PeriodicSite, Species,Element,Lattice
+import numpy as np
+import re
+def typejudge(number):
+    if number in [1, 2]:
+        return "Triclinic"
+    elif 3 <= number <= 15:
+        return "Monoclinic"
+    elif 16 <= number <= 74:
+        return "Orthorhombic"
+    elif 75 <= number <= 142:
+        return "Tetragonal"
+    elif 143 <= number <= 167:
+        return "Trigonal"
+    elif 168 <= number <= 194:
+        return "Hexagonal"
+    elif 195 <= number <= 230:
+        return "Cubic"
+    else:
+        return "Unknown"
+
+
+def extract_oxi_state(species_string):
+    """
+    从 species_string（如 "Li+:0.645"）提取氧化态
+
+    Args:
+        species_string: 包含元素和氧化态的字符串（如 "Li+:0.645"、"Fe2-:0.5"）
+
+    Returns:
+        int: 氧化态数值（如 "+" -> 1, "2-" -> -2），默认返回 0 如果解析失败
+    """
+    # 使用正则表达式匹配化学符号和氧化态（如 Li+, Fe2-）
+    match = re.search(r"([A-Za-z]+)([+-]?\d*[+-])", species_string)
+    if not match:
+        return 0  # 默认中性
+
+    # 提取氧化态部分（如 "+", "2-", "3+"）
+    oxi_str = match.group(2)
+
+    # 处理单符号情况（如 "+" -> 1, "-" -> -1）
+    if oxi_str in ("+", "-"):
+        return 1 if oxi_str == "+" else -1
+
+    # 处理多数字情况（如 "2+" -> 2, "3-" -> -3）
+    try:
+        num = int(oxi_str[:-1])  # 提取数字部分
+        sign = 1 if oxi_str[-1] == "+" else -1
+        return num * sign
+    except (ValueError, IndexError):
+        return 0  # 解析失败时返回中性
+
+def process_cif_file(cif_file_path,explict_element):
+    structure = Structure.from_file(cif_file_path)
+    result_list = []
+    for index, site in enumerate(structure.sites, start=1):
+        occupancy = site.species.element_composition.num_atoms
+        species = site.species.chemical_system
+        if species in explict_element:
+            break
+        if occupancy < 1.0:
+            entry = next((r for r in result_list if
+                          r["species"] == species and r["occupancy"] == occupancy), None)
+            if entry:
+                entry["number"].append(index)
+            else:
+                result_list.append({
+                    "species": species,
+                    "number": [index],
+                    "occupancy": occupancy
+                })
+    return result_list
+
+
+def factorize_to_three_factors(n,type_sym=None):
+    factors = []
+
+    # 遍历可能的x值
+    if type_sym == None:
+        for x in range(1, n + 1):
+            if n % x == 0:
+                remaining_n = n // x
+                # 遍历可能的y值
+                for y in range(1, remaining_n + 1):
+                    if remaining_n % y == 0:
+                        z = remaining_n // y
+                        factors.append({'x': x, 'y': y, 'z': z})
+    if type_sym == "xyz":
+        for x in range(1, n + 1):
+            if n % x == 0:
+                remaining_n = n // x
+                # 遍历可能的y值
+                for y in range(1, remaining_n + 1):
+                    if remaining_n % y == 0 and y <= x:
+                        z = remaining_n // y
+                        if z <= y:
+                            factors.append({'x': x, 'y': y, 'z': z})
+
+    def sum_score(factor):
+        x, y, z = factor['x'], factor['y'], factor['z']
+        return x + y + z
+
+    # 按照sum_score从小到大排序
+    sorted_factors = sorted(factors, key=sum_score)
+    return sorted_factors
+
+
+
+
+
+def calculate_supercell_factor(occupancy):
+    # 将浮点数转换为分数形式
+    fraction = Fraction(occupancy).limit_denominator()
+
+    # 获取分子和分母
+    numerator = fraction.numerator
+    denominator = fraction.denominator
+
+    return numerator,denominator
+
+
+def mark_atoms_randomly(factors, atom_number):
+    """
+    根据扩胞因子和占据数量生成随机占据字典
+
+    Args:
+        factors: 扩胞因子字典 {'x': int, 'y': int, 'z': int}
+        atom_number: 需要占据的副本数量
+
+    Returns:
+        字典 {0: 1或0, 1: 1或0, ..., total_copies-1: 1或0}
+    """
+    x, y, z = factors['x'], factors['y'], factors['z']
+    total_copies = x * y * z
+
+    if atom_number > total_copies:
+        raise ValueError(f"atom_number ({atom_number}) 不能超过扩胞总数 (x*y*z = {total_copies})")
+
+    # 生成所有副本索引 [0, 1, 2, ..., total_copies-1]
+    atom_dice = list(range(total_copies))
+
+    # 随机选择 atom_number 个副本占据
+    selected_atoms = random.sample(atom_dice, atom_number)
+
+    # 创建结果字典 {0: 1或0, 1: 1或0, ...}
+    result = {atom: 1 if atom in selected_atoms else 0 for atom in atom_dice}
+
+    return result
+
+
+def generate_random_list(total_elements, atom_number):
+    # 确保 atom_number 不超过 total_elements
+    if atom_number > total_elements:
+        raise ValueError("atom_number cannot be greater than the total number of elements (x * y * z)")
+
+    # 创建一个全0的列表
+    result = [0] * total_elements
+
+    # 随机选择 atom_number 个位置，并将这些位置的值设为1
+    indices = random.sample(range(total_elements), atom_number)
+    for index in indices:
+        result[index] = 1
+
+    return result
+
+
+def merge_structures(struct_copies, factors):
+    """
+    将多个副本结构按三维顺序合并为扩胞后的结构
+
+    Args:
+        struct_copies: 副本结构列表（长度 = x*y*z）
+        factors: 扩胞因子字典 {"x": int, "y": int, "z": int}
+
+    Returns:
+        合并后的扩胞结构
+    """
+    x, y, z = factors["x"], factors["y"], factors["z"]
+    total_copies = x * y * z
+
+    if len(struct_copies) != total_copies:
+        raise ValueError("副本数量与扩胞因子不匹配")
+
+    # 获取原结构的晶格
+    original_lattice = struct_copies[0].lattice
+
+    # 创建扩胞后的新晶格（直接按倍数缩放）
+    new_lattice_matrix = np.dot(original_lattice.matrix, np.diag([x, y, z]))
+    new_lattice = Lattice(new_lattice_matrix)
+
+    # 初始化合并后的结构
+    merged_structure = Structure(
+        lattice=new_lattice,
+        species=[],
+        coords=[],
+        coords_are_cartesian=False
+    )
+
+    # 按三维顺序填充每个副本的原子
+    for copy_idx in range(total_copies):
+        # 计算当前副本的分数坐标偏移量
+        offset = np.array([
+            copy_idx // (y * z),  # x方向偏移
+            (copy_idx % (y * z)) // z,  # y方向偏移
+            copy_idx % z  # z方向偏移
+        ])
+
+        # 将当前副本的原子添加到合并结构中（考虑偏移）
+        for site in struct_copies[copy_idx]:
+            if site.species:  # 跳过空位
+                merged_structure.append(
+                    species=site.species,
+                    coords=site.frac_coords + offset,
+                    coords_are_cartesian=False,
+                    properties=site.properties
+                )
+
+    return merged_structure
+
+def expand_structure(structure, factors, atom_indices, atom_number):
+    # 参数检查
+    x, y, z = factors['x'], factors['y'], factors['z']
+    total_copies = x * y * z
+    if not all(1 <= idx <= len(structure.sites) for idx in atom_indices):
+        raise ValueError("atom_indices包含无效原子索引")
+
+    # 生成独立副本
+    struct_copies = [structure.copy() for _ in range(total_copies)]
+    atom_dice = list(range(total_copies))  # 所有副本索引 [0,1,2,...]
+
+    # 处理每个目标原子
+    for atom_idx in atom_indices:
+        original_site = structure.sites[atom_idx - 1]
+        element = original_site.species.chemical_system
+
+
+        # 生成当前原子的占据字典（如{0:1, 1:0, 2:1,...}）
+        occupancy_dict = mark_atoms_randomly(factors,atom_number)
+        # 修改每个副本
+        for copy_idx, occupy in occupancy_dict.items():
+            # 或者方法2：使用remove/insert
+            struct_copies[copy_idx].remove_sites([atom_idx - 1])
+            oxi_state = extract_oxi_state(original_site.species_string)
+            if occupy:
+                new_site = PeriodicSite(
+                    species=Species(element, oxi_state),
+                    coords=original_site.frac_coords,
+                    lattice=struct_copies[copy_idx].lattice,
+                    to_unit_cell=True,
+                    label=original_site.label
+                )
+                struct_copies[copy_idx].sites.insert(atom_idx - 1, new_site)
+            else:
+                species_dict = {Species(element, oxi_state): 0.0}
+                new_site = PeriodicSite(
+                    species = species_dict,
+                    coords=original_site.frac_coords,
+                    lattice=struct_copies[copy_idx].lattice,
+                    to_unit_cell=True,
+                    label=original_site.label
+                )
+                struct_copies[copy_idx].sites.insert(atom_idx - 1, new_site)
+
+    # 合并副本
+    expanded_structure = Structure(
+        lattice=np.dot(structure.lattice.matrix, np.diag([x, y, z])),
+        species=[],
+        coords=[],
+        coords_are_cartesian=False
+    )
+
+    for copy_idx in range(total_copies):
+        offset = np.array([
+            copy_idx // (y * z),
+            (copy_idx % (y * z)) // z,
+            copy_idx % z
+        ])
+        for site in struct_copies[copy_idx]:
+            if site.species:  # 只添加非空位
+                expanded_structure.append(
+                    species=site.species,
+                    coords=site.frac_coords + offset,
+                    coords_are_cartesian=False,
+                    properties=site.properties
+                )
+    expanded_structure = merge_structures(struct_copies,factors)
+    return expanded_structure
+
+
+def process_occupation(input_file,output_file,explict_element = ["Li"],expect_cifnumber = 10,random_time=1):
+    struct = Structure.from_file(input_file)
+    space_group_info = struct.get_space_group_info()
+    space_group_symbol = space_group_info[0]
+    all_spacegroup_symbols = [spglib.get_spacegroup_type(i) for i in range(1, 531)]
+    symbol = all_spacegroup_symbols[0]
+    for symbol_i in all_spacegroup_symbols:
+        if space_group_symbol == symbol_i.international_short:
+            symbol = symbol_i
+    space_type = typejudge(symbol.number)
+    print(f"当前空间群符号为{space_group_symbol},序号为{symbol.number},对应的晶体体系为{space_type}")
+    occupation_list = process_cif_file(cif_file_path=input_file,explict_element=explict_element)
+    print(occupation_list)
+    for occupation in occupation_list:
+        atom_number, target_multiplier=calculate_supercell_factor(occupation["occupancy"])
+        divides = []
+        if space_type == "Hexagonal":
+            print('当前为六方晶系，暂不处理')
+        if space_type == "Cubic":
+            print("当前为立方晶体，三个方向同步")
+            divides = factorize_to_three_factors(target_multiplier,"xyz")
+        else:
+            print("为其他晶系，假设三个方向不同")
+            divides = factorize_to_three_factors(target_multiplier,)
+        print(divides)
+        for it in divides:
+            end_str = f'x{it["x"]}y{it["y"]}z{it["z"]}'
+            for i in range(random_time):
+                expand_struct=expand_structure(struct,it,occupation["number"],atom_number)
+                expand_struct.to_file(output_file)
+            print(it)
+    else:
+        print(f"不存在除{explict_element}以外的共占位原子")
+process_occupation("../data/input_pre/ICSD_1234.cif", "haha.cif", explict_element=[], expect_cifnumber=1, random_time=1)
+
--- a/py/call_analyze.py
+++ b/py/call_analyze.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+import argparse
+import subprocess
+
+
+def run_analysis_with_subprocess(cif_file, input_file, output_file, filters=None):
+    # 如果没有传递 filters，则使用默认值
+    if filters is None:
+        filters = ["Ordered", "PropOxi", "VoroPerco", "Coulomb", "VoroBV", "VoroInfo", "MergeSite"]
+
+    # 构建命令行参数
+    command = ['python', '../tool/analyze_voronoi_nodes.py', cif_file, '-i', input_file, '-o', output_file, '-f'] + filters
+
+    # 调用 subprocess 执行命令
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    # 捕获标准输出和标准错误
+    stdout, stderr = process.communicate()
+
+    # Python 2.7 需要解码 stdout 和 stderr（因为是 str 类型）
+    stdout = stdout.decode('utf-8') if isinstance(stdout, str) else stdout
+    stderr = stderr.decode('utf-8') if isinstance(stderr, str) else stderr
+
+    # 打印输出内容或记录到文件
+    print(stdout)
+    if stderr:
+        print(stderr)
+
+
+if __name__ == "__main__":
+    # 设置命令行参数解析器
+    parser = argparse.ArgumentParser(description='Run Voronoi analysis using analyze.py script.')
+    parser.add_argument('cif_file', type=str, help='CIF file to analyze')
+    parser.add_argument('-i', '--input_file', type=str, help='Input YAML file', required=True)
+    parser.add_argument('-o', '--output_file', type=str, help='Output file to save the results', required=True)
+    parser.add_argument('-f', '--filters', nargs='+',
+                        default=["Ordered", "PropOxi", "VoroPerco", "Coulomb", "VoroBV", "VoroInfo", "MergeSite"],
+                        help='List of filters to apply (default is all filters)')
+
+    # 解析命令行参数
+    args = parser.parse_args()
+
+    # 调用分析函数
+    run_analysis_with_subprocess(args.cif_file, args.input_file, args.output_file, args.filters)
--- a/py/crystal_2.py
+++ b/py/crystal_2.py
@@ -0,0 +1,363 @@
+from pymatgen.core import Structure
+from pymatgen.core.periodic_table import Element, Specie
+from pymatgen.analysis.local_env import CrystalNN
+from pymatgen.analysis.structure_matcher import StructureMatcher
+from pymatgen.io.cif import CifParser
+import numpy as np
+class crystal:
+    def __init__(self, file_path, element_positive='Na', mixed_anions=None):
+        # self.parse = CifParser(file_path)
+        # self.structure = self.parse.get_structures()[0]
+        if mixed_anions is None:
+            mixed_anions = {frozenset({'S', 'O'}), frozenset({'Cl', 'Br'}),frozenset({'Cl', 'O'}),frozenset({'Cl', 'Br'}),frozenset({'S', 'Cl'})}
+        self.structure = Structure.from_file(file_path)
+        self.file_path = file_path
+        self.element_positive = element_positive
+        self.check_all = False
+        self.check_basic_result = False
+        self.check_high_cn_and_face_sharing_result = False
+        self.check_percolation_radius_result = False
+        self.check_practical_result = False
+        self.anion = ""
+        self.anions = ""
+        self.mixed_anions = mixed_anions
+        #self.initialize()
+        
+    def initialize(self):
+        print("e")
+        # self.check_basic_result=self.check_basic()
+        # self.check_high_cn_and_face_sharing_result = self.check_high_cn_and_face_sharing()
+        # self.check_percolation_radius_result = self.check_percolation_radius()
+        # self.check_all = self.check_basic_result and self.check_high_cn_and_face_sharing_result and self.check_percolation_radius_result
+        # print(f"{self.file_path}done")
+
+    def check_practical(self):
+        structure = self.structure
+
+        # 检查是否为Li-X-O,Li-P-S
+        excluded_X_elements = {'S', 'I', 'Si', 'C', 'P', 'Al', 'Ge', 'Se', 'B', 'Cl'}
+        chemical_system_set = structure.chemical_system_set
+
+        try:
+            if len(chemical_system_set) == 3:
+                if "Li" in chemical_system_set and "O" in chemical_system_set:
+                    for element in excluded_X_elements:
+                        if element in chemical_system_set:
+                            return False
+                if "Li" in chemical_system_set and "P" in chemical_system_set and "S" in chemical_system_set:
+                    return False
+        except Exception as e:
+            print(f"Error during Li-X-O check: {e}")
+            return False
+
+        # 排除过渡金属元素
+        excluded_transition_metals = {'Fe', 'Mn', 'Ni', 'Ti', 'Mo', 'V', 'Co'}
+        try:
+            if "Li" in chemical_system_set and "O" in chemical_system_set:
+                for element in excluded_transition_metals:
+                    if element in chemical_system_set:
+                        return False
+        except Exception as e:
+            print(f"Error during transition metal check: {e}")
+            return False
+
+        # 检查是否包含N, Re, Ho, Hf, Ru, Eu, Lu
+        excluded_elements = {'N', 'Re', 'Ho', 'Hf', 'Ru', 'Lu'}
+        try:
+            for element in excluded_elements:
+                if element in chemical_system_set:
+                    return False
+        except Exception as e:
+            print(f"Error during excluded elements check: {e}")
+            return False
+
+        # 检查是否共享位点
+        try:
+            for site in structure.sites:
+                if 'Li' in site.species_string and len(site.species) > 1:
+                    return False
+        except Exception as e:
+            print(f"Error during site sharing check: {e}")
+            return False
+
+        self.check_practical_result = True
+        return True
+
+    def check_basic(self):
+        structure = self.structure
+        #判断是否为二元化合物
+        if len(structure.types_of_specie) == 2:
+            return False
+        #判断阴离子是否为多种阴离子
+        # anions = {'O', 'S', 'Se', 'Te', 'F', 'Cl', 'Br', 'I'}
+        anions = {'O', 'S','Br','Cl'}
+        try:
+            for site in self.structure.sites:
+                try:
+                    #if site.specie.symbol in anions:
+                    if site.species.chemical_system in anions:
+                        self.anion = site.specie.symbol
+                        break
+                except AttributeError as e:
+                    a=1
+                try:
+                    if site.species.chemical_system in anions:
+                        self.anion = site.specie.symbol
+                        break
+                except AttributeError as e:
+                    print(e)
+            if self.anion in anions:
+                a=1
+            else:
+                if not self.mixed_anions:
+                    print("不是所选阴离子化合物")
+                    return False
+        except Exception as e:
+            print(e)
+            return False
+        #这里添加对多种阴离子的支持
+        try:
+            # 创建一个集合来收集所有发现的阴离子
+            found_anions = set()
+
+            # 遍历structure以收集所有阴离子
+            for site in self.structure.sites:
+                try:
+                    if site.species.chemical_system in anions:
+                        found_anions.add(site.specie.symbol)
+                except AttributeError:
+                    try:
+                        if site.specie.symbol in anions:
+                            found_anions.add(site.specie.symbol)
+                    except AttributeError:
+                        continue
+
+            # 检查找到的阴离子情况
+            if len(found_anions) == 0:
+                print("未找到任何预定义的阴离子")
+                return False
+            elif len(found_anions) == 1:
+                # 只有一种阴离子
+                self.anion = list(found_anions)[0]
+                print(f"发现单一阴离子: {self.anion}")
+            else:
+                # 有多种阴离子，检查是否匹配预定义的混合阴离子组合
+                found_anions_frozen = frozenset(found_anions)
+                if found_anions_frozen in self.mixed_anions:
+                    self.anions = found_anions
+                    self.anion = "+".join(sorted(found_anions))  # 例如: "Cl+S"
+                    print(f"发现匹配的混合阴离子组合: {self.anion}")
+                else:
+                    # 如果找到的阴离子组合不在预定义列表中
+                    print(f"发现的阴离子组合 {found_anions} 不在预定义的混合阴离子列表中")
+                    return False
+        except Exception as e:
+            print(f"处理阴离子时出错: {e}")
+            return False
+
+        #这里还要调试
+        # try:
+        #     # 初始化总电荷
+        #     total_charge = 0
+        #
+        #     # 检查是否所有元素都有氧化态
+        #     for site in structure:
+        #         try:
+        #             oxi_state = site.species.charge  # 检查是否有氧化态
+        #             total_charge += oxi_state  # 累加氧化态
+        #         except AttributeError:
+        #             print(f"元素 {site.specie.symbol} 缺少氧化态定义")
+        #             return False
+        #     # 检查是否电荷平衡
+        #     if total_charge == 0:
+        #         print("所有元素的价态之和为 0，结构电荷平衡")
+        #     else:
+        #         print(f"所有元素的价态之和为 {total_charge}，结构不平衡")
+        #         return False
+        # except Exception as e:
+        #     print(f"发生错误: {e}")
+        #     return False
+
+        #判断原子个数
+        try:
+            if not self.mixed_anions:
+                if structure.num_sites>300:
+                    return False
+            else:
+                if structure.num_sites>900:
+                    return False
+        except Exception:
+            print("原子个数判断失败")
+            return False
+
+        #判断有几个阴离子
+        # anions = {'O', 'S', 'Se', 'Te', 'F', 'Cl', 'Br', 'I'}
+        # try:
+        #     anion_elements = {site.species.chemical_system for site in structure if site.species.chemical_system in anions}
+        #     if len(anion_elements) > 1:
+        #         return False
+        # except Exception:
+        #     print("阴离子个数判断失败")
+        #     return False
+
+        #判断是否有放射性元素
+        radioactive_elements = {'U', 'Th', 'Pu', 'Ra', 'Rn', 'Po', 'Np', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No',
+                                'Lr'}
+
+        try:
+            # 遍历结构中的元素
+            for site in structure:
+                if site.species.chemical_system in radioactive_elements:
+                    return False  # 存在放射性元素
+        except Exception:
+            print("放射性元素判断失败")
+            return False
+
+
+        #判断是否存在共占位
+        try:
+            for site in structure.sites:
+                if self.element_positive in [specie.symbol for specie in site.species.keys()] and len(site.species) > 1:
+                    return False
+        except Exception:
+            print("共占位判断失败")
+            return False
+
+        #判读是否有无序或部分占位的阴离子
+        try:
+            for site in structure.sites:
+                for specie, occupancy in site.species.items():
+                    if specie.symbol in anions and occupancy < 1:
+                        return False
+        except Exception:
+            print("无序或部分占位的阴离子判断失败")
+            return False
+                
+        #判断是否有水分子
+        try:
+            oxygen_sites = [site for site in structure.sites if site.species.chemical_system == "O"]
+            hydrogen_sites = [site for site in structure.sites if site.species.chemical_system == "H"]
+            
+            for o_site in oxygen_sites:
+                nearby_hydrogens = [h_site for h_site in hydrogen_sites if o_site.distance(h_site) < 1.2]
+                
+                if len(nearby_hydrogens) == 2:
+                    return False
+        except Exception:
+            print("水分子判断失败")
+            return False
+        #接下来判断是否有标准信息
+        try:
+            for site in structure.sites:
+                for specie in site.species.keys():
+                    element = Element(specie.symbol)
+                    
+                    if not element.common_oxidation_states:
+                        return False
+                    
+                    try:
+                        _ = Specie(element.symbol, max(element.common_oxidation_states)).ionic_radius
+                    except:
+                        return False
+        except Exception:
+            print("标准信息判断失败")
+            return False
+        #暂时不判断是否为电中性
+        #可能需要通过ovito等库来做判断
+        #存在一些文件不提供各元素的电负性
+        
+        #判断电中性是否存在
+
+        self.check_basic_result = True
+        return True
+    
+    def check_high_cn_and_face_sharing(self,cut_distance = 3.1):
+        structure = self.structure
+        #基于固角权重的计算
+        nn_finder = CrystalNN()
+        
+        #遍历结构中的所有Na位点，检查配位数
+        #是所有Na位点都需要还是只检查高配位数的位点？
+        high_cn_ep_sites = []
+        try:
+            for i,site in enumerate(structure):
+                if site.specie == Element(self.element_positive):
+                    cn = nn_finder.get_cn(structure,i)
+                    if cn>=5:
+                        high_cn_ep_sites.append(i)
+            if len(high_cn_ep_sites)==0:
+                return False
+        except Exception:
+            print("高配位Na离子判断失败")
+            return False
+        #检查共面
+        try:
+            for i in high_cn_ep_sites:
+                neighbors = nn_finder.get_nn_info(structure,i)
+                x_neighbors = []
+                x_neighbors = [
+                    neighbor["site_index"]
+                    for neighbor in neighbors
+                    if structure[neighbor["site_index"]].specie.symbol == self.element_positive
+                    and neighbor["site"].distance(structure[i]) <= cut_distance
+                ]
+                
+                if not self._check_face_sharing(i, x_neighbors):
+                    print(f"Na site {i} does not share a face with other high-CN Na sites.")
+                    return False
+
+            print("All high-CN Na sites are face-sharing.")
+        except Exception:
+            print("共面判断失败")
+        return True
+                    
+                    
+    def _check_face_sharing(self,site_index,neighbor_indices):
+        # 获取当前 Na 位点的坐标
+        site_coords = self.structure[site_index].coords
+
+        # 遍历邻居
+        for neighbor_index in neighbor_indices:
+            # 获取邻居的坐标
+            neighbor_coords = self.structure[neighbor_index].coords
+
+            # 获取两个原子之间共享的面（使用简单的距离或角度计算）
+            # 假设共享面的法向量计算可以从 Voronoi 构造
+            shared_face_normal = self._calculate_face_normal(site_coords, neighbor_coords)
+
+            # 判断是否共面（如果法向量的绝对值接近 0，可以认为共面）
+            if shared_face_normal is not None:
+                return True
+
+        return False
+    def _calculate_face_normal(self, coords1, coords2):
+
+        # 示例计算：用两个原子之间的向量生成法向量
+        vector = coords2 - coords1
+        norm = np.linalg.norm(vector)
+
+        # 如果向量接近零，返回 None
+        if norm < 1e-6:
+            return None
+
+        # 正则化向量作为法向量
+        return vector / norm
+    
+    def check_percolation_radius(self):
+        return True
+def group_structures_by_framework(structures):
+    matcher = StructureMatcher()
+    grouped_structures = []
+
+    for structure in structures:
+        matched = False
+        for group in grouped_structures:
+
+            if matcher.fit(structure, group[0]):  # 比较结构是否匹配
+                group.append(structure)
+                matched = True
+                break
+        if not matched:
+            grouped_structures.append([structure])
+
+    return grouped_structures
--- a/py/expansion.py
+++ b/py/expansion.py
@@ -0,0 +1,499 @@
+from distutils.dir_util import remove_tree
+
+from pymatgen.core import Structure, Lattice,Species,PeriodicSite
+import numpy as np
+from collections import defaultdict
+import math
+import spglib
+from functools import reduce
+from fractions import Fraction
+import random
+import re
+import os
+
+def mark_atoms_randomly(numerator,denominator):
+    """
+    根据扩胞因子和占据数量生成随机占据字典
+
+    Args:
+        factors: 扩胞因子字典 {'x': int, 'y': int, 'z': int}
+        atom_number: 需要占据的副本数量
+
+    Returns:
+        字典 {0: 1或0, 1: 1或0, ..., total_copies-1: 1或0}
+    """
+
+
+    if numerator > denominator:
+        raise ValueError(f"atom_number ({numerator}) 不能超过扩胞总数 (x*y*z = {denominator})")
+
+    # 生成所有副本索引 [0, 1, 2, ..., total_copies-1]
+    atom_dice = list(range(denominator))
+
+    # 随机选择 atom_number 个副本占据
+    selected_atoms = random.sample(atom_dice, numerator)
+
+    # 创建结果字典 {0: 1或0, 1: 1或0, ...}
+    result = {atom: 1 if atom in selected_atoms else 0 for atom in atom_dice}
+
+    return result
+def extract_oxi_state(species_str,element):
+    """
+    从物种字符串中提取指定元素的氧化态
+
+    参数:
+        species_str: 物种字符串，如 "Li+:0.689, Sc3+:0.311"
+        element: 要提取的元素符号，如 "Sc"
+
+    返回:
+        int: 氧化态数值（如 Sc3+ → 3，Sc- → -1，Sc3- → -3）
+             如果未找到或没有氧化态则返回 0
+    """
+    # 分割字符串获取各个物种部分
+    species_parts = [part.strip() for part in species_str.split(",") if part.strip()]
+
+    for part in species_parts:
+        # 提取元素和电荷部分（冒号前的内容）
+        element_with_charge = part.split(":")[0].strip()
+
+        # 检查是否匹配目标元素
+        if element in element_with_charge:
+            # 提取电荷部分
+            charge_part = element_with_charge[len(element):]
+
+            # 处理无数字情况（如"Sc+"）
+            if not any(c.isdigit() for c in charge_part):
+                if "+" in charge_part:
+                    return 1
+                elif "-" in charge_part:
+                    return -1
+                else:
+                    return 0
+
+            # 处理有数字情况（如"Sc3+"）
+            sign = 1
+            if "-" in charge_part:
+                sign = -1
+
+            # 提取数字部分
+            digits = ""
+            for c in charge_part:
+                if c.isdigit():
+                    digits += c
+
+            if digits:  # 确保有提取到数字
+                return sign * int(digits)
+
+    return 0  # 默认返回0
+def factorize_to_three_factors(n,type_sym=None,keep_module=None):
+    factors = []
+
+    # 遍历可能的x值
+    if type_sym == None:
+        for x in range(1, n + 1):
+            if n % x == 0:
+                remaining_n = n // x
+                # 遍历可能的y值
+                for y in range(1, remaining_n + 1):
+                    if remaining_n % y == 0:
+                        z = remaining_n // y
+                        factors.append({'x': x, 'y': y, 'z': z})
+    if type_sym == "xyz":
+        for x in range(1, n + 1):
+            if n % x == 0:
+                remaining_n = n // x
+                # 遍历可能的y值
+                for y in range(1, remaining_n + 1):
+                    if remaining_n % y == 0 and y <= x:
+                        z = remaining_n // y
+                        if z <= y:
+                            factors.append({'x': x, 'y': y, 'z': z})
+    if keep_module=='random':
+        import random
+        # 创建一个因子列表的副本，并随机打乱顺序
+        shuffled_factors = factors.copy()
+        random.shuffle(shuffled_factors)
+        return shuffled_factors
+    else:
+        def sort_key(item):
+            """返回一个用于排序的元组"""
+            return (item['x'] + item['y'] + item['z'], item['z'], item['y'], item['x'])
+
+        # 使用 sorted() 函数（返回一个新的排序后的列表，不改变原列表）
+        sorted_factor = sorted(factors, key=sort_key)
+        return sorted_factor
+def typejudge(number):
+    if number in [1, 2]:
+        return "Triclinic"
+    elif 3 <= number <= 15:
+        return "Monoclinic"
+    elif 16 <= number <= 74:
+        return "Orthorhombic"
+    elif 75 <= number <= 142:
+        return "Tetragonal"
+    elif 143 <= number <= 167:
+        return "Trigonal"
+    elif 168 <= number <= 194:
+        return "Hexagonal"
+    elif 195 <= number <= 230:
+        return "Cubic"
+    else:
+        return "Unknown"
+def strategy_divide(struct,total,keep_module=None):
+    space_group_info = struct.get_space_group_info()
+    space_group_symbol = space_group_info[0]
+    all_spacegroup_symbols = [spglib.get_spacegroup_type(i) for i in range(1, 531)]
+    symbol = all_spacegroup_symbols[0]
+    for symbol_i in all_spacegroup_symbols:
+        if space_group_symbol == symbol_i.international_short:
+            symbol = symbol_i
+    space_type = typejudge(symbol.number)
+    print(f"当前空间群符号为{space_group_symbol},序号为{symbol.number},对应的晶体体系为{space_type}")
+    divides = []
+    if space_type == "Hexagonal":
+        print('当前为六方晶系，暂不处理')
+    if space_type == "Cubic":
+        print("当前为立方晶体，三个方向同步")
+        divides = factorize_to_three_factors(total, "xyz",keep_module=keep_module)
+    else:
+        print("为其他晶系，假设三个方向不同")
+        divides = factorize_to_three_factors(total,keep_module=keep_module)
+    return divides
+def get_first_non_explicit_element(species_str, explict_element= ["Li","Li+"]):
+    """
+    从物种字符串中获取第一个不在explict_element中的元素符号
+
+    参数:
+        species_str: 物种字符串，如 "Li+:0.689, Sc3+:0.311"
+        explict_element: 需要排除的元素列表，如 ["Li"]
+
+    返回:
+        str: 第一个符合条件的元素符号，如 "Sc"
+             如果没有找到则返回空字符串 ""
+    """
+    if not species_str.strip():
+        return ""
+
+    # 分割字符串获取各个物种部分
+    species_parts = [part.strip() for part in species_str.split(",") if part.strip()]
+
+    for part in species_parts:
+        # 提取元素符号（去掉电荷和占据数部分）
+        element_with_charge = part.split(":")[0].strip()
+        # 提取纯元素符号（去掉数字和特殊符号）
+        pure_element = ''.join([c for c in element_with_charge if c.isalpha()])
+
+        if pure_element not in explict_element:
+            return pure_element
+
+    return ""
+def calculate_expansion_factor(Occupation_list,calculate_type='high'):
+    """
+    计算Occupation_list的扩大倍数，支持不同精度模式
+
+    参数:
+        Occupation_list: List[Dict], 每个字典包含:
+            {
+                "occupation": float,
+                "atom_serial": List[int],
+                "numerator": None,
+                "denominator": None
+            }
+        calculate_type: str, 计算精度模式 ('high', 'normal', 'low')
+            - high: 精确分数（默认）
+            - normal: 分母≤100的最接近分数
+            - low: 分母≤10的最接近分数
+
+    返回:
+        int: 扩大倍数（所有分母的最小公倍数）
+        List[Dict]: 更新后的Occupation_list（包含分子和分母）
+    """
+    if not Occupation_list:
+        return 1, []
+
+    # Step 1: 根据精度要求计算分数
+    for entry in Occupation_list:
+        occu = entry["occupation"]
+
+        if calculate_type == 'high':
+            # 高精度模式 - 使用精确分数
+            fraction = Fraction(occu).limit_denominator()
+        elif calculate_type == 'normal':
+            # 普通精度 - 分母≤100
+            fraction = Fraction(occu).limit_denominator(100)
+        elif calculate_type == 'low':
+            # 低精度 - 分母≤10
+            fraction = Fraction(occu).limit_denominator(10)
+        elif calculate_type == 'very low':
+            # 低精度 - 分母≤10
+            fraction = Fraction(occu).limit_denominator(5)
+        else:
+            raise ValueError("calculate_type必须是'high', 'normal'或'low'")
+
+        entry["numerator"] = fraction.numerator
+        entry["denominator"] = fraction.denominator
+
+    # Step 2: 计算所有分母的最小公倍数
+    denominators = [entry["denominator"] for entry in Occupation_list]
+    lcm = reduce(lambda a, b: a * b // math.gcd(a, b), denominators, 1)
+
+    # Step 3: 统一分母
+    for entry in Occupation_list:
+        denominator = entry["denominator"]
+        entry["numerator"] = entry["numerator"] * (lcm // denominator)
+        entry["denominator"] = lcm
+
+    return lcm, Occupation_list
+def get_occu(s_str,explict_element):
+    '''
+    这里暂时不考虑无化合价的情况
+    Args:
+        s_str:
+
+    Returns:
+
+    '''
+    if not s_str.strip():
+        return {}
+    pattern = r'([A-Za-z0-9+-]+):([0-9.]+)'
+    matches = re.findall(pattern, s_str)
+    result = {}
+    for species, occu in matches:
+        try:
+            if species not in explict_element:
+                return occu
+        except ValueError:
+            continue  # 忽略无效数字
+
+    return 1
+def process_cif_file(struct, explict_element=["Li", "Li+"]):
+    """
+    统计结构中各原子的occupation情况（忽略occupation=1.0的原子）并分类
+    参数:
+        struct: Structure对象 (从CIF文件读取)
+    返回:
+        List[Dict]: Occupation_list，每个字典格式为:
+            {
+                "occupation": list,  # 占据值（不为1.0）
+                "atom_serial": List[int],  # 原子序号列表
+                "numerator": None,  # 预留分子
+                "denominator": None  # 预留分母
+                "split":list[string]#对应的值
+            }
+    """
+    if not isinstance(struct, Structure):
+        raise TypeError("输入必须为pymatgen的Structure对象")
+
+    occupation_dict = defaultdict(list)
+    # 用于记录每个occupation对应的元素列表
+    split_dict = {}
+    for i, site in enumerate(struct):
+        # 获取当前原子的occupation（默认为1.0）
+        occu = get_occu(site.species_string, explict_element)
+        # 忽略occupation=1.0的原子
+        if occu != 1.0:
+            if site.species.chemical_system not in explict_element:
+                occupation_dict[occu].append(i + 1)  # 原子序号从1开始计数
+                # 提取元素名称列表
+                elements = []
+                if ':' in site.species_string:
+                    # 格式如 'S:0.494, Cl:0.506' 或 'S2-:0.494, Cl-:0.506'
+                    parts = site.species_string.split(',')
+                    for part in parts:
+                        # 提取冒号前的部分并去除前后空格
+                        element_with_valence = part.strip().split(':')[0].strip()
+                        # 从带有价态的元素符号中提取纯元素符号（只保留元素符号部分）
+                        # 元素符号通常是一个大写字母，可能后跟一个小写字母
+                        import re
+                        element_match = re.match(r'([A-Z][a-z]?)', element_with_valence)
+                        if element_match:
+                            element = element_match.group(1)
+                            elements.append(element)
+                else:
+                    # 只有一个元素，也需要处理可能的价态
+                    import re
+                    element_match = re.match(r'([A-Z][a-z]?)', site.species_string)
+                    if element_match:
+                        elements = [element_match.group(1)]
+                # 存储该occupation对应的元素列表
+                split_dict[occu] = elements
+
+    # 转换为要求的输出格式
+    Occupation_list = [
+        {
+            "occupation": occu,
+            "atom_serial": serials,
+            "numerator": None,
+            "denominator": None,
+            "split": split_dict.get(occu, [])  # 添加split字段
+        }
+        for occu, serials in occupation_dict.items()
+    ]
+
+    return Occupation_list
+def merge_structures(structure_list, merge_dict):
+    """
+    按指定方向合并多个结构
+
+    参数:
+        structure_list: List[Structure], 待合并的结构列表（所有结构必须具有相同的晶格）
+        merge_dict: Dict[str, int], 指定各方向的合并次数（如 {"x":1, "y":1, "z":2}）
+
+    返回:
+        Structure: 合并后的新结构
+    """
+    if not structure_list:
+        raise ValueError("结构列表不能为空")
+
+    # 检查所有结构是否具有相同的晶格
+    ref_lattice = structure_list[0].lattice
+    for s in structure_list[1:]:
+        if not np.allclose(s.lattice.matrix, ref_lattice.matrix):
+            raise ValueError("所有结构的晶格必须相同")
+
+    # 计算总合并次数
+    total_merge = merge_dict.get("x", 1) * merge_dict.get("y", 1) * merge_dict.get("z", 1)
+    if len(structure_list) != total_merge:
+        raise ValueError(f"结构数量({len(structure_list)})与合并次数({total_merge})不匹配")
+
+    # 获取参考结构的晶格参数
+    a, b, c = ref_lattice.abc
+    alpha, beta, gamma = ref_lattice.angles
+
+    # 计算新晶格尺寸
+    new_a = a * merge_dict.get("x", 1)
+    new_b = b * merge_dict.get("y", 1)
+    new_c = c * merge_dict.get("z", 1)
+    new_lattice = Lattice.from_parameters(new_a, new_b, new_c, alpha, beta, gamma)
+
+    # 合并所有原子
+    all_sites = []
+    for i, structure in enumerate(structure_list):
+        # 计算当前结构在合并后的偏移量
+        x_offset = (i // (merge_dict.get("y", 1) * merge_dict.get("z", 1))) % merge_dict.get("x", 1)
+        y_offset = (i // merge_dict.get("z", 1)) % merge_dict.get("y", 1)
+        z_offset = i % merge_dict.get("z", 1)
+
+        # 对每个原子应用偏移
+        for site in structure:
+            coords = site.frac_coords.copy()
+            coords[0] = (coords[0] + x_offset) / merge_dict.get("x", 1)
+            coords[1] = (coords[1] + y_offset) / merge_dict.get("y", 1)
+            coords[2] = (coords[2] + z_offset) / merge_dict.get("z", 1)
+            all_sites.append({"species": site.species, "coords": coords})
+
+    # 创建新结构
+    return Structure(new_lattice, [site["species"] for site in all_sites], [site["coords"] for site in all_sites])
+def generate_structure_list(base_structure,occupation_list,explict_element=["Li","Li+"]):
+    if not occupation_list:
+        return [base_structure.copy()]
+    lcm = occupation_list[0]["denominator"]
+    structure_list = [base_structure.copy() for _ in range(lcm)]
+    for entry in occupation_list:
+        numerator = entry["numerator"]
+        denominator = entry["denominator"]
+        atom_indices = entry["atom_serial"]  # 注意：原子序号从1开始
+        for atom_idx in atom_indices:
+            occupancy_dict = mark_atoms_randomly(numerator=numerator,denominator=denominator)
+            original_site = base_structure.sites[atom_idx - 1]
+            element = get_first_non_explicit_element(original_site.species_string,explict_element)
+            for copy_idx ,occupy in occupancy_dict.items():
+                structure_list[copy_idx].remove_sites([atom_idx-1])
+                oxi_state= extract_oxi_state(original_site.species_string,element)
+                if len(entry["split"])==1:
+                    if occupy:
+                        new_site = PeriodicSite(
+                            species=Species(element, oxi_state),
+                            coords=original_site.frac_coords,
+                            lattice=structure_list[copy_idx].lattice,
+                            to_unit_cell=True,
+                            label=original_site.label
+                        )
+                        structure_list[copy_idx].sites.insert(atom_idx - 1, new_site)
+                    else:
+                        species_dict = {Species("Li", 1.0):0.0}
+                        new_site = PeriodicSite(
+                            species = species_dict,
+                            coords=original_site.frac_coords,
+                            lattice=structure_list[copy_idx].lattice,
+                            to_unit_cell=True,
+                            label=original_site.label
+                        )
+                        structure_list[copy_idx].sites.insert(atom_idx - 1, new_site)
+                else:
+                    if occupy:
+                        new_site = PeriodicSite(
+                            species=Species(element, oxi_state),
+                            coords=original_site.frac_coords,
+                            lattice=structure_list[copy_idx].lattice,
+                            to_unit_cell=True,
+                            label=original_site.label
+                        )
+                        structure_list[copy_idx].sites.insert(atom_idx - 1, new_site)
+                    else:
+                        new_site = PeriodicSite(
+                            species=Species(entry['split'][1], oxi_state),
+                            coords=original_site.frac_coords,
+                            lattice=structure_list[copy_idx].lattice,
+                            to_unit_cell=True,
+                            label=original_site.label
+                        )
+                        structure_list[copy_idx].sites.insert(atom_idx - 1, new_site)
+    return structure_list
+def expansion(input_file,output_folder,keep_number,calculate_type='high',keep_module=None):
+    structure_origin = Structure.from_file(input_file)
+    lmp,oc_list = calculate_expansion_factor(process_cif_file(structure_origin),calculate_type=calculate_type)
+    strategy = strategy_divide(structure_origin,lmp,keep_module)
+    st_list = generate_structure_list(structure_origin,oc_list)
+    # 获取基础文件名（不含路径和扩展名）
+    base_name = os.path.splitext(os.path.basename(input_file))[0]
+    mergeds=[]
+    names=[]
+    if len(strategy)< keep_number:
+        keep_number = len(strategy)
+    for index in range(keep_number):
+        merged = merge_structures(st_list, strategy[index])
+
+        suffix = "x{}y{}z{}".format(
+            strategy[index]["x"],
+            strategy[index]["y"],
+            strategy[index]["z"]
+        )
+        output_filename=''
+        if keep_module=='classify':
+            print(f"{base_name}采用扩展方式为{suffix}")
+            output_filename=f"{base_name}.cif"
+        elif keep_module=='random':
+            print(f"{base_name}采用扩展方式为{suffix}")
+            output_filename=f"{base_name}-{suffix}.cif"
+        else:
+            output_filename = f"{base_name}-{suffix}.cif"
+        output_path = os.path.join(output_folder, output_filename)
+
+        merged.to(filename=output_path, fmt="cif")
+
+        print(f"Saved: {output_path}")
+        if keep_module=='classify':
+
+            return merged
+        if keep_module=='random':
+            mergeds.append(merged)
+            names.append(output_filename)
+    return mergeds,names
+
+
+if __name__ == "__main__":
+    #expansion("../data/tmp/36.cif","../data/tmp",1,calculate_type='low')
+    expansion("../data/input_ClBr_set/36.cif", "../data/tmp", 3, calculate_type='low',keep_module='random')
+    #expansion("../data/input/1234.cif", "../data/input/output", 1, calculate_type='low',keep_module='classify')
+    # s1 = Structure.from_file("../data/input_pre/mp-6783.cif")
+    # s2 = Structure.from_file("../data/input_pre/ICSD_1234.cif")
+    # print(process_cif_file(s2))
+    # lmp,oc_list=calculate_expansion_factor(process_cif_file(s2))
+    # print(oc_list)
+    # strategy = strategy_divide(s2,lmp)
+    # print(strategy)
+    # st_list=generate_structure_list(s2,oc_list)
+    # merged = merge_structures(st_list,strategy[0])
+    # # merged = merge_structures([s1, s2], {"x": 1, "y": 1, "z": 2})
+    # merged.to("merged.cif", "cif")  # 保存合并后的结构
--- a/py/make_sh.py
+++ b/py/make_sh.py
@@ -0,0 +1,156 @@
+import os
+
+
+def creat_sh(input_folder, anion, sh_file_path='analyze.sh'):
+    """
+    创建shell脚本，只处理两类CIF文件：
+    1. 纯数字命名的CIF文件 (例如: 123.cif)
+    2. 数字-坐标格式的CIF文件 (例如: 123-x1y2z3.cif)
+
+    参数:
+    input_folder: 输入文件夹路径
+    anion: 阴离子类型
+    sh_file_path: 生成的shell脚本路径
+    """
+    # 文件夹路径
+    folder_path = input_folder
+
+    import re
+
+    # 定义两种文件名模式的正则表达式
+    pattern1 = re.compile(r'^\d+\.cif$')  # 纯数字.cif
+    pattern2 = re.compile(r'^\d+-x\d+y\d+z\d+\.cif$')  # 数字-x数字y数字z数字.cif
+
+    # 打开SH脚本文件用于写入
+    with open(sh_file_path, 'w') as sh_file:
+        # 写入脚本头部
+        sh_file.write('#!/bin/bash\n')
+
+        # 遍历文件夹中的所有文件
+        for filename in os.listdir(folder_path):
+            file_path = os.path.join(folder_path, filename)
+
+            # 只处理文件(不处理文件夹)
+            if os.path.isfile(file_path):
+                # 检查文件名是否匹配两种模式之一
+                if pattern1.match(filename) or pattern2.match(filename):
+                    # 生成对应的命令
+                    command = f"python ../../../tool/analyze_voronoi_nodes.py {filename} -i ../../../tool/{anion}.yaml > {filename}.txt\n"
+                    # 将命令写入SH脚本文件
+                    sh_file.write(command)
+
+    print(f"SH脚本已生成：{sh_file_path}")
+
+
+import os
+
+
+def create_sh_recursive(base_folder, tool_path="tool", relative_depth=2):
+    """
+    递归遍历文件夹，为每个包含.cif文件的文件夹生成analyze.sh脚本，
+    并在基础文件夹下创建一个sh_all.sh来执行所有脚本。
+
+    参数:
+        base_folder: 起始文件夹路径
+        tool_path: 工具目录的基本路径
+        relative_depth: 基础相对深度，用于计算正确的相对路径
+    """
+    # 用于收集所有生成的analyze.sh脚本的相对路径
+    analyze_sh_paths = []
+    base_folder_name = os.path.basename(base_folder)
+
+    def process_folder(folder_path, current_depth=0):
+        print(f"处理文件夹: {folder_path}")
+
+        # 获取当前文件夹名称
+        folder_name = os.path.basename(folder_path)
+
+        # 检查当前文件夹是否包含.cif文件
+        has_cif_files = any(
+            f.endswith('.cif') for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f)))
+
+        # 如果当前文件夹包含.cif文件，生成脚本
+        if has_cif_files:
+            # 计算正确的工具路径（根据深度增加../）
+            dots = "../" * (relative_depth + current_depth)
+            tool_relative_path = f"{dots}{tool_path}"
+
+            # 确定anion参数（使用文件夹名）
+            anion = folder_name
+
+            # 生成脚本文件路径
+            sh_file_path = os.path.join(folder_path, "analyze.sh")
+
+            # 创建脚本
+            with open(sh_file_path, 'w') as sh_file:
+                sh_file.write('#!/bin/bash\n')
+                for filename in os.listdir(folder_path):
+                    file_path = os.path.join(folder_path, filename)
+                    if os.path.isfile(file_path) and filename.endswith('.cif'):
+                        command = f"python {tool_relative_path}/analyze_voronoi_nodes.py {filename} -i {tool_relative_path}/{anion}.yaml > {filename}.txt\n"
+                        sh_file.write(command)
+
+            # 将此脚本添加到收集器中
+            # 计算相对于基础文件夹的路径
+            rel_path = os.path.relpath(folder_path, base_folder)
+            analyze_sh_paths.append(rel_path)
+
+            print(f"生成脚本: {sh_file_path} (工具路径: {tool_relative_path})")
+
+        # 获取子文件夹列表
+        subdirs = [d for d in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, d))]
+
+        # 处理复合阴离子文件夹的特殊情况
+        if "+" in folder_name:
+            elements = folder_name.split("+")
+            for element in elements:
+                element_dir = os.path.join(folder_path, element)
+                # 如果对应元素的子文件夹不存在，创建它
+                if not os.path.exists(element_dir):
+                    os.makedirs(element_dir)
+                    print(f"创建子文件夹: {element_dir}")
+                # 确保这个子文件夹被包含在递归处理列表中
+                if element not in subdirs:
+                    subdirs.append(element)
+
+        # 递归处理所有子文件夹
+        for subdir in subdirs:
+            subdir_path = os.path.join(folder_path, subdir)
+            process_folder(subdir_path, current_depth + 1)
+
+    # 开始递归处理
+    process_folder(base_folder)
+
+    # 创建sh_all.sh脚本
+    sh_all_path = os.path.join(base_folder, "sh_all.sh")
+    with open(sh_all_path, 'w') as sh_all:
+        sh_all.write('#!/bin/bash\n\n')
+        sh_all.write(f'# process all analyze.sh\n\n')
+
+        # 记录初始目录
+        sh_all.write('# remember current dir\n')
+        sh_all.write('INITIAL_DIR=$(pwd)\n\n')
+
+        # 为每个包含analyze.sh的目录添加命令
+        for path in analyze_sh_paths:
+            sh_all.write(f'echo "process {path}/analyze.sh"\n')
+            sh_all.write(f'cd "{path}"\n')
+            sh_all.write('bash analyze.sh\n')
+            sh_all.write(f'cd "$INITIAL_DIR"\n\n')
+
+        # 添加完成消息
+        sh_all.write('echo "done!"\n')
+
+    # 修改权限使脚本可执行
+    os.chmod(sh_all_path, 0o755)
+    print(f"生成总执行脚本: {sh_all_path}")
+    print("所有脚本生成完成！")
+# 示例调用
+# create_sh_recursive("../data/after_step1")
+
+if __name__ == '__main__':
+    # creat_sh("../data/after_step1/O","O","../data/after_step1/O/analyze.sh")
+    # creat_sh("../data/after_step1/S","S","../data/after_step1/S/analyze.sh")
+    # creat_sh("../data/after_step1/Cl","Cl","../data/after_step1/Cl/analyze.sh")
+    # creat_sh("../data/after_step1/Br","Br","../data/after_step1/Br/analyze.sh")
+    create_sh_recursive("../data/after_step1")
--- a/py/pre_process.py
+++ b/py/pre_process.py
@@ -0,0 +1,131 @@
+import re
+import os
+from pymatgen.core.structure import Structure
+from pymatgen.core.periodic_table import Element
+import yaml
+from pymatgen.core.periodic_table import Specie
+from expansion import expansion
+from expansion import process_cif_file
+def generate_valence_yaml(output_yaml_path):
+    """
+    Generate a YAML file containing the most common oxidation states for elements.
+
+    Parameters:
+        output_yaml_path (str): Path to save the generated YAML file.
+    """
+    valences = {}
+    for element in Element:
+        common_oxidation_states = element.common_oxidation_states
+        if common_oxidation_states:
+            # Metals/metalloids: take the maximum oxidation state
+            # Non-metals: take the minimum (most negative) oxidation state
+            if element.is_metalloid or element.is_metal:
+                valences[element.symbol] = max(common_oxidation_states)
+            else:
+                valences[element.symbol] = min(common_oxidation_states)
+
+    # Save the valences dictionary to a YAML file
+    with open(output_yaml_path, "w") as file:
+        yaml.dump(valences, file, default_flow_style=False)
+
+
+def apply_oxidation_states_to_cif(input_cif_path, valence_yaml_path, output_cif_path,calculate_type='low',output_folder = None):
+    """
+    Modify a CIF file to include oxidation states for each element based on a YAML file,
+    unless oxidation states are already present in the CIF.
+    """
+    # Load the structure from the CIF file
+    structure = Structure.from_file(input_cif_path)
+    oxi = process_cif_file(structure)
+    # classsify类型
+    # if oxi:
+    #     structure = expansion(input_cif_path,'../data/input_oxidation',3,calculate_type=calculate_type,keep_module='classify')
+    # # # 判断是否所有site都已经有oxidation state
+    # has_oxidation = all(
+    #     all(isinstance(sp, Specie) for sp in site.species.keys())
+    #     for site in structure.sites
+    # )
+    # if not has_oxidation:
+    #     # 只有当没有价态时才读取yaml并赋值
+    #     with open(valence_yaml_path, "r") as file:
+    #         valences = yaml.safe_load(file)
+    #     # Apply oxidation states to the structure
+    #     structure.add_oxidation_state_by_element(valences)
+    #
+    # # Save the updated structure to a new CIF file
+    # structure.to(filename=output_cif_path)
+    structures=[]
+    names=[]
+    if oxi:
+        structures,names = expansion(input_cif_path,'../data/input_oxidation',3,calculate_type=calculate_type,keep_module='random')
+    # # 判断是否所有site都已经有oxidation state
+        for structure,name in zip(structures,names):
+            has_oxidation = all(
+                all(isinstance(sp, Specie) for sp in site.species.keys())
+                for site in structure.sites
+            )
+            if not has_oxidation:
+                # 只有当没有价态时才读取yaml并赋值
+                with open(valence_yaml_path, "r") as file:
+                    valences = yaml.safe_load(file)
+                # Apply oxidation states to the structure
+                structure.add_oxidation_state_by_element(valences)
+
+            # Save the updated structure to a new CIF file
+            structure.to(filename=os.path.join(output_folder,name ))
+    else:
+        has_oxidation = all(
+            all(isinstance(sp, Specie) for sp in site.species.keys())
+            for site in structure.sites
+        )
+        if not has_oxidation:
+            # 只有当没有价态时才读取yaml并赋值
+            with open(valence_yaml_path, "r") as file:
+                valences = yaml.safe_load(file)
+            # Apply oxidation states to the structure
+            structure.add_oxidation_state_by_element(valences)
+
+        # Save the updated structure to a new CIF file
+        structure.to(filename=output_cif_path)
+def data_add_state(input_folder, valence_yaml_path, output_folder,output_occupatition_folder=None,calculate_type='normal'):
+    if not os.path.exists(input_folder):
+        print(f"{input_folder} 文件夹不存在")
+        return
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+        print(f"目录 {output_folder} 已创建")
+
+    for filename in os.listdir(input_folder):
+        if filename.endswith(".cif"):  # 检查文件是否以.cif结尾
+            file_path = os.path.join(input_folder, filename)
+
+            # 提取文件名中的数字部分
+            match = re.search(r'\d+', filename)
+            if match:
+                new_filename = match.group(0) + ".cif"  # 提取数字并加上 .cif 后缀
+            else:
+                print(f"文件名 {filename} 中未找到数字部分，跳过处理")
+                continue
+
+            # 构造输出文件路径
+            output_cif_path = os.path.join(output_folder, new_filename)
+
+            # 应用氧化态并保存新文件
+            print(f"正在处理{filename}")
+            try:
+                apply_oxidation_states_to_cif(file_path, valence_yaml_path, output_cif_path,calculate_type=calculate_type,output_folder=output_folder)
+            except Exception as e:
+                print(f"{filename}出现问题！")
+            print(f"{filename} 已完成")
+
+
+
+if __name__ == "__main__":
+    # Example usage:
+    # Step 1: Generate the valence YAML file
+    valence_yaml = "../tool/valence_states.yaml"
+
+    # Step 2: Process CIF files in the input folder
+    data_add_state("../data/input_pre", valence_yaml, "../data/input","../data/input_oxidation")
+    # Step 3: Process Occupation
+    # data_process_Occupatiton("")
--- a/py/step1.py
+++ b/py/step1.py
@@ -0,0 +1,54 @@
+from pymatgen.core import Structure
+from pymatgen.core.periodic_table import Element, Specie
+from pymatgen.io.cif import CifWriter
+
+from crystal_2 import crystal
+import crystal_2
+import os
+import shutil
+from pymatgen.io.cif import CifWriter
+
+def read_files_check_basic(folder_path):
+    file_contents = []
+
+    if not os.path.exists(folder_path):
+        print(f"{folder_path} 文件夹不存在")
+        return file_contents
+
+    for filename in os.listdir(folder_path):
+        file_path = os.path.join(folder_path, filename)
+
+        if os.path.isfile(file_path):
+            try:
+                temp = crystal(file_path)
+                file_contents.append(temp)
+            except Exception as e:
+                print(e)
+            print(f"正在处理{filename}")
+            temp.check_basic()
+            if temp.check_basic_result:
+                if not "+" in temp.anion:
+                    target_folder = os.path.join("../data/after_step1",f"{temp.anion}")
+                    if not os.path.exists(target_folder):
+                        os.makedirs(target_folder)
+
+                    # 目标文件路径
+                    target_file_path = os.path.join(target_folder, filename)
+
+                    # 复制文件到目标文件夹
+                    shutil.copy(file_path, target_file_path)
+                    print(f"文件 {filename}通过基本筛选，已复制到 {target_folder}")
+                else:
+                    anions = temp.anion.split("+")
+                    for anion in anions:
+                        target_folder = os.path.join("../data/after_step1", f"{temp.anion}")
+                        target_folder = os.path.join(target_folder, anion)
+                        if not os.path.exists(target_folder):
+                            os.makedirs(target_folder)
+
+                        # 目标文件路径
+                        target_file_path = os.path.join(target_folder, filename)
+                        # 复制文件到目标文件夹
+                        shutil.copy(file_path, target_file_path)
+                        print(f"文件 {filename}通过基本筛选，已复制到 {target_folder}")
+read_files_check_basic("../data/input")
--- a/py/step2-5-file_process.py
+++ b/py/step2-5-file_process.py
@@ -0,0 +1,121 @@
+from step2 import process_files as step2_process
+from step3 import process_files as step3_process
+from step4 import process_files as step4_process
+from step5 import read_files_check_partical as step5_process
+import os
+import shutil
+
+import os
+import shutil
+
+
+def create_empty_directory_structure(source_dir, target_dir):
+    """
+    递归地复制源文件夹的目录结构到目标文件夹，创建空文件夹。
+
+    参数:
+        source_dir (str): 源文件夹路径
+        target_dir (str): 目标文件夹路径
+
+    返回:
+        int: 成功创建的文件夹数量
+
+    异常:
+        FileNotFoundError: 如果源文件夹不存在
+        PermissionError: 如果没有权限读取源文件夹或写入目标文件夹
+    """
+    # 计数器
+    created_count = 0
+
+    # 检查源文件夹是否存在
+    if not os.path.exists(source_dir):
+        raise FileNotFoundError(f"源文件夹不存在: {source_dir}")
+
+    # 确保目标文件夹存在
+    if not os.path.exists(target_dir):
+        print(f"目标文件夹不存在，正在创建: {target_dir}")
+        os.makedirs(target_dir)
+        created_count += 1
+
+    # 递归函数，复制文件夹结构
+    def copy_structure(src, dst):
+        nonlocal created_count
+
+        try:
+            # 获取源目录中的所有项目
+            items = os.listdir(src)
+
+            # 遍历所有项目
+            for item in items:
+                src_path = os.path.join(src, item)
+                dst_path = os.path.join(dst, item)
+
+                # 如果是目录，递归复制结构
+                if os.path.isdir(src_path):
+                    if not os.path.exists(dst_path):
+                        os.makedirs(dst_path)
+                        created_count += 1
+                        print(f"创建文件夹: {dst_path}")
+                    copy_structure(src_path, dst_path)
+                # 对于文件，我们不做任何处理，因为我们只需要文件夹结构
+
+        except PermissionError:
+            print(f"无权限访问目录: {src}")
+        except Exception as e:
+            print(f"处理目录 {src} 时出错: {str(e)}")
+
+    # 开始递归复制
+    try:
+        copy_structure(source_dir, target_dir)
+        print(f"已成功在 {target_dir} 中创建 {created_count} 个文件夹，复制了完整的目录结构")
+        return created_count
+    except Exception as e:
+        print(f"整体操作失败: {str(e)}")
+        return created_count
+if __name__ == "__main__":
+    create_empty_directory_structure("../data/after_step1","../data/after_step2")
+    create_empty_directory_structure("../data/after_step1", "../data/after_step3")
+    create_empty_directory_structure("../data/after_step1", "../data/after_step4")
+    create_empty_directory_structure("../data/after_step1", "../data/after_step5")
+    create_empty_directory_structure("../data/after_step1", "../data/after_step6")
+    for files in os.listdir("../data/after_step1"):
+        source_path = os.path.join("../data/after_step1", files)
+        target_path = os.path.join("../data/after_step2", files)
+        file = files  # 如果需要从文件名提取，替换这一行
+        print('-------------------')
+        if "+" in file:
+            # 第二种情况：多个元素，如"S+O"
+            elements = file.split("+")
+            print(f"处理多元素文件 {file}，拆分为：{elements}")
+            for element in elements:
+                print(element)
+                source_path_tmp = os.path.join(source_path, element)
+                target_path_tmp = os.path.join(target_path, element)
+                print('正在做第二步筛选')
+                step2_process(source_path_tmp, target_path_tmp, element)
+                target_path_tmp_2 = os.path.join(f"../data/after_step3/{files}", element)
+                print('正在做第三步筛选')
+                step3_process(source_path_tmp, target_path_tmp, target_path_tmp_2,element)
+                target_path_tmp_3 = os.path.join(f"../data/after_step4/{files}", element)
+                print('正在做第四步筛选')
+                step4_process(source_path_tmp, target_path_tmp_2,target_path_tmp_3, element)
+                target_path_tmp_4 = os.path.join(f"../data/after_step5/{files}", element)
+                print('正在做第五步筛选')
+                step5_process(target_path_tmp_3,target_path_tmp_4)
+        else:
+            # 第一种情况：单一元素，如"S"
+            print(f"处理单一元素文件：{file}")
+            target_path_1 = os.path.join("../data/after_step3", files)
+            target_path_2 = os.path.join("../data/after_step4", files)
+            target_path_3 = os.path.join("../data/after_step5", files)
+            print('正在做第二步筛选')
+            step2_process(source_path, target_path, file)
+            print('正在做第三步筛选')
+            step3_process(source_path, target_path,target_path_1, file)
+            print('正在做第四步筛选')
+            step4_process(source_path, target_path_1,target_path_2, file)
+            print('正在做第五步筛选')
+            step5_process(target_path_2,target_path_3)
+
+
+
--- a/py/step2.py
+++ b/py/step2.py
@@ -0,0 +1,80 @@
+import os
+import re
+import shutil
+
+
+def process_files(cif_folder, output_folder, anion):
+
+    # 确保输出文件夹存在
+    os.makedirs(output_folder, exist_ok=True)
+
+    # 获取 txt 文件夹中的所有 txt 文件
+    txt_files = [f for f in os.listdir(cif_folder) if f.endswith('.txt')]
+
+
+    # 遍历 txt 文件
+    for txt_file in txt_files:
+        txt_path = os.path.join(cif_folder, txt_file)
+
+        # 打开并读取 txt 文件内容
+        with open(txt_path, 'r', encoding='utf-8') as file:
+            content = file.read()
+        matches = re.findall(r"Percolation diameter \(A\): (\d+\.\d+)", content)
+        # 使用正则表达式查找符合条件的内容
+        if matches:
+
+            # 提取文件名（去掉.txt后缀）
+            base_name = os.path.splitext(txt_file)[0]
+            check = False
+            if anion == "O":
+                print(f"{base_name}的perconlation diameter为{matches[0]}A")
+                if float(matches[0]) > 0.5:
+                    check = True
+                    print(f"符合要求")
+                else:
+                    print("不符合要求")
+            elif anion == "S":
+                print(f"{base_name}的perconlation diameter为{matches[0]}A")
+                if float(matches[0]) > 0.55:
+                    check = True
+                    print(f"符合要求")
+                else:
+                    print("不符合要求")
+            elif anion == "Br":
+                print(f"{base_name}的perconlation diameter为{matches[0]}A")
+                if float(matches[0]) > 0.45:
+                    check = True
+                    print("符合要求")
+                else:
+                    print("不符合要求")
+            elif anion == "Cl":
+                print(f"{base_name}的perconlation diameter为{matches[0]}A")
+                if float(matches[0]) > 0.45:
+                    check = True
+                    print("符合要求")
+                else:
+                    print("不符合要求")
+            if check:
+                # 查找与 txt 文件同名的 cif 文件
+                cif_path = os.path.join(cif_folder, base_name)
+
+                # 如果对应的 cif 文件存在，复制到 output_folder
+                if os.path.exists(cif_path):
+                    shutil.copy(cif_path, os.path.join(output_folder, base_name))
+                    print(f"Copied {base_name} to {output_folder}")
+
+
+def work_py(input_folder, output_folder):
+    if not os.path.exists(output_folder):
+        print("not exists")
+    for filename in os.listdir(input_folder):
+        target_folder = os.path.join(output_folder, filename)
+        from_folder = os.path.join(input_folder, filename)
+        process_files(from_folder, target_folder)
+
+if __name__ == "__main__":
+    # work_py("../data/after_step1","../data/after_step2" )
+    # process_files("../data/after_step1/O", "../data/after_step2/O", "O")
+    # process_files("../data/after_step1/S", "../data/after_step2/S", "S")
+    process_files("../data/after_step1/Cl", "../data/after_step2/Cl", "Br")
+    process_files("../data/after_step1/Br", "../data/after_step2/Br", "Cl")
--- a/py/step3.py
+++ b/py/step3.py
@@ -0,0 +1,72 @@
+import os
+import re
+import shutil
+
+
+def process_files(cif_folder,input_folder,output_folder, anion):
+
+    # 确保输出文件夹存在
+    os.makedirs(output_folder, exist_ok=True)
+
+    # 获取 txt 文件夹中的所有 txt 文件
+    txt_files = [f for f in os.listdir(cif_folder) if f.endswith('.txt')]
+
+
+    # 遍历 txt 文件
+    for txt_file in txt_files:
+        txt_path = os.path.join(cif_folder, txt_file)
+
+        # 打开并读取 txt 文件内容
+        with open(txt_path, 'r', encoding='utf-8') as file:
+            content = file.read()
+        matches = re.findall(r"the minium of d\s+([\d\.]+)\s*#", content)
+        # 使用正则表达式查找符合条件的内容
+        if matches:
+
+            # 提取文件名（去掉.txt后缀）
+            base_name = os.path.splitext(txt_file)[0]
+            check = False
+            if anion == "O":
+                print(f"{base_name}的最短距离为{matches[0]}A")
+                if float(matches[0]) < 3:
+                    check = True
+                    print(f"符合要求")
+                else:
+                    print("不符合要求")
+            elif anion == "S":
+                print(f"{base_name}的最短距离为{matches[0]}A")
+                if float(matches[0]) < 3:
+                    check = True
+                    print(f"符合要求")
+                else:
+                    print("不符合要求")
+            elif anion == "Cl":
+                print(f"{base_name}的最短距离为{matches[0]}A")
+                if float(matches[0]) < 3:
+                    check = True
+                    print(f"符合要求")
+                else:
+                    print("不符合要求")
+            elif anion == "Br":
+                print(f"{base_name}的最短距离为{matches[0]}A")
+                if float(matches[0]) < 3:
+                    check = True
+                    print(f"符合要求")
+                else:
+                    print("不符合要求")
+            if check:
+                # 查找与 txt 文件同名的 cif 文件
+                cif_path = os.path.join(input_folder, base_name)
+
+                # 如果对应的 cif 文件存在，复制到 output_folder
+                if os.path.exists(cif_path):
+                    shutil.copy(cif_path, os.path.join(output_folder, base_name))
+                    print(f"Copied {base_name} to {output_folder}")
+
+
+
+if __name__ == '__main__':
+    # process_files("../data/after_step1/O","../data/after_step2/O", "../data/after_step3/O", "O")
+    # process_files("../data/after_step1/S", "../data/after_step2/S","../data/after_step3/S", "S")
+    process_files("../data/after_step1/Cl", "../data/after_step2/Cl","../data/after_step3/Cl", "Cl")
+    process_files("../data/after_step1/Br", "../data/after_step2/Br","../data/after_step3/Br", "Br")
--- a/py/step4.py
+++ b/py/step4.py
@@ -0,0 +1,72 @@
+import os
+import re
+import shutil
+
+
+def process_files(cif_folder,input_folder,output_folder, anion):
+
+    # 确保输出文件夹存在
+    os.makedirs(output_folder, exist_ok=True)
+
+    # 获取 txt 文件夹中的所有 txt 文件
+    txt_files = [f for f in os.listdir(cif_folder) if f.endswith('.txt')]
+
+
+    # 遍历 txt 文件
+    for txt_file in txt_files:
+        txt_path = os.path.join(cif_folder, txt_file)
+
+        # 打开并读取 txt 文件内容
+        with open(txt_path, 'r', encoding='utf-8') as file:
+            content = file.read()
+        matches = re.findall(r"Maximum node length detected: (\d+\.\d+) A", content)
+        # 使用正则表达式查找符合条件的内容
+        if matches:
+
+            # 提取文件名（去掉.txt后缀）
+            base_name = os.path.splitext(txt_file)[0]
+            check = False
+            if anion == "O":
+                print(f"{base_name}的扩大锂离子直径为{matches[0]}A")
+                if float(matches[0]) > 2.2:
+                    check = True
+                    print(f"符合要求")
+                else:
+                    print("不符合要求")
+            elif anion == "S":
+                print(f"{base_name}的扩大锂离子直径为{matches[0]}A")
+                if float(matches[0]) > 2.2:
+                    check = True
+                    print(f"符合要求")
+                else:
+                    print("不符合要求")
+            elif anion == "Cl":
+                print(f"{base_name}的扩大锂离子直径为{matches[0]}A")
+                if float(matches[0]) > 2:
+                    check = True
+                    print(f"符合要求")
+                else:
+                    print("不符合要求")
+            elif anion == "Br":
+                print(f"{base_name}的扩大锂离子直径为{matches[0]}A")
+                if float(matches[0]) > 2:
+                    check = True
+                    print(f"符合要求")
+                else:
+                    print("不符合要求")
+            if check:
+                # 查找与 txt 文件同名的 cif 文件
+                cif_path = os.path.join(input_folder, base_name)
+
+                # 如果对应的 cif 文件存在，复制到 output_folder
+                if os.path.exists(cif_path):
+                    shutil.copy(cif_path, os.path.join(output_folder, base_name))
+                    print(f"Copied {base_name} to {output_folder}")
+
+
+
+if __name__ == "__main__":
+    # process_files("../data/after_step1/O","../data/after_step3/O", "../data/after_step4/O", "O")
+    # process_files("../data/after_step1/S", "../data/after_step3/S","../data/after_step4/S", "S")
+    process_files("../data/after_step1/Cl", "../data/after_step3/Cl","../data/after_step4/Cl", "Cl")
+    process_files("../data/after_step1/Br", "../data/after_step3/Br","../data/after_step4/Br", "Br")
--- a/py/step5.py
+++ b/py/step5.py
@@ -0,0 +1,57 @@
+from pymatgen.core import Structure
+from pymatgen.core.periodic_table import Element, Specie
+from pymatgen.io.cif import CifWriter
+
+from crystal_2 import crystal
+import crystal_2
+import os
+import shutil
+from pymatgen.io.cif import CifWriter
+
+
+def read_files_check_partical(input_folder,output_folder):
+    file_contents = []
+    folder_path = input_folder
+    if not os.path.exists(folder_path):
+        print(f"{folder_path} 文件夹不存在")
+        return file_contents
+
+    for filename in os.listdir(folder_path):
+        file_path = os.path.join(folder_path, filename)
+
+        if os.path.isfile(file_path):
+            try:
+                temp = crystal(file_path)
+                file_contents.append(temp)
+                print(f"正在处理{filename}")
+                temp.check_practical()
+                if temp.check_practical_result:
+                    target_folder = output_folder
+                    if not os.path.exists(target_folder):
+                        os.makedirs(target_folder)
+
+                    # 目标文件路径
+                    target_file_path = os.path.join(target_folder, filename)
+
+                    # 复制文件到目标文件夹
+                    shutil.copy(file_path, target_file_path)
+                    print(f"文件 {filename}通过实际筛选，已复制到 {target_folder}")
+            except Exception as e:
+                print(e)
+
+
+
+def work_py(input_folder, output_folder):
+    if not os.path.exists(output_folder):
+        print("not exists")
+    for filename in os.listdir(input_folder):
+        target_folder = os.path.join(output_folder, filename)
+        from_folder = os.path.join(input_folder, filename)
+        read_files_check_partical(from_folder, target_folder)
+
+if __name__ == "__main__":
+    # work_py('../data/after_step4','../data/after_step5')
+    # read_files_check_partical('../data/after_step4/O','../data/after_step5/O')
+    # read_files_check_partical('../data/after_step4/S','../data/after_step5/S')
+    read_files_check_partical('../data/after_step4/Cl','../data/after_step5/Cl')
+    read_files_check_partical('../data/after_step4/Br','../data/after_step5/Br')
--- a/py/step6.py
+++ b/py/step6.py
@@ -0,0 +1,75 @@
+from pymatgen.core import Structure
+from pymatgen.analysis.structure_matcher import StructureMatcher
+import os
+import shutil
+
+# 定义函数，用于将 CIF 文件进行分类并输出
+
+def structure_classify(input_folder, output_folder):
+    """
+    分类 input_folder 中的 CIF 文件，并根据框架分组后存储到 output_folder 中。
+
+    :param input_folder: 输入文件夹路径，包含 .cif 文件。
+    :param output_folder: 输出文件夹路径，用于存储分类后的 .cif 文件。
+    """
+    # 检查输入文件夹是否存在
+    if not os.path.exists(input_folder):
+        print(f"输入文件夹 {input_folder} 不存在")
+        return
+
+    # 创建输出文件夹（如果不存在）
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+
+    # 读取输入文件夹中的所有 .cif 文件
+    structures = []
+    file_map = {}  # 记录文件路径与结构的对应关系
+    for filename in os.listdir(input_folder):
+        if filename.endswith(".cif"):
+            file_path = os.path.join(input_folder, filename)
+            try:
+                structure = Structure.from_file(file_path)
+                structures.append(structure)
+                file_map[id(structure)] = filename  # 使用结构对象的 id 作为键
+            except Exception as e:
+                print(f"无法读取文件 {filename}：{e}")
+
+    # 检查是否成功加载了任何结构
+    if not structures:
+        print("未找到有效的 CIF 文件")
+        return
+
+    # 分组结构
+    matcher = StructureMatcher()
+    grouped_structures = []
+
+    for structure in structures:
+        matched = False
+        for group in grouped_structures:
+            if matcher.fit(structure, group[0]):  # 比较结构是否匹配
+                group.append(structure)
+                matched = True
+                break
+        if not matched:
+            grouped_structures.append([structure])
+
+    # 保存分组后的结构到输出文件夹
+    for group_index, group in enumerate(grouped_structures):
+        group_folder = os.path.join(output_folder, f"group_{group_index}")
+        os.makedirs(group_folder, exist_ok=True)
+
+        for structure_index, structure in enumerate(group):
+            output_file = os.path.join(group_folder, f"structure_{structure_index}.cif")
+            structure.to(filename=output_file)
+
+            # 获取原始文件名并复制到分组文件夹
+            original_filename = file_map[id(structure)]  # 使用结构对象的 id 获取文件名
+            original_file_path = os.path.join(input_folder, original_filename)
+            shutil.copy(original_file_path, group_folder)
+
+    print(f"处理完成，分类后的结构已保存到 {output_folder}")
+
+if __name__ == "__main__":
+    # 示例调用
+    structure_classify("../data/after_step5/S", "../data/after_step6/S")
+    structure_classify("../data/after_step5/O", "../data/after_step6/O")
--- a/py/utils.py
+++ b/py/utils.py
@@ -0,0 +1,9 @@
+import os
+
+
+def work_py(input_folder, output_folder):
+    if not os.path.exists(output_folder):
+        print("not exists")
+    for filename in os.listdir(input_folder):
+        target_folder = os.path.join(output_folder, filename)
+        from_folder = os.path.join(input_folder, filename)