from fontTools.misc.plistlib import end_string from pymatgen.core import Structure import spglib from fractions import Fraction import random from pymatgen.core.sites import PeriodicSite, Species,Element,Lattice import numpy as np import re def typejudge(number): if number in [1, 2]: return "Triclinic" elif 3 <= number <= 15: return "Monoclinic" elif 16 <= number <= 74: return "Orthorhombic" elif 75 <= number <= 142: return "Tetragonal" elif 143 <= number <= 167: return "Trigonal" elif 168 <= number <= 194: return "Hexagonal" elif 195 <= number <= 230: return "Cubic" else: return "Unknown" def extract_oxi_state(species_string): """ 从 species_string(如 "Li+:0.645")提取氧化态 Args: species_string: 包含元素和氧化态的字符串(如 "Li+:0.645"、"Fe2-:0.5") Returns: int: 氧化态数值(如 "+" -> 1, "2-" -> -2),默认返回 0 如果解析失败 """ # 使用正则表达式匹配化学符号和氧化态(如 Li+, Fe2-) match = re.search(r"([A-Za-z]+)([+-]?\d*[+-])", species_string) if not match: return 0 # 默认中性 # 提取氧化态部分(如 "+", "2-", "3+") oxi_str = match.group(2) # 处理单符号情况(如 "+" -> 1, "-" -> -1) if oxi_str in ("+", "-"): return 1 if oxi_str == "+" else -1 # 处理多数字情况(如 "2+" -> 2, "3-" -> -3) try: num = int(oxi_str[:-1]) # 提取数字部分 sign = 1 if oxi_str[-1] == "+" else -1 return num * sign except (ValueError, IndexError): return 0 # 解析失败时返回中性 def process_cif_file(cif_file_path,explict_element): structure = Structure.from_file(cif_file_path) result_list = [] for index, site in enumerate(structure.sites, start=1): occupancy = site.species.element_composition.num_atoms species = site.species.chemical_system if species in explict_element: break if occupancy < 1.0: entry = next((r for r in result_list if r["species"] == species and r["occupancy"] == occupancy), None) if entry: entry["number"].append(index) else: result_list.append({ "species": species, "number": [index], "occupancy": occupancy }) return result_list def factorize_to_three_factors(n,type_sym=None): factors = [] # 遍历可能的x值 if type_sym == None: for x in range(1, n + 1): if n % x == 0: remaining_n = n // x # 遍历可能的y值 for y in range(1, remaining_n + 1): if remaining_n % y == 0: z = remaining_n // y factors.append({'x': x, 'y': y, 'z': z}) if type_sym == "xyz": for x in range(1, n + 1): if n % x == 0: remaining_n = n // x # 遍历可能的y值 for y in range(1, remaining_n + 1): if remaining_n % y == 0 and y <= x: z = remaining_n // y if z <= y: factors.append({'x': x, 'y': y, 'z': z}) def sum_score(factor): x, y, z = factor['x'], factor['y'], factor['z'] return x + y + z # 按照sum_score从小到大排序 sorted_factors = sorted(factors, key=sum_score) return sorted_factors def calculate_supercell_factor(occupancy): # 将浮点数转换为分数形式 fraction = Fraction(occupancy).limit_denominator() # 获取分子和分母 numerator = fraction.numerator denominator = fraction.denominator return numerator,denominator def mark_atoms_randomly(factors, atom_number): """ 根据扩胞因子和占据数量生成随机占据字典 Args: factors: 扩胞因子字典 {'x': int, 'y': int, 'z': int} atom_number: 需要占据的副本数量 Returns: 字典 {0: 1或0, 1: 1或0, ..., total_copies-1: 1或0} """ x, y, z = factors['x'], factors['y'], factors['z'] total_copies = x * y * z if atom_number > total_copies: raise ValueError(f"atom_number ({atom_number}) 不能超过扩胞总数 (x*y*z = {total_copies})") # 生成所有副本索引 [0, 1, 2, ..., total_copies-1] atom_dice = list(range(total_copies)) # 随机选择 atom_number 个副本占据 selected_atoms = random.sample(atom_dice, atom_number) # 创建结果字典 {0: 1或0, 1: 1或0, ...} result = {atom: 1 if atom in selected_atoms else 0 for atom in atom_dice} return result def generate_random_list(total_elements, atom_number): # 确保 atom_number 不超过 total_elements if atom_number > total_elements: raise ValueError("atom_number cannot be greater than the total number of elements (x * y * z)") # 创建一个全0的列表 result = [0] * total_elements # 随机选择 atom_number 个位置,并将这些位置的值设为1 indices = random.sample(range(total_elements), atom_number) for index in indices: result[index] = 1 return result def merge_structures(struct_copies, factors): """ 将多个副本结构按三维顺序合并为扩胞后的结构 Args: struct_copies: 副本结构列表(长度 = x*y*z) factors: 扩胞因子字典 {"x": int, "y": int, "z": int} Returns: 合并后的扩胞结构 """ x, y, z = factors["x"], factors["y"], factors["z"] total_copies = x * y * z if len(struct_copies) != total_copies: raise ValueError("副本数量与扩胞因子不匹配") # 获取原结构的晶格 original_lattice = struct_copies[0].lattice # 创建扩胞后的新晶格(直接按倍数缩放) new_lattice_matrix = np.dot(original_lattice.matrix, np.diag([x, y, z])) new_lattice = Lattice(new_lattice_matrix) # 初始化合并后的结构 merged_structure = Structure( lattice=new_lattice, species=[], coords=[], coords_are_cartesian=False ) # 按三维顺序填充每个副本的原子 for copy_idx in range(total_copies): # 计算当前副本的分数坐标偏移量 offset = np.array([ copy_idx // (y * z), # x方向偏移 (copy_idx % (y * z)) // z, # y方向偏移 copy_idx % z # z方向偏移 ]) # 将当前副本的原子添加到合并结构中(考虑偏移) for site in struct_copies[copy_idx]: if site.species: # 跳过空位 merged_structure.append( species=site.species, coords=site.frac_coords + offset, coords_are_cartesian=False, properties=site.properties ) return merged_structure def expand_structure(structure, factors, atom_indices, atom_number): # 参数检查 x, y, z = factors['x'], factors['y'], factors['z'] total_copies = x * y * z if not all(1 <= idx <= len(structure.sites) for idx in atom_indices): raise ValueError("atom_indices包含无效原子索引") # 生成独立副本 struct_copies = [structure.copy() for _ in range(total_copies)] atom_dice = list(range(total_copies)) # 所有副本索引 [0,1,2,...] # 处理每个目标原子 for atom_idx in atom_indices: original_site = structure.sites[atom_idx - 1] element = original_site.species.chemical_system # 生成当前原子的占据字典(如{0:1, 1:0, 2:1,...}) occupancy_dict = mark_atoms_randomly(factors,atom_number) # 修改每个副本 for copy_idx, occupy in occupancy_dict.items(): # 或者方法2:使用remove/insert struct_copies[copy_idx].remove_sites([atom_idx - 1]) oxi_state = extract_oxi_state(original_site.species_string) if occupy: new_site = PeriodicSite( species=Species(element, oxi_state), coords=original_site.frac_coords, lattice=struct_copies[copy_idx].lattice, to_unit_cell=True, label=original_site.label ) struct_copies[copy_idx].sites.insert(atom_idx - 1, new_site) else: species_dict = {Species(element, oxi_state): 0.0} new_site = PeriodicSite( species = species_dict, coords=original_site.frac_coords, lattice=struct_copies[copy_idx].lattice, to_unit_cell=True, label=original_site.label ) struct_copies[copy_idx].sites.insert(atom_idx - 1, new_site) # 合并副本 expanded_structure = Structure( lattice=np.dot(structure.lattice.matrix, np.diag([x, y, z])), species=[], coords=[], coords_are_cartesian=False ) for copy_idx in range(total_copies): offset = np.array([ copy_idx // (y * z), (copy_idx % (y * z)) // z, copy_idx % z ]) for site in struct_copies[copy_idx]: if site.species: # 只添加非空位 expanded_structure.append( species=site.species, coords=site.frac_coords + offset, coords_are_cartesian=False, properties=site.properties ) expanded_structure = merge_structures(struct_copies,factors) return expanded_structure def process_occupation(input_file,output_file,explict_element = ["Li"],expect_cifnumber = 10,random_time=1): struct = Structure.from_file(input_file) space_group_info = struct.get_space_group_info() space_group_symbol = space_group_info[0] all_spacegroup_symbols = [spglib.get_spacegroup_type(i) for i in range(1, 531)] symbol = all_spacegroup_symbols[0] for symbol_i in all_spacegroup_symbols: if space_group_symbol == symbol_i.international_short: symbol = symbol_i space_type = typejudge(symbol.number) print(f"当前空间群符号为{space_group_symbol},序号为{symbol.number},对应的晶体体系为{space_type}") occupation_list = process_cif_file(cif_file_path=input_file,explict_element=explict_element) print(occupation_list) for occupation in occupation_list: atom_number, target_multiplier=calculate_supercell_factor(occupation["occupancy"]) divides = [] if space_type == "Hexagonal": print('当前为六方晶系,暂不处理') if space_type == "Cubic": print("当前为立方晶体,三个方向同步") divides = factorize_to_three_factors(target_multiplier,"xyz") else: print("为其他晶系,假设三个方向不同") divides = factorize_to_three_factors(target_multiplier,) print(divides) for it in divides: end_str = f'x{it["x"]}y{it["y"]}z{it["z"]}' for i in range(random_time): expand_struct=expand_structure(struct,it,occupation["number"],atom_number) expand_struct.to_file(output_file) print(it) else: print(f"不存在除{explict_element}以外的共占位原子") process_occupation("../data/input_pre/ICSD_1234.cif", "haha.cif", explict_element=[], expect_cifnumber=1, random_time=1)