V1

2025-12-07 13:56:33 +08:00
parent 49f54b04cd
commit c83985cd02
22 changed files with 2732 additions and 0 deletions
--- a/py/Occupation.py
+++ b/py/Occupation.py
@@ -0,0 +1,329 @@
+from fontTools.misc.plistlib import end_string
+from pymatgen.core import Structure
+import spglib
+from fractions import Fraction
+import random
+from pymatgen.core.sites import PeriodicSite, Species,Element,Lattice
+import numpy as np
+import re
+def typejudge(number):
+    if number in [1, 2]:
+        return "Triclinic"
+    elif 3 <= number <= 15:
+        return "Monoclinic"
+    elif 16 <= number <= 74:
+        return "Orthorhombic"
+    elif 75 <= number <= 142:
+        return "Tetragonal"
+    elif 143 <= number <= 167:
+        return "Trigonal"
+    elif 168 <= number <= 194:
+        return "Hexagonal"
+    elif 195 <= number <= 230:
+        return "Cubic"
+    else:
+        return "Unknown"
+
+
+def extract_oxi_state(species_string):
+    """
+    从 species_string（如 "Li+:0.645"）提取氧化态
+
+    Args:
+        species_string: 包含元素和氧化态的字符串（如 "Li+:0.645"、"Fe2-:0.5"）
+
+    Returns:
+        int: 氧化态数值（如 "+" -> 1, "2-" -> -2），默认返回 0 如果解析失败
+    """
+    # 使用正则表达式匹配化学符号和氧化态（如 Li+, Fe2-）
+    match = re.search(r"([A-Za-z]+)([+-]?\d*[+-])", species_string)
+    if not match:
+        return 0  # 默认中性
+
+    # 提取氧化态部分（如 "+", "2-", "3+"）
+    oxi_str = match.group(2)
+
+    # 处理单符号情况（如 "+" -> 1, "-" -> -1）
+    if oxi_str in ("+", "-"):
+        return 1 if oxi_str == "+" else -1
+
+    # 处理多数字情况（如 "2+" -> 2, "3-" -> -3）
+    try:
+        num = int(oxi_str[:-1])  # 提取数字部分
+        sign = 1 if oxi_str[-1] == "+" else -1
+        return num * sign
+    except (ValueError, IndexError):
+        return 0  # 解析失败时返回中性
+
+def process_cif_file(cif_file_path,explict_element):
+    structure = Structure.from_file(cif_file_path)
+    result_list = []
+    for index, site in enumerate(structure.sites, start=1):
+        occupancy = site.species.element_composition.num_atoms
+        species = site.species.chemical_system
+        if species in explict_element:
+            break
+        if occupancy < 1.0:
+            entry = next((r for r in result_list if
+                          r["species"] == species and r["occupancy"] == occupancy), None)
+            if entry:
+                entry["number"].append(index)
+            else:
+                result_list.append({
+                    "species": species,
+                    "number": [index],
+                    "occupancy": occupancy
+                })
+    return result_list
+
+
+def factorize_to_three_factors(n,type_sym=None):
+    factors = []
+
+    # 遍历可能的x值
+    if type_sym == None:
+        for x in range(1, n + 1):
+            if n % x == 0:
+                remaining_n = n // x
+                # 遍历可能的y值
+                for y in range(1, remaining_n + 1):
+                    if remaining_n % y == 0:
+                        z = remaining_n // y
+                        factors.append({'x': x, 'y': y, 'z': z})
+    if type_sym == "xyz":
+        for x in range(1, n + 1):
+            if n % x == 0:
+                remaining_n = n // x
+                # 遍历可能的y值
+                for y in range(1, remaining_n + 1):
+                    if remaining_n % y == 0 and y <= x:
+                        z = remaining_n // y
+                        if z <= y:
+                            factors.append({'x': x, 'y': y, 'z': z})
+
+    def sum_score(factor):
+        x, y, z = factor['x'], factor['y'], factor['z']
+        return x + y + z
+
+    # 按照sum_score从小到大排序
+    sorted_factors = sorted(factors, key=sum_score)
+    return sorted_factors
+
+
+
+
+
+def calculate_supercell_factor(occupancy):
+    # 将浮点数转换为分数形式
+    fraction = Fraction(occupancy).limit_denominator()
+
+    # 获取分子和分母
+    numerator = fraction.numerator
+    denominator = fraction.denominator
+
+    return numerator,denominator
+
+
+def mark_atoms_randomly(factors, atom_number):
+    """
+    根据扩胞因子和占据数量生成随机占据字典
+
+    Args:
+        factors: 扩胞因子字典 {'x': int, 'y': int, 'z': int}
+        atom_number: 需要占据的副本数量
+
+    Returns:
+        字典 {0: 1或0, 1: 1或0, ..., total_copies-1: 1或0}
+    """
+    x, y, z = factors['x'], factors['y'], factors['z']
+    total_copies = x * y * z
+
+    if atom_number > total_copies:
+        raise ValueError(f"atom_number ({atom_number}) 不能超过扩胞总数 (x*y*z = {total_copies})")
+
+    # 生成所有副本索引 [0, 1, 2, ..., total_copies-1]
+    atom_dice = list(range(total_copies))
+
+    # 随机选择 atom_number 个副本占据
+    selected_atoms = random.sample(atom_dice, atom_number)
+
+    # 创建结果字典 {0: 1或0, 1: 1或0, ...}
+    result = {atom: 1 if atom in selected_atoms else 0 for atom in atom_dice}
+
+    return result
+
+
+def generate_random_list(total_elements, atom_number):
+    # 确保 atom_number 不超过 total_elements
+    if atom_number > total_elements:
+        raise ValueError("atom_number cannot be greater than the total number of elements (x * y * z)")
+
+    # 创建一个全0的列表
+    result = [0] * total_elements
+
+    # 随机选择 atom_number 个位置，并将这些位置的值设为1
+    indices = random.sample(range(total_elements), atom_number)
+    for index in indices:
+        result[index] = 1
+
+    return result
+
+
+def merge_structures(struct_copies, factors):
+    """
+    将多个副本结构按三维顺序合并为扩胞后的结构
+
+    Args:
+        struct_copies: 副本结构列表（长度 = x*y*z）
+        factors: 扩胞因子字典 {"x": int, "y": int, "z": int}
+
+    Returns:
+        合并后的扩胞结构
+    """
+    x, y, z = factors["x"], factors["y"], factors["z"]
+    total_copies = x * y * z
+
+    if len(struct_copies) != total_copies:
+        raise ValueError("副本数量与扩胞因子不匹配")
+
+    # 获取原结构的晶格
+    original_lattice = struct_copies[0].lattice
+
+    # 创建扩胞后的新晶格（直接按倍数缩放）
+    new_lattice_matrix = np.dot(original_lattice.matrix, np.diag([x, y, z]))
+    new_lattice = Lattice(new_lattice_matrix)
+
+    # 初始化合并后的结构
+    merged_structure = Structure(
+        lattice=new_lattice,
+        species=[],
+        coords=[],
+        coords_are_cartesian=False
+    )
+
+    # 按三维顺序填充每个副本的原子
+    for copy_idx in range(total_copies):
+        # 计算当前副本的分数坐标偏移量
+        offset = np.array([
+            copy_idx // (y * z),  # x方向偏移
+            (copy_idx % (y * z)) // z,  # y方向偏移
+            copy_idx % z  # z方向偏移
+        ])
+
+        # 将当前副本的原子添加到合并结构中（考虑偏移）
+        for site in struct_copies[copy_idx]:
+            if site.species:  # 跳过空位
+                merged_structure.append(
+                    species=site.species,
+                    coords=site.frac_coords + offset,
+                    coords_are_cartesian=False,
+                    properties=site.properties
+                )
+
+    return merged_structure
+
+def expand_structure(structure, factors, atom_indices, atom_number):
+    # 参数检查
+    x, y, z = factors['x'], factors['y'], factors['z']
+    total_copies = x * y * z
+    if not all(1 <= idx <= len(structure.sites) for idx in atom_indices):
+        raise ValueError("atom_indices包含无效原子索引")
+
+    # 生成独立副本
+    struct_copies = [structure.copy() for _ in range(total_copies)]
+    atom_dice = list(range(total_copies))  # 所有副本索引 [0,1,2,...]
+
+    # 处理每个目标原子
+    for atom_idx in atom_indices:
+        original_site = structure.sites[atom_idx - 1]
+        element = original_site.species.chemical_system
+
+
+        # 生成当前原子的占据字典（如{0:1, 1:0, 2:1,...}）
+        occupancy_dict = mark_atoms_randomly(factors,atom_number)
+        # 修改每个副本
+        for copy_idx, occupy in occupancy_dict.items():
+            # 或者方法2：使用remove/insert
+            struct_copies[copy_idx].remove_sites([atom_idx - 1])
+            oxi_state = extract_oxi_state(original_site.species_string)
+            if occupy:
+                new_site = PeriodicSite(
+                    species=Species(element, oxi_state),
+                    coords=original_site.frac_coords,
+                    lattice=struct_copies[copy_idx].lattice,
+                    to_unit_cell=True,
+                    label=original_site.label
+                )
+                struct_copies[copy_idx].sites.insert(atom_idx - 1, new_site)
+            else:
+                species_dict = {Species(element, oxi_state): 0.0}
+                new_site = PeriodicSite(
+                    species = species_dict,
+                    coords=original_site.frac_coords,
+                    lattice=struct_copies[copy_idx].lattice,
+                    to_unit_cell=True,
+                    label=original_site.label
+                )
+                struct_copies[copy_idx].sites.insert(atom_idx - 1, new_site)
+
+    # 合并副本
+    expanded_structure = Structure(
+        lattice=np.dot(structure.lattice.matrix, np.diag([x, y, z])),
+        species=[],
+        coords=[],
+        coords_are_cartesian=False
+    )
+
+    for copy_idx in range(total_copies):
+        offset = np.array([
+            copy_idx // (y * z),
+            (copy_idx % (y * z)) // z,
+            copy_idx % z
+        ])
+        for site in struct_copies[copy_idx]:
+            if site.species:  # 只添加非空位
+                expanded_structure.append(
+                    species=site.species,
+                    coords=site.frac_coords + offset,
+                    coords_are_cartesian=False,
+                    properties=site.properties
+                )
+    expanded_structure = merge_structures(struct_copies,factors)
+    return expanded_structure
+
+
+def process_occupation(input_file,output_file,explict_element = ["Li"],expect_cifnumber = 10,random_time=1):
+    struct = Structure.from_file(input_file)
+    space_group_info = struct.get_space_group_info()
+    space_group_symbol = space_group_info[0]
+    all_spacegroup_symbols = [spglib.get_spacegroup_type(i) for i in range(1, 531)]
+    symbol = all_spacegroup_symbols[0]
+    for symbol_i in all_spacegroup_symbols:
+        if space_group_symbol == symbol_i.international_short:
+            symbol = symbol_i
+    space_type = typejudge(symbol.number)
+    print(f"当前空间群符号为{space_group_symbol},序号为{symbol.number},对应的晶体体系为{space_type}")
+    occupation_list = process_cif_file(cif_file_path=input_file,explict_element=explict_element)
+    print(occupation_list)
+    for occupation in occupation_list:
+        atom_number, target_multiplier=calculate_supercell_factor(occupation["occupancy"])
+        divides = []
+        if space_type == "Hexagonal":
+            print('当前为六方晶系，暂不处理')
+        if space_type == "Cubic":
+            print("当前为立方晶体，三个方向同步")
+            divides = factorize_to_three_factors(target_multiplier,"xyz")
+        else:
+            print("为其他晶系，假设三个方向不同")
+            divides = factorize_to_three_factors(target_multiplier,)
+        print(divides)
+        for it in divides:
+            end_str = f'x{it["x"]}y{it["y"]}z{it["z"]}'
+            for i in range(random_time):
+                expand_struct=expand_structure(struct,it,occupation["number"],atom_number)
+                expand_struct.to_file(output_file)
+            print(it)
+    else:
+        print(f"不存在除{explict_element}以外的共占位原子")
+process_occupation("../data/input_pre/ICSD_1234.cif", "haha.cif", explict_element=[], expect_cifnumber=1, random_time=1)
+