solidstate-tools/dpgen/supercell_make_p3ma.py

import pymatgen.core as mg
from pymatgen.io.cif import CifParser
from pymatgen.transformations.standard_transformations import SupercellTransformation
import random
import os


def create_ordered_structure_from_disordered(disordered_structure):
    """
    手动将包含部分占位的无序结构转换为有序结构，借鉴plus.py的思路。
    """
    s = disordered_structure.copy()

    # 识别需要处理的部分占位
    # 根据 model3.cif, Y2(z≈0.488, occ=0.75), Y3(z≈-0.065, occ=0.25), Li2(z≈0.5, occ=0.5) [model3.cif]
    y2_indices, y3_indices, li2_indices = [], [], []

    for i, site in enumerate(s.sites):
        # 使用z坐标来识别特定的部分占位
        z = site.frac_coords[2]
        if site.species_string == "Y":
            if abs(z - 0.488) < 0.05:
                y2_indices.append(i)
            elif abs(z - (-0.065)) < 0.05 or abs(z - (1 - 0.065)) < 0.05:
                y3_indices.append(i)
        elif site.species_string == "Li":
            if abs(z - 0.5) < 0.05:
                li2_indices.append(i)

    # 根据占位率随机选择要保留的原子
    def choose_keep(indices, keep_fraction):
        num_to_keep = int(round(len(indices) * keep_fraction))
        return set(random.sample(indices, num_to_keep))

    keep_y2 = choose_keep(y2_indices, 0.75)
    keep_y3 = choose_keep(y3_indices, 0.25)
    keep_li2 = choose_keep(li2_indices, 0.50)

    # 找出所有需要删除的原子索引
    to_remove_indices = [i for i in y2_indices if i not in keep_y2]
    to_remove_indices.extend([i for i in y3_indices if i not in keep_y3])
    to_remove_indices.extend([i for i in li2_indices if i not in keep_li2])

    # 从后往前删除，避免索引错位
    s.remove_sites(sorted(to_remove_indices, reverse=True))

    # --- 关键修复步骤 ---
    # 最终清理，确保所有位点都是有序的
    for i, site in enumerate(s.sites):
        if not site.is_ordered:
            # 将Composition对象转换为字典，然后找到占位率最高的元素 [plus.py]
            species_dict = site.species.as_dict()
            main_specie = max(species_dict.items(), key=lambda item: item[1])[0]
            s.replace(i, main_specie)

    return s


def create_supercells_from_file(cif_path, output_path="."):
    """
    根据给定的CIF文件路径，生成三种不同尺寸和缺陷的超胞，并保存为POSCAR文件。
    """
    if not os.path.exists(cif_path):
        print(f"错误: 文件 '{cif_path}' 不存在。")
        return

    print(f"正在从 {cif_path} 读取结构...")
    parser = CifParser(cif_path)
    disordered_structure = parser.parse_structures(primitive=False)[0]

    structure = create_ordered_structure_from_disordered(disordered_structure)
    print(f"成功将无序结构转换为一个包含 {len(structure)} 个原子的有序单胞。")

    os.makedirs(output_path, exist_ok=True)

    # 任务一：生成60原子超胞 (无缺陷)
    print("\n--- 正在生成 60原子无缺陷超胞 (1x1x2) ---")
    tf_60 = SupercellTransformation([[1, 0, 0], [0, 1, 0], [0, 0, 2]])
    sc_60_no_defect = tf_60.apply_transformation(structure)
    print(f"原子总数: {len(sc_60_no_defect)}, 化学式: {sc_60_no_defect.composition.reduced_formula}")
    sc_60_no_defect.to(fmt="poscar", filename=os.path.join(output_path, "POSCAR_60_no_defect"))
    print(f"已保存文件: {os.path.join(output_path, 'POSCAR_60_no_defect')}")

    # 任务二：生成60原子超胞 (含一对反位缺陷)
    print("\n--- 正在生成 60原子含一对反位缺陷超胞 ---")
    sc_60_defect = sc_60_no_defect.copy()
    li_indices = [i for i, site in enumerate(sc_60_defect.sites) if site.species_string == 'Li']
    y_indices = [i for i, site in enumerate(sc_60_defect.sites) if site.species_string == 'Y']

    if li_indices and y_indices:
        li_swap_idx, y_swap_idx = random.choice(li_indices), random.choice(y_indices)
        sc_60_defect.replace(li_swap_idx, "Y")
        sc_60_defect.replace(y_swap_idx, "Li")
        print(f"成功引入一对反位缺陷。浓度: {2 / (len(li_indices) + len(y_indices)) * 100:.2f}%")
        sc_60_defect.to(fmt="poscar", filename=os.path.join(output_path, "POSCAR_60_antisite_defect"))
        print(f"已保存文件: {os.path.join(output_path, 'POSCAR_60_antisite_defect')}")

    # 任务三：生成90原子超胞 (含一对反位缺陷)
    print("\n--- 正在生成 90原子含一对反位缺陷超胞 ---")
    tf_90 = SupercellTransformation([[1, 0, 0], [0, 1, 0], [0, 0, 3]])
    sc_90_no_defect = tf_90.apply_transformation(structure)
    sc_90_defect = sc_90_no_defect.copy()
    li_indices = [i for i, site in enumerate(sc_90_defect.sites) if site.species_string == 'Li']
    y_indices = [i for i, site in enumerate(sc_90_defect.sites) if site.species_string == 'Y']

    if li_indices and y_indices:
        li_swap_idx, y_swap_idx = random.choice(li_indices), random.choice(y_indices)
        sc_90_defect.replace(li_swap_idx, "Y")
        sc_90_defect.replace(y_swap_idx, "Li")
        print(f"原子总数: {len(sc_90_defect)}, 浓度: {2 / (len(li_indices) + len(y_indices)) * 100:.2f}%")
        sc_90_defect.to(fmt="poscar", filename=os.path.join(output_path, "POSCAR_90_antisite_defect"))
        print(f"已保存文件: {os.path.join(output_path, 'POSCAR_90_antisite_defect')}")


def create_ordered_p3ma_structure(disordered_structure):
    """
    手动将P3ma相的无序结构（包含Y2, Y3, Li2的部分占位）转换为有序结构。
    """
    s = disordered_structure.copy()

    # 根据 model3.cif, 识别Y2(z≈0.488, occ=0.75), Y3(z≈-0.065, occ=0.25), Li2(z≈0.5, occ=0.5) [model3.cif]
    y2_indices, y3_indices, li2_indices = [], [], []

    for i, site in enumerate(s.sites):
        z = site.frac_coords[2]
        if site.species_string == "Y":
            if abs(z - 0.488) < 0.05:
                y2_indices.append(i)
            elif abs(z - (-0.065)) < 0.05 or abs(z - (1 - 0.065)) < 0.05:
                y3_indices.append(i)
        elif site.species_string == "Li":
            if abs(z - 0.5) < 0.05:
                li2_indices.append(i)

    # 根据占位率随机选择要保留的原子
    def choose_keep(indices, keep_fraction):
        num_to_keep = int(round(len(indices) * keep_fraction))
        return set(random.sample(indices, num_to_keep))

    keep_y2 = choose_keep(y2_indices, 0.75)
    keep_y3 = choose_keep(y3_indices, 0.25)
    keep_li2 = choose_keep(li2_indices, 0.50)

    # 找出所有需要删除的原子索引
    to_remove_indices = [i for i in y2_indices if i not in keep_y2]
    to_remove_indices.extend([i for i in y3_indices if i not in keep_y3])
    to_remove_indices.extend([i for i in li2_indices if i not in keep_li2])

    s.remove_sites(sorted(to_remove_indices, reverse=True))

    # 最终清理，确保所有位点都是有序的
    for i, site in enumerate(s.sites):
        if not site.is_ordered:
            species_dict = site.species.as_dict()
            main_specie = max(species_dict.items(), key=lambda item: item[1])[0]
            s.replace(i, main_specie)

    return s


def create_multiple_p3ma_supercells(cif_path, num_configs=5, output_path="."):
    """
    读取P3ma相CIF，为不同尺寸的超胞生成多个具有不同反位缺陷位置的构型。
    """
    if not os.path.exists(cif_path):
        print(f"错误: 文件 '{cif_path}' 不存在。")
        return

    print(f"正在从 {cif_path} 读取P3ma结构...")
    parser = CifParser(cif_path)
    disordered_structure = parser.parse_structures(primitive=False)[0]

    structure = create_ordered_p3ma_structure(disordered_structure)
    print(f"成功将无序P3ma结构转换为一个包含 {len(structure)} 个原子的有序单胞。")

    os.makedirs(output_path, exist_ok=True)

    target_sizes = [60, 90]
    for size in target_sizes:
        print(f"\n--- 正在为约 {size} 原子的版本生成 {num_configs} 个不同构型 ---")

        # 1. 构建基准超胞
        if size == 60:
            tf = SupercellTransformation([[1, 0, 0], [0, 1, 0], [0, 0, 2]])
            filename_suffix = "60_approx"
        else:  # size == 90
            tf = SupercellTransformation([[1, 0, 0], [0, 1, 0], [0, 0, 3]])
            filename_suffix = "90_approx"

        base_supercell = tf.apply_transformation(structure)
        print(f"已生成基准超胞，实际原子数: {len(base_supercell)}")

        li_indices = [i for i, site in enumerate(base_supercell.sites) if site.species_string == 'Li']
        y_indices = [i for i, site in enumerate(base_supercell.sites) if site.species_string == 'Y']

        if not li_indices or not y_indices:
            print("错误：在超胞中未找到足够的Li或Y原子来引入缺陷。")
            continue

        # 2. 循环生成多个独特的缺陷构型
        used_pairs = set()
        for i in range(num_configs):
            defect_supercell = base_supercell.copy()

            # 确保随机选择的交换对是全新的
            # 增加一个尝试次数上限，防止在原子数很少时陷入死循环
            max_tries = len(li_indices) * len(y_indices)
            for _ in range(max_tries):
                li_swap_idx = random.choice(li_indices)
                y_swap_idx = random.choice(y_indices)
                pair = tuple(sorted((li_swap_idx, y_swap_idx)))
                if pair not in used_pairs:
                    used_pairs.add(pair)
                    break
            else:
                print(f"  警告: 未能找到更多独特的交换对，已停止在第 {i} 个构型。")
                break

            # 引入缺陷
            defect_supercell.replace(li_swap_idx, "Y")
            defect_supercell.replace(y_swap_idx, "Li")

            print(f"  配置 {i}: 成功引入一对反位缺陷 (Li at index {li_swap_idx} <-> Y at index {y_swap_idx})。")

            # 3. 保存为带编号的POSCAR文件
            poscar_filename = f"POSCAR_P3ma_{filename_suffix}_antisite_defect_{i}"
            poscar_path = os.path.join(output_path, poscar_filename)
            defect_supercell.to(fmt="poscar", filename=poscar_path)
            print(f"  已保存文件: {poscar_path}")

if __name__ == '__main__':
    # --- 使用方法 ---
    # 1. 将您的CIF文件保存，例如命名为 "Li3YCl6.cif"
    # 2. 将文件名作为参数传递给函数
    cif_file_path = "data/P3ma/model3.cif"  # 修改为您的CIF文件名
    output_directory = "raw/P3ma/output"  # 可以指定一个输出目录

    # create_supercells_from_file(cif_file_path, output_directory)
    create_multiple_p3ma_supercells(cif_file_path,output_path=output_directory)
    print("所有任务完成！")