import random from collections import defaultdict, Counter from pymatgen.core import Structure, Element from pymatgen.io.lammps.data import LammpsData # ASE 兜底(可选) try: from ase.io import write as ase_write from ase import Atoms as ASEAtoms HAS_ASE = True except Exception: HAS_ASE = False # ===== 用户参数 ===== cif_filename = "data/P3ma/origin.cif" # 你的输入 CIF(含部分占位)[5] supercell_matrix = [[1, 0, 0], [0, 1, 0], [0, 0, 2]] # 2×2×4 超胞(总复制数=16)[3] out_lammps = "lyc_P3m1_1x1x2_from_cif_ordered.vasp" # 输出 LAMMPS raw seed = 2025 strict_count = True # 严格配额:每个父位点在超胞内按占位概率分配整数个原子/空位 # ==================== random.seed(seed) def species_to_probs(site): """ 将站点的物种占位转换为 [(species or None(vac), prob)] 列表,prob 归一化为和=1。 若总占位 < 1,补一个 vacancy(None)。 去掉氧化态,仅保留元素。 """ sp_items = site.species.items() total = 0.0 pairs = [] for spc, occ in sp_items: # 转成 Element(剔除氧化态) try: e = Element(spc.symbol) if hasattr(spc, "symbol") else Element(str(spc)) except Exception: e = Element(str(spc)) pairs.append((e, float(occ))) total += float(occ) if total < 1.0 - 1e-10: pairs.append((None, 1.0 - total)) # vacancy total = 1.0 # 归一化 if abs(total - 1.0) > 1e-10: pairs = [(e, p / total) for (e, p) in pairs] return pairs def draw_counts_from_probs(n, probs): """ 给定复制数 n 和概率 probs,返回 {species/None: count},使计数和为 n。 先按四舍五入,再用残差修正到总和= n。 """ # 初分配 counts = {sp: int(round(p * n)) for sp, p in probs} s = sum(counts.values()) if s == n: return counts # 残差排序:需要增加则按概率大的优先加;需要减少则按概率小的优先减 if n > s: need = n - s probs_sorted = sorted(probs, key=lambda x: x[1], reverse=True) for i in range(need): sp = probs_sorted[i % len(probs_sorted)][0] counts[sp] = counts.get(sp, 0) + 1 else: need = s - n probs_sorted = sorted(probs, key=lambda x: x[1]) # 先减概率小的 idx = 0 while need > 0 and idx < 50 * len(probs_sorted): sp = probs_sorted[idx % len(probs_sorted)][0] if counts.get(sp, 0) > 0: counts[sp] -= 1 need -= 1 idx += 1 return counts def collapse_disorder_to_ordered_supercell(struct, M, strict=True): """ 处理步骤: 1) 给原胞每个位点打 parent_id 2) 扩胞到 M 3) 以父位点为组,在组内(复制数 n)按占位概率分配整数个 species/空位到每个复制位点 - 有序位点:所有复制直接保留 - 无序位点/部分占位:严格配额或独立抽样 4) 返回完全有序的超胞 Structure """ s0 = struct.copy() s0.add_site_property("parent_id", list(range(s0.num_sites))) sc = s0.copy() sc.make_supercell(M) # 按父位点分组 groups = defaultdict(list) for i, site in enumerate(sc.sites): pid = sc.site_properties["parent_id"][i] groups[pid].append(i) new_species = [] new_fracs = [] new_lat = sc.lattice for pid, idx_list in groups.items(): # 用该组第一个复制的站点定义占位 site0 = sc[idx_list[0]] # 有序站点:直接全部保留(只有一种元素,且占位为1) if site0.is_ordered: species_elem = list(site0.species.keys())[0] for i in idx_list: new_species.append(species_elem) new_fracs.append(sc[i].frac_coords) continue # 无序/部分占位:概率分配 probs = species_to_probs(site0) n = len(idx_list) if strict: counts = draw_counts_from_probs(n, probs) # 构造分配池并打乱 pool = [] for sp, c in counts.items(): pool += [sp] * c random.shuffle(pool) # 分配到每个复制位点 for i, sp in zip(idx_list, pool): if sp is None: continue # vacancy -> 删除该位点 new_species.append(sp) new_fracs.append(sc[i].frac_coords) else: # 独立抽样 import bisect species_list = [sp for sp, p in probs] cum = [] ssum = 0.0 for _, p in probs: ssum += p cum.append(ssum) for i in idx_list: r = random.random() j = bisect.bisect_left(cum, r) sp = species_list[j] if sp is None: continue new_species.append(sp) new_fracs.append(sc[i].frac_coords) ordered_sc = Structure(new_lat, new_species, new_fracs, to_unit_cell=True, coords_are_cartesian=False) # 去除可能残留的氧化态(LAMMPS atomic 不需要) try: ordered_sc.remove_oxidation_states() except Exception: pass return ordered_sc # 1) 读取 CIF(含部分占位) s_in = Structure.from_file(cif_filename, primitive=False) print(f"读入: {cif_filename}, 原胞位点: {s_in.num_sites}, 有序?: {s_in.is_ordered}") # 2) 在 2×2×4 超胞上固化部分占位 -> 完全有序超胞 ordered_sc = collapse_disorder_to_ordered_supercell(s_in, supercell_matrix, strict=strict_count) print(f"生成有序超胞: 位点数={ordered_sc.num_sites}, 有序?: {ordered_sc.is_ordered}") # 3) 打印元素计数,核对化学计量 elem_count = Counter([sp.symbol for sp in ordered_sc.species]) print("元素计数:", dict(elem_count)) # 4) 写 LAMMPS raw(pymatgen,失败则 ASE 兜底) wrote = False try: ldata = LammpsData.from_structure(ordered_sc, atom_style="atomic") ldata.write_file(out_lammps) wrote = True print(f"已写出 LAMMPS raw: {out_lammps} (pymatgen)") except Exception as e: print("pymatgen 写 LAMMPS raw 失败:", e) if not wrote and HAS_ASE: try: ase_atoms = ASEAtoms( symbols=[sp.symbol for sp in ordered_sc.species], positions=ordered_sc.cart_coords, cell=ordered_sc.lattice.matrix, pbc=True ) ase_write(out_lammps, ase_atoms, format="lammps-raw", atom_style="atomic") wrote = True print(f"已写出 LAMMPS raw: {out_lammps} (ASE)") except Exception as e: print("ASE 写 LAMMPS raw 也失败:", e) if not wrote: print("写 LAMMPS raw 失败,请把错误信息发我。")