一些小修改

2025-11-19 12:23:17 +08:00
parent 95d719cc1e
commit 80ae03c8c1
25 changed files with 2291 additions and 17 deletions
--- a/dpgen/supercell_make_wangshuo.py
+++ b/dpgen/supercell_make_wangshuo.py
@@ -0,0 +1,197 @@
+import random
+from collections import defaultdict, Counter
+
+from pymatgen.core import Structure, Element
+from pymatgen.io.lammps.data import LammpsData
+
+# ASE 兜底（可选）
+try:
+    from ase.io import write as ase_write
+    from ase import Atoms as ASEAtoms
+    HAS_ASE = True
+except Exception:
+    HAS_ASE = False
+
+# ===== 用户参数 =====
+cif_filename = "data/P3ma/origin.cif"                # 你的输入 CIF（含部分占位）[5]
+supercell_matrix = [[1, 0, 0], [0, 1, 0], [0, 0, 2]]  # 2×2×4 超胞（总复制数=16）[3]
+out_lammps = "lyc_P3m1_1x1x2_from_cif_ordered.vasp"  # 输出 LAMMPS raw
+seed = 2025
+strict_count = True  # 严格配额：每个父位点在超胞内按占位概率分配整数个原子/空位
+# ====================
+
+random.seed(seed)
+
+def species_to_probs(site):
+    """
+    将站点的物种占位转换为 [(species or None(vac), prob)] 列表，prob 归一化为和=1。
+    若总占位 < 1，补一个 vacancy(None)。
+    去掉氧化态，仅保留元素。
+    """
+    sp_items = site.species.items()
+    total = 0.0
+    pairs = []
+    for spc, occ in sp_items:
+        # 转成 Element（剔除氧化态）
+        try:
+            e = Element(spc.symbol) if hasattr(spc, "symbol") else Element(str(spc))
+        except Exception:
+            e = Element(str(spc))
+        pairs.append((e, float(occ)))
+        total += float(occ)
+    if total < 1.0 - 1e-10:
+        pairs.append((None, 1.0 - total))  # vacancy
+        total = 1.0
+    # 归一化
+    if abs(total - 1.0) > 1e-10:
+        pairs = [(e, p / total) for (e, p) in pairs]
+    return pairs
+
+def draw_counts_from_probs(n, probs):
+    """
+    给定复制数 n 和概率 probs，返回 {species/None: count}，使计数和为 n。
+    先按四舍五入，再用残差修正到总和= n。
+    """
+    # 初分配
+    counts = {sp: int(round(p * n)) for sp, p in probs}
+    s = sum(counts.values())
+    if s == n:
+        return counts
+
+    # 残差排序：需要增加则按概率大的优先加；需要减少则按概率小的优先减
+    if n > s:
+        need = n - s
+        probs_sorted = sorted(probs, key=lambda x: x[1], reverse=True)
+        for i in range(need):
+            sp = probs_sorted[i % len(probs_sorted)][0]
+            counts[sp] = counts.get(sp, 0) + 1
+    else:
+        need = s - n
+        probs_sorted = sorted(probs, key=lambda x: x[1])  # 先减概率小的
+        idx = 0
+        while need > 0 and idx < 50 * len(probs_sorted):
+            sp = probs_sorted[idx % len(probs_sorted)][0]
+            if counts.get(sp, 0) > 0:
+                counts[sp] -= 1
+                need -= 1
+            idx += 1
+    return counts
+
+def collapse_disorder_to_ordered_supercell(struct, M, strict=True):
+    """
+    处理步骤：
+    1) 给原胞每个位点打 parent_id
+    2) 扩胞到 M
+    3) 以父位点为组，在组内（复制数 n）按占位概率分配整数个 species/空位到每个复制位点
+       - 有序位点：所有复制直接保留
+       - 无序位点/部分占位：严格配额或独立抽样
+    4) 返回完全有序的超胞 Structure
+    """
+    s0 = struct.copy()
+    s0.add_site_property("parent_id", list(range(s0.num_sites)))
+
+    sc = s0.copy()
+    sc.make_supercell(M)
+
+    # 按父位点分组
+    groups = defaultdict(list)
+    for i, site in enumerate(sc.sites):
+        pid = sc.site_properties["parent_id"][i]
+        groups[pid].append(i)
+
+    new_species = []
+    new_fracs = []
+    new_lat = sc.lattice
+
+    for pid, idx_list in groups.items():
+        # 用该组第一个复制的站点定义占位
+        site0 = sc[idx_list[0]]
+        # 有序站点：直接全部保留（只有一种元素，且占位为1）
+        if site0.is_ordered:
+            species_elem = list(site0.species.keys())[0]
+            for i in idx_list:
+                new_species.append(species_elem)
+                new_fracs.append(sc[i].frac_coords)
+            continue
+
+        # 无序/部分占位：概率分配
+        probs = species_to_probs(site0)
+        n = len(idx_list)
+
+        if strict:
+            counts = draw_counts_from_probs(n, probs)
+            # 构造分配池并打乱
+            pool = []
+            for sp, c in counts.items():
+                pool += [sp] * c
+            random.shuffle(pool)
+            # 分配到每个复制位点
+            for i, sp in zip(idx_list, pool):
+                if sp is None:
+                    continue  # vacancy -> 删除该位点
+                new_species.append(sp)
+                new_fracs.append(sc[i].frac_coords)
+        else:
+            # 独立抽样
+            import bisect
+            species_list = [sp for sp, p in probs]
+            cum = []
+            ssum = 0.0
+            for _, p in probs:
+                ssum += p
+                cum.append(ssum)
+            for i in idx_list:
+                r = random.random()
+                j = bisect.bisect_left(cum, r)
+                sp = species_list[j]
+                if sp is None:
+                    continue
+                new_species.append(sp)
+                new_fracs.append(sc[i].frac_coords)
+
+    ordered_sc = Structure(new_lat, new_species, new_fracs, to_unit_cell=True, coords_are_cartesian=False)
+    # 去除可能残留的氧化态（LAMMPS atomic 不需要）
+    try:
+        ordered_sc.remove_oxidation_states()
+    except Exception:
+        pass
+    return ordered_sc
+
+# 1) 读取 CIF（含部分占位）
+s_in = Structure.from_file(cif_filename, primitive=False)
+print(f"读入: {cif_filename}, 原胞位点: {s_in.num_sites}, 有序?: {s_in.is_ordered}")
+
+# 2) 在 2×2×4 超胞上固化部分占位 -> 完全有序超胞
+ordered_sc = collapse_disorder_to_ordered_supercell(s_in, supercell_matrix, strict=strict_count)
+print(f"生成有序超胞: 位点数={ordered_sc.num_sites}, 有序?: {ordered_sc.is_ordered}")
+
+# 3) 打印元素计数，核对化学计量
+elem_count = Counter([sp.symbol for sp in ordered_sc.species])
+print("元素计数:", dict(elem_count))
+
+# 4) 写 LAMMPS raw（pymatgen，失败则 ASE 兜底）
+wrote = False
+try:
+    ldata = LammpsData.from_structure(ordered_sc, atom_style="atomic")
+    ldata.write_file(out_lammps)
+    wrote = True
+    print(f"已写出 LAMMPS raw: {out_lammps} (pymatgen)")
+except Exception as e:
+    print("pymatgen 写 LAMMPS raw 失败：", e)
+
+if not wrote and HAS_ASE:
+    try:
+        ase_atoms = ASEAtoms(
+            symbols=[sp.symbol for sp in ordered_sc.species],
+            positions=ordered_sc.cart_coords,
+            cell=ordered_sc.lattice.matrix,
+            pbc=True
+        )
+        ase_write(out_lammps, ase_atoms, format="lammps-raw", atom_style="atomic")
+        wrote = True
+        print(f"已写出 LAMMPS raw: {out_lammps} (ASE)")
+    except Exception as e:
+        print("ASE 写 LAMMPS raw 也失败：", e)
+
+if not wrote:
+    print("写 LAMMPS raw 失败，请把错误信息发我。")