Files
solidstate-tools/dpgen/supercell_make_wangshuo.py
2025-11-19 12:23:17 +08:00

197 lines
6.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import random
from collections import defaultdict, Counter
from pymatgen.core import Structure, Element
from pymatgen.io.lammps.data import LammpsData
# ASE 兜底(可选)
try:
from ase.io import write as ase_write
from ase import Atoms as ASEAtoms
HAS_ASE = True
except Exception:
HAS_ASE = False
# ===== 用户参数 =====
cif_filename = "data/P3ma/origin.cif" # 你的输入 CIF含部分占位[5]
supercell_matrix = [[1, 0, 0], [0, 1, 0], [0, 0, 2]] # 2×2×4 超胞(总复制数=16[3]
out_lammps = "lyc_P3m1_1x1x2_from_cif_ordered.vasp" # 输出 LAMMPS raw
seed = 2025
strict_count = True # 严格配额:每个父位点在超胞内按占位概率分配整数个原子/空位
# ====================
random.seed(seed)
def species_to_probs(site):
"""
将站点的物种占位转换为 [(species or None(vac), prob)] 列表prob 归一化为和=1。
若总占位 < 1补一个 vacancy(None)。
去掉氧化态,仅保留元素。
"""
sp_items = site.species.items()
total = 0.0
pairs = []
for spc, occ in sp_items:
# 转成 Element剔除氧化态
try:
e = Element(spc.symbol) if hasattr(spc, "symbol") else Element(str(spc))
except Exception:
e = Element(str(spc))
pairs.append((e, float(occ)))
total += float(occ)
if total < 1.0 - 1e-10:
pairs.append((None, 1.0 - total)) # vacancy
total = 1.0
# 归一化
if abs(total - 1.0) > 1e-10:
pairs = [(e, p / total) for (e, p) in pairs]
return pairs
def draw_counts_from_probs(n, probs):
"""
给定复制数 n 和概率 probs返回 {species/None: count},使计数和为 n。
先按四舍五入,再用残差修正到总和= n。
"""
# 初分配
counts = {sp: int(round(p * n)) for sp, p in probs}
s = sum(counts.values())
if s == n:
return counts
# 残差排序:需要增加则按概率大的优先加;需要减少则按概率小的优先减
if n > s:
need = n - s
probs_sorted = sorted(probs, key=lambda x: x[1], reverse=True)
for i in range(need):
sp = probs_sorted[i % len(probs_sorted)][0]
counts[sp] = counts.get(sp, 0) + 1
else:
need = s - n
probs_sorted = sorted(probs, key=lambda x: x[1]) # 先减概率小的
idx = 0
while need > 0 and idx < 50 * len(probs_sorted):
sp = probs_sorted[idx % len(probs_sorted)][0]
if counts.get(sp, 0) > 0:
counts[sp] -= 1
need -= 1
idx += 1
return counts
def collapse_disorder_to_ordered_supercell(struct, M, strict=True):
"""
处理步骤:
1) 给原胞每个位点打 parent_id
2) 扩胞到 M
3) 以父位点为组,在组内(复制数 n按占位概率分配整数个 species/空位到每个复制位点
- 有序位点:所有复制直接保留
- 无序位点/部分占位:严格配额或独立抽样
4) 返回完全有序的超胞 Structure
"""
s0 = struct.copy()
s0.add_site_property("parent_id", list(range(s0.num_sites)))
sc = s0.copy()
sc.make_supercell(M)
# 按父位点分组
groups = defaultdict(list)
for i, site in enumerate(sc.sites):
pid = sc.site_properties["parent_id"][i]
groups[pid].append(i)
new_species = []
new_fracs = []
new_lat = sc.lattice
for pid, idx_list in groups.items():
# 用该组第一个复制的站点定义占位
site0 = sc[idx_list[0]]
# 有序站点直接全部保留只有一种元素且占位为1
if site0.is_ordered:
species_elem = list(site0.species.keys())[0]
for i in idx_list:
new_species.append(species_elem)
new_fracs.append(sc[i].frac_coords)
continue
# 无序/部分占位:概率分配
probs = species_to_probs(site0)
n = len(idx_list)
if strict:
counts = draw_counts_from_probs(n, probs)
# 构造分配池并打乱
pool = []
for sp, c in counts.items():
pool += [sp] * c
random.shuffle(pool)
# 分配到每个复制位点
for i, sp in zip(idx_list, pool):
if sp is None:
continue # vacancy -> 删除该位点
new_species.append(sp)
new_fracs.append(sc[i].frac_coords)
else:
# 独立抽样
import bisect
species_list = [sp for sp, p in probs]
cum = []
ssum = 0.0
for _, p in probs:
ssum += p
cum.append(ssum)
for i in idx_list:
r = random.random()
j = bisect.bisect_left(cum, r)
sp = species_list[j]
if sp is None:
continue
new_species.append(sp)
new_fracs.append(sc[i].frac_coords)
ordered_sc = Structure(new_lat, new_species, new_fracs, to_unit_cell=True, coords_are_cartesian=False)
# 去除可能残留的氧化态LAMMPS atomic 不需要)
try:
ordered_sc.remove_oxidation_states()
except Exception:
pass
return ordered_sc
# 1) 读取 CIF含部分占位
s_in = Structure.from_file(cif_filename, primitive=False)
print(f"读入: {cif_filename}, 原胞位点: {s_in.num_sites}, 有序?: {s_in.is_ordered}")
# 2) 在 2×2×4 超胞上固化部分占位 -> 完全有序超胞
ordered_sc = collapse_disorder_to_ordered_supercell(s_in, supercell_matrix, strict=strict_count)
print(f"生成有序超胞: 位点数={ordered_sc.num_sites}, 有序?: {ordered_sc.is_ordered}")
# 3) 打印元素计数,核对化学计量
elem_count = Counter([sp.symbol for sp in ordered_sc.species])
print("元素计数:", dict(elem_count))
# 4) 写 LAMMPS rawpymatgen失败则 ASE 兜底)
wrote = False
try:
ldata = LammpsData.from_structure(ordered_sc, atom_style="atomic")
ldata.write_file(out_lammps)
wrote = True
print(f"已写出 LAMMPS raw: {out_lammps} (pymatgen)")
except Exception as e:
print("pymatgen 写 LAMMPS raw 失败:", e)
if not wrote and HAS_ASE:
try:
ase_atoms = ASEAtoms(
symbols=[sp.symbol for sp in ordered_sc.species],
positions=ordered_sc.cart_coords,
cell=ordered_sc.lattice.matrix,
pbc=True
)
ase_write(out_lammps, ase_atoms, format="lammps-raw", atom_style="atomic")
wrote = True
print(f"已写出 LAMMPS raw: {out_lammps} (ASE)")
except Exception as e:
print("ASE 写 LAMMPS raw 也失败:", e)
if not wrote:
print("写 LAMMPS raw 失败,请把错误信息发我。")