对比学习法增改

This commit is contained in:
2025-10-29 11:39:30 +08:00
parent 1f8667ae51
commit 95d719cc1e
5 changed files with 466 additions and 0 deletions

151
dpgen/plus.py Normal file
View File

@@ -0,0 +1,151 @@
import random
from typing import List
from pymatgen.core import Structure
from pymatgen.io.vasp import Poscar
def _is_close_frac(z, target, tol=2e-2):
t = target % 1.0
return min(abs(z - t), abs(z - (t + 1)), abs(z - (t - 1))) < tol
def make_model3_poscar_from_cif(cif_path: str,
out_poscar: str = "POSCAR_model3_supercell",
seed: int = 42,
tol: float = 2e-2):
"""
将 model3.cif 扩胞为 [[3,0,0],[2,4,0],[0,0,6]] 的2160原子超胞并把部分占据位点(Y2=0.75, Y3=0.25, Li2=0.5)
显式有序化后写出 POSCAR。
"""
random.seed(seed)
# 1) 读取 CIF
s = Structure.from_file(cif_path)
# 2) 扩胞a_s=3a0, b_s=2a0+4b0, c_s=6c0[1]
T = [[3, 0, 0],
[2, 4, 0],
[0, 0, 6]]
s.make_supercell(T)
# 3) 识别三类需取整的位点Y2、Y3、Li2
y2_idx: List[int] = []
y3_idx: List[int] = []
li2_idx: List[int] = []
for i, site in enumerate(s.sites):
# 兼容不同版本pymatgen
try:
el = site.species.elements[0].symbol
except Exception:
ss = site.species_string
el = "Li" if ss.startswith("Li") else ("Y" if ss.startswith("Y") else ("Cl" if ss.startswith("Cl") else ss))
z = site.frac_coords[2]
if el == "Y":
if _is_close_frac(z, 0.488, tol):
y2_idx.append(i)
elif _is_close_frac(z, -0.065, tol) or _is_close_frac(z, 0.935, tol):
y3_idx.append(i)
elif el == "Li":
if _is_close_frac(z, 0.5, tol):
li2_idx.append(i)
def choose_keep(idxs, frac_keep):
n = len(idxs)
k = int(round(n * frac_keep))
if k < 0: k = 0
if k > n: k = n
keep = set(random.sample(idxs, k)) if 0 < k < n else set(idxs if k == n else [])
drop = [i for i in idxs if i not in keep]
return keep, drop
keep_y2, drop_y2 = choose_keep(y2_idx, 0.75)
keep_y3, drop_y3 = choose_keep(y3_idx, 0.25)
keep_li2, drop_li2 = choose_keep(li2_idx, 0.50)
# 4) 保留者占据设为1其余删除
for i in keep_y2 | keep_y3:
s.replace(i, "Y")
for i in keep_li2:
s.replace(i, "Li")
to_remove = sorted(drop_y2 + drop_y3 + drop_li2, reverse=True)
for i in to_remove:
s.remove_sites([i])
# 5) 最终清理:消除任何残留的部分占据(防止 POSCAR 写出报错)
# 若有 site.is_ordered==False则取该站位的“主要元素”替换为占据=1
for i, site in enumerate(s.sites):
if not site.is_ordered:
d = site.species.as_dict() # {'Li': 0.5} 或 {'Li':0.5,'Y':0.5}
elem = max(d.items(), key=lambda kv: kv[1])[0]
s.replace(i, elem)
# 6) 排序并写出 POSCAR
order = {"Li": 0, "Y": 1, "Cl": 2}
s = s.get_sorted_structure(key=lambda site: order.get(site.species.elements[0].symbol, 99))
Poscar(s).write_file(out_poscar)
# 报告
comp = {k: int(v) for k, v in s.composition.as_dict().items()}
print(f"写出 {out_poscar};总原子数 = {len(s)}")
print(f"Y2识别={len(y2_idx)}Y3识别={len(y3_idx)}Li2识别={len(li2_idx)};组成={comp}")
import random
from typing import List
from pymatgen.core import Structure
from pymatgen.io.vasp import Poscar
def make_pnma_poscar_from_cif(cif_path: str,
out_poscar: str = "POSCAR_pnma_supercell",
seed: int = 42,
supercell=(3,3,6),
tol: float = 1e-6):
"""
读取 Pnma 的 CIF如 origin.cif扩胞到 2160 原子,并把部分占据的 Li 位点(0.75)显式取整后写出 POSCAR。
默认超胞尺度为(3,3,6),体积放大因子=5440原子/原胞×54=2160 [1][3]。
"""
random.seed(seed)
s = Structure.from_file(cif_path)
# 扩胞Pnma原胞已是正交直接用对角放缩
s.make_supercell(supercell)
# 找出所有“部分占据的 Li”位点
partial_li_idx: List[int] = []
for i, site in enumerate(s.sites):
if not site.is_ordered:
d = site.species.as_dict() # 例如 {'Li': 0.75}
# 只处理主要元素是Li且占据<1的位点
m_elem, m_occ = max(d.items(), key=lambda kv: kv[1])
if m_elem == "Li" and m_occ < 1 - tol:
partial_li_idx.append(i)
# 以占据0.75进行随机取整保留75%,其余删除为“空位”
n = len(partial_li_idx)
k = int(round(n * 0.75))
keep = set(random.sample(partial_li_idx, k)) if 0 < k < n else set(partial_li_idx if k == n else [])
drop = sorted([i for i in partial_li_idx if i not in keep], reverse=True)
# 保留者设为占据=1删除其余
for i in keep:
s.replace(i, "Li")
for i in drop:
s.remove_sites([i])
# 兜底:若仍有部分占据,强制取主要元素
for i, site in enumerate(s.sites):
if not site.is_ordered:
d = site.species.as_dict()
elem = max(d.items(), key=lambda kv: kv[1])[0]
s.replace(i, elem)
# 排序并写POSCAR
order = {"Li": 0, "Y": 1, "Cl": 2}
s = s.get_sorted_structure(key=lambda site: order.get(site.species.elements[0].symbol, 99))
Poscar(s).write_file(out_poscar)
comp = {k: int(v) for k, v in s.composition.as_dict().items()}
print(f"写出 {out_poscar};总原子数 = {len(s)};组成 = {comp}")
if __name__=="__main__":
# make_model3_poscar_from_cif("data/P3ma/model3.cif","data/P3ma/supercell_model4.poscar")
make_pnma_poscar_from_cif("data/Pnma/origin.cif","data/Pnma/supercell_pnma.poscar",seed=42)