增加扩胞逻辑
This commit is contained in:
@@ -1,10 +1,39 @@
|
||||
"""
|
||||
结构检查器:对单个CIF文件进行深度分析
|
||||
结构检查器:对单个CIF文件进行深度分析(含扩胞需求判断)
|
||||
"""
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Set, Dict, List, Optional, Tuple
|
||||
from pymatgen.core import Structure
|
||||
from pymatgen.core.periodic_table import Element, Specie
|
||||
from collections import defaultdict
|
||||
from fractions import Fraction
|
||||
from functools import reduce
|
||||
import math
|
||||
import re
|
||||
import os
|
||||
|
||||
|
||||
@dataclass
|
||||
class OccupancyInfo:
|
||||
"""共占位信息"""
|
||||
occupation: float # 占据率
|
||||
atom_serials: List[int] = field(default_factory=list) # 原子序号
|
||||
elements: List[str] = field(default_factory=list) # 涉及的元素
|
||||
numerator: int = 0 # 分子
|
||||
denominator: int = 1 # 分母
|
||||
involves_target_cation: bool = False # 是否涉及目标阳离子
|
||||
involves_anion: bool = False # 是否涉及阴离子
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExpansionInfo:
|
||||
"""扩胞信息"""
|
||||
needs_expansion: bool = False # 是否需要扩胞
|
||||
expansion_factor: int = 1 # 扩胞因子(最小公倍数)
|
||||
occupancy_details: List[OccupancyInfo] = field(default_factory=list) # 共占位详情
|
||||
problematic_sites: int = 0 # 问题位点数
|
||||
can_expand: bool = True # 是否可以扩胞处理
|
||||
skip_reason: str = "" # 无法扩胞的原因
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -20,29 +49,34 @@ class StructureInfo:
|
||||
# 元素组成
|
||||
elements: Set[str] = field(default_factory=set)
|
||||
num_sites: int = 0
|
||||
formula: str = ""
|
||||
|
||||
# 阳离子/阴离子信息
|
||||
contains_target_cation: bool = False
|
||||
anion_types: Set[str] = field(default_factory=set) # 找到的目标阴离子
|
||||
anion_types: Set[str] = field(default_factory=set)
|
||||
anion_mode: str = "" # "single", "mixed", "none"
|
||||
|
||||
# 数据质量标记
|
||||
has_oxidation_states: bool = False
|
||||
has_partial_occupancy: bool = False # 是否有共占位
|
||||
cation_has_partial_occupancy: bool = False # 目标阳离子是否共占位
|
||||
anion_has_partial_occupancy: bool = False # 阴离子是否共占位
|
||||
has_water_molecule: bool = False
|
||||
has_radioactive_elements: bool = False
|
||||
is_binary_compound: bool = False
|
||||
|
||||
# 共占位详细分析(新增)
|
||||
cation_has_partial_occupancy: bool = False # 目标阳离子共占位
|
||||
anion_has_partial_occupancy: bool = False # 阴离子共占位
|
||||
other_has_partial_occupancy: bool = False # 其他元素共占位(需扩胞)
|
||||
expansion_info: ExpansionInfo = field(default_factory=ExpansionInfo)
|
||||
|
||||
# 可处理性
|
||||
needs_expansion: bool = False # 需要扩胞
|
||||
can_process: bool = False # 可以直接处理
|
||||
skip_reason: str = "" # 跳过原因
|
||||
needs_expansion: bool = False
|
||||
can_process: bool = False
|
||||
skip_reason: str = ""
|
||||
|
||||
|
||||
class StructureInspector:
|
||||
"""结构检查器"""
|
||||
"""结构检查器(含扩胞分析)"""
|
||||
|
||||
# 预定义的阴离子集合
|
||||
VALID_ANIONS = {'O', 'S', 'Cl', 'Br'}
|
||||
@@ -53,16 +87,38 @@ class StructureInspector:
|
||||
'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'
|
||||
}
|
||||
|
||||
def __init__(self, target_cation: str = "Li", target_anions: Set[str] = None):
|
||||
# 扩胞精度模式
|
||||
PRECISION_LIMITS = {
|
||||
'high': None, # 精确分数
|
||||
'normal': 100, # 分母≤100
|
||||
'low': 10, # 分母≤10
|
||||
'very_low': 5 # 分母≤5
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
target_cation: str = "Li",
|
||||
target_anions: Set[str] = None,
|
||||
expansion_precision: str = "low"
|
||||
):
|
||||
"""
|
||||
初始化检查器
|
||||
|
||||
Args:
|
||||
target_cation: 目标阳离子 (如 "Li", "Na")
|
||||
target_anions: 目标阴离子集合 (如 {"O", "S"})
|
||||
expansion_precision: 扩胞计算精度 ('high', 'normal', 'low', 'very_low')
|
||||
"""
|
||||
self.target_cation = target_cation
|
||||
self.target_anions = target_anions or self.VALID_ANIONS
|
||||
self.expansion_precision = expansion_precision
|
||||
|
||||
# 目标阳离子的各种可能表示形式
|
||||
self.target_cation_variants = {
|
||||
target_cation,
|
||||
f"{target_cation}+",
|
||||
f"{target_cation}1+",
|
||||
}
|
||||
|
||||
def inspect(self, file_path: str) -> StructureInfo:
|
||||
"""
|
||||
@@ -74,7 +130,6 @@ class StructureInspector:
|
||||
Returns:
|
||||
StructureInfo: 分析结果
|
||||
"""
|
||||
import os
|
||||
info = StructureInfo(
|
||||
file_path=file_path,
|
||||
file_name=os.path.basename(file_path)
|
||||
@@ -91,9 +146,10 @@ class StructureInspector:
|
||||
# 基础信息
|
||||
info.elements = {str(el) for el in structure.composition.elements}
|
||||
info.num_sites = structure.num_sites
|
||||
info.formula = structure.composition.reduced_formula
|
||||
|
||||
# 检查是否为二元化合物
|
||||
info.is_binary_compound = len(structure.types_of_specie) == 2
|
||||
info.is_binary_compound = len(structure.composition.elements) == 2
|
||||
|
||||
# 检查是否含有目标阳离子
|
||||
info.contains_target_cation = self.target_cation in info.elements
|
||||
@@ -110,8 +166,8 @@ class StructureInspector:
|
||||
# 检查氧化态
|
||||
info.has_oxidation_states = self._check_oxidation_states(structure)
|
||||
|
||||
# 检查共占位
|
||||
self._check_partial_occupancy(structure, info)
|
||||
# 检查共占位(核心分析)
|
||||
self._analyze_partial_occupancy(structure, info)
|
||||
|
||||
# 检查水分子
|
||||
info.has_water_molecule = self._check_water_molecule(structure)
|
||||
@@ -121,9 +177,6 @@ class StructureInspector:
|
||||
info.elements.intersection(self.RADIOACTIVE_ELEMENTS)
|
||||
)
|
||||
|
||||
# 判断是否需要扩胞
|
||||
info.needs_expansion = info.has_partial_occupancy and not info.cation_has_partial_occupancy
|
||||
|
||||
# 判断可处理性
|
||||
self._evaluate_processability(info)
|
||||
|
||||
@@ -140,48 +193,172 @@ class StructureInspector:
|
||||
except:
|
||||
return False
|
||||
|
||||
def _check_partial_occupancy(self, structure: Structure, info: StructureInfo):
|
||||
"""检查共占位情况"""
|
||||
try:
|
||||
for site in structure.sites:
|
||||
if len(site.species) > 1:
|
||||
def _get_element_from_species_string(self, species_str: str) -> str:
|
||||
"""从物种字符串提取纯元素符号"""
|
||||
match = re.match(r'([A-Z][a-z]?)', species_str)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
def _get_occupancy_from_species_string(self, species_str: str, exclude_elements: Set[str]) -> Optional[float]:
|
||||
"""
|
||||
从物种字符串获取非目标元素的占据率
|
||||
格式如: "Li+:0.689, Sc3+:0.311"
|
||||
"""
|
||||
if ':' not in species_str:
|
||||
return None
|
||||
|
||||
parts = [p.strip() for p in species_str.split(',')]
|
||||
for part in parts:
|
||||
if ':' in part:
|
||||
element_part, occu_part = part.split(':')
|
||||
element = self._get_element_from_species_string(element_part.strip())
|
||||
if element and element not in exclude_elements:
|
||||
try:
|
||||
return float(occu_part.strip())
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
def _analyze_partial_occupancy(self, structure: Structure, info: StructureInfo):
|
||||
"""
|
||||
分析共占位情况(核心逻辑)
|
||||
|
||||
关键规则:
|
||||
- 目标阳离子(Li)的共占位 → 不可处理
|
||||
- 阴离子的共占位 → 需要扩胞,但通常不处理
|
||||
- 其他阳离子的共占位 → 需要扩胞处理
|
||||
"""
|
||||
occupancy_dict = defaultdict(list) # {occupation: [site_indices]}
|
||||
occupancy_elements = {} # {occupation: [elements]}
|
||||
|
||||
for i, site in enumerate(structure.sites):
|
||||
site_species = site.species
|
||||
species_string = str(site.species)
|
||||
|
||||
# 检查是否有多个物种占据同一位点
|
||||
if len(site_species) > 1:
|
||||
info.has_partial_occupancy = True
|
||||
|
||||
# 提取各元素符号
|
||||
elements_at_site = []
|
||||
for sp in site_species.keys():
|
||||
elem = sp.symbol if hasattr(sp, 'symbol') else str(sp)
|
||||
elem = self._get_element_from_species_string(elem)
|
||||
if elem:
|
||||
elements_at_site.append(elem)
|
||||
|
||||
# 判断是否涉及目标阳离子
|
||||
if self.target_cation in elements_at_site:
|
||||
info.cation_has_partial_occupancy = True
|
||||
|
||||
# 判断是否涉及阴离子
|
||||
if any(elem in self.target_anions for elem in elements_at_site):
|
||||
info.anion_has_partial_occupancy = True
|
||||
|
||||
# 判断是否涉及其他元素(需要扩胞处理的情况)
|
||||
other_elements = [e for e in elements_at_site
|
||||
if e != self.target_cation and e not in self.target_anions]
|
||||
if other_elements:
|
||||
info.other_has_partial_occupancy = True
|
||||
|
||||
# 获取占据率(取非目标阳离子的占据率)
|
||||
occu = self._get_occupancy_from_species_string(
|
||||
species_string,
|
||||
self.target_cation_variants
|
||||
)
|
||||
if occu is not None and occu != 1.0:
|
||||
occupancy_dict[occu].append(i)
|
||||
occupancy_elements[occu] = elements_at_site
|
||||
|
||||
# 检查单一物种的部分占据
|
||||
for specie, occupancy in site_species.items():
|
||||
if occupancy < 1.0:
|
||||
info.has_partial_occupancy = True
|
||||
elem = specie.symbol if hasattr(specie, 'symbol') else str(specie)
|
||||
elem = self._get_element_from_species_string(elem)
|
||||
|
||||
# 检查是否涉及目标阳离子
|
||||
species_symbols = [str(sp.symbol) if hasattr(sp, 'symbol') else str(sp)
|
||||
for sp in site.species.keys()]
|
||||
|
||||
if self.target_cation in species_symbols:
|
||||
if elem == self.target_cation:
|
||||
info.cation_has_partial_occupancy = True
|
||||
|
||||
# 检查是否涉及阴离子
|
||||
if any(sym in self.target_anions for sym in species_symbols):
|
||||
elif elem in self.target_anions:
|
||||
info.anion_has_partial_occupancy = True
|
||||
else:
|
||||
info.other_has_partial_occupancy = True
|
||||
occupancy_dict[occupancy].append(i)
|
||||
occupancy_elements[occupancy] = [elem]
|
||||
|
||||
# 检查单一物种的部分占据
|
||||
for specie, occupancy in site.species.items():
|
||||
if occupancy < 1.0:
|
||||
info.has_partial_occupancy = True
|
||||
symbol = str(specie.symbol) if hasattr(specie, 'symbol') else str(specie)
|
||||
# 计算扩胞信息
|
||||
self._calculate_expansion_info(info, occupancy_dict, occupancy_elements)
|
||||
|
||||
if symbol == self.target_cation:
|
||||
info.cation_has_partial_occupancy = True
|
||||
if symbol in self.target_anions:
|
||||
info.anion_has_partial_occupancy = True
|
||||
except Exception as e:
|
||||
pass
|
||||
def _calculate_expansion_info(
|
||||
self,
|
||||
info: StructureInfo,
|
||||
occupancy_dict: Dict[float, List[int]],
|
||||
occupancy_elements: Dict[float, List[str]]
|
||||
):
|
||||
"""计算扩胞相关信息"""
|
||||
expansion_info = ExpansionInfo()
|
||||
|
||||
if not occupancy_dict:
|
||||
info.expansion_info = expansion_info
|
||||
return
|
||||
|
||||
# 需要扩胞(有非目标阳离子的共占位)
|
||||
expansion_info.needs_expansion = True
|
||||
expansion_info.problematic_sites = sum(len(v) for v in occupancy_dict.values())
|
||||
|
||||
# 转换为OccupancyInfo列表
|
||||
occupancy_list = []
|
||||
for occu, serials in occupancy_dict.items():
|
||||
elements = occupancy_elements.get(occu, [])
|
||||
|
||||
# 根据精度计算分数
|
||||
limit = self.PRECISION_LIMITS.get(self.expansion_precision)
|
||||
if limit:
|
||||
fraction = Fraction(occu).limit_denominator(limit)
|
||||
else:
|
||||
fraction = Fraction(occu).limit_denominator()
|
||||
|
||||
occ_info = OccupancyInfo(
|
||||
occupation=occu,
|
||||
atom_serials=[s + 1 for s in serials], # 转为1-based
|
||||
elements=elements,
|
||||
numerator=fraction.numerator,
|
||||
denominator=fraction.denominator,
|
||||
involves_target_cation=self.target_cation in elements,
|
||||
involves_anion=any(e in self.target_anions for e in elements)
|
||||
)
|
||||
occupancy_list.append(occ_info)
|
||||
|
||||
expansion_info.occupancy_details = occupancy_list
|
||||
|
||||
# 计算最小公倍数(扩胞因子)
|
||||
denominators = [occ.denominator for occ in occupancy_list]
|
||||
if denominators:
|
||||
lcm = reduce(lambda a, b: a * b // math.gcd(a, b), denominators, 1)
|
||||
expansion_info.expansion_factor = lcm
|
||||
|
||||
# 判断是否可以扩胞(因子过大则不可处理)
|
||||
if lcm > 64: # 扩胞超过64倍通常不可行
|
||||
expansion_info.can_expand = False
|
||||
expansion_info.skip_reason = f"扩胞因子过大({lcm})"
|
||||
|
||||
info.expansion_info = expansion_info
|
||||
info.needs_expansion = expansion_info.needs_expansion and expansion_info.can_expand
|
||||
|
||||
def _check_water_molecule(self, structure: Structure) -> bool:
|
||||
"""检查是否含有水分子"""
|
||||
try:
|
||||
oxygen_sites = [site for site in structure.sites
|
||||
if 'O' in str(site.species)]
|
||||
hydrogen_sites = [site for site in structure.sites
|
||||
if 'H' in str(site.species)]
|
||||
oxygen_sites = []
|
||||
hydrogen_sites = []
|
||||
|
||||
for site in structure.sites:
|
||||
species_str = str(site.species)
|
||||
if 'O' in species_str:
|
||||
oxygen_sites.append(site)
|
||||
if 'H' in species_str:
|
||||
hydrogen_sites.append(site)
|
||||
|
||||
for o_site in oxygen_sites:
|
||||
nearby_h = [h for h in hydrogen_sites
|
||||
if o_site.distance(h) < 1.2]
|
||||
nearby_h = [h for h in hydrogen_sites if o_site.distance(h) < 1.2]
|
||||
if len(nearby_h) >= 2:
|
||||
return True
|
||||
return False
|
||||
@@ -207,15 +384,21 @@ class StructureInspector:
|
||||
if info.has_radioactive_elements:
|
||||
skip_reasons.append("含放射性元素")
|
||||
|
||||
# 关键:目标阳离子共占位是不可处理的
|
||||
if info.cation_has_partial_occupancy:
|
||||
skip_reasons.append(f"{self.target_cation}存在共占位")
|
||||
|
||||
# 阴离子共占位通常也不处理
|
||||
if info.anion_has_partial_occupancy:
|
||||
skip_reasons.append("阴离子存在共占位")
|
||||
|
||||
if info.has_water_molecule:
|
||||
skip_reasons.append("含水分子")
|
||||
|
||||
# 扩胞因子过大
|
||||
if info.expansion_info.needs_expansion and not info.expansion_info.can_expand:
|
||||
skip_reasons.append(info.expansion_info.skip_reason)
|
||||
|
||||
if skip_reasons:
|
||||
info.can_process = False
|
||||
info.skip_reason = "; ".join(skip_reasons)
|
||||
|
||||
Reference in New Issue
Block a user