重构预处理制作
This commit is contained in:
223
src/analysis/structure_inspector.py
Normal file
223
src/analysis/structure_inspector.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""
|
||||
结构检查器:对单个CIF文件进行深度分析
|
||||
"""
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Set, Dict, List, Optional, Tuple
|
||||
from pymatgen.core import Structure
|
||||
from pymatgen.core.periodic_table import Element, Specie
|
||||
|
||||
|
||||
@dataclass
|
||||
class StructureInfo:
|
||||
"""单个结构的分析结果"""
|
||||
file_path: str
|
||||
file_name: str
|
||||
|
||||
# 基础信息
|
||||
is_valid: bool = False
|
||||
error_message: str = ""
|
||||
|
||||
# 元素组成
|
||||
elements: Set[str] = field(default_factory=set)
|
||||
num_sites: int = 0
|
||||
|
||||
# 阳离子/阴离子信息
|
||||
contains_target_cation: bool = False
|
||||
anion_types: Set[str] = field(default_factory=set) # 找到的目标阴离子
|
||||
anion_mode: str = "" # "single", "mixed", "none"
|
||||
|
||||
# 数据质量标记
|
||||
has_oxidation_states: bool = False
|
||||
has_partial_occupancy: bool = False # 是否有共占位
|
||||
cation_has_partial_occupancy: bool = False # 目标阳离子是否共占位
|
||||
anion_has_partial_occupancy: bool = False # 阴离子是否共占位
|
||||
has_water_molecule: bool = False
|
||||
has_radioactive_elements: bool = False
|
||||
is_binary_compound: bool = False
|
||||
|
||||
# 可处理性
|
||||
needs_expansion: bool = False # 需要扩胞
|
||||
can_process: bool = False # 可以直接处理
|
||||
skip_reason: str = "" # 跳过原因
|
||||
|
||||
|
||||
class StructureInspector:
|
||||
"""结构检查器"""
|
||||
|
||||
# 预定义的阴离子集合
|
||||
VALID_ANIONS = {'O', 'S', 'Cl', 'Br'}
|
||||
|
||||
# 放射性元素
|
||||
RADIOACTIVE_ELEMENTS = {
|
||||
'U', 'Th', 'Pu', 'Ra', 'Rn', 'Po', 'Np', 'Am',
|
||||
'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'
|
||||
}
|
||||
|
||||
def __init__(self, target_cation: str = "Li", target_anions: Set[str] = None):
|
||||
"""
|
||||
初始化检查器
|
||||
|
||||
Args:
|
||||
target_cation: 目标阳离子 (如 "Li", "Na")
|
||||
target_anions: 目标阴离子集合 (如 {"O", "S"})
|
||||
"""
|
||||
self.target_cation = target_cation
|
||||
self.target_anions = target_anions or self.VALID_ANIONS
|
||||
|
||||
def inspect(self, file_path: str) -> StructureInfo:
|
||||
"""
|
||||
分析单个CIF文件
|
||||
|
||||
Args:
|
||||
file_path: CIF文件路径
|
||||
|
||||
Returns:
|
||||
StructureInfo: 分析结果
|
||||
"""
|
||||
import os
|
||||
info = StructureInfo(
|
||||
file_path=file_path,
|
||||
file_name=os.path.basename(file_path)
|
||||
)
|
||||
|
||||
# 尝试读取结构
|
||||
try:
|
||||
structure = Structure.from_file(file_path)
|
||||
info.is_valid = True
|
||||
except Exception as e:
|
||||
info.error_message = str(e)
|
||||
return info
|
||||
|
||||
# 基础信息
|
||||
info.elements = {str(el) for el in structure.composition.elements}
|
||||
info.num_sites = structure.num_sites
|
||||
|
||||
# 检查是否为二元化合物
|
||||
info.is_binary_compound = len(structure.types_of_specie) == 2
|
||||
|
||||
# 检查是否含有目标阳离子
|
||||
info.contains_target_cation = self.target_cation in info.elements
|
||||
|
||||
# 检查阴离子类型
|
||||
info.anion_types = info.elements.intersection(self.target_anions)
|
||||
if len(info.anion_types) == 0:
|
||||
info.anion_mode = "none"
|
||||
elif len(info.anion_types) == 1:
|
||||
info.anion_mode = "single"
|
||||
else:
|
||||
info.anion_mode = "mixed"
|
||||
|
||||
# 检查氧化态
|
||||
info.has_oxidation_states = self._check_oxidation_states(structure)
|
||||
|
||||
# 检查共占位
|
||||
self._check_partial_occupancy(structure, info)
|
||||
|
||||
# 检查水分子
|
||||
info.has_water_molecule = self._check_water_molecule(structure)
|
||||
|
||||
# 检查放射性元素
|
||||
info.has_radioactive_elements = bool(
|
||||
info.elements.intersection(self.RADIOACTIVE_ELEMENTS)
|
||||
)
|
||||
|
||||
# 判断是否需要扩胞
|
||||
info.needs_expansion = info.has_partial_occupancy and not info.cation_has_partial_occupancy
|
||||
|
||||
# 判断可处理性
|
||||
self._evaluate_processability(info)
|
||||
|
||||
return info
|
||||
|
||||
def _check_oxidation_states(self, structure: Structure) -> bool:
|
||||
"""检查结构是否包含氧化态信息"""
|
||||
try:
|
||||
for site in structure.sites:
|
||||
for specie in site.species.keys():
|
||||
if isinstance(specie, Specie):
|
||||
return True
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
def _check_partial_occupancy(self, structure: Structure, info: StructureInfo):
|
||||
"""检查共占位情况"""
|
||||
try:
|
||||
for site in structure.sites:
|
||||
if len(site.species) > 1:
|
||||
info.has_partial_occupancy = True
|
||||
|
||||
# 检查是否涉及目标阳离子
|
||||
species_symbols = [str(sp.symbol) if hasattr(sp, 'symbol') else str(sp)
|
||||
for sp in site.species.keys()]
|
||||
|
||||
if self.target_cation in species_symbols:
|
||||
info.cation_has_partial_occupancy = True
|
||||
|
||||
# 检查是否涉及阴离子
|
||||
if any(sym in self.target_anions for sym in species_symbols):
|
||||
info.anion_has_partial_occupancy = True
|
||||
|
||||
# 检查单一物种的部分占据
|
||||
for specie, occupancy in site.species.items():
|
||||
if occupancy < 1.0:
|
||||
info.has_partial_occupancy = True
|
||||
symbol = str(specie.symbol) if hasattr(specie, 'symbol') else str(specie)
|
||||
|
||||
if symbol == self.target_cation:
|
||||
info.cation_has_partial_occupancy = True
|
||||
if symbol in self.target_anions:
|
||||
info.anion_has_partial_occupancy = True
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
def _check_water_molecule(self, structure: Structure) -> bool:
|
||||
"""检查是否含有水分子"""
|
||||
try:
|
||||
oxygen_sites = [site for site in structure.sites
|
||||
if 'O' in str(site.species)]
|
||||
hydrogen_sites = [site for site in structure.sites
|
||||
if 'H' in str(site.species)]
|
||||
|
||||
for o_site in oxygen_sites:
|
||||
nearby_h = [h for h in hydrogen_sites
|
||||
if o_site.distance(h) < 1.2]
|
||||
if len(nearby_h) >= 2:
|
||||
return True
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
def _evaluate_processability(self, info: StructureInfo):
|
||||
"""评估可处理性"""
|
||||
skip_reasons = []
|
||||
|
||||
if not info.is_valid:
|
||||
skip_reasons.append("无法解析CIF文件")
|
||||
|
||||
if not info.contains_target_cation:
|
||||
skip_reasons.append(f"不含{self.target_cation}")
|
||||
|
||||
if info.anion_mode == "none":
|
||||
skip_reasons.append("不含目标阴离子")
|
||||
|
||||
if info.is_binary_compound:
|
||||
skip_reasons.append("二元化合物")
|
||||
|
||||
if info.has_radioactive_elements:
|
||||
skip_reasons.append("含放射性元素")
|
||||
|
||||
if info.cation_has_partial_occupancy:
|
||||
skip_reasons.append(f"{self.target_cation}存在共占位")
|
||||
|
||||
if info.anion_has_partial_occupancy:
|
||||
skip_reasons.append("阴离子存在共占位")
|
||||
|
||||
if info.has_water_molecule:
|
||||
skip_reasons.append("含水分子")
|
||||
|
||||
if skip_reasons:
|
||||
info.can_process = False
|
||||
info.skip_reason = "; ".join(skip_reasons)
|
||||
else:
|
||||
info.can_process = True
|
||||
Reference in New Issue
Block a user