增加扩胞逻辑

This commit is contained in:
2025-12-14 16:52:14 +08:00
parent f27fd3e3ce
commit 72cf0a79e1
2 changed files with 253 additions and 46 deletions

View File

@@ -97,6 +97,30 @@ class ReportGenerator:
print("\n" + "=" * 70) print("\n" + "=" * 70)
# 扩胞分析(新增)
print("\n" + "-" * 70)
print("【5. 扩胞需求分析】")
print("-" * 70)
exp = report.expansion_stats
total_processable = report.directly_processable + report.needs_preprocessing
if total_processable > 0:
print(f" 无需扩胞: {exp['no_expansion_needed']:6d} "
f"({exp['no_expansion_needed'] / total_processable:.1%})")
print(f" 扩胞因子=2: {exp['expansion_factor_2']:6d}")
print(f" 扩胞因子=3: {exp['expansion_factor_3']:6d}")
print(f" 扩胞因子=4~8: {exp['expansion_factor_4_8']:6d}")
print(f" 扩胞因子>8: {exp['expansion_factor_large']:6d}")
print(f" 无法扩胞(因子过大): {exp['cannot_expand']:6d}")
# 详细分布
if detailed and report.expansion_factor_distribution:
print("\n 扩胞因子分布:")
for factor in sorted(report.expansion_factor_distribution.keys()):
count = report.expansion_factor_distribution[factor]
bar = "" * min(count, 30)
print(f" {factor:3d}x: {count:5d} {bar}")
@staticmethod @staticmethod
def export_to_csv(report: DatabaseReport, output_path: str): def export_to_csv(report: DatabaseReport, output_path: str):
"""导出详细结果到CSV""" """导出详细结果到CSV"""

View File

@@ -1,10 +1,39 @@
""" """
结构检查器对单个CIF文件进行深度分析 结构检查器对单个CIF文件进行深度分析(含扩胞需求判断)
""" """
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Set, Dict, List, Optional, Tuple from typing import Set, Dict, List, Optional, Tuple
from pymatgen.core import Structure from pymatgen.core import Structure
from pymatgen.core.periodic_table import Element, Specie from pymatgen.core.periodic_table import Element, Specie
from collections import defaultdict
from fractions import Fraction
from functools import reduce
import math
import re
import os
@dataclass
class OccupancyInfo:
"""共占位信息"""
occupation: float # 占据率
atom_serials: List[int] = field(default_factory=list) # 原子序号
elements: List[str] = field(default_factory=list) # 涉及的元素
numerator: int = 0 # 分子
denominator: int = 1 # 分母
involves_target_cation: bool = False # 是否涉及目标阳离子
involves_anion: bool = False # 是否涉及阴离子
@dataclass
class ExpansionInfo:
"""扩胞信息"""
needs_expansion: bool = False # 是否需要扩胞
expansion_factor: int = 1 # 扩胞因子(最小公倍数)
occupancy_details: List[OccupancyInfo] = field(default_factory=list) # 共占位详情
problematic_sites: int = 0 # 问题位点数
can_expand: bool = True # 是否可以扩胞处理
skip_reason: str = "" # 无法扩胞的原因
@dataclass @dataclass
@@ -20,29 +49,34 @@ class StructureInfo:
# 元素组成 # 元素组成
elements: Set[str] = field(default_factory=set) elements: Set[str] = field(default_factory=set)
num_sites: int = 0 num_sites: int = 0
formula: str = ""
# 阳离子/阴离子信息 # 阳离子/阴离子信息
contains_target_cation: bool = False contains_target_cation: bool = False
anion_types: Set[str] = field(default_factory=set) # 找到的目标阴离子 anion_types: Set[str] = field(default_factory=set)
anion_mode: str = "" # "single", "mixed", "none" anion_mode: str = "" # "single", "mixed", "none"
# 数据质量标记 # 数据质量标记
has_oxidation_states: bool = False has_oxidation_states: bool = False
has_partial_occupancy: bool = False # 是否有共占位 has_partial_occupancy: bool = False # 是否有共占位
cation_has_partial_occupancy: bool = False # 目标阳离子是否共占位
anion_has_partial_occupancy: bool = False # 阴离子是否共占位
has_water_molecule: bool = False has_water_molecule: bool = False
has_radioactive_elements: bool = False has_radioactive_elements: bool = False
is_binary_compound: bool = False is_binary_compound: bool = False
# 共占位详细分析(新增)
cation_has_partial_occupancy: bool = False # 目标阳离子共占位
anion_has_partial_occupancy: bool = False # 阴离子共占位
other_has_partial_occupancy: bool = False # 其他元素共占位(需扩胞)
expansion_info: ExpansionInfo = field(default_factory=ExpansionInfo)
# 可处理性 # 可处理性
needs_expansion: bool = False # 需要扩胞 needs_expansion: bool = False
can_process: bool = False # 可以直接处理 can_process: bool = False
skip_reason: str = "" # 跳过原因 skip_reason: str = ""
class StructureInspector: class StructureInspector:
"""结构检查器""" """结构检查器(含扩胞分析)"""
# 预定义的阴离子集合 # 预定义的阴离子集合
VALID_ANIONS = {'O', 'S', 'Cl', 'Br'} VALID_ANIONS = {'O', 'S', 'Cl', 'Br'}
@@ -53,16 +87,38 @@ class StructureInspector:
'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr' 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'
} }
def __init__(self, target_cation: str = "Li", target_anions: Set[str] = None): # 扩胞精度模式
PRECISION_LIMITS = {
'high': None, # 精确分数
'normal': 100, # 分母≤100
'low': 10, # 分母≤10
'very_low': 5 # 分母≤5
}
def __init__(
self,
target_cation: str = "Li",
target_anions: Set[str] = None,
expansion_precision: str = "low"
):
""" """
初始化检查器 初始化检查器
Args: Args:
target_cation: 目标阳离子 (如 "Li", "Na") target_cation: 目标阳离子 (如 "Li", "Na")
target_anions: 目标阴离子集合 (如 {"O", "S"}) target_anions: 目标阴离子集合 (如 {"O", "S"})
expansion_precision: 扩胞计算精度 ('high', 'normal', 'low', 'very_low')
""" """
self.target_cation = target_cation self.target_cation = target_cation
self.target_anions = target_anions or self.VALID_ANIONS self.target_anions = target_anions or self.VALID_ANIONS
self.expansion_precision = expansion_precision
# 目标阳离子的各种可能表示形式
self.target_cation_variants = {
target_cation,
f"{target_cation}+",
f"{target_cation}1+",
}
def inspect(self, file_path: str) -> StructureInfo: def inspect(self, file_path: str) -> StructureInfo:
""" """
@@ -74,7 +130,6 @@ class StructureInspector:
Returns: Returns:
StructureInfo: 分析结果 StructureInfo: 分析结果
""" """
import os
info = StructureInfo( info = StructureInfo(
file_path=file_path, file_path=file_path,
file_name=os.path.basename(file_path) file_name=os.path.basename(file_path)
@@ -91,9 +146,10 @@ class StructureInspector:
# 基础信息 # 基础信息
info.elements = {str(el) for el in structure.composition.elements} info.elements = {str(el) for el in structure.composition.elements}
info.num_sites = structure.num_sites info.num_sites = structure.num_sites
info.formula = structure.composition.reduced_formula
# 检查是否为二元化合物 # 检查是否为二元化合物
info.is_binary_compound = len(structure.types_of_specie) == 2 info.is_binary_compound = len(structure.composition.elements) == 2
# 检查是否含有目标阳离子 # 检查是否含有目标阳离子
info.contains_target_cation = self.target_cation in info.elements info.contains_target_cation = self.target_cation in info.elements
@@ -110,8 +166,8 @@ class StructureInspector:
# 检查氧化态 # 检查氧化态
info.has_oxidation_states = self._check_oxidation_states(structure) info.has_oxidation_states = self._check_oxidation_states(structure)
# 检查共占位 # 检查共占位(核心分析)
self._check_partial_occupancy(structure, info) self._analyze_partial_occupancy(structure, info)
# 检查水分子 # 检查水分子
info.has_water_molecule = self._check_water_molecule(structure) info.has_water_molecule = self._check_water_molecule(structure)
@@ -121,9 +177,6 @@ class StructureInspector:
info.elements.intersection(self.RADIOACTIVE_ELEMENTS) info.elements.intersection(self.RADIOACTIVE_ELEMENTS)
) )
# 判断是否需要扩胞
info.needs_expansion = info.has_partial_occupancy and not info.cation_has_partial_occupancy
# 判断可处理性 # 判断可处理性
self._evaluate_processability(info) self._evaluate_processability(info)
@@ -140,48 +193,172 @@ class StructureInspector:
except: except:
return False return False
def _check_partial_occupancy(self, structure: Structure, info: StructureInfo): def _get_element_from_species_string(self, species_str: str) -> str:
"""检查共占位情况""" """从物种字符串提取纯元素符号"""
match = re.match(r'([A-Z][a-z]?)', species_str)
return match.group(1) if match else ""
def _get_occupancy_from_species_string(self, species_str: str, exclude_elements: Set[str]) -> Optional[float]:
"""
从物种字符串获取非目标元素的占据率
格式如: "Li+:0.689, Sc3+:0.311"
"""
if ':' not in species_str:
return None
parts = [p.strip() for p in species_str.split(',')]
for part in parts:
if ':' in part:
element_part, occu_part = part.split(':')
element = self._get_element_from_species_string(element_part.strip())
if element and element not in exclude_elements:
try: try:
for site in structure.sites: return float(occu_part.strip())
if len(site.species) > 1: except ValueError:
continue
return None
def _analyze_partial_occupancy(self, structure: Structure, info: StructureInfo):
"""
分析共占位情况(核心逻辑)
关键规则:
- 目标阳离子(Li)的共占位 → 不可处理
- 阴离子的共占位 → 需要扩胞,但通常不处理
- 其他阳离子的共占位 → 需要扩胞处理
"""
occupancy_dict = defaultdict(list) # {occupation: [site_indices]}
occupancy_elements = {} # {occupation: [elements]}
for i, site in enumerate(structure.sites):
site_species = site.species
species_string = str(site.species)
# 检查是否有多个物种占据同一位点
if len(site_species) > 1:
info.has_partial_occupancy = True info.has_partial_occupancy = True
# 检查是否涉及目标阳离子 # 提取各元素符号
species_symbols = [str(sp.symbol) if hasattr(sp, 'symbol') else str(sp) elements_at_site = []
for sp in site.species.keys()] for sp in site_species.keys():
elem = sp.symbol if hasattr(sp, 'symbol') else str(sp)
elem = self._get_element_from_species_string(elem)
if elem:
elements_at_site.append(elem)
if self.target_cation in species_symbols: # 判断是否涉及目标阳离子
if self.target_cation in elements_at_site:
info.cation_has_partial_occupancy = True info.cation_has_partial_occupancy = True
# 检查是否涉及阴离子 # 判断是否涉及阴离子
if any(sym in self.target_anions for sym in species_symbols): if any(elem in self.target_anions for elem in elements_at_site):
info.anion_has_partial_occupancy = True info.anion_has_partial_occupancy = True
# 判断是否涉及其他元素(需要扩胞处理的情况)
other_elements = [e for e in elements_at_site
if e != self.target_cation and e not in self.target_anions]
if other_elements:
info.other_has_partial_occupancy = True
# 获取占据率(取非目标阳离子的占据率)
occu = self._get_occupancy_from_species_string(
species_string,
self.target_cation_variants
)
if occu is not None and occu != 1.0:
occupancy_dict[occu].append(i)
occupancy_elements[occu] = elements_at_site
# 检查单一物种的部分占据 # 检查单一物种的部分占据
for specie, occupancy in site.species.items(): for specie, occupancy in site_species.items():
if occupancy < 1.0: if occupancy < 1.0:
info.has_partial_occupancy = True info.has_partial_occupancy = True
symbol = str(specie.symbol) if hasattr(specie, 'symbol') else str(specie) elem = specie.symbol if hasattr(specie, 'symbol') else str(specie)
elem = self._get_element_from_species_string(elem)
if symbol == self.target_cation: if elem == self.target_cation:
info.cation_has_partial_occupancy = True info.cation_has_partial_occupancy = True
if symbol in self.target_anions: elif elem in self.target_anions:
info.anion_has_partial_occupancy = True info.anion_has_partial_occupancy = True
except Exception as e: else:
pass info.other_has_partial_occupancy = True
occupancy_dict[occupancy].append(i)
occupancy_elements[occupancy] = [elem]
# 计算扩胞信息
self._calculate_expansion_info(info, occupancy_dict, occupancy_elements)
def _calculate_expansion_info(
self,
info: StructureInfo,
occupancy_dict: Dict[float, List[int]],
occupancy_elements: Dict[float, List[str]]
):
"""计算扩胞相关信息"""
expansion_info = ExpansionInfo()
if not occupancy_dict:
info.expansion_info = expansion_info
return
# 需要扩胞(有非目标阳离子的共占位)
expansion_info.needs_expansion = True
expansion_info.problematic_sites = sum(len(v) for v in occupancy_dict.values())
# 转换为OccupancyInfo列表
occupancy_list = []
for occu, serials in occupancy_dict.items():
elements = occupancy_elements.get(occu, [])
# 根据精度计算分数
limit = self.PRECISION_LIMITS.get(self.expansion_precision)
if limit:
fraction = Fraction(occu).limit_denominator(limit)
else:
fraction = Fraction(occu).limit_denominator()
occ_info = OccupancyInfo(
occupation=occu,
atom_serials=[s + 1 for s in serials], # 转为1-based
elements=elements,
numerator=fraction.numerator,
denominator=fraction.denominator,
involves_target_cation=self.target_cation in elements,
involves_anion=any(e in self.target_anions for e in elements)
)
occupancy_list.append(occ_info)
expansion_info.occupancy_details = occupancy_list
# 计算最小公倍数(扩胞因子)
denominators = [occ.denominator for occ in occupancy_list]
if denominators:
lcm = reduce(lambda a, b: a * b // math.gcd(a, b), denominators, 1)
expansion_info.expansion_factor = lcm
# 判断是否可以扩胞(因子过大则不可处理)
if lcm > 64: # 扩胞超过64倍通常不可行
expansion_info.can_expand = False
expansion_info.skip_reason = f"扩胞因子过大({lcm})"
info.expansion_info = expansion_info
info.needs_expansion = expansion_info.needs_expansion and expansion_info.can_expand
def _check_water_molecule(self, structure: Structure) -> bool: def _check_water_molecule(self, structure: Structure) -> bool:
"""检查是否含有水分子""" """检查是否含有水分子"""
try: try:
oxygen_sites = [site for site in structure.sites oxygen_sites = []
if 'O' in str(site.species)] hydrogen_sites = []
hydrogen_sites = [site for site in structure.sites
if 'H' in str(site.species)] for site in structure.sites:
species_str = str(site.species)
if 'O' in species_str:
oxygen_sites.append(site)
if 'H' in species_str:
hydrogen_sites.append(site)
for o_site in oxygen_sites: for o_site in oxygen_sites:
nearby_h = [h for h in hydrogen_sites nearby_h = [h for h in hydrogen_sites if o_site.distance(h) < 1.2]
if o_site.distance(h) < 1.2]
if len(nearby_h) >= 2: if len(nearby_h) >= 2:
return True return True
return False return False
@@ -207,15 +384,21 @@ class StructureInspector:
if info.has_radioactive_elements: if info.has_radioactive_elements:
skip_reasons.append("含放射性元素") skip_reasons.append("含放射性元素")
# 关键:目标阳离子共占位是不可处理的
if info.cation_has_partial_occupancy: if info.cation_has_partial_occupancy:
skip_reasons.append(f"{self.target_cation}存在共占位") skip_reasons.append(f"{self.target_cation}存在共占位")
# 阴离子共占位通常也不处理
if info.anion_has_partial_occupancy: if info.anion_has_partial_occupancy:
skip_reasons.append("阴离子存在共占位") skip_reasons.append("阴离子存在共占位")
if info.has_water_molecule: if info.has_water_molecule:
skip_reasons.append("含水分子") skip_reasons.append("含水分子")
# 扩胞因子过大
if info.expansion_info.needs_expansion and not info.expansion_info.can_expand:
skip_reasons.append(info.expansion_info.skip_reason)
if skip_reasons: if skip_reasons:
info.can_process = False info.can_process = False
info.skip_reason = "; ".join(skip_reasons) info.skip_reason = "; ".join(skip_reasons)