增加扩胞逻辑
This commit is contained in:
@@ -97,6 +97,30 @@ class ReportGenerator:
|
|||||||
|
|
||||||
print("\n" + "=" * 70)
|
print("\n" + "=" * 70)
|
||||||
|
|
||||||
|
# 扩胞分析(新增)
|
||||||
|
print("\n" + "-" * 70)
|
||||||
|
print("【5. 扩胞需求分析】")
|
||||||
|
print("-" * 70)
|
||||||
|
|
||||||
|
exp = report.expansion_stats
|
||||||
|
total_processable = report.directly_processable + report.needs_preprocessing
|
||||||
|
|
||||||
|
if total_processable > 0:
|
||||||
|
print(f" 无需扩胞: {exp['no_expansion_needed']:6d} "
|
||||||
|
f"({exp['no_expansion_needed'] / total_processable:.1%})")
|
||||||
|
print(f" 扩胞因子=2: {exp['expansion_factor_2']:6d}")
|
||||||
|
print(f" 扩胞因子=3: {exp['expansion_factor_3']:6d}")
|
||||||
|
print(f" 扩胞因子=4~8: {exp['expansion_factor_4_8']:6d}")
|
||||||
|
print(f" 扩胞因子>8: {exp['expansion_factor_large']:6d}")
|
||||||
|
print(f" 无法扩胞(因子过大): {exp['cannot_expand']:6d}")
|
||||||
|
|
||||||
|
# 详细分布
|
||||||
|
if detailed and report.expansion_factor_distribution:
|
||||||
|
print("\n 扩胞因子分布:")
|
||||||
|
for factor in sorted(report.expansion_factor_distribution.keys()):
|
||||||
|
count = report.expansion_factor_distribution[factor]
|
||||||
|
bar = "█" * min(count, 30)
|
||||||
|
print(f" {factor:3d}x: {count:5d} {bar}")
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def export_to_csv(report: DatabaseReport, output_path: str):
|
def export_to_csv(report: DatabaseReport, output_path: str):
|
||||||
"""导出详细结果到CSV"""
|
"""导出详细结果到CSV"""
|
||||||
|
|||||||
@@ -1,10 +1,39 @@
|
|||||||
"""
|
"""
|
||||||
结构检查器:对单个CIF文件进行深度分析
|
结构检查器:对单个CIF文件进行深度分析(含扩胞需求判断)
|
||||||
"""
|
"""
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Set, Dict, List, Optional, Tuple
|
from typing import Set, Dict, List, Optional, Tuple
|
||||||
from pymatgen.core import Structure
|
from pymatgen.core import Structure
|
||||||
from pymatgen.core.periodic_table import Element, Specie
|
from pymatgen.core.periodic_table import Element, Specie
|
||||||
|
from collections import defaultdict
|
||||||
|
from fractions import Fraction
|
||||||
|
from functools import reduce
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OccupancyInfo:
|
||||||
|
"""共占位信息"""
|
||||||
|
occupation: float # 占据率
|
||||||
|
atom_serials: List[int] = field(default_factory=list) # 原子序号
|
||||||
|
elements: List[str] = field(default_factory=list) # 涉及的元素
|
||||||
|
numerator: int = 0 # 分子
|
||||||
|
denominator: int = 1 # 分母
|
||||||
|
involves_target_cation: bool = False # 是否涉及目标阳离子
|
||||||
|
involves_anion: bool = False # 是否涉及阴离子
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExpansionInfo:
|
||||||
|
"""扩胞信息"""
|
||||||
|
needs_expansion: bool = False # 是否需要扩胞
|
||||||
|
expansion_factor: int = 1 # 扩胞因子(最小公倍数)
|
||||||
|
occupancy_details: List[OccupancyInfo] = field(default_factory=list) # 共占位详情
|
||||||
|
problematic_sites: int = 0 # 问题位点数
|
||||||
|
can_expand: bool = True # 是否可以扩胞处理
|
||||||
|
skip_reason: str = "" # 无法扩胞的原因
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -20,29 +49,34 @@ class StructureInfo:
|
|||||||
# 元素组成
|
# 元素组成
|
||||||
elements: Set[str] = field(default_factory=set)
|
elements: Set[str] = field(default_factory=set)
|
||||||
num_sites: int = 0
|
num_sites: int = 0
|
||||||
|
formula: str = ""
|
||||||
|
|
||||||
# 阳离子/阴离子信息
|
# 阳离子/阴离子信息
|
||||||
contains_target_cation: bool = False
|
contains_target_cation: bool = False
|
||||||
anion_types: Set[str] = field(default_factory=set) # 找到的目标阴离子
|
anion_types: Set[str] = field(default_factory=set)
|
||||||
anion_mode: str = "" # "single", "mixed", "none"
|
anion_mode: str = "" # "single", "mixed", "none"
|
||||||
|
|
||||||
# 数据质量标记
|
# 数据质量标记
|
||||||
has_oxidation_states: bool = False
|
has_oxidation_states: bool = False
|
||||||
has_partial_occupancy: bool = False # 是否有共占位
|
has_partial_occupancy: bool = False # 是否有共占位
|
||||||
cation_has_partial_occupancy: bool = False # 目标阳离子是否共占位
|
|
||||||
anion_has_partial_occupancy: bool = False # 阴离子是否共占位
|
|
||||||
has_water_molecule: bool = False
|
has_water_molecule: bool = False
|
||||||
has_radioactive_elements: bool = False
|
has_radioactive_elements: bool = False
|
||||||
is_binary_compound: bool = False
|
is_binary_compound: bool = False
|
||||||
|
|
||||||
|
# 共占位详细分析(新增)
|
||||||
|
cation_has_partial_occupancy: bool = False # 目标阳离子共占位
|
||||||
|
anion_has_partial_occupancy: bool = False # 阴离子共占位
|
||||||
|
other_has_partial_occupancy: bool = False # 其他元素共占位(需扩胞)
|
||||||
|
expansion_info: ExpansionInfo = field(default_factory=ExpansionInfo)
|
||||||
|
|
||||||
# 可处理性
|
# 可处理性
|
||||||
needs_expansion: bool = False # 需要扩胞
|
needs_expansion: bool = False
|
||||||
can_process: bool = False # 可以直接处理
|
can_process: bool = False
|
||||||
skip_reason: str = "" # 跳过原因
|
skip_reason: str = ""
|
||||||
|
|
||||||
|
|
||||||
class StructureInspector:
|
class StructureInspector:
|
||||||
"""结构检查器"""
|
"""结构检查器(含扩胞分析)"""
|
||||||
|
|
||||||
# 预定义的阴离子集合
|
# 预定义的阴离子集合
|
||||||
VALID_ANIONS = {'O', 'S', 'Cl', 'Br'}
|
VALID_ANIONS = {'O', 'S', 'Cl', 'Br'}
|
||||||
@@ -53,16 +87,38 @@ class StructureInspector:
|
|||||||
'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'
|
'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, target_cation: str = "Li", target_anions: Set[str] = None):
|
# 扩胞精度模式
|
||||||
|
PRECISION_LIMITS = {
|
||||||
|
'high': None, # 精确分数
|
||||||
|
'normal': 100, # 分母≤100
|
||||||
|
'low': 10, # 分母≤10
|
||||||
|
'very_low': 5 # 分母≤5
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
target_cation: str = "Li",
|
||||||
|
target_anions: Set[str] = None,
|
||||||
|
expansion_precision: str = "low"
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
初始化检查器
|
初始化检查器
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
target_cation: 目标阳离子 (如 "Li", "Na")
|
target_cation: 目标阳离子 (如 "Li", "Na")
|
||||||
target_anions: 目标阴离子集合 (如 {"O", "S"})
|
target_anions: 目标阴离子集合 (如 {"O", "S"})
|
||||||
|
expansion_precision: 扩胞计算精度 ('high', 'normal', 'low', 'very_low')
|
||||||
"""
|
"""
|
||||||
self.target_cation = target_cation
|
self.target_cation = target_cation
|
||||||
self.target_anions = target_anions or self.VALID_ANIONS
|
self.target_anions = target_anions or self.VALID_ANIONS
|
||||||
|
self.expansion_precision = expansion_precision
|
||||||
|
|
||||||
|
# 目标阳离子的各种可能表示形式
|
||||||
|
self.target_cation_variants = {
|
||||||
|
target_cation,
|
||||||
|
f"{target_cation}+",
|
||||||
|
f"{target_cation}1+",
|
||||||
|
}
|
||||||
|
|
||||||
def inspect(self, file_path: str) -> StructureInfo:
|
def inspect(self, file_path: str) -> StructureInfo:
|
||||||
"""
|
"""
|
||||||
@@ -74,7 +130,6 @@ class StructureInspector:
|
|||||||
Returns:
|
Returns:
|
||||||
StructureInfo: 分析结果
|
StructureInfo: 分析结果
|
||||||
"""
|
"""
|
||||||
import os
|
|
||||||
info = StructureInfo(
|
info = StructureInfo(
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
file_name=os.path.basename(file_path)
|
file_name=os.path.basename(file_path)
|
||||||
@@ -91,9 +146,10 @@ class StructureInspector:
|
|||||||
# 基础信息
|
# 基础信息
|
||||||
info.elements = {str(el) for el in structure.composition.elements}
|
info.elements = {str(el) for el in structure.composition.elements}
|
||||||
info.num_sites = structure.num_sites
|
info.num_sites = structure.num_sites
|
||||||
|
info.formula = structure.composition.reduced_formula
|
||||||
|
|
||||||
# 检查是否为二元化合物
|
# 检查是否为二元化合物
|
||||||
info.is_binary_compound = len(structure.types_of_specie) == 2
|
info.is_binary_compound = len(structure.composition.elements) == 2
|
||||||
|
|
||||||
# 检查是否含有目标阳离子
|
# 检查是否含有目标阳离子
|
||||||
info.contains_target_cation = self.target_cation in info.elements
|
info.contains_target_cation = self.target_cation in info.elements
|
||||||
@@ -110,8 +166,8 @@ class StructureInspector:
|
|||||||
# 检查氧化态
|
# 检查氧化态
|
||||||
info.has_oxidation_states = self._check_oxidation_states(structure)
|
info.has_oxidation_states = self._check_oxidation_states(structure)
|
||||||
|
|
||||||
# 检查共占位
|
# 检查共占位(核心分析)
|
||||||
self._check_partial_occupancy(structure, info)
|
self._analyze_partial_occupancy(structure, info)
|
||||||
|
|
||||||
# 检查水分子
|
# 检查水分子
|
||||||
info.has_water_molecule = self._check_water_molecule(structure)
|
info.has_water_molecule = self._check_water_molecule(structure)
|
||||||
@@ -121,9 +177,6 @@ class StructureInspector:
|
|||||||
info.elements.intersection(self.RADIOACTIVE_ELEMENTS)
|
info.elements.intersection(self.RADIOACTIVE_ELEMENTS)
|
||||||
)
|
)
|
||||||
|
|
||||||
# 判断是否需要扩胞
|
|
||||||
info.needs_expansion = info.has_partial_occupancy and not info.cation_has_partial_occupancy
|
|
||||||
|
|
||||||
# 判断可处理性
|
# 判断可处理性
|
||||||
self._evaluate_processability(info)
|
self._evaluate_processability(info)
|
||||||
|
|
||||||
@@ -140,48 +193,172 @@ class StructureInspector:
|
|||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _check_partial_occupancy(self, structure: Structure, info: StructureInfo):
|
def _get_element_from_species_string(self, species_str: str) -> str:
|
||||||
"""检查共占位情况"""
|
"""从物种字符串提取纯元素符号"""
|
||||||
try:
|
match = re.match(r'([A-Z][a-z]?)', species_str)
|
||||||
for site in structure.sites:
|
return match.group(1) if match else ""
|
||||||
if len(site.species) > 1:
|
|
||||||
|
def _get_occupancy_from_species_string(self, species_str: str, exclude_elements: Set[str]) -> Optional[float]:
|
||||||
|
"""
|
||||||
|
从物种字符串获取非目标元素的占据率
|
||||||
|
格式如: "Li+:0.689, Sc3+:0.311"
|
||||||
|
"""
|
||||||
|
if ':' not in species_str:
|
||||||
|
return None
|
||||||
|
|
||||||
|
parts = [p.strip() for p in species_str.split(',')]
|
||||||
|
for part in parts:
|
||||||
|
if ':' in part:
|
||||||
|
element_part, occu_part = part.split(':')
|
||||||
|
element = self._get_element_from_species_string(element_part.strip())
|
||||||
|
if element and element not in exclude_elements:
|
||||||
|
try:
|
||||||
|
return float(occu_part.strip())
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _analyze_partial_occupancy(self, structure: Structure, info: StructureInfo):
|
||||||
|
"""
|
||||||
|
分析共占位情况(核心逻辑)
|
||||||
|
|
||||||
|
关键规则:
|
||||||
|
- 目标阳离子(Li)的共占位 → 不可处理
|
||||||
|
- 阴离子的共占位 → 需要扩胞,但通常不处理
|
||||||
|
- 其他阳离子的共占位 → 需要扩胞处理
|
||||||
|
"""
|
||||||
|
occupancy_dict = defaultdict(list) # {occupation: [site_indices]}
|
||||||
|
occupancy_elements = {} # {occupation: [elements]}
|
||||||
|
|
||||||
|
for i, site in enumerate(structure.sites):
|
||||||
|
site_species = site.species
|
||||||
|
species_string = str(site.species)
|
||||||
|
|
||||||
|
# 检查是否有多个物种占据同一位点
|
||||||
|
if len(site_species) > 1:
|
||||||
|
info.has_partial_occupancy = True
|
||||||
|
|
||||||
|
# 提取各元素符号
|
||||||
|
elements_at_site = []
|
||||||
|
for sp in site_species.keys():
|
||||||
|
elem = sp.symbol if hasattr(sp, 'symbol') else str(sp)
|
||||||
|
elem = self._get_element_from_species_string(elem)
|
||||||
|
if elem:
|
||||||
|
elements_at_site.append(elem)
|
||||||
|
|
||||||
|
# 判断是否涉及目标阳离子
|
||||||
|
if self.target_cation in elements_at_site:
|
||||||
|
info.cation_has_partial_occupancy = True
|
||||||
|
|
||||||
|
# 判断是否涉及阴离子
|
||||||
|
if any(elem in self.target_anions for elem in elements_at_site):
|
||||||
|
info.anion_has_partial_occupancy = True
|
||||||
|
|
||||||
|
# 判断是否涉及其他元素(需要扩胞处理的情况)
|
||||||
|
other_elements = [e for e in elements_at_site
|
||||||
|
if e != self.target_cation and e not in self.target_anions]
|
||||||
|
if other_elements:
|
||||||
|
info.other_has_partial_occupancy = True
|
||||||
|
|
||||||
|
# 获取占据率(取非目标阳离子的占据率)
|
||||||
|
occu = self._get_occupancy_from_species_string(
|
||||||
|
species_string,
|
||||||
|
self.target_cation_variants
|
||||||
|
)
|
||||||
|
if occu is not None and occu != 1.0:
|
||||||
|
occupancy_dict[occu].append(i)
|
||||||
|
occupancy_elements[occu] = elements_at_site
|
||||||
|
|
||||||
|
# 检查单一物种的部分占据
|
||||||
|
for specie, occupancy in site_species.items():
|
||||||
|
if occupancy < 1.0:
|
||||||
info.has_partial_occupancy = True
|
info.has_partial_occupancy = True
|
||||||
|
elem = specie.symbol if hasattr(specie, 'symbol') else str(specie)
|
||||||
|
elem = self._get_element_from_species_string(elem)
|
||||||
|
|
||||||
# 检查是否涉及目标阳离子
|
if elem == self.target_cation:
|
||||||
species_symbols = [str(sp.symbol) if hasattr(sp, 'symbol') else str(sp)
|
|
||||||
for sp in site.species.keys()]
|
|
||||||
|
|
||||||
if self.target_cation in species_symbols:
|
|
||||||
info.cation_has_partial_occupancy = True
|
info.cation_has_partial_occupancy = True
|
||||||
|
elif elem in self.target_anions:
|
||||||
# 检查是否涉及阴离子
|
|
||||||
if any(sym in self.target_anions for sym in species_symbols):
|
|
||||||
info.anion_has_partial_occupancy = True
|
info.anion_has_partial_occupancy = True
|
||||||
|
else:
|
||||||
|
info.other_has_partial_occupancy = True
|
||||||
|
occupancy_dict[occupancy].append(i)
|
||||||
|
occupancy_elements[occupancy] = [elem]
|
||||||
|
|
||||||
# 检查单一物种的部分占据
|
# 计算扩胞信息
|
||||||
for specie, occupancy in site.species.items():
|
self._calculate_expansion_info(info, occupancy_dict, occupancy_elements)
|
||||||
if occupancy < 1.0:
|
|
||||||
info.has_partial_occupancy = True
|
|
||||||
symbol = str(specie.symbol) if hasattr(specie, 'symbol') else str(specie)
|
|
||||||
|
|
||||||
if symbol == self.target_cation:
|
def _calculate_expansion_info(
|
||||||
info.cation_has_partial_occupancy = True
|
self,
|
||||||
if symbol in self.target_anions:
|
info: StructureInfo,
|
||||||
info.anion_has_partial_occupancy = True
|
occupancy_dict: Dict[float, List[int]],
|
||||||
except Exception as e:
|
occupancy_elements: Dict[float, List[str]]
|
||||||
pass
|
):
|
||||||
|
"""计算扩胞相关信息"""
|
||||||
|
expansion_info = ExpansionInfo()
|
||||||
|
|
||||||
|
if not occupancy_dict:
|
||||||
|
info.expansion_info = expansion_info
|
||||||
|
return
|
||||||
|
|
||||||
|
# 需要扩胞(有非目标阳离子的共占位)
|
||||||
|
expansion_info.needs_expansion = True
|
||||||
|
expansion_info.problematic_sites = sum(len(v) for v in occupancy_dict.values())
|
||||||
|
|
||||||
|
# 转换为OccupancyInfo列表
|
||||||
|
occupancy_list = []
|
||||||
|
for occu, serials in occupancy_dict.items():
|
||||||
|
elements = occupancy_elements.get(occu, [])
|
||||||
|
|
||||||
|
# 根据精度计算分数
|
||||||
|
limit = self.PRECISION_LIMITS.get(self.expansion_precision)
|
||||||
|
if limit:
|
||||||
|
fraction = Fraction(occu).limit_denominator(limit)
|
||||||
|
else:
|
||||||
|
fraction = Fraction(occu).limit_denominator()
|
||||||
|
|
||||||
|
occ_info = OccupancyInfo(
|
||||||
|
occupation=occu,
|
||||||
|
atom_serials=[s + 1 for s in serials], # 转为1-based
|
||||||
|
elements=elements,
|
||||||
|
numerator=fraction.numerator,
|
||||||
|
denominator=fraction.denominator,
|
||||||
|
involves_target_cation=self.target_cation in elements,
|
||||||
|
involves_anion=any(e in self.target_anions for e in elements)
|
||||||
|
)
|
||||||
|
occupancy_list.append(occ_info)
|
||||||
|
|
||||||
|
expansion_info.occupancy_details = occupancy_list
|
||||||
|
|
||||||
|
# 计算最小公倍数(扩胞因子)
|
||||||
|
denominators = [occ.denominator for occ in occupancy_list]
|
||||||
|
if denominators:
|
||||||
|
lcm = reduce(lambda a, b: a * b // math.gcd(a, b), denominators, 1)
|
||||||
|
expansion_info.expansion_factor = lcm
|
||||||
|
|
||||||
|
# 判断是否可以扩胞(因子过大则不可处理)
|
||||||
|
if lcm > 64: # 扩胞超过64倍通常不可行
|
||||||
|
expansion_info.can_expand = False
|
||||||
|
expansion_info.skip_reason = f"扩胞因子过大({lcm})"
|
||||||
|
|
||||||
|
info.expansion_info = expansion_info
|
||||||
|
info.needs_expansion = expansion_info.needs_expansion and expansion_info.can_expand
|
||||||
|
|
||||||
def _check_water_molecule(self, structure: Structure) -> bool:
|
def _check_water_molecule(self, structure: Structure) -> bool:
|
||||||
"""检查是否含有水分子"""
|
"""检查是否含有水分子"""
|
||||||
try:
|
try:
|
||||||
oxygen_sites = [site for site in structure.sites
|
oxygen_sites = []
|
||||||
if 'O' in str(site.species)]
|
hydrogen_sites = []
|
||||||
hydrogen_sites = [site for site in structure.sites
|
|
||||||
if 'H' in str(site.species)]
|
for site in structure.sites:
|
||||||
|
species_str = str(site.species)
|
||||||
|
if 'O' in species_str:
|
||||||
|
oxygen_sites.append(site)
|
||||||
|
if 'H' in species_str:
|
||||||
|
hydrogen_sites.append(site)
|
||||||
|
|
||||||
for o_site in oxygen_sites:
|
for o_site in oxygen_sites:
|
||||||
nearby_h = [h for h in hydrogen_sites
|
nearby_h = [h for h in hydrogen_sites if o_site.distance(h) < 1.2]
|
||||||
if o_site.distance(h) < 1.2]
|
|
||||||
if len(nearby_h) >= 2:
|
if len(nearby_h) >= 2:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@@ -207,15 +384,21 @@ class StructureInspector:
|
|||||||
if info.has_radioactive_elements:
|
if info.has_radioactive_elements:
|
||||||
skip_reasons.append("含放射性元素")
|
skip_reasons.append("含放射性元素")
|
||||||
|
|
||||||
|
# 关键:目标阳离子共占位是不可处理的
|
||||||
if info.cation_has_partial_occupancy:
|
if info.cation_has_partial_occupancy:
|
||||||
skip_reasons.append(f"{self.target_cation}存在共占位")
|
skip_reasons.append(f"{self.target_cation}存在共占位")
|
||||||
|
|
||||||
|
# 阴离子共占位通常也不处理
|
||||||
if info.anion_has_partial_occupancy:
|
if info.anion_has_partial_occupancy:
|
||||||
skip_reasons.append("阴离子存在共占位")
|
skip_reasons.append("阴离子存在共占位")
|
||||||
|
|
||||||
if info.has_water_molecule:
|
if info.has_water_molecule:
|
||||||
skip_reasons.append("含水分子")
|
skip_reasons.append("含水分子")
|
||||||
|
|
||||||
|
# 扩胞因子过大
|
||||||
|
if info.expansion_info.needs_expansion and not info.expansion_info.can_expand:
|
||||||
|
skip_reasons.append(info.expansion_info.skip_reason)
|
||||||
|
|
||||||
if skip_reasons:
|
if skip_reasons:
|
||||||
info.can_process = False
|
info.can_process = False
|
||||||
info.skip_reason = "; ".join(skip_reasons)
|
info.skip_reason = "; ".join(skip_reasons)
|
||||||
|
|||||||
Reference in New Issue
Block a user