From 72cf0a79e12a46fd6eacaf567f00c9a9e97fc463 Mon Sep 17 00:00:00 2001 From: koko <1429659362@qq.com> Date: Sun, 14 Dec 2025 16:52:14 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=89=A9=E8=83=9E=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/analysis/report_generator.py | 24 +++ src/analysis/structure_inspector.py | 275 +++++++++++++++++++++++----- 2 files changed, 253 insertions(+), 46 deletions(-) diff --git a/src/analysis/report_generator.py b/src/analysis/report_generator.py index 398b067..dd29ed1 100644 --- a/src/analysis/report_generator.py +++ b/src/analysis/report_generator.py @@ -97,6 +97,30 @@ class ReportGenerator: print("\n" + "=" * 70) + # 扩胞分析(新增) + print("\n" + "-" * 70) + print("【5. 扩胞需求分析】") + print("-" * 70) + + exp = report.expansion_stats + total_processable = report.directly_processable + report.needs_preprocessing + + if total_processable > 0: + print(f" 无需扩胞: {exp['no_expansion_needed']:6d} " + f"({exp['no_expansion_needed'] / total_processable:.1%})") + print(f" 扩胞因子=2: {exp['expansion_factor_2']:6d}") + print(f" 扩胞因子=3: {exp['expansion_factor_3']:6d}") + print(f" 扩胞因子=4~8: {exp['expansion_factor_4_8']:6d}") + print(f" 扩胞因子>8: {exp['expansion_factor_large']:6d}") + print(f" 无法扩胞(因子过大): {exp['cannot_expand']:6d}") + + # 详细分布 + if detailed and report.expansion_factor_distribution: + print("\n 扩胞因子分布:") + for factor in sorted(report.expansion_factor_distribution.keys()): + count = report.expansion_factor_distribution[factor] + bar = "█" * min(count, 30) + print(f" {factor:3d}x: {count:5d} {bar}") @staticmethod def export_to_csv(report: DatabaseReport, output_path: str): """导出详细结果到CSV""" diff --git a/src/analysis/structure_inspector.py b/src/analysis/structure_inspector.py index 2099a22..76535fa 100644 --- a/src/analysis/structure_inspector.py +++ b/src/analysis/structure_inspector.py @@ -1,10 +1,39 @@ """ -结构检查器:对单个CIF文件进行深度分析 +结构检查器:对单个CIF文件进行深度分析(含扩胞需求判断) """ from dataclasses import dataclass, field from typing import Set, Dict, List, Optional, Tuple from pymatgen.core import Structure from pymatgen.core.periodic_table import Element, Specie +from collections import defaultdict +from fractions import Fraction +from functools import reduce +import math +import re +import os + + +@dataclass +class OccupancyInfo: + """共占位信息""" + occupation: float # 占据率 + atom_serials: List[int] = field(default_factory=list) # 原子序号 + elements: List[str] = field(default_factory=list) # 涉及的元素 + numerator: int = 0 # 分子 + denominator: int = 1 # 分母 + involves_target_cation: bool = False # 是否涉及目标阳离子 + involves_anion: bool = False # 是否涉及阴离子 + + +@dataclass +class ExpansionInfo: + """扩胞信息""" + needs_expansion: bool = False # 是否需要扩胞 + expansion_factor: int = 1 # 扩胞因子(最小公倍数) + occupancy_details: List[OccupancyInfo] = field(default_factory=list) # 共占位详情 + problematic_sites: int = 0 # 问题位点数 + can_expand: bool = True # 是否可以扩胞处理 + skip_reason: str = "" # 无法扩胞的原因 @dataclass @@ -20,29 +49,34 @@ class StructureInfo: # 元素组成 elements: Set[str] = field(default_factory=set) num_sites: int = 0 + formula: str = "" # 阳离子/阴离子信息 contains_target_cation: bool = False - anion_types: Set[str] = field(default_factory=set) # 找到的目标阴离子 + anion_types: Set[str] = field(default_factory=set) anion_mode: str = "" # "single", "mixed", "none" # 数据质量标记 has_oxidation_states: bool = False has_partial_occupancy: bool = False # 是否有共占位 - cation_has_partial_occupancy: bool = False # 目标阳离子是否共占位 - anion_has_partial_occupancy: bool = False # 阴离子是否共占位 has_water_molecule: bool = False has_radioactive_elements: bool = False is_binary_compound: bool = False + # 共占位详细分析(新增) + cation_has_partial_occupancy: bool = False # 目标阳离子共占位 + anion_has_partial_occupancy: bool = False # 阴离子共占位 + other_has_partial_occupancy: bool = False # 其他元素共占位(需扩胞) + expansion_info: ExpansionInfo = field(default_factory=ExpansionInfo) + # 可处理性 - needs_expansion: bool = False # 需要扩胞 - can_process: bool = False # 可以直接处理 - skip_reason: str = "" # 跳过原因 + needs_expansion: bool = False + can_process: bool = False + skip_reason: str = "" class StructureInspector: - """结构检查器""" + """结构检查器(含扩胞分析)""" # 预定义的阴离子集合 VALID_ANIONS = {'O', 'S', 'Cl', 'Br'} @@ -53,16 +87,38 @@ class StructureInspector: 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr' } - def __init__(self, target_cation: str = "Li", target_anions: Set[str] = None): + # 扩胞精度模式 + PRECISION_LIMITS = { + 'high': None, # 精确分数 + 'normal': 100, # 分母≤100 + 'low': 10, # 分母≤10 + 'very_low': 5 # 分母≤5 + } + + def __init__( + self, + target_cation: str = "Li", + target_anions: Set[str] = None, + expansion_precision: str = "low" + ): """ 初始化检查器 Args: target_cation: 目标阳离子 (如 "Li", "Na") target_anions: 目标阴离子集合 (如 {"O", "S"}) + expansion_precision: 扩胞计算精度 ('high', 'normal', 'low', 'very_low') """ self.target_cation = target_cation self.target_anions = target_anions or self.VALID_ANIONS + self.expansion_precision = expansion_precision + + # 目标阳离子的各种可能表示形式 + self.target_cation_variants = { + target_cation, + f"{target_cation}+", + f"{target_cation}1+", + } def inspect(self, file_path: str) -> StructureInfo: """ @@ -74,7 +130,6 @@ class StructureInspector: Returns: StructureInfo: 分析结果 """ - import os info = StructureInfo( file_path=file_path, file_name=os.path.basename(file_path) @@ -91,9 +146,10 @@ class StructureInspector: # 基础信息 info.elements = {str(el) for el in structure.composition.elements} info.num_sites = structure.num_sites + info.formula = structure.composition.reduced_formula # 检查是否为二元化合物 - info.is_binary_compound = len(structure.types_of_specie) == 2 + info.is_binary_compound = len(structure.composition.elements) == 2 # 检查是否含有目标阳离子 info.contains_target_cation = self.target_cation in info.elements @@ -110,8 +166,8 @@ class StructureInspector: # 检查氧化态 info.has_oxidation_states = self._check_oxidation_states(structure) - # 检查共占位 - self._check_partial_occupancy(structure, info) + # 检查共占位(核心分析) + self._analyze_partial_occupancy(structure, info) # 检查水分子 info.has_water_molecule = self._check_water_molecule(structure) @@ -121,9 +177,6 @@ class StructureInspector: info.elements.intersection(self.RADIOACTIVE_ELEMENTS) ) - # 判断是否需要扩胞 - info.needs_expansion = info.has_partial_occupancy and not info.cation_has_partial_occupancy - # 判断可处理性 self._evaluate_processability(info) @@ -140,48 +193,172 @@ class StructureInspector: except: return False - def _check_partial_occupancy(self, structure: Structure, info: StructureInfo): - """检查共占位情况""" - try: - for site in structure.sites: - if len(site.species) > 1: + def _get_element_from_species_string(self, species_str: str) -> str: + """从物种字符串提取纯元素符号""" + match = re.match(r'([A-Z][a-z]?)', species_str) + return match.group(1) if match else "" + + def _get_occupancy_from_species_string(self, species_str: str, exclude_elements: Set[str]) -> Optional[float]: + """ + 从物种字符串获取非目标元素的占据率 + 格式如: "Li+:0.689, Sc3+:0.311" + """ + if ':' not in species_str: + return None + + parts = [p.strip() for p in species_str.split(',')] + for part in parts: + if ':' in part: + element_part, occu_part = part.split(':') + element = self._get_element_from_species_string(element_part.strip()) + if element and element not in exclude_elements: + try: + return float(occu_part.strip()) + except ValueError: + continue + return None + + def _analyze_partial_occupancy(self, structure: Structure, info: StructureInfo): + """ + 分析共占位情况(核心逻辑) + + 关键规则: + - 目标阳离子(Li)的共占位 → 不可处理 + - 阴离子的共占位 → 需要扩胞,但通常不处理 + - 其他阳离子的共占位 → 需要扩胞处理 + """ + occupancy_dict = defaultdict(list) # {occupation: [site_indices]} + occupancy_elements = {} # {occupation: [elements]} + + for i, site in enumerate(structure.sites): + site_species = site.species + species_string = str(site.species) + + # 检查是否有多个物种占据同一位点 + if len(site_species) > 1: + info.has_partial_occupancy = True + + # 提取各元素符号 + elements_at_site = [] + for sp in site_species.keys(): + elem = sp.symbol if hasattr(sp, 'symbol') else str(sp) + elem = self._get_element_from_species_string(elem) + if elem: + elements_at_site.append(elem) + + # 判断是否涉及目标阳离子 + if self.target_cation in elements_at_site: + info.cation_has_partial_occupancy = True + + # 判断是否涉及阴离子 + if any(elem in self.target_anions for elem in elements_at_site): + info.anion_has_partial_occupancy = True + + # 判断是否涉及其他元素(需要扩胞处理的情况) + other_elements = [e for e in elements_at_site + if e != self.target_cation and e not in self.target_anions] + if other_elements: + info.other_has_partial_occupancy = True + + # 获取占据率(取非目标阳离子的占据率) + occu = self._get_occupancy_from_species_string( + species_string, + self.target_cation_variants + ) + if occu is not None and occu != 1.0: + occupancy_dict[occu].append(i) + occupancy_elements[occu] = elements_at_site + + # 检查单一物种的部分占据 + for specie, occupancy in site_species.items(): + if occupancy < 1.0: info.has_partial_occupancy = True + elem = specie.symbol if hasattr(specie, 'symbol') else str(specie) + elem = self._get_element_from_species_string(elem) - # 检查是否涉及目标阳离子 - species_symbols = [str(sp.symbol) if hasattr(sp, 'symbol') else str(sp) - for sp in site.species.keys()] - - if self.target_cation in species_symbols: + if elem == self.target_cation: info.cation_has_partial_occupancy = True - - # 检查是否涉及阴离子 - if any(sym in self.target_anions for sym in species_symbols): + elif elem in self.target_anions: info.anion_has_partial_occupancy = True + else: + info.other_has_partial_occupancy = True + occupancy_dict[occupancy].append(i) + occupancy_elements[occupancy] = [elem] - # 检查单一物种的部分占据 - for specie, occupancy in site.species.items(): - if occupancy < 1.0: - info.has_partial_occupancy = True - symbol = str(specie.symbol) if hasattr(specie, 'symbol') else str(specie) + # 计算扩胞信息 + self._calculate_expansion_info(info, occupancy_dict, occupancy_elements) - if symbol == self.target_cation: - info.cation_has_partial_occupancy = True - if symbol in self.target_anions: - info.anion_has_partial_occupancy = True - except Exception as e: - pass + def _calculate_expansion_info( + self, + info: StructureInfo, + occupancy_dict: Dict[float, List[int]], + occupancy_elements: Dict[float, List[str]] + ): + """计算扩胞相关信息""" + expansion_info = ExpansionInfo() + + if not occupancy_dict: + info.expansion_info = expansion_info + return + + # 需要扩胞(有非目标阳离子的共占位) + expansion_info.needs_expansion = True + expansion_info.problematic_sites = sum(len(v) for v in occupancy_dict.values()) + + # 转换为OccupancyInfo列表 + occupancy_list = [] + for occu, serials in occupancy_dict.items(): + elements = occupancy_elements.get(occu, []) + + # 根据精度计算分数 + limit = self.PRECISION_LIMITS.get(self.expansion_precision) + if limit: + fraction = Fraction(occu).limit_denominator(limit) + else: + fraction = Fraction(occu).limit_denominator() + + occ_info = OccupancyInfo( + occupation=occu, + atom_serials=[s + 1 for s in serials], # 转为1-based + elements=elements, + numerator=fraction.numerator, + denominator=fraction.denominator, + involves_target_cation=self.target_cation in elements, + involves_anion=any(e in self.target_anions for e in elements) + ) + occupancy_list.append(occ_info) + + expansion_info.occupancy_details = occupancy_list + + # 计算最小公倍数(扩胞因子) + denominators = [occ.denominator for occ in occupancy_list] + if denominators: + lcm = reduce(lambda a, b: a * b // math.gcd(a, b), denominators, 1) + expansion_info.expansion_factor = lcm + + # 判断是否可以扩胞(因子过大则不可处理) + if lcm > 64: # 扩胞超过64倍通常不可行 + expansion_info.can_expand = False + expansion_info.skip_reason = f"扩胞因子过大({lcm})" + + info.expansion_info = expansion_info + info.needs_expansion = expansion_info.needs_expansion and expansion_info.can_expand def _check_water_molecule(self, structure: Structure) -> bool: """检查是否含有水分子""" try: - oxygen_sites = [site for site in structure.sites - if 'O' in str(site.species)] - hydrogen_sites = [site for site in structure.sites - if 'H' in str(site.species)] + oxygen_sites = [] + hydrogen_sites = [] + + for site in structure.sites: + species_str = str(site.species) + if 'O' in species_str: + oxygen_sites.append(site) + if 'H' in species_str: + hydrogen_sites.append(site) for o_site in oxygen_sites: - nearby_h = [h for h in hydrogen_sites - if o_site.distance(h) < 1.2] + nearby_h = [h for h in hydrogen_sites if o_site.distance(h) < 1.2] if len(nearby_h) >= 2: return True return False @@ -207,15 +384,21 @@ class StructureInspector: if info.has_radioactive_elements: skip_reasons.append("含放射性元素") + # 关键:目标阳离子共占位是不可处理的 if info.cation_has_partial_occupancy: skip_reasons.append(f"{self.target_cation}存在共占位") + # 阴离子共占位通常也不处理 if info.anion_has_partial_occupancy: skip_reasons.append("阴离子存在共占位") if info.has_water_molecule: skip_reasons.append("含水分子") + # 扩胞因子过大 + if info.expansion_info.needs_expansion and not info.expansion_info.can_expand: + skip_reasons.append(info.expansion_info.skip_reason) + if skip_reasons: info.can_process = False info.skip_reason = "; ".join(skip_reasons)