import re import os from pymatgen.core.structure import Structure from pymatgen.core.periodic_table import Element import yaml from pymatgen.core.periodic_table import Specie from expansion import expansion from expansion import process_cif_file def generate_valence_yaml(output_yaml_path): """ Generate a YAML file containing the most common oxidation states for elements. Parameters: output_yaml_path (str): Path to save the generated YAML file. """ valences = {} for element in Element: common_oxidation_states = element.common_oxidation_states if common_oxidation_states: # Metals/metalloids: take the maximum oxidation state # Non-metals: take the minimum (most negative) oxidation state if element.is_metalloid or element.is_metal: valences[element.symbol] = max(common_oxidation_states) else: valences[element.symbol] = min(common_oxidation_states) # Save the valences dictionary to a YAML file with open(output_yaml_path, "w") as file: yaml.dump(valences, file, default_flow_style=False) def apply_oxidation_states_to_cif(input_cif_path, valence_yaml_path, output_cif_path,calculate_type='low',output_folder = None): """ Modify a CIF file to include oxidation states for each element based on a YAML file, unless oxidation states are already present in the CIF. """ # Load the structure from the CIF file structure = Structure.from_file(input_cif_path) oxi = process_cif_file(structure) # classsify类型 # if oxi: # structure = expansion(input_cif_path,'../data/input_oxidation',3,calculate_type=calculate_type,keep_module='classify') # # # 判断是否所有site都已经有oxidation state # has_oxidation = all( # all(isinstance(sp, Specie) for sp in site.species.keys()) # for site in structure.sites # ) # if not has_oxidation: # # 只有当没有价态时才读取yaml并赋值 # with open(valence_yaml_path, "r") as file: # valences = yaml.safe_load(file) # # Apply oxidation states to the structure # structure.add_oxidation_state_by_element(valences) # # # Save the updated structure to a new CIF file # structure.to(filename=output_cif_path) structures=[] names=[] if oxi: structures,names = expansion(input_cif_path,'../data/input_oxidation',3,calculate_type=calculate_type,keep_module='random') # # 判断是否所有site都已经有oxidation state for structure,name in zip(structures,names): has_oxidation = all( all(isinstance(sp, Specie) for sp in site.species.keys()) for site in structure.sites ) if not has_oxidation: # 只有当没有价态时才读取yaml并赋值 with open(valence_yaml_path, "r") as file: valences = yaml.safe_load(file) # Apply oxidation states to the structure structure.add_oxidation_state_by_element(valences) # Save the updated structure to a new CIF file structure.to(filename=os.path.join(output_folder,name )) else: has_oxidation = all( all(isinstance(sp, Specie) for sp in site.species.keys()) for site in structure.sites ) if not has_oxidation: # 只有当没有价态时才读取yaml并赋值 with open(valence_yaml_path, "r") as file: valences = yaml.safe_load(file) # Apply oxidation states to the structure structure.add_oxidation_state_by_element(valences) # Save the updated structure to a new CIF file structure.to(filename=output_cif_path) def data_add_state(input_folder, valence_yaml_path, output_folder,output_occupatition_folder=None,calculate_type='normal'): if not os.path.exists(input_folder): print(f"{input_folder} 文件夹不存在") return if not os.path.exists(output_folder): os.makedirs(output_folder) print(f"目录 {output_folder} 已创建") for filename in os.listdir(input_folder): if filename.endswith(".cif"): # 检查文件是否以.cif结尾 file_path = os.path.join(input_folder, filename) # 提取文件名中的数字部分 match = re.search(r'\d+', filename) if match: new_filename = match.group(0) + ".cif" # 提取数字并加上 .cif 后缀 else: print(f"文件名 {filename} 中未找到数字部分,跳过处理") continue # 构造输出文件路径 output_cif_path = os.path.join(output_folder, new_filename) # 应用氧化态并保存新文件 print(f"正在处理{filename}") try: apply_oxidation_states_to_cif(file_path, valence_yaml_path, output_cif_path,calculate_type=calculate_type,output_folder=output_folder) except Exception as e: print(f"{filename}出现问题!") print(f"{filename} 已完成") if __name__ == "__main__": # Example usage: # Step 1: Generate the valence YAML file valence_yaml = "../tool/valence_states.yaml" # Step 2: Process CIF files in the input folder data_add_state("../data/input_pre", valence_yaml, "../data/input","../data/input_oxidation") # Step 3: Process Occupation # data_process_Occupatiton("")