Compare commits
2 Commits
71f6ae8928
...
76105a631d
| Author | SHA1 | Date | |
|---|---|---|---|
| 76105a631d | |||
| 28c2323ce8 |
205087
MSD/data/320K/log.lammps
Normal file
205087
MSD/data/320K/log.lammps
Normal file
File diff suppressed because it is too large
Load Diff
10002
MSD/data/320K/msd_li.dat
Normal file
10002
MSD/data/320K/msd_li.dat
Normal file
File diff suppressed because it is too large
Load Diff
205087
MSD/data/340K/log.lammps
Normal file
205087
MSD/data/340K/log.lammps
Normal file
File diff suppressed because it is too large
Load Diff
10002
MSD/data/340K/msd_li.dat
Normal file
10002
MSD/data/340K/msd_li.dat
Normal file
File diff suppressed because it is too large
Load Diff
35963
MSD/data/log.lammps
35963
MSD/data/log.lammps
File diff suppressed because it is too large
Load Diff
1545
MSD/data/msd_li.dat
1545
MSD/data/msd_li.dat
File diff suppressed because it is too large
Load Diff
@@ -9,10 +9,11 @@ if __name__ == '__main__':
|
|||||||
# 调用新函数
|
# 调用新函数
|
||||||
# 它会自动从 log.lammps 读取温度和体积
|
# 它会自动从 log.lammps 读取温度和体积
|
||||||
results = calculate_conductivity_from_msd(
|
results = calculate_conductivity_from_msd(
|
||||||
msd_file_path='data/msd_li.dat',
|
msd_file_path='data/700K/msd_li.dat',
|
||||||
log_file_path='data/log.lammps',
|
log_file_path='data/700K/log.lammps',
|
||||||
ion_name='Li⁺',
|
ion_name='Li⁺',
|
||||||
charge=1,
|
charge=1,
|
||||||
num_ions=num_li_ions,
|
num_ions=num_li_ions,
|
||||||
fit_fraction=0.5 # 可以根据图像调整此值
|
fit_start_fraction=0.0,
|
||||||
|
fit_end_fraction=1.0
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -63,7 +63,8 @@ def calculate_conductivity_from_msd(
|
|||||||
ion_name,
|
ion_name,
|
||||||
charge,
|
charge,
|
||||||
num_ions,
|
num_ions,
|
||||||
fit_fraction=0.5
|
fit_start_fraction=0.5,
|
||||||
|
fit_end_fraction=0.5
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
从MSD数据计算电导率 (v2)。
|
从MSD数据计算电导率 (v2)。
|
||||||
@@ -102,9 +103,10 @@ def calculate_conductivity_from_msd(
|
|||||||
time_ps = timesteps * timestep_ps
|
time_ps = timesteps * timestep_ps
|
||||||
|
|
||||||
# --- 3. 线性拟合计算扩散系数 ---
|
# --- 3. 线性拟合计算扩散系数 ---
|
||||||
fit_start_index = int(len(time_ps) * (1 - fit_fraction))
|
fit_start_index = int(len(time_ps) * fit_start_fraction)
|
||||||
fit_time_ps = time_ps[fit_start_index:]
|
fit_end_index = int(len(time_ps) * fit_end_fraction)
|
||||||
fit_msd_values = msd_values[fit_start_index:]
|
fit_time_ps = time_ps[fit_start_index:fit_end_index]
|
||||||
|
fit_msd_values = msd_values[fit_start_index:fit_end_index]
|
||||||
|
|
||||||
if len(fit_time_ps) < 2:
|
if len(fit_time_ps) < 2:
|
||||||
print("错误: 用于拟合的数据点不足 (少于2个),无法进行线性回归。")
|
print("错误: 用于拟合的数据点不足 (少于2个),无法进行线性回归。")
|
||||||
|
|||||||
289
MSD/utils/con2.py
Normal file
289
MSD/utils/con2.py
Normal file
@@ -0,0 +1,289 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from scipy import stats
|
||||||
|
|
||||||
|
# 物理常数(SI单位)
|
||||||
|
kB = 1.380649e-23 # 玻尔兹曼常数 (J/K)
|
||||||
|
q = 1.602176634e-19 # 元电荷 (C,Li+的电荷)
|
||||||
|
ps_to_s = 1e-12 # 皮秒转秒
|
||||||
|
angstrom2_to_m2 = 1e-20 # 埃²转米²
|
||||||
|
angstrom3_to_m3 = 1e-30 # 埃³转米³
|
||||||
|
|
||||||
|
|
||||||
|
def extract_temperature(folder_name):
|
||||||
|
"""从文件夹名(如320k)提取温度(K)"""
|
||||||
|
match = re.search(r'(\d+)K', folder_name)
|
||||||
|
if match:
|
||||||
|
return int(match.group(1))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def read_msd(msd_path):
|
||||||
|
"""读取msd_li.dat,返回时间(ps)和总MSD(Ų)"""
|
||||||
|
try:
|
||||||
|
data = np.loadtxt(msd_path, comments='#')
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"读取MSD文件失败:{e}")
|
||||||
|
|
||||||
|
if data.size == 0:
|
||||||
|
raise ValueError("MSD文件内容为空")
|
||||||
|
|
||||||
|
# 时间步转实际时间(ps):TimeStep × 0.001
|
||||||
|
time = data[:, 0] * 0.001 # 第一列是TimeStep,乘以0.001转为ps
|
||||||
|
msd_total = data[:, -1] # 最后一列是总MSD
|
||||||
|
|
||||||
|
# 打印时间范围(关键调试信息)
|
||||||
|
print(f" MSD时间范围:{time.min():.1f} ~ {time.max():.1f} ps")
|
||||||
|
return time, msd_total
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_diffusion(time, msd, t_start, t_end):
|
||||||
|
"""根据MSD计算扩散系数D(单位:m²/s)"""
|
||||||
|
# 筛选时间范围内的数据
|
||||||
|
mask = (time >= t_start) & (time <= t_end)
|
||||||
|
t_filtered = time[mask] * ps_to_s # 转为秒
|
||||||
|
msd_filtered = msd[mask] * angstrom2_to_m2 # 转为m²
|
||||||
|
|
||||||
|
# 检查筛选后的数据是否为空
|
||||||
|
if len(t_filtered) < 2:
|
||||||
|
raise ValueError(f"筛选后的数据点不足(仅{len(t_filtered)}个),请调整t_start/t_end")
|
||||||
|
|
||||||
|
# 线性拟合:MSD vs 时间,斜率 = 6D
|
||||||
|
slope, _, r_value, _, _ = stats.linregress(t_filtered, msd_filtered)
|
||||||
|
D = slope / 6
|
||||||
|
|
||||||
|
# 打印拟合信息(调试用)
|
||||||
|
print(f" MSD拟合:斜率 = {slope:.6e},R² = {r_value**2:.4f}")
|
||||||
|
return D
|
||||||
|
|
||||||
|
|
||||||
|
def parse_lammps_data(data_path):
|
||||||
|
"""解析LAMMPS data文件,提取盒子体积和Li+数量(Li的原子类型为1)"""
|
||||||
|
with open(data_path, 'r') as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
|
# 提取原子总数
|
||||||
|
atom_count_line = next((line for line in lines if 'atoms' in line), None)
|
||||||
|
if not atom_count_line:
|
||||||
|
raise ValueError("无法在data文件中找到原子总数")
|
||||||
|
total_atoms = int(atom_count_line.split()[0])
|
||||||
|
|
||||||
|
# 提取盒子尺寸
|
||||||
|
x_line = next((line for line in lines if 'xlo xhi' in line), None)
|
||||||
|
y_line = next((line for line in lines if 'ylo yhi' in line), None)
|
||||||
|
z_line = next((line for line in lines if 'zlo zhi' in line), None)
|
||||||
|
|
||||||
|
if not (x_line and y_line and z_line):
|
||||||
|
raise ValueError("无法在data文件中找到盒子尺寸")
|
||||||
|
|
||||||
|
xlo, xhi = map(float, x_line.split()[:2])
|
||||||
|
ylo, yhi = map(float, y_line.split()[:2])
|
||||||
|
zlo, zhi = map(float, z_line.split()[:2])
|
||||||
|
|
||||||
|
# 计算盒子体积(ų)
|
||||||
|
volume_angstrom3 = (xhi - xlo) * (yhi - ylo) * (zhi - zlo)
|
||||||
|
|
||||||
|
# 手动指定Li的原子类型为1
|
||||||
|
li_type = 1
|
||||||
|
|
||||||
|
# 统计Li原子数量
|
||||||
|
atoms_section_start = next((i for i, line in enumerate(lines) if 'Atoms' in line), None)
|
||||||
|
if atoms_section_start is None:
|
||||||
|
raise ValueError("无法在data文件中找到Atoms部分")
|
||||||
|
|
||||||
|
li_count = 0
|
||||||
|
for i in range(atoms_section_start + 1, atoms_section_start + total_atoms + 1):
|
||||||
|
if i >= len(lines):
|
||||||
|
break
|
||||||
|
parts = lines[i].split()
|
||||||
|
if len(parts) >= 2 and int(parts[1]) == li_type:
|
||||||
|
li_count += 1
|
||||||
|
|
||||||
|
if li_count == 0:
|
||||||
|
raise ValueError(f"未找到原子类型为{li_type}的Li原子")
|
||||||
|
|
||||||
|
return li_count, volume_angstrom3
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_conductivity(D, T, n):
|
||||||
|
"""根据Nernst-Einstein方程计算离子电导σ(单位:S/m)"""
|
||||||
|
sigma = (n * q**2 * D) / (kB * T)
|
||||||
|
return sigma
|
||||||
|
|
||||||
|
def arrhenius_plot(temperatures, D_list, sigma_list):
|
||||||
|
"""绘制阿伦尼乌斯图,拟合400K前后两条直线,基于320-400K数据外推300K电导"""
|
||||||
|
plt.rcParams["axes.unicode_minus"] = False # 确保负号正常显示
|
||||||
|
temps = np.array(temperatures, dtype=float)
|
||||||
|
sigmas = np.array(sigma_list, dtype=float)
|
||||||
|
inv_T = 1000 / temps # 1000/T (1/K)
|
||||||
|
|
||||||
|
# 数据分组:320-400K(用于拟合和外推)和>400K(仅拟合)
|
||||||
|
mask_below = (temps >= 320) & (temps <= 400) # 320-400K区间
|
||||||
|
mask_above = temps > 400 # 400K以上区间
|
||||||
|
|
||||||
|
# 检查低温度区间数据量
|
||||||
|
if np.sum(mask_below) < 2:
|
||||||
|
raise ValueError(f"320-400K区间数据点不足(仅{np.sum(mask_below)}个),无法拟合直线")
|
||||||
|
|
||||||
|
# 1. 拟合320-400K区间(用于外推300K)
|
||||||
|
inv_T_below = inv_T[mask_below]
|
||||||
|
ln_sigma_below = np.log(sigmas[mask_below])
|
||||||
|
slope_below, intercept_below, r_below, _, _ = stats.linregress(inv_T_below, ln_sigma_below)
|
||||||
|
Ea_below = -slope_below * kB * 1000 # 活化能(J)
|
||||||
|
Ea_below_eV = Ea_below / q # 转换为eV
|
||||||
|
|
||||||
|
# 2. 拟合400K以上区间(若有足够数据)
|
||||||
|
slope_above = None
|
||||||
|
intercept_above = None
|
||||||
|
Ea_above_eV = None
|
||||||
|
r_above = None
|
||||||
|
if np.sum(mask_above) >= 2:
|
||||||
|
inv_T_above = inv_T[mask_above]
|
||||||
|
ln_sigma_above = np.log(sigmas[mask_above])
|
||||||
|
slope_above, intercept_above, r_above, _, _ = stats.linregress(inv_T_above, ln_sigma_above)
|
||||||
|
Ea_above = -slope_above * kB * 1000
|
||||||
|
Ea_above_eV = Ea_above / q
|
||||||
|
|
||||||
|
# 3. 外推300K的离子电导(基于320-400K拟合线)
|
||||||
|
T_target = 300 # 目标温度300K
|
||||||
|
inv_T_target = 1000 / T_target # 1000/300 (1/K)
|
||||||
|
ln_sigma_target = slope_below * inv_T_target + intercept_below # 用低温度区间拟合线外推
|
||||||
|
sigma_target = np.exp(ln_sigma_target) # 转换为电导值
|
||||||
|
|
||||||
|
# 4. 绘图
|
||||||
|
fig, ax1 = plt.subplots(figsize=(10, 6))
|
||||||
|
|
||||||
|
# 绘制数据点
|
||||||
|
ax1.scatter(inv_T_below, ln_sigma_below, label='320-400K Data', color='blue', s=60, zorder=3)
|
||||||
|
if np.sum(mask_above) >= 1:
|
||||||
|
ax1.scatter(inv_T[mask_above], np.log(sigmas[mask_above]), label='>400K Data', color='red', s=60, zorder=3)
|
||||||
|
|
||||||
|
# 绘制拟合线(低温度区间)
|
||||||
|
x_fit_below = np.linspace(min(inv_T_below), max(inv_T_below), 100)
|
||||||
|
y_fit_below = slope_below * x_fit_below + intercept_below
|
||||||
|
ax1.plot(x_fit_below, y_fit_below, '--', color='blue',
|
||||||
|
label=f'320-400K Fit (Eₐ={Ea_below_eV:.3f} eV, R²={r_below**2:.3f})', zorder=2)
|
||||||
|
|
||||||
|
# 绘制拟合线(高温度区间,若有)
|
||||||
|
if slope_above is not None:
|
||||||
|
x_fit_above = np.linspace(min(inv_T[mask_above]), max(inv_T[mask_above]), 100)
|
||||||
|
y_fit_above = slope_above * x_fit_above + intercept_above
|
||||||
|
ax1.plot(x_fit_above, y_fit_above, '--', color='red',
|
||||||
|
label=f'>400K Fit (Eₐ={Ea_above_eV:.3f} eV, R²={r_above**2:.3f})', zorder=2)
|
||||||
|
|
||||||
|
# 标记300K外推点
|
||||||
|
ax1.scatter([inv_T_target], [ln_sigma_target], color='green', marker='*', s=150,
|
||||||
|
label=f'300K Extrapolated', zorder=4)
|
||||||
|
|
||||||
|
# 坐标轴设置
|
||||||
|
ax1.set_xlabel('1000/T (1/K)')
|
||||||
|
ax1.set_ylabel('ln(σ) (S/m)')
|
||||||
|
ax1.set_title('Arrhenius Plot with Dual Fitting')
|
||||||
|
ax1.legend()
|
||||||
|
ax1.grid(alpha=0.3)
|
||||||
|
|
||||||
|
# 顶部温度坐标轴
|
||||||
|
ax2 = ax1.twiny()
|
||||||
|
x1_min, x1_max = ax1.get_xlim()
|
||||||
|
ax2.set_xlim(x1_min, x1_max)
|
||||||
|
ax2.set_xticks(1000 / np.unique(temps)) # 显示现有温度刻度
|
||||||
|
ax2.set_xticklabels([f"{int(t)}" for t in np.unique(temps)])
|
||||||
|
ax2.set_xlabel('Temperature (K)')
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig('arrhenius_dual_fit.png', dpi=300)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# 输出结果
|
||||||
|
print("\n===== 拟合结果 =====")
|
||||||
|
print(f"320-400K区间:")
|
||||||
|
print(f" 活化能 Eₐ = {Ea_below_eV:.3f} eV (R² = {r_below**2:.3f})")
|
||||||
|
if Ea_above_eV is not None:
|
||||||
|
print(f">400K区间:")
|
||||||
|
print(f" 活化能 Eₐ = {Ea_above_eV:.3f} eV (R² = {r_above**2:.3f})")
|
||||||
|
|
||||||
|
print(f"\n===== 300K外推结果(基于320-400K拟合) =====")
|
||||||
|
print(f" 离子电导率 σ = {sigma_target:.6e} S/m")
|
||||||
|
print(f" 转换单位:σ = {sigma_target * 10:.6e} mS/cm") # 1 S/m = 0.1 mS/cm,乘以10转换
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# ------------ 必须根据实际数据调整的参数 ------------
|
||||||
|
base_dir = "../data" # 主文件夹路径
|
||||||
|
t_start = 100 # MSD拟合起始时间(ps)
|
||||||
|
t_end = 2000 # MSD拟合结束时间(ps)
|
||||||
|
# ------------------------------------------------
|
||||||
|
|
||||||
|
temperatures = []
|
||||||
|
D_list = []
|
||||||
|
sigma_list = []
|
||||||
|
temp_folder_pattern = re.compile(r'^\d+K$')
|
||||||
|
|
||||||
|
for folder in os.listdir(base_dir):
|
||||||
|
folder_path = os.path.join(base_dir, folder)
|
||||||
|
if not os.path.isdir(folder_path) or not temp_folder_pattern.match(folder):
|
||||||
|
continue # 跳过非温度文件夹
|
||||||
|
|
||||||
|
T = extract_temperature(folder)
|
||||||
|
print(f"\n处理文件夹: {folder}(温度 {T} K)")
|
||||||
|
if T is None:
|
||||||
|
print(" 跳过:无法提取温度")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 检查文件是否存在
|
||||||
|
msd_path = os.path.join(folder_path, "msd_li.dat")
|
||||||
|
data_path = os.path.join(folder_path, "LYC.data")
|
||||||
|
if not os.path.exists(msd_path):
|
||||||
|
print(f" 跳过:缺少{msd_path}")
|
||||||
|
continue
|
||||||
|
if not os.path.exists(data_path):
|
||||||
|
print(f" 跳过:缺少{data_path}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 解析Li数量和体积
|
||||||
|
try:
|
||||||
|
li_count, volume_angstrom3 = parse_lammps_data(data_path)
|
||||||
|
volume_m3 = volume_angstrom3 * angstrom3_to_m3
|
||||||
|
n = li_count / volume_m3
|
||||||
|
print(f" Li+数量: {li_count},盒子体积: {volume_angstrom3:.2f} ų")
|
||||||
|
print(f" 载流子浓度: {n:.6e} m⁻³")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" 跳过:解析data文件失败: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 读取MSD数据
|
||||||
|
try:
|
||||||
|
time, msd = read_msd(msd_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" 跳过:读取MSD失败: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 计算扩散系数
|
||||||
|
try:
|
||||||
|
D = calculate_diffusion(time, msd, t_start, t_end)
|
||||||
|
print(f" 扩散系数 D = {D:.6e} m²/s")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" 跳过:计算扩散系数失败: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 计算离子电导
|
||||||
|
try:
|
||||||
|
sigma = calculate_conductivity(D, T, n)
|
||||||
|
print(f" 离子电导 σ = {sigma:.6e} S/m")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" 跳过:计算电导失败: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 保存结果
|
||||||
|
temperatures.append(T)
|
||||||
|
D_list.append(D)
|
||||||
|
sigma_list.append(sigma)
|
||||||
|
|
||||||
|
# 绘制阿伦尼乌斯图(双拟合线)
|
||||||
|
if len(temperatures) >= 2:
|
||||||
|
print("\n===== 计算完成,绘制双拟合阿伦尼乌斯图 =====")
|
||||||
|
arrhenius_plot(temperatures, D_list, sigma_list)
|
||||||
|
else:
|
||||||
|
print(f"\n===== 错误:有效数据点不足(仅{len(temperatures)}个),无法绘图 =====")
|
||||||
134
corner-sharing/utils/CS_analyse.py
Normal file
134
corner-sharing/utils/CS_analyse.py
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
from pymatgen.core.structure import Structure
|
||||||
|
from pymatgen.analysis.local_env import VoronoiNN
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def check_real(nearest):
|
||||||
|
real_nearest = []
|
||||||
|
for site in nearest:
|
||||||
|
if np.all((site.frac_coords >= 0) & (site.frac_coords <= 1)):
|
||||||
|
real_nearest.append(site)
|
||||||
|
|
||||||
|
return real_nearest
|
||||||
|
|
||||||
|
def special_check_for_3(site, nearest):
|
||||||
|
real_nearest = []
|
||||||
|
distances = []
|
||||||
|
for site2 in nearest:
|
||||||
|
distance = np.linalg.norm(np.array(site.frac_coords) - np.array(site2.frac_coords))
|
||||||
|
distances.append(distance)
|
||||||
|
|
||||||
|
sorted_indices = np.argsort(distances)
|
||||||
|
for index in sorted_indices[:3]:
|
||||||
|
real_nearest.append(nearest[index])
|
||||||
|
|
||||||
|
return real_nearest
|
||||||
|
|
||||||
|
|
||||||
|
def CS_catulate(struct, sp='Li', anion=['O'], tol=0, cutoff=3.0,notice=False,ID=None):
|
||||||
|
"""
|
||||||
|
计算结构中目标元素与最近阴离子的共享关系。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
struct (Structure): 输入结构。
|
||||||
|
sp (str): 目标元素符号,默认为 'Li'。
|
||||||
|
anion (list): 阴离子列表,默认为 ['O']。
|
||||||
|
tol (float): VoronoiNN 的容差,默认为 0。
|
||||||
|
cutoff (float): VoronoiNN 的截断距离,默认为 3.0。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
list: 包含每个目标位点及其最近阴离子索引的列表。
|
||||||
|
"""
|
||||||
|
# 初始化 VoronoiNN 对象
|
||||||
|
if sp=='Li':
|
||||||
|
tol = 0
|
||||||
|
cutoff = 3.0
|
||||||
|
voro_nn = VoronoiNN(tol=tol, cutoff=cutoff)
|
||||||
|
# 初始化字典,用于统计共享关系
|
||||||
|
shared_count = {"2": 0, "3": 0,"4":0,"5":0,"6":0}
|
||||||
|
# 存储结果的列表
|
||||||
|
atom_dice = []
|
||||||
|
|
||||||
|
# 遍历结构中的每个位点
|
||||||
|
for index,site in enumerate(struct.sites):
|
||||||
|
# 跳过阴离子位点
|
||||||
|
if site.specie.symbol in anion:
|
||||||
|
continue
|
||||||
|
# 跳过Li原子
|
||||||
|
if site.specie.symbol == sp:
|
||||||
|
continue
|
||||||
|
# 获取 Voronoi 多面体信息
|
||||||
|
voro_info = voro_nn.get_voronoi_polyhedra(struct, index)
|
||||||
|
|
||||||
|
# 找到最近的阴离子位点
|
||||||
|
nearest_anions = [
|
||||||
|
nn_info["site"] for nn_info in voro_info.values()
|
||||||
|
if nn_info["site"].specie.symbol in anion
|
||||||
|
]
|
||||||
|
|
||||||
|
# 如果没有找到最近的阴离子,跳过
|
||||||
|
if not nearest_anions:
|
||||||
|
print(f"No nearest anions found for {ID} site {index}.")
|
||||||
|
continue
|
||||||
|
if site.specie.symbol == 'B' or site.specie.symbol == 'N':
|
||||||
|
nearest_anions = special_check_for_3(site,nearest_anions)
|
||||||
|
nearest_anions = check_real(nearest_anions)
|
||||||
|
# 将结果添加到 atom_dice 列表中
|
||||||
|
atom_dice.append({
|
||||||
|
'index': index,
|
||||||
|
'nearest_index': [nn.index for nn in nearest_anions]
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 枚举 atom_dice 中的所有原子对
|
||||||
|
for i, atom_i in enumerate(atom_dice):
|
||||||
|
for j, atom_j in enumerate(atom_dice[i + 1:], start=i + 1):
|
||||||
|
# 获取两个原子的最近阴离子索引
|
||||||
|
nearest_i = set(atom_i['nearest_index'])
|
||||||
|
nearest_j = set(atom_j['nearest_index'])
|
||||||
|
|
||||||
|
# 比较最近阴离子的交集大小
|
||||||
|
shared_count_key = str(len(nearest_i & nearest_j))
|
||||||
|
|
||||||
|
# 更新字典中的计数
|
||||||
|
if shared_count_key in shared_count:
|
||||||
|
shared_count[shared_count_key] += 1
|
||||||
|
if notice:
|
||||||
|
if shared_count_key=='2':
|
||||||
|
print(f"{atom_j['index']}与{atom_i['index']}之间存在共线")
|
||||||
|
print(f"共线的阴离子为{nearest_i & nearest_j}")
|
||||||
|
if shared_count_key=='3':
|
||||||
|
print(f"{atom_j['index']}与{atom_i['index']}之间存在共面")
|
||||||
|
print(f"共面的阴离子为{nearest_i & nearest_j}")
|
||||||
|
|
||||||
|
# # 最后将字典中的值除以 2,因为每个共享关系被计算了两次
|
||||||
|
# for key in shared_count.keys():
|
||||||
|
# shared_count[key] //= 2
|
||||||
|
|
||||||
|
return shared_count
|
||||||
|
|
||||||
|
|
||||||
|
def CS_count(struct, shared_count, sp='Li'):
|
||||||
|
count = 0
|
||||||
|
for site in struct.sites:
|
||||||
|
if site.specie.symbol == sp:
|
||||||
|
count += 1 # 累加符合条件的原子数量
|
||||||
|
|
||||||
|
CS_count = 0
|
||||||
|
for i in range(2, 7): # 遍历范围 [2, 3, 4, 5]
|
||||||
|
if str(i) in shared_count: # 检查键是否存在
|
||||||
|
CS_count += shared_count[str(i)] * i # 累加计算结果
|
||||||
|
|
||||||
|
if count > 0: # 防止除以零
|
||||||
|
CS_count /= count # 平均化结果
|
||||||
|
else:
|
||||||
|
CS_count = 0 # 如果 count 为 0,直接返回 0
|
||||||
|
|
||||||
|
return CS_count
|
||||||
|
|
||||||
|
structure = Structure.from_file("../data/0921/wjy_001.cif")
|
||||||
|
a = CS_catulate(structure,notice=True)
|
||||||
|
b = CS_count(structure,a)
|
||||||
|
print(f"{a}\n{b}")
|
||||||
@@ -47,28 +47,19 @@ class HiddenPrints:
|
|||||||
|
|
||||||
|
|
||||||
def non_elements(struct, sp='Li'):
|
def non_elements(struct, sp='Li'):
|
||||||
'''
|
"""
|
||||||
struct : structure object from Pymatgen
|
struct : 必须是一个有序结构
|
||||||
sp : the mobile specie
|
sp : the mobile specie
|
||||||
returns the structure with all of the mobile specie (Li) removed
|
returns a new structure containing only the framework anions (O, S, N).
|
||||||
'''
|
"""
|
||||||
num_li = struct.species.count(Element(sp))
|
anions_to_keep = {"O", "S", "N"}
|
||||||
species = list(set(struct.species))
|
|
||||||
try:
|
|
||||||
species.remove(Element("O"))
|
|
||||||
except ValueError:
|
|
||||||
print("没有O")
|
|
||||||
try:
|
|
||||||
species.remove(Element("S"))
|
|
||||||
except ValueError:
|
|
||||||
print("没有S")
|
|
||||||
try:
|
|
||||||
species.remove(Element("N"))
|
|
||||||
except ValueError:
|
|
||||||
print("没有N")
|
|
||||||
stripped = struct.copy()
|
stripped = struct.copy()
|
||||||
stripped.remove_species(species)
|
species_to_remove = [el.symbol for el in stripped.composition.elements
|
||||||
stripped = stripped.get_sorted_structure(reverse=True)
|
if el.symbol not in anions_to_keep]
|
||||||
|
|
||||||
|
if species_to_remove:
|
||||||
|
stripped.remove_species(species_to_remove)
|
||||||
|
|
||||||
return stripped
|
return stripped
|
||||||
|
|
||||||
|
|
||||||
@@ -160,22 +151,43 @@ def site_env(coord, struct, sp="Li", envtype='both'):
|
|||||||
|
|
||||||
|
|
||||||
def extract_sites(struct, sp="Li", envtype='both'):
|
def extract_sites(struct, sp="Li", envtype='both'):
|
||||||
'''
|
"""
|
||||||
struct : structure object from Pymatgen
|
struct : structure object from Pymatgen
|
||||||
envtype : 'tet', 'oct', or 'both'
|
envtype : 'tet', 'oct', or 'both'
|
||||||
sp : target element to analyze environment
|
sp : target element to analyze environment
|
||||||
|
"""
|
||||||
'''
|
|
||||||
envlist = []
|
envlist = []
|
||||||
for i in range(len(struct.sites)):
|
|
||||||
if struct.sites[i].specie != Element(sp):
|
# --- 关键修改:直接遍历原始结构,即使它是无序的 ---
|
||||||
continue
|
# 我们不再调用 get_sorted_structure()
|
||||||
site = struct.sites[i]
|
# 我们只关心那些含有目标元素 sp 的位点
|
||||||
singleenv = site_env(site.frac_coords, struct, sp, envtype)
|
|
||||||
|
# 遍历每一个位点 (site)
|
||||||
|
for i, site in enumerate(struct):
|
||||||
|
# 检查当前位点的组分(site.species)中是否包含我们感兴趣的元素(sp)
|
||||||
|
# site.species.elements 返回该位点上的元素列表,例如 [Element Li, Element Fe]
|
||||||
|
# [el.symbol for el in site.species.elements] 将其转换为符号列表 ['Li', 'Fe']
|
||||||
|
site_elements = [el.symbol for el in site.species.elements]
|
||||||
|
|
||||||
|
if sp in site_elements:
|
||||||
|
# 如果找到了Li,我们就对这个位点进行环境分析
|
||||||
|
# 注意:我们将原始的、可能无序的 struct 传递给 site_env
|
||||||
|
# 因为 site_env 内部的函数 (如 LocalGeometryFinder) 知道如何处理它
|
||||||
|
|
||||||
|
# 为了让下游函数(特别是 non_elements)能够工作,
|
||||||
|
# 我们在这里创建一个一次性的、临时的有序结构副本给它
|
||||||
|
# 这可以避免我们之前遇到的所有 'ordered structures only' 错误
|
||||||
|
temp_ordered_struct = struct.get_sorted_structure()
|
||||||
|
|
||||||
|
singleenv = site_env(site.frac_coords, temp_ordered_struct, sp, envtype)
|
||||||
|
|
||||||
envlist.append({'frac_coords': site.frac_coords, 'type': singleenv['type'], 'csm': singleenv['csm'],
|
envlist.append({'frac_coords': site.frac_coords, 'type': singleenv['type'], 'csm': singleenv['csm'],
|
||||||
'volume': singleenv['vol']})
|
'volume': singleenv['vol']})
|
||||||
return envlist
|
|
||||||
|
|
||||||
|
if not envlist:
|
||||||
|
print(f"警告: 在结构中未找到元素 {sp} 的占位。")
|
||||||
|
|
||||||
|
return envlist
|
||||||
|
|
||||||
def export_envs(envlist, sp='Li', envtype='both', fname=None):
|
def export_envs(envlist, sp='Li', envtype='both', fname=None):
|
||||||
'''
|
'''
|
||||||
@@ -193,6 +205,6 @@ def export_envs(envlist, sp='Li', envtype='both', fname=None):
|
|||||||
f.write("Site index " + str(index) + ": " + str(i) + '\n')
|
f.write("Site index " + str(index) + ": " + str(i) + '\n')
|
||||||
|
|
||||||
|
|
||||||
struct = Structure.from_file("../data/31960.cif")
|
# struct = Structure.from_file("../data/0921/wjy_475.cif")
|
||||||
site_info = extract_sites(struct, envtype="both")
|
# site_info = extract_sites(struct, envtype="both")
|
||||||
export_envs(site_info, sp="Li", envtype="both")
|
# export_envs(site_info, sp="Li", envtype="both")
|
||||||
127
data_get/data_get.py
Normal file
127
data_get/data_get.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def extract_cif_from_xlsx(
|
||||||
|
xlsx_path: str,
|
||||||
|
output_dir: str,
|
||||||
|
naming_mode: str = 'formula',
|
||||||
|
name_col: int = 0,
|
||||||
|
cif_col: int = 1,
|
||||||
|
prefix: str = 'wjy'
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
从 XLSX 文件中提取 CIF 数据并保存为单独的 .cif 文件。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
xlsx_path (str): 输入的 XLSX 文件的路径。
|
||||||
|
output_dir (str): 输出 .cif 文件的文件夹路径。
|
||||||
|
naming_mode (str, optional): CIF 文件的命名模式。
|
||||||
|
可选值为 'formula' (使用第一列的名字) 或
|
||||||
|
'auto' (使用前缀+自动递增编号)。
|
||||||
|
默认为 'formula'。
|
||||||
|
name_col (int, optional): 包含文件名的列的索引(从0开始)。默认为 0。
|
||||||
|
cif_col (int, optional): 包含 CIF 内容的列的索引(从0开始)。默认为 1。
|
||||||
|
prefix (str, optional): 在 'auto' 命名模式下使用的文件名前缀。默认为 'wjy'。
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
FileNotFoundError: 如果指定的 xlsx_path 文件不存在。
|
||||||
|
ValueError: 如果指定的 naming_mode 无效。
|
||||||
|
Exception: 处理过程中发生的其他错误。
|
||||||
|
"""
|
||||||
|
# --- 1. 参数校验和准备 ---
|
||||||
|
if not os.path.exists(xlsx_path):
|
||||||
|
raise FileNotFoundError(f"错误: 输入文件未找到 -> {xlsx_path}")
|
||||||
|
|
||||||
|
if naming_mode not in ['formula', 'auto']:
|
||||||
|
raise ValueError(f"错误: 'naming_mode' 参数必须是 'formula' 或 'auto',但收到了 '{naming_mode}'")
|
||||||
|
|
||||||
|
# 创建输出目录(如果不存在)
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
print(f"CIF 文件将保存到: {output_dir}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# --- 2. 读取 XLSX 文件 ---
|
||||||
|
# header=None 表示第一行不是标题,将其作为数据读取
|
||||||
|
df = pd.read_excel(xlsx_path, header=None)
|
||||||
|
|
||||||
|
# 跳过原始文件的表头行('formula', 'cif')
|
||||||
|
if str(df.iloc[0, name_col]).strip().lower() == 'formula' and str(df.iloc[0, cif_col]).strip().lower() == 'cif':
|
||||||
|
df = df.iloc[1:]
|
||||||
|
print("检测到并跳过了表头行。")
|
||||||
|
|
||||||
|
# --- 3. 遍历数据并生成文件 ---
|
||||||
|
success_count = 0
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
# 获取文件名和 CIF 内容
|
||||||
|
formula_name = str(row[name_col])
|
||||||
|
cif_content = str(row[cif_col])
|
||||||
|
|
||||||
|
# 跳过内容为空的行
|
||||||
|
if pd.isna(row[name_col]) or pd.isna(row[cif_col]) or not cif_content.strip():
|
||||||
|
print(f"警告: 第 {index + 2} 行数据不完整,已跳过。")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- 4. 根据命名模式确定文件名 ---
|
||||||
|
if naming_mode == 'formula':
|
||||||
|
# 清理文件名,替换掉不适合做文件名的特殊字符
|
||||||
|
# 例如:将 (PO4)3 替换为 _PO4_3,将 / 替换为 _
|
||||||
|
safe_filename = re.sub(r'[\\/*?:"<>|()]', '_', formula_name)
|
||||||
|
filename = f"{safe_filename}.cif"
|
||||||
|
else: # naming_mode == 'auto'
|
||||||
|
# 使用 format 方法来确保编号格式统一,例如 001, 002
|
||||||
|
filename = f"{prefix}_{success_count + 1:03d}.cif"
|
||||||
|
|
||||||
|
# 构造完整的输出文件路径
|
||||||
|
output_path = os.path.join(output_dir, filename)
|
||||||
|
|
||||||
|
# --- 5. 写入 CIF 文件 ---
|
||||||
|
try:
|
||||||
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(cif_content)
|
||||||
|
success_count += 1
|
||||||
|
except IOError as e:
|
||||||
|
print(f"错误: 无法写入文件 {output_path}。原因: {e}")
|
||||||
|
|
||||||
|
print(f"\n处理完成!成功提取并生成了 {success_count} 个 CIF 文件。")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"处理 XLSX 文件时发生错误: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# --- 函数使用示例 ---
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# 假设您的 XLSX 文件名为 'materials.xlsx',且与此脚本在同一目录下
|
||||||
|
source_xlsx_file = 'input/cif_dataset.xlsx'
|
||||||
|
|
||||||
|
# 检查示例文件是否存在,如果不存在则创建一个
|
||||||
|
if not os.path.exists(source_xlsx_file):
|
||||||
|
print(f"未找到示例文件 '{source_xlsx_file}',正在创建一个...")
|
||||||
|
example_data = {
|
||||||
|
'formula': ['Li3Al0.3Ti1.7(PO4)3', 'Li6.5La3Zr1.75W0.25O12', 'Invalid/Name*Test'],
|
||||||
|
'cif': ['# CIF Data for Li3Al0.3...\n_atom_site_type_symbol\n Li\n Al\n Ti\n P\n O',
|
||||||
|
'# CIF Data for Li6.5La3...\n_symmetry_space_group_name_H-M \'I a -3 d\'',
|
||||||
|
'# CIF Data for Invalid Name Test']
|
||||||
|
}
|
||||||
|
pd.DataFrame(example_data).to_excel(source_xlsx_file, index=False, header=True)
|
||||||
|
print("示例文件创建成功。")
|
||||||
|
|
||||||
|
# --- 示例 1: 使用第一列的 'formula' 命名 ---
|
||||||
|
# print("\n--- 示例 1: 使用 'formula' 命名模式 ---")
|
||||||
|
# output_folder_1 = 'cif_by_formula'
|
||||||
|
# extract_cif_from_xlsx(
|
||||||
|
# xlsx_path=source_xlsx_file,
|
||||||
|
# output_dir=output_folder_1,
|
||||||
|
# naming_mode='formula'
|
||||||
|
# )
|
||||||
|
|
||||||
|
# --- 示例 2: 使用 'wjy+编号' 自动命名 ---
|
||||||
|
print("\n--- 示例 2: 使用 'auto' 命名模式 ---")
|
||||||
|
output_folder_2 = 'cif_by_auto'
|
||||||
|
extract_cif_from_xlsx(
|
||||||
|
xlsx_path=source_xlsx_file,
|
||||||
|
output_dir=output_folder_2,
|
||||||
|
naming_mode='auto',
|
||||||
|
prefix='wjy'
|
||||||
|
)
|
||||||
BIN
data_get/input/cif_dataset.xlsx
Normal file
BIN
data_get/input/cif_dataset.xlsx
Normal file
Binary file not shown.
Reference in New Issue
Block a user