Source code for epdfsuite.pdf_extraction

from matplotlib import pyplot as plt
from .lobato_scattering import LobatoScatteringCalculator
import numpy as np
import re
from numpy.polynomial import Polynomial

# --------------------------------------------------
# Chemistry utilities
# --------------------------------------------------

[docs] def parse_formula(formula): """ Parse a chemical formula string into element symbols and molar fractions. Parameters ---------- formula : str Chemical formula, e.g. ``'SiO2'``, ``'Al2O3'``, ``'Fe0.5Ni0.5'``. Returns ------- elements : list of str Element symbols in the order they appear in the formula. ratios : list of float Molar fractions of each element (sum to 1). """ tokens = re.findall(r'([A-Z][a-z]*)([0-9.]+)?', formula) elements, counts = [], [] for elem, count in tokens: elements.append(elem) counts.append(float(count) if count else 1.0) counts = np.array(counts) ratios = counts / counts.sum() return elements, ratios.tolist()
[docs] def compute_avg_scattering_factor( formula, x_max, x_step, qvalues=True, xray=False, ): """ Compute the composition-averaged atomic scattering factor f_avg(q). The average is the weighted sum over all elements: ``f_avg(q) = sum_i x_i * f_i(q)`` where ``x_i`` are the molar fractions parsed from ``formula``. Parameters ---------- formula : str Chemical formula of the sample (e.g. ``'SiO2'``). x_max : float Upper limit of the scattering variable axis. Interpreted as q (Å⁻¹) if ``qvalues=True``, otherwise as s = q/(2π). x_step : float Sampling step of the scattering variable axis, same units as ``x_max``. qvalues : bool, optional If ``True`` (default), ``x_max`` and ``x_step`` are in q units (Å⁻¹). If ``False``, they are in s = q/(2π) units. xray : bool, optional If ``True``, use X-ray scattering factors instead of electron scattering factors. Default is ``False`` (electron factors). Returns ------- q : ndarray Momentum transfer axis in Å⁻¹. favg : ndarray Composition-averaged scattering factor f_avg(q). """ elements, ratios = parse_formula(formula) if qvalues: s_max = x_max / (2 * np.pi) s_step = x_step / (2 * np.pi) else: s_max, s_step = x_max, x_step parametrization = LobatoScatteringCalculator() name = "x_ray_scattering_factor" if xray else "scattering_factor" sf = parametrization.line_profiles( elements, cutoff=s_max, sampling=s_step, name=name, ) npts = sf.array.shape[1] s = np.arange(npts) * s_step q = 2 * np.pi * s favg = np.zeros(npts) for i in range(len(elements)): favg += ratios[i] * sf.array[i] return q, favg
[docs] def compute_f2avg( formula, x_max, x_step, qvalues=True, xray=False, ): """ Compute the composition-averaged squared scattering factor <f²>(q). The average is the weighted sum of squared individual factors: ``<f²>(q) = sum_i x_i * f_i²(q)`` where ``x_i`` are the molar fractions parsed from ``formula``. This quantity is used for the normalisation of the reduced structure function F(Q) in the PDFgetX3 formalism. Parameters ---------- formula : str Chemical formula of the sample (e.g. ``'SiO2'``). x_max : float Upper limit of the scattering variable axis. Interpreted as q (Å⁻¹) if ``qvalues=True``, otherwise as s = q/(2π). x_step : float Sampling step of the scattering variable axis, same units as ``x_max``. qvalues : bool, optional If ``True`` (default), ``x_max`` and ``x_step`` are in q units (Å⁻¹). If ``False``, they are in s = q/(2π) units. xray : bool, optional If ``True``, use X-ray scattering factors. Default is ``False`` (electron scattering factors). Returns ------- q : ndarray Momentum transfer axis in Å⁻¹. f2avg : ndarray Composition-averaged squared scattering factor <f²>(q). """ elements, ratios = parse_formula(formula) if qvalues: s_max = x_max / (2 * np.pi) s_step = x_step / (2 * np.pi) else: s_max, s_step = x_max, x_step parametrization = LobatoScatteringCalculator() name = "x_ray_scattering_factor" if xray else "scattering_factor" sf = parametrization.line_profiles( elements, cutoff=s_max, sampling=s_step, name=name, ) npts = sf.array.shape[1] s = np.arange(npts) * s_step q = 2 * np.pi * s f2avg = np.zeros(npts) for i in range(len(elements)): f2avg += ratios[i] * sf.array[i]**2 return q, f2avg
# -------------------------------------------------- # Polynomial background (PDFgetX3 style) # --------------------------------------------------
[docs] def fit_polynomial_background(q, Fm, rpoly=0.9, qmin=0.3, qmax=None): """ Fit and return a polynomial background to the reduced structure function F(Q). Follows the PDFgetX3 convention: the polynomial degree is determined by ``deg = round(rpoly * qmax / π)``, and the fit is performed on F(Q)/Q to enforce the correct low-Q behaviour. Parameters ---------- q : ndarray Momentum transfer axis in Å⁻¹. Fm : ndarray Reduced structure function F(Q) = Q * (I_norm / I_inf - 1). rpoly : float, optional Polynomial degree control parameter (PDFgetX3 convention). Default is 0.9. qmin : float, optional Lower bound of the fitting range in Å⁻¹. Default is 0.3. qmax : float, optional Upper bound of the fitting range in Å⁻¹. Defaults to ``q.max()``. Returns ------- background : ndarray Polynomial background evaluated on the full ``q`` grid, same shape as ``Fm``. """ if qmax is None: qmax = q.max() mask = (q >= qmin) & (q <= qmax) deg = int(round(rpoly * qmax / np.pi)) deg = max(1, min(deg, mask.sum() - 1)) y = Fm[mask] / q[mask] poly = Polynomial.fit(q[mask], y, deg=deg, domain=[qmin, qmax]) return q * poly(q)
# -------------------------------------------------- # PDFgetX3-like PDF (ELECTRONS) # --------------------------------------------------
[docs] def compute_ePDF( q, Iexp, composition, Iref=None, bgscale=1.0, qmin=0.3, qmax=None, qmaxinst=None, rmin=0.0, rmax=50.0, rstep=0.01, rpoly=1.4, Lorch=True, plot=False, ): """ Compute the electron Pair Distribution Function G(r) from a SAED intensity profile. Follows the PDFgetX3 formalism adapted for electron scattering: 1. Optional background subtraction: ``I = Iexp - bgscale * Iref`` 2. Normalisation by the composition-averaged squared scattering factor <f²>(Q) 3. Construction of the reduced structure function: ``F(Q) = Q * (I_norm / I_inf - 1)`` 4. Polynomial background removal (PDFgetX3 convention, controlled by ``rpoly``) 5. Optional Lorch modification function to suppress Fourier ripples 6. Sine Fourier transform to obtain G(r) Parameters ---------- q : ndarray Momentum transfer axis in Å⁻¹. Iexp : ndarray Experimental azimuthally averaged intensity profile. composition : str Chemical formula of the sample (e.g. ``'SiO2'``, ``'Al2O3'``). Iref : ndarray, optional Reference (background) intensity profile. If its length differs from ``Iexp``, it is interpolated onto the ``q`` grid. Default is ``None``. bgscale : float, optional Scaling factor applied to the reference before subtraction. Default is 1.0. qmin : float, optional Minimum Q used for the Fourier transform (Å⁻¹). Default is 0.3. qmax : float, optional Maximum Q used for the Fourier transform (Å⁻¹). Defaults to ``q.max()``. qmaxinst : float, optional Maximum Q used for the polynomial background fit. Defaults to ``qmax``. Useful when the data are noisy near ``qmax``. rmin : float, optional Minimum real-space distance r (Å). Default is 0.0. rmax : float, optional Maximum real-space distance r (Å). Default is 50.0. rstep : float, optional Step size in real space (Å). Default is 0.01. rpoly : float, optional Polynomial degree control for background removal (PDFgetX3 convention). Default is 1.4. Lorch : bool, optional If ``True`` (default), apply the Lorch modification function ``sinc(Q/Qmax)`` before the Fourier transform to reduce termination ripples. plot : bool, optional If ``True``, display diagnostic plots of the raw intensities, F(Q), and G(r). Default is ``False``. Returns ------- r : ndarray Real-space distance axis in Å. G : ndarray Reduced pair distribution function G(r) in Å⁻². Notes ----- The normalisation factor ``I_inf`` is estimated as the mean intensity in the top 10 % of the Q range (``q > 0.9 * qmax``). """ if qmax is None: qmax = q.max() if qmaxinst is None: qmaxinst = qmax Iraw= Iexp.copy() # Keep a copy of the raw intensity for plotting # --- Interpolate over NaN/Inf bins (from masked radial bins) --- finite_exp = np.isfinite(Iexp) if not np.all(finite_exp): Iexp = np.interp(q, q[finite_exp], Iexp[finite_exp]) Iraw = Iexp.copy() # --- Background subtraction --- # First, ensure Iref is on the same q-grid as Iexp by interpolation if needed if Iref is not None: finite_ref = np.isfinite(Iref) if not np.all(finite_ref) and finite_ref.any(): q_ref_full = np.linspace(q[0], q[-1], len(Iref)) Iref = np.interp(q, q_ref_full[finite_ref], Iref[finite_ref]) elif len(Iref) != len(Iexp): # Create a q-grid for the reference data based on its length q_ref = np.linspace(q[0], q[-1], len(Iref)) # Interpolate reference intensity to match the sample's q-grid Iref = np.interp(q, q_ref, Iref) # Then subtract the background if Iref is not None: Iexp = Iexp - bgscale * Iref qstep = q[1] - q[0] # --- Electron scattering normalization --- q_f2, f2avg = compute_f2avg( composition, x_max=qmax, x_step=qstep, qvalues=True, xray=False, ) f2avg = np.interp(q, q_f2, f2avg) mask_inf = q > 0.9 * qmax I_inf = np.mean(Iexp[mask_inf]) Inorm = Iexp / f2avg # --- Modified intensity F(Q) --- Fm = q * (Inorm / I_inf - 1) # --- Polynomial background (PDFgetX3 philosophy) --- background = fit_polynomial_background( q, Fm, rpoly=rpoly, qmin=qmin, qmax=qmaxinst ) Fc = Fm - background # NO Q-DAMPING # --- Fourier transform --- r = np.arange(rmin, rmax + rstep, rstep) mask = (q >= qmin) & (q <= qmax) qv = q[mask] if Lorch: Fv = Fc[mask] * np.sinc(qv / qmax) else: Fv = Fc[mask] integrand = Fv[None, :] * np.sin(np.outer(r, qv)) # Use np.trapezoid (NumPy >= 1.22) with fallback to np.trapz for older versions # G = (2 / np.pi) * np.trapz(integrand, qv, axis=1) # np.trapz is deprecated in NumPy 1.22+ trapz_func = getattr(np, 'trapezoid', np.trapz) G = (2 / np.pi) * trapz_func(integrand, qv, axis=1) # Optional diagnostic plots if plot: fig, ax = plt.subplots(3, figsize=(4, 6)) # Plot 1: Raw intensities ax[0].plot(q, Iraw, label="Iexp") if Iref is not None: ax[0].plot(q, bgscale * Iref, label="Ref*bgscale") ax[0].legend() ax[0].set_xlabel("Q ($\\AA^{-1}$)") ax[0].set_ylabel("Intensity") # set q limits to [qmin,qmax] mask_plot = (q >= qmin) & (q <= qmax) ax[0].set_xlim([qmin, qmax]) # set intensity limits to [min(Iexp), max(Iexp)] in the q range Iraw_valid = Iraw[mask_plot][np.isfinite(Iraw[mask_plot])] if len(Iraw_valid) > 0: ax[0].set_ylim([np.min(Iraw_valid), np.max(Iraw_valid)]) # Plot 2: Corrected structure factor ax[1].plot(q, Fc, label=f"rpoly={rpoly:.2f}") ax[1].legend() ax[1].set_xlabel("Q ($\\AA^{-1}$)") ax[1].set_ylabel("F(Q)") ax[1].set_xlim([qmin, qmax]) # Filter out NaN and Inf values before setting y limits Fc_valid = Fc[mask_plot][np.isfinite(Fc[mask_plot])] if len(Fc_valid) > 0: ax[1].set_ylim([np.min(Fc_valid), np.max(Fc_valid)]) else: ax[1].set_ylim([0, 1]) # Fallback to default limits if no valid values # Plot 3: Final PDF ax[2].plot(r, G, label=f"rpoly={rpoly:.2f}") ax[2].legend() ax[2].set_xlabel("r ($\\AA$)") ax[2].set_ylabel("G(r)") fig.tight_layout() plt.show() return r, G