Identifying active catalyst states

Identifying active catalyst states

2022, Jun    

Method for deconvoluting the spectro-electrochemical signal of iridium catalysts for water oxidation.

Redox-state kinetics in water-oxidation IrOx electrocatalysts measured by operando spectroelectrochemistry. ACS Catal., 2021, 11, 24, 15013–15025; ChemRxiv.

Spectroelectrochemistry of water oxidation kinetics in molecular versus heterogeneous oxide iridium electrocatalysts. J. Am. Chem. Soc. 2022, 144, 19, 8454–8459.

This approach deconvolves the ultraviolet–visible absorption signal measured as a function of the applied electric potential. The oxidation states of the catalyst across the potential range are modeled as a sum of cumulative Gaussian distributions, and their absorption is calculated from one of the spectra following the Lambert-Beer law. The interactive plots below allow you to explore the raw data, simulated spectra, fitting errors, and the extracted absorption coefficients and concentration gradients. You can further test the fitting process by:

  1. Modifying the initial guesses and boundaries of the Gaussian functions
  2. Editing the Python script directly
  3. Running the Python script
  4. Uploading your own two-dimensional dataset
  5. Downloading the results
Show / edit Python script

Your script should read from data.csv and save out CSVs (e.g. results_*.csv). Any .csv files it writes will become downloadable.

import pandas as pd
import numpy as np
import math
import os
from scipy.optimize import differential_evolution

raw_data = pd.read_csv('data.csv', header=None)
gaussian_data = pd.read_csv('gaussians.csv')

def closest_i(target, vector):
    closest_index = min(range(len(vector)), key=lambda i: abs(vector.iloc[i] - target))
    return closest_index

def sum_of_gaussians(data, sample_data, ref, *properties):
    num_gaussians = len(properties) // 3
    concentrations = []
    gradients = []
    for i in range(num_gaussians):
        amp, mean, std = properties[3*i], properties[3*i+1], properties[3*i+2]
        gradient = amp * np.exp(-0.5 * ((sample_data - mean) / std) ** 2)
        gradients.append(gradient)
        concentrations.append(np.cumsum(gradient))

    coefficients = []
    residual = data.iloc[:, ref[0]].values
    for i in range(num_gaussians):
        if i == 0:
            coeff = residual / concentrations[i][ref[0]]
        else:
            previous = sum(coefficients[j] * concentrations[j][ref[i]] for j in range(i))
            coeff = (data.iloc[:, ref[i]].values - previous) / concentrations[i][ref[i]]
        coefficients.append(coeff)
    return coefficients, concentrations, gradients

def simulation (data, sample_data, ref, *properties):
    num_gaussians = len(properties) // 3
    coefficients, concentrations, gradients = sum_of_gaussians(data, sample_data, ref, *properties)
    simulated_data = sum(np.outer(coefficients[i], concentrations[i]) for i in range(num_gaussians))
    if any(np.any(c < 0) for c in coefficients):
        simulated_data *= 2
    return simulated_data

def error(data, sample_data, ref, *properties):
    simulated_data = simulation (data, sample_data, ref, *properties)
    diff = np.array(data.values) - simulated_data
    error = np.sum(diff**2)
    return error

def optimize_properties(data, sample_data, ref, initial_guess, b):
    iteration = {'count': 0}
    def callback(xk, convergence):
        iteration['count'] += 1
        print(f"Iteration {iteration['count']}: Current error = {error(data, sample_data, ref, *xk):.6f}")

    result = differential_evolution(
        func=lambda props: error(data, sample_data, ref, *props),
        bounds=b,
        strategy='best1bin',
        maxiter=200,
        popsize=20,
        mutation=(0.5, 1.0),
        recombination=0.7,
        tol=1e-5,
        callback=callback,
        polish=True
    )
    return result

x_label = raw_data.iloc[1, 0]
y_label = raw_data.iloc[0, 1]
sample_label = raw_data.iloc[1, 1]
x_data = raw_data.iloc[3:, 0].astype(float)
y_data = raw_data.iloc[3:, 1:88].astype(float)
sample_data = raw_data.iloc[2, 1:88].astype(float)
y_data = y_data.reset_index(drop=True)
x_data = x_data.reset_index(drop=True)
sample_data = sample_data.reset_index(drop=True)

n_gauss = int(len(gaussian_data)/3)
ref = []
initial_guess = []
boundaries = []
for i in range(n_gauss):
  r_val = float(gaussian_data['Reference Spectrum'].iloc[i*3])
  r = closest_i(r_val, sample_data)
  ref = ref + [r]
  for j in range (3):
    initial_guess = initial_guess + [float(gaussian_data['Initial Guess'].iloc[i*3+j])]
    boundaries = boundaries + [(float(gaussian_data['Lower Boundary'].iloc[i*3+j]), float(gaussian_data['Upper Boundary'].iloc[i*3+j]))]

optimized = optimize_properties (y_data, sample_data, ref, initial_guess, boundaries)
simulated_data = simulation(y_data, sample_data, ref, *optimized.x)
diff = np.array(y_data.astype(float).values) - simulated_data
coefficients, concentrations, gradients = sum_of_gaussians(y_data, sample_data, ref, *optimized.x)

header_row = [''] + [y_label] + ['']* (y_data.shape[1]-1)
label_row = [x_label, sample_label] + [''] * (y_data.shape[1] - 1)
sample_row = [''] + sample_data.tolist()

data_rows = pd.concat([x_data, pd.DataFrame(simulated_data)], axis=1)
data_rows.columns = range(data_rows.shape[1])
simulated = pd.DataFrame([header_row, label_row, sample_row])
simulated = pd.concat([simulated, data_rows], ignore_index=True)
simulated.to_csv("simulated.csv", index=False, header=False)

data_rows = pd.concat([x_data, pd.DataFrame(diff)], axis=1)
data_rows.columns = range(data_rows.shape[1])
error = pd.DataFrame([header_row, label_row, sample_row])
error = pd.concat([error, data_rows], ignore_index=True)
error.to_csv("error.csv", index=False, header=False)

d = pd.DataFrame(coefficients).transpose()
s_label = list(range(1, n_gauss+1))
header_row = [''] + [y_label] + ['']* (d.shape[1]-1)
label_row = [x_label, 'Gaussian number'] + [''] * (d.shape[1] - 1)
sample_row = [''] + s_label
data_rows = pd.concat([x_data, d], axis=1)
data_rows.columns = range(data_rows.shape[1])
coeff = pd.DataFrame([header_row, label_row, sample_row])
coeff = pd.concat([coeff, data_rows], ignore_index=True)
coeff.to_csv("coefficients.csv", index=False, header=False)

d = pd.DataFrame(gradients).transpose()
header_row = [''] + ['Concentration gradient'] + ['']* (d.shape[1]-1)
label_row = [sample_label, 'Gaussian number'] + [''] * (d.shape[1] - 1)
data_rows = pd.concat([sample_data, d], axis=1)
data_rows.columns = range(data_rows.shape[1])
grad = pd.DataFrame([header_row, label_row, sample_row])
grad = pd.concat([grad, data_rows], ignore_index=True)
grad.to_csv("gradients.csv", index=False, header=False)

optimized_gaussians = gaussian_data[['Gaussian', 'Reference Spectrum', 'Variable']].copy()
optimized_gaussians['Optimized'] = optimized.x
optimized_gaussians.to_csv("optimized_gaussians.csv", index=False)