Source code for spectral_denoising.noise

import random
import numpy as np
from . import spectral_operations as so
[docs] def generate_noise(pmz, lamda, n = 100): """ Generate synthetic electronic noise for spectral data. Parameters: pmz (float): The upper bound for the mass range. lamda (float): The lambda parameter for the Poisson distribution, which serves as both mean and standard deviation of the distribution. n (int, optional): The number of random noise ions to generate. Defaults to 100. Returns: np.array: A synthetic spectrum with electronic noise. """ if int(n)!= n: n = np.int64(np.ceil(n)) else: n = n # Generate a random variable from a uniform distribution in the range [a, b] mass = [random.uniform(50, pmz) for _ in range(n)] # size specifies the number of random variates to generate. # Generating Poisson-distributed random variables intensity = np.random.poisson(lam=lamda, size=n) intensity = intensity/100 return(so.pack_spectrum(mass, intensity))
[docs] def add_noise(msms, noise): """ Add noise to a mass spectrum and process the resulting spectrum. This function takes a mass spectrum and a noise spectrum, standardizes the mass spectrum, adds the noise to it, normalizes the resulting spectrum, and sorts it. Args: msms (np.ndarray): The mass spectrum to which noise will be added. noise (np.ndarray): The noise spectrum to be added to the mass spectrum. Returns: np.ndarray: The processed mass spectrum after adding noise, normalization, and sorting. Notes: - The noise spectrum is generated with intensity as ralatie measure (from 0-1) - Thus, the mass spectrum is standardized using the standardize_spectrum function. """ msms = so.standardize_spectrum(msms) msms_c = so.add_spectra(msms, noise) return(so.sort_spectrum(so.normalize_spectrum(msms_c)) )
[docs] def generate_chemical_noise(pmz, lamda, polarity,formula_db,n = 100): """ Generate chemical noise for a given mass-to-charge ratio (m/z) and other parameters. The m/z of the chemical noise is taken from a database of all true possible mass values. The detailes about this database can be found paper: LibGen: Generating High Quality Spectral Libraries of Natural Products for EAD-, UVPD-, and HCD-High Resolution Mass Spectrometers Args: pmz (float): The target mass-to-charge ratio (m/z) value. lamda (float): The lambda parameter for the Poisson distribution used to generate intensities, which serves as both mean and standard deviation of the distribution. polarity (str): The polarity of the adduct, either '+' or '-'. formula_db (pandas.DataFrame): A DataFrame containing a column 'mass' with possible mass values. n (int, optional): The number of noise peaks to generate. Default is 100. Returns: np.array: A synthetic spectrum with chemical noise. Raises: ValueError: If the polarity is not '+' or '-'. """ mass_e = -0.00054858026 if polarity =='+': coe = 1 elif polarity =='-': coe = -1 else: print('cannot determine adduct polarity!') return() if int(n)!= n: n = np.int64(np.ceil(n)) else: n = n all_possible_mass = np.array(formula_db['mass']) idx_left, idx_right = all_possible_mass.searchsorted([50,pmz ]) all_allowed_mass = all_possible_mass[idx_left:idx_right] if idx_right-idx_left <n: n = idx_right-idx_left # Generate a random variable from a uniform distribution in the range [a, b] mass = np.random.choice(all_allowed_mass, size=n, replace=False) mass = mass+coe*mass_e # mass = [random.uniform(50, pmz) for _ in range(n)] # size specifies the number of random variates to generate. # Generating Poisson-distributed random variables intensity = np.random.poisson(lam=lamda, size=n) intensity = intensity/100 return(so.pack_spectrum(mass, intensity))