Source code for spectral_denoising.search_utils

import pandas as pd
import numpy as np
import numexpr



        # return data[data[column_name].to_numpy() != item]
[docs] def quick_search_sorted(data_raw, column_name,value_start, value_end): """ Perform a quick search on a sorted column of a DataFrame to find rows within a specified range. Parameters: data_raw (pd.DataFrame): The input DataFrame containing the data to search. column_name (str): The name of the column to search within. value_start (float): The starting value of the range. value_end (float): The ending value of the range. Returns: pd.DataFrame: A DataFrame containing the rows where the values in the specified column fall within the given range. """ search_array=data_raw[column_name].to_numpy(dtype="float") index_start = np.searchsorted(search_array, value_start,side = 'left') index_end = np.searchsorted(search_array, value_end,side = 'right') # drop_indices = list(range(index_start)) + list(range(index_end, len(data_raw))) return data_raw.iloc[index_start:index_end]
# def quick_search_sorted(data_raw, column_name,value_start, value_end): # """ # Perform a quick search on a sorted column of a DataFrame to find rows within a specified range. # Parameters: # data_raw (pd.DataFrame): The input DataFrame containing the data to search. # column_name (str): The name of the column to search within. # value_start (float): The starting value of the range. # value_end (float): The ending value of the range. # Returns: # pd.DataFrame: A DataFrame containing the rows where the values in the specified column fall within the given range. # """ # search_array=data_raw[column_name].to_numpy(dtype="float") # index_start = np.searchsorted(search_array, value_start,side = 'left') # index_end = np.searchsorted(search_array, value_end,side = 'right') # return(data_raw.iloc[index_start:index_end])
[docs] def quick_search_values(data_raw, column_name,value_start, value_end): """ Perform a quick search on a DataFrame to find rows where the values in a specified column fall within a given range. Basically sorting the data first followed by quick_search_sorted. Args: data_raw (pd.DataFrame): The raw DataFrame to search. column_name (str): The name of the column to search within. value_start (numeric): The starting value of the range. value_end (numeric): The ending value of the range. Returns: pd.DataFrame: A DataFrame containing rows where the values in the specified column are within the range [value_start, value_end]. """ data_sorted = data_raw.sort_values(by=column_name) # data_sorted.reset_index(inplace=True, drop=True) data_return = quick_search_sorted(data_sorted, column_name, value_start, value_end) # index_start = np.searchsorted(data[column_name], value_start,side = 'left') # index_end = np.searchsorted(data[column_name], value_end,side = 'right') return(data_return)
# def num_search(data, column_name,number, direction, step = None,inclusion = False): # """ # Perform a numerical search on a specified column of a DataFrame based on given criteria. # Parameters: # data (pd.DataFrame): The DataFrame to search within. # column_name (str): The name of the column to perform the search on. # number (float or int): The reference number for the search condition. # direction (str): The direction of the comparison. Can be one of the following: '>', '<', '==', 'between'. # step (float or int, optional): The step value for the 'between' direction. Default is None. # inclusion (bool, optional): Whether to include the boundary values in the comparison. Default is False. # Returns: # pd.DataFrame: A DataFrame containing rows that match the search criteria. # Raises: # ValueError: If an invalid direction is provided. # Examples: # >>> num_search(df, 'age', 30, '>') # >>> num_search(df, 'age', 30, 'between', step=5, inclusion=True) # """ # x = data[column_name].values # if direction == ">": # if inclusion == False: # return(data[numexpr.evaluate('(x > number)')]) # else: # return(data[numexpr.evaluate('(x >= number)')]) # elif direction == '<': # if inclusion == False: # return(data[numexpr.evaluate('(x < number)')]) # else: # return(data[numexpr.evaluate('(x <= number)')]) # elif direction == '==': # return(data[numexpr.evaluate('(x == number)')]) # elif direction =='between' and step != None: # if inclusion == False: # temp = data[numexpr.evaluate('(x > number-step)')] # x = temp[column_name].values # return (temp[numexpr.evaluate('(x < number+step)')]) # else: # temp = data[numexpr.evaluate('(x >= number-step)')] # x = temp[column_name].values # return (temp[numexpr.evaluate('(x <= number+step)')]) # else: # print('the wrong method is passed')