frompathlibimportPathfromtypingimportOptional,Tuplefromscipy.statsimportcircmeanimportnumpyasnpimportpandasaspdfromagroecometricsimportsettings# Gets the acutal LABELS of columns based on the user settingsLABELS=settings.get_labels()#### UTIL FUNCTIONS ####
[docs]defcsv_file_exists(file_path:Path):""" Checks is the a Path to a csv file is valid Args: file_path: The Path to the csv file Returns: True if the csv file at the given Path already exists and False otherwise Raises: TypeError: If file_path is not a Path object. ValueError: If the file extension is not '.csv'. FileNotFoundError: If the parent directory does not exist. """ifnotisinstance(file_path,Path):raiseTypeError("file_path must be a pathlib.Path object.")iffile_path.suffix.lower()!=".csv":raiseValueError("The filename must end with '.csv'.")iffile_path.parent.exists():returnfile_path.exists()else:raiseFileNotFoundError(f"The directory '{file_path.parent}' does not exist.")
# Data File Functions
[docs]defload_data(file_path:Path,date_format:str=LABELS['date_format'],start_date:Optional[str]=None,end_date:Optional[str]=None,)->pd.DataFrame:''' Loads data into a DataFrame from a given CSV file filtered by date and cleaned Loads the data from between the two specificed dates inclusive. If no start or end date is specified the oldest and newest dates in the data are used respectively. Adss a column with the Date normalized Adds a column with the DOY from January 1st. Ie, January 1st = 0 and December 31st = 364 Adds a column with the Year in an integer representation Args: file_path: The path to your csv data file start_date: Optional. The date to start filtering from (Inclusive) end_date: Optional. The date to stop filtering on (Inclusive) date_format: The date_format to use on the file Returns: A DataFrame with the information from the csv file filtered by the specified dates Raises: TypeError: If file_path is not a Path object. ValueError: If the file extension is not '.csv'. FileNotFoundError: If the file could not be found. '''globalLABELS# Check Parametersifnotcsv_file_exists(file_path):raiseFileNotFoundError(f"The file, {file_path}, that you are trying to load does not exist.")ifstart_dateandend_dateandpd.to_datetime(start_date)>pd.to_datetime(end_date):raiseValueError("The end date must be after the start date.\nStart Date:\t{start_date}\nEnd Date:\t{end_date}")# Read the data from the given csv file and make the formatting consistentdf=Nonedf=pd.read_csv(file_path)df.columns=df.columns.str.strip().str.replace("'","")df[LABELS['date_time']]=pd.to_datetime(df[LABELS['date_time']],format=date_format)# Filter data using the start and end datesifstart_date:df=df[df[LABELS['date_time']]>=pd.to_datetime(start_date)]ifend_date:df=df[df[LABELS['date_time']]<=pd.to_datetime(end_date)]# Adds a Year, DOY, and Normalized Date column to the dfdf[LABELS['doy']]=df[LABELS['date_time']].dt.dayofyear-1df[LABELS['year']]=df[LABELS['date_time']].dt.yeardf[LABELS['date_norm']]=pd.to_datetime(df[LABELS['date_time']],format=date_format).dt.normalize()returndf.reset_index(drop=True)
[docs]definterpolate_missing_data(df:pd.DataFrame,label_keys:Optional[list[str]]=None,method:str="linear"):""" Interpolates missing data within a DataFrame. Interpolates LABELS based on list of keys in label_keys or all data is label_keys is None. Args: df: The DataFrame to interpolate data on LABELS: A list of the label keys to interpolate data on. method: The pandas interpolation type to use on the data """globalLABELSiflabel_keys:iflen(label_keys)==0:raiseValueError("label_keys must containa list of keys or be none")forkeyinlabel_keys:ifkeynotinLABELS:raiseKeyError(key+" was not found in the available LABELS")df[LABELS[key]].interpolate(method=method,inplace=True)else:forkeyindf.columns.values:df[LABELS[key]].interpolate(method=method,inplace=True)
[docs]defsave_data(df:pd.DataFrame,file_path:Path)->Path:""" Saves your DataFrame to the given csv file Args: df: The dataframe to be saved file_path: A Path to where you would like to save the data Raises: TypeError: If file_path is not a Path object. ValueError: If the file extension is not '.csv'. FileNotFoundError: If the parent directory does not exist. FileExistsError: If the file already exists and the user selects not to override it """globalLABELS# Check Parametersifcsv_file_exists(file_path):print("The file {file_path} already exists.")usr_input=input("Are you sure you want OVERWRITE the file(y/n): ")ifnotusr_inputorusr_input!="y":raiseFileExistsError(f"The file, {file_path}, already exists.")# Save DataFramedf.to_csv(file_path,index=False)returnfile_path