Source code for sheap.Utils.PathsFunctions

"""
Cross-matching spectra with metadata
====================================

This module provides utilities to cross-match lists of spectra
stored in a directory with metadata stored in a CSV or pandas DataFrame.
It supports both SDSS and DESI-style naming conventions.

Functions
---------
cross_pandas_spectra(path_dr16, path_data, name_csv)
    Cross-match SDSS-style spectra (PLATE-MJD-FIBERID) with metadata.

cross_pandas_spectra_desi(path_dr16, path_data, name_csv)
    Cross-match DESI-style spectra with metadata.
"""

__author__ = 'felavila'

__all__ = [
    "cross_pandas_spectra",
    "cross_pandas_spectra_desi",
]

import glob
import os
import pandas as pd


[docs] def cross_pandas_spectra_desi(path_dr16: str, path_data: str, name_csv: str): """ Cross-match DESI spectra with entries in a metadata CSV. Parameters ---------- path_dr16 : str Base path to the DR16/DR data directory. path_data : str Subdirectory under `path_dr16` containing `.fits` spectra. name_csv : str CSV file with metadata table. Must include ``PLATE``, ``MJD``, ``FIBERID``. Returns ------- file_paths : list of str All spectrum file paths found in the directory. objs_panda_paths_filtered : pandas.DataFrame or None Filtered DataFrame of metadata entries with matches to actual spectra. Includes a new column ``fit_path`` with the resolved file paths. Returns ``None, None`` if no matches were found. """ file_paths = glob.glob(f"{path_dr16}/{path_data}/*.fits") objs_panda = pd.read_csv(f"{path_dr16}/{name_csv}") objs_panda["dr_name"] = [ f"{PLATE:04d}-{MJD:05d}-{FIBERID:04d}" for PLATE, MJD, FIBERID in objs_panda[["PLATE", "MJD", "FIBERID"]].values ] objs_panda_paths_list = [ os.path.basename(path).replace(".fits", "") for path in file_paths ] objs_panda_paths_filtered = objs_panda[ objs_panda["dr_name"].isin(objs_panda_paths_list) ].reset_index(drop=True) if len(objs_panda_paths_filtered) == 0: print("No matches found.") return None, None else: print(f"Cross-match found {len(objs_panda_paths_filtered)} entries.") objs_panda_paths_filtered["fit_path"] = ( objs_panda_paths_filtered["dr_name"] .apply(lambda x: os.path.join(path_dr16, f"{path_data}/{x}.fits")) .values ) return file_paths, objs_panda_paths_filtered
[docs] def cross_pandas_spectra(path_dr16: str, path_data: str, name_csv: str): """ Cross-match SDSS spectra with entries in a metadata CSV. Parameters ---------- path_dr16 : str Base path to the DR16/DR data directory. path_data : str Subdirectory under `path_dr16` containing `.fits` spectra. name_csv : str CSV file with metadata table. Must include ``PLATE``, ``MJD``, ``FIBERID``. Returns ------- file_paths : list of str All spectrum file paths found in the directory. objs_panda_paths_filtered : pandas.DataFrame or None Filtered DataFrame of metadata entries with matches to actual spectra. Includes a new column ``fit_path`` with the resolved file paths. Returns ``None, None`` if no matches were found. """ file_paths = glob.glob(f"{path_dr16}/{path_data}/*.fits") objs_panda = pd.read_csv(f"{path_dr16}/{name_csv}") objs_panda["dr_name"] = [ f"{int(PLATE):04d}-{int(MJD):05d}-{int(FIBERID):04d}" for PLATE, MJD, FIBERID in objs_panda[["PLATE", "MJD", "FIBERID"]].values ] objs_panda_paths_list = [ os.path.basename(path).replace(".fits", "") for path in file_paths ] objs_panda_paths_filtered = objs_panda[ objs_panda["dr_name"].isin(objs_panda_paths_list) ].reset_index(drop=True) if len(objs_panda_paths_filtered) == 0: print("No matches found.") return None, None else: print(f"Cross-match found {len(objs_panda_paths_filtered)} entries.") objs_panda_paths_filtered["fit_path"] = ( objs_panda_paths_filtered["dr_name"] .apply(lambda x: os.path.join(path_dr16, f"{path_data}/{x}.fits")) .values ) return file_paths, objs_panda_paths_filtered