Source code for smartclass.helpers.combine_csv_files

"""Combine multiple CSV files into a single CSV file."""

from __future__ import annotations

import polars

__all__ = [
    "combine_csv_files",
]


[docs] def combine_csv_files( input_files: list[str] | str, output_file: str, separator="\t" ) -> None: """ Combine multiple CSV files into a single CSV file. :param input_files: List of input CSV file paths or a single input CSV file path. :type input_files: Union[list[str], str] :param output_file: Output CSV file path where the combined data will be saved. :type output_file: str :param separator: Separator used in the CSV files (default is 'tab'). :type separator: str """ if isinstance(input_files, str): input_files = [input_files] # Ensure input_files is a list # Create an empty list to store DataFrames dfs = [] # Read each CSV file and store it in the list for file_path in input_files: df = polars.read_csv(file_path, separator=separator) dfs.append(df) # Create a merged DataFrame concatenated_df = dfs[0] # Iterate over the remaining DataFrames and concatenate them row-wise for df in dfs[1:]: concatenated_df = concatenated_df.vstack(df) # Export the merged DataFrame as a CSV file concatenated_df.write_csv(output_file, separator=separator)
if __name__ == "__main__": import logging logging.basicConfig(level=logging.INFO) input_files = [ "scratch/wikidata_classes_cxsmiles.tsv", "scratch/wikidata_classes_smarts.tsv", "scratch/wikidata_classes_smiles_canonical.tsv", "scratch/wikidata_classes_smiles_isomeric.tsv", ] output_file = "scratch/wikidata_classes_full.tsv" combine_csv_files(input_files, output_file) logging.info(output_file)