Skip to content

parquet_loader

Classes:

ParquetLoader

Bases: BaseLoader

Load and save Parquet files using pandas.

Methods:

  • load

    Load a parquet file with optional configuration.

  • save

    Save a dataframe to parquet with optional configuration.

load

load(
    file_path: Path, loader_config: Optional[Dict] = None
) -> DataFrame

Load a parquet file with optional configuration.

Parameters:

  • file_path (Path) –

    Path to the parquet file

  • loader_config (Optional[Dict], default: None ) –

    Configuration options for pd.read_parquet

Returns:

  • DataFrame

    pd.DataFrame: The loaded dataframe

Source code in src/unibox/loaders/parquet_loader.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def load(self, file_path: Path, loader_config: Optional[Dict] = None) -> pd.DataFrame:
    """Load a parquet file with optional configuration.

    Args:
        file_path (Path): Path to the parquet file
        loader_config (Optional[Dict]): Configuration options for pd.read_parquet

    Returns:
        pd.DataFrame: The loaded dataframe
    """
    config = loader_config or {}
    used_keys: Set[str] = set()

    # Extract supported arguments from config
    kwargs = {}
    for key in self.SUPPORTED_LOAD_CONFIG:
        if key in config:
            kwargs[key] = config[key]
            used_keys.add(key)

    # Warn about unused config options
    self._warn_unused_config(config, used_keys, "ParquetLoader")

    return pd.read_parquet(file_path, **kwargs)

save

save(
    file_path: Path,
    data: DataFrame,
    loader_config: Optional[Dict] = None,
) -> None

Save a dataframe to parquet with optional configuration.

Parameters:

  • file_path (Path) –

    Where to save the parquet file

  • data (DataFrame) –

    DataFrame to save

  • loader_config (Optional[Dict], default: None ) –

    Configuration options for to_parquet

Source code in src/unibox/loaders/parquet_loader.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def save(self, file_path: Path, data: pd.DataFrame, loader_config: Optional[Dict] = None) -> None:
    """Save a dataframe to parquet with optional configuration.

    Args:
        file_path (Path): Where to save the parquet file
        data (pd.DataFrame): DataFrame to save
        loader_config (Optional[Dict]): Configuration options for to_parquet
    """
    config = loader_config or {}
    used_keys: Set[str] = set()

    # Extract supported arguments from config
    kwargs = {}
    for key in self.SUPPORTED_SAVE_CONFIG:
        if key in config:
            kwargs[key] = config[key]
            used_keys.add(key)

    # Warn about unused config options
    self._warn_unused_config(config, used_keys, "ParquetLoader")

    data.to_parquet(file_path, **kwargs)