
    .hjH              	         d dl mZ d dlZd dlmZ d dlmZ d dlmZmZ d dl	m
Z
mZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ erd dlmZ d dlmZ d dlmZmZm Z  d dl!m"Z" ddddddddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ#dddddddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ$ddd dZ%	 	 	 d!	 	 	 	 	 	 	 	 	 d"dZ&d#dZ'd$dZ(y)%    )annotationsN)datetime)Path)TYPE_CHECKINGAny)_DELTALAKE_AVAILABLE	deltalake)NullTime)unpack_dtypes)_get_path_scheme)scan_parquet)scan_pyarrow_dataset)ScanCastOptions)Schema)Literal
DeltaTable)	DataFrameDataType	LazyFrame)CredentialProviderFunctionautoF)versioncolumnsrechunkstorage_optionscredential_providerdelta_table_optionsuse_pyarrowpyarrow_optionsc          
     n    t        | |||||||      }	||	j                  |      }	|	j                         S )a7  
    Reads into a DataFrame from a Delta lake table.

    Parameters
    ----------
    source
        DeltaTable or a Path or URI to the root of the Delta lake table.

        Note: For Local filesystem, absolute and relative paths are supported but
        for the supported object storages - GCS, Azure and S3 full URI must be provided.
    version
        Numerical version or timestamp version of the Delta lake table.

        Note: If `version` is not provided, the latest version of delta lake
        table is read.
    columns
        Columns to select. Accepts a list of column names.
    rechunk
        Make sure that all columns are contiguous in memory by
        aggregating the chunks into a single array.
    storage_options
        Extra options for the storage backends supported by `deltalake`.
        For cloud storages, this may include configurations for authentication etc.

        More info is available `here
        <https://delta-io.github.io/delta-rs/usage/loading-table/>`__.
    credential_provider
        Provide a function that can be called to provide cloud storage
        credentials. The function is expected to return a dictionary of
        credential keys along with an optional credential expiry time.

        .. warning::
            This functionality is considered **unstable**. It may be changed
            at any point without it being considered a breaking change.
    delta_table_options
        Additional keyword arguments while reading a Delta lake Table.
    use_pyarrow
        Flag to enable pyarrow dataset reads.
    pyarrow_options
        Keyword arguments while converting a Delta lake Table to pyarrow table.

    Returns
    -------
    DataFrame

    Examples
    --------
    Reads a Delta table from local filesystem.
    Note: Since version is not provided, the latest version of the delta table is read.

    >>> table_path = "/path/to/delta-table/"
    >>> pl.read_delta(table_path)  # doctest: +SKIP

    Reads a specific version of the Delta table from local filesystem.
    Note: This will fail if the provided version of the delta table does not exist.

    >>> pl.read_delta(table_path, version=1)  # doctest: +SKIP

    Time travel a delta table from local filesystem using a timestamp version.

    >>> pl.read_delta(
    ...     table_path, version=datetime(2020, 1, 1, tzinfo=timezone.utc)
    ... )  # doctest: +SKIP

    Reads a Delta table from AWS S3.
    See a list of supported storage options for S3 `here
    <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants>`__.

    >>> table_path = "s3://bucket/path/to/delta-table/"
    >>> storage_options = {
    ...     "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
    ...     "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
    ... }
    >>> pl.read_delta(table_path, storage_options=storage_options)  # doctest: +SKIP

    Reads a Delta table from Google Cloud storage (GCS).
    See a list of supported storage options for GCS `here
    <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants>`__.

    >>> table_path = "gs://bucket/path/to/delta-table/"
    >>> storage_options = {"SERVICE_ACCOUNT": "SERVICE_ACCOUNT_JSON_ABSOLUTE_PATH"}
    >>> pl.read_delta(table_path, storage_options=storage_options)  # doctest: +SKIP

    Reads a Delta table from Azure.

    Following type of table paths are supported,

    * az://<container>/<path>
    * adl://<container>/<path>
    * abfs://<container>/<path>

    See a list of supported storage options for Azure `here
    <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants>`__.

    >>> table_path = "az://container/path/to/delta-table/"
    >>> storage_options = {
    ...     "AZURE_STORAGE_ACCOUNT_NAME": "AZURE_STORAGE_ACCOUNT_NAME",
    ...     "AZURE_STORAGE_ACCOUNT_KEY": "AZURE_STORAGE_ACCOUNT_KEY",
    ... }
    >>> pl.read_delta(table_path, storage_options=storage_options)  # doctest: +SKIP

    Reads a Delta table with additional delta specific options. In the below example,
    `without_files` option is used which loads the table without file tracking
    information.

    >>> table_path = "/path/to/delta-table/"
    >>> delta_table_options = {"without_files": True}
    >>> pl.read_delta(
    ...     table_path, delta_table_options=delta_table_options
    ... )  # doctest: +SKIP
    )sourcer   r   r   r   r    r!   r   )
scan_deltaselectcollect)
r#   r   r   r   r   r   r   r    r!   dfs
             M/var/www/html/ai-service/venv/lib/python3.12/site-packages/polars/io/delta.py
read_deltar)      sI    v 
'//'	
B YYw::<    )r   r   r   r   r    r!   r   c                  t                i }ddlm}	 ddlm}
 ddlm} t        | |	      s |
|| |d      }n||dk7  rd}t        |      d}~|r|j                         x}r ||      xs i }t        | |||
i |xs i |nd|	      }t        | |	      r&| j                  |i | j                  xs i |xs i }|r#|xs i } |j                  di |}t        |      S |d
}t        |      ddlm} ddlm}m}m} |j)                         }|j*                  |kD  s|j*                  |k(  rd|j*                   d| d| } ||      |j*                  dk\  rE|j,                  9h |j,                  j/                  |      }t1        |      dkD  rd| d} ||      |j3                         }t5        |      }|j7                         j8                  }	 	 	 	 	 	 dd} |||      \  }}|j;                         }|j<                  j?                  d      r|D cg c]  }|jA                  dd       }}tC        ||t1        |      dkD  r|ndtE        jF                         ddt1        |      dkD  |||xs d
      S c c}w )aY  
    Lazily read from a Delta lake table.

    Parameters
    ----------
    source
        DeltaTable or a Path or URI to the root of the Delta lake table.

        Note: For Local filesystem, absolute and relative paths are supported but
        for the supported object storages - GCS, Azure and S3 full URI must be provided.
    version
        Numerical version or timestamp version of the Delta lake table.

        Note: If `version` is not provided, the latest version of delta lake
        table is read.
    storage_options
        Extra options for the storage backends supported by `deltalake`.
        For cloud storages, this may include configurations for authentication etc.

        More info is available `here
        <https://delta-io.github.io/delta-rs/usage/loading-table/>`__.
    credential_provider
        Provide a function that can be called to provide cloud storage
        credentials. The function is expected to return a dictionary of
        credential keys along with an optional credential expiry time.

        .. warning::
            This functionality is considered **unstable**. It may be changed
            at any point without it being considered a breaking change.
    delta_table_options
        Additional keyword arguments while reading a Delta lake Table.
    use_pyarrow
        Flag to enable pyarrow dataset reads.
    pyarrow_options
        Keyword arguments while converting a Delta lake Table to pyarrow table.
        Use this parameter when filtering on partitioned columns or to read
        from a 'fsspec' supported filesystem.
    rechunk
        Make sure that all columns are contiguous in memory by
        aggregating the chunks into a single array.

    Returns
    -------
    LazyFrame

    Examples
    --------
    Creates a scan for a Delta table from local filesystem.
    Note: Since version is not provided, the latest version of the delta table is read.

    >>> table_path = "/path/to/delta-table/"
    >>> pl.scan_delta(table_path).collect()  # doctest: +SKIP

    Creates a scan for a specific version of the Delta table from local filesystem.
    Note: This will fail if the provided version of the delta table does not exist.

    >>> pl.scan_delta(table_path, version=1).collect()  # doctest: +SKIP

    Time travel a delta table from local filesystem using a timestamp version.

    >>> pl.scan_delta(
    ...     table_path, version=datetime(2020, 1, 1, tzinfo=timezone.utc)
    ... ).collect()  # doctest: +SKIP

    Creates a scan for a Delta table from AWS S3.
    See a list of supported storage options for S3 `here
    <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants>`__.

    >>> table_path = "s3://bucket/path/to/delta-table/"
    >>> storage_options = {
    ...     "AWS_REGION": "eu-central-1",
    ...     "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
    ...     "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
    ... }
    >>> pl.scan_delta(
    ...     table_path, storage_options=storage_options
    ... ).collect()  # doctest: +SKIP

    Creates a scan for a Delta table from Google Cloud storage (GCS).
    See a list of supported storage options for GCS `here
    <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants>`__.

    >>> table_path = "gs://bucket/path/to/delta-table/"
    >>> storage_options = {"SERVICE_ACCOUNT": "SERVICE_ACCOUNT_JSON_ABSOLUTE_PATH"}
    >>> pl.scan_delta(
    ...     table_path, storage_options=storage_options
    ... ).collect()  # doctest: +SKIP

    Creates a scan for a Delta table from Azure.
    Supported options for Azure are available `here
    <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants>`__.

    Following type of table paths are supported,

    * az://<container>/<path>
    * adl://<container>/<path>
    * abfs[s]://<container>/<path>

    >>> table_path = "az://container/path/to/delta-table/"
    >>> storage_options = {
    ...     "AZURE_STORAGE_ACCOUNT_NAME": "AZURE_STORAGE_ACCOUNT_NAME",
    ...     "AZURE_STORAGE_ACCOUNT_KEY": "AZURE_STORAGE_ACCOUNT_KEY",
    ... }
    >>> pl.scan_delta(
    ...     table_path, storage_options=storage_options
    ... ).collect()  # doctest: +SKIP

    Creates a scan for a Delta table with additional delta specific options.
    In the below example, `without_files` option is used which loads the table without
    file tracking information.

    >>> table_path = "/path/to/delta-table/"
    >>> delta_table_options = {"without_files": True}
    >>> pl.scan_delta(
    ...     table_path, delta_table_options=delta_table_options
    ... ).collect()  # doctest: +SKIP
    r   r   )!_init_credential_provider_builder)+_get_credentials_from_provider_expiry_awarer$   Nr   z?cannot use credential_provider when passing a DeltaTable object)
table_pathr   r   r   z7To make use of pyarrow_options, set use_pyarrow to True)DeltaProtocolError)MAX_SUPPORTED_READER_VERSIONNOT_SUPPORTED_READER_VERSIONSUPPORTED_READER_FEATURESz&The table's minimum reader version is z5 but polars delta scanner only supports version 1 or z with these reader features:    z)The table has set these reader features: z= but these are not yet supported by the polars delta scanner.c                    t        |      dk(  r| t        g       fS g }g }| j                         D ]0  \  }}||v r|j                  ||f       |j                  ||f       2 t        |      t        |      fS )Nr   )lenr   itemsappend)schemapartition_columnsmain_schemahive_schemanamedtypes         r(   _split_schemaz!scan_delta.<locals>._split_schema~  s      !Q&6":%%!<<> 	2KD%((""D%=1""D%=1		2 k"F;$777r*   z	lakefs://zs3://insertignoreF)	r8   r;   cast_optionsmissing_columnsextra_columnshive_partitioningr   r   r    )r8   r   r9   z	list[str]returnztuple[Schema, Schema])$_check_if_delta_availabler	   r   ,polars.io.cloud.credential_provider._builderr,   .polars.io.cloud.credential_provider._providersr-   
isinstance
ValueErrorbuild_credential_provider_get_delta_lake_table_storage_optionsto_pyarrow_datasetr   deltalake.exceptionsr/   deltalake.tabler0   r1   r2   protocolmin_reader_versionreader_features
differencer5   r8   r   metadatar9   	file_uris	table_uri
startswithreplacer   r   _default_iceberg)r#   r   r   r   r   r    r!   r   credential_provider_credsr   r,   r-   credential_provider_buildermsgproviderdl_tblpa_dsr/   r0   r1   r2   table_protocolmissing_featuresdelta_schemapolars_schemar9   r>   r:   r;   rW   file_uris                                  r(   r$   r$      s-   @  "$ fj)&G,'
# 
	(-@F-JOo&*#"/IIKKK 8AGR 	" # *.I.U E%2D*CD/	F &*%+/JXf55;XAVTVX)/R)))<O<#E**"Go7  __&N)),HH,,0LL 5^5V5V4W XCC_B``}  X  ~YZ 	 !%%))Q.**6<^;;<GG%
  1$=>N=O  PM  NC$S))==?L<(M);;88+48	8"  -]<MNK  "I "";/LUVX%%k7;V	V#&'8#9A#=K4$557 /014'7 5  Ws   I<T)strictc                   t        t        |       .t        |       j                         j	                  |            }|S |       }|S )N)strr   r   
expanduserresolve)rX   rg   resolved_uris      r(   _resolve_delta_lake_urirm     sP    I&. 	Y""$,,V4L  L r*   c                   t                t        | t        j                        r2t	        |du|du|dug      rt        j                  dt        d       | S |i }t        |       }t        |t        t        f      st        j                  |f||d|}|S t        j                  | fd|i|}|j                  |       |S )z
    Initialize a Delta lake table for use in read and scan operations.

    Notes
    -----
    Make sure to install deltalake>=0.8.0. Read the documentation
    `here <https://delta-io.github.io/delta-rs/usage/installation/>`_.
    NzWhen supplying a DeltaTable directly, `version`, `storage_options`, and `delta_table_options` are ignored.
                To silence this warning, don't supply those parameters.   )
stacklevel)r   r   r   )rG   rJ   r	   r   anywarningswarnRuntimeWarningrm   ri   r   load_as_version)r.   r   r   r   rl   r`   s         r(   rM   rM     s     *i223t#t+#4/
 MMK	 " *:6LgX/%%
+
 "	
 M %%
+
 "

 	w'Mr*   c                 *    t         sd} t        |       y )Nz=deltalake is not installed

Please run: pip install deltalake)r   ModuleNotFoundError)r^   s    r(   rG   rG     s    O!#&&  r*   c                Z    t        |  }t        t        h}||z  x}rd|}t        |      y )Nz+dataframe contains unsupported data types: )r   r   r
   	TypeError)dtypesschema_dtypesunsupported_typesoverlapr^   s        r(   _check_for_unsupported_typesr~     sD    !6*Mt
  "333w3;G;Gn 4r*   )r#   str | Path | DeltaTabler   int | str | datetime | Noner   zlist[str] | Noner   bool | Noner   dict[str, Any] | Noner   3CredentialProviderFunction | Literal['auto'] | Noner   r   r    boolr!   r   rF   r   )r#   r   r   r   r   r   r   r   r   r   r    r   r!   r   r   r   rF   r   )rX   z
str | Pathrg   r   rF   ri   )NNN)
r.   r   r   r   r   r   r   r   rF   zdeltalake.DeltaTable)rF   None)rz   zlist[DataType]rF   r   ))
__future__r   rr   r   pathlibr   typingr   r   polars._dependenciesr   r	   polars.datatypesr
   r   polars.datatypes.convertr   polars.io.cloud._utilsr   polars.io.parquetr   #polars.io.pyarrow_dataset.functionsr   #polars.io.scan_options.cast_optionsr   polars.schemar   r   r   polarsr   r   r   polars.io.cloudr   r)   r$   rm   rM   rG   r~   rE   r*   r(   <module>r      s   "    % @ ' 2 3 * D ?  $55: ,0 $-1OU15-1H#H )H 	H
 H +H MH /H H +H H\ ,0-1OU15-1~#~ )~ +	~
 M~ /~ ~ +~ ~ ~B FJ  ,0-115	1'1(1 +1 /	1
 1h'	r*   