
    .hJ                       U d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZmZmZ d dlmZ erd dlZd dl Z!d dl"m#Z# d dl$m%Z%  G d d      Z& G d de      Z'e G d de'             Z(e G d de'             Z)d?dZ*	 	 	 	 d@dZ+i dddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=Z,de-d><   y)A    )annotationsN)ABCabstractmethod)	dataclass)partial)perf_counter)TYPE_CHECKINGAnyLiteral)eprintverbose)ComputeError)IcebergStatisticsLoader)IdentityTransformedPartitionValuesBuilder_scan_pyarrow_dataset_impl)ScanCastOptions)Table)	LazyFramec                      e Zd ZdZddddd	 	 	 	 	 	 	 	 	 	 	 ddZddZddZddddd	 	 	 	 	 	 	 	 	 dd	Zddddd	 	 	 	 	 	 	 	 	 dd
ZddZ	ddZ
ddZddZy)IcebergDatasetz Dataset interface for PyIceberg.NT)snapshot_idiceberg_storage_propertiesreader_overrideuse_metadata_statisticsc                   d | _         d | _        || _        || _        || _        || _        t        |t              r|| _         y || _        y N)_metadata_path_table_snapshot_id_iceberg_storage_properties_reader_override_use_metadata_statistics
isinstancestr)selfsourcer   r   r   r   s         W/var/www/html/ai-service/venv/lib/python3.12/site-packages/polars/io/iceberg/dataset.py__init__zIcebergDataset.__init__   sN     #'+E(GV(?%
 fc""(D DK    c                "    | j                         S )zFetch the schema of the table.)arrow_schemar%   s    r'   schemazIcebergDataset.schema;   s      ""r)   c                V    ddl m}  || j                         j                               S )z$Fetch the arrow schema of the table.r   schema_to_pyarrow)pyiceberg.io.pyarrowr0   tabler-   )r%   r0   s     r'   r+   zIcebergDataset.arrow_schema?   s    : !4!4!677r)   existing_resolved_version_keylimit
projectionfilter_columnsc               l    | j                  ||||      x}	 y|j                         |j                  fS )zConstruct a LazyFrame scan.r3   N)_to_dataset_scan_implto_lazyframesnapshot_id_key)r%   r4   r5   r6   r7   	scan_datas         r'   to_dataset_scanzIcebergDataset.to_dataset_scanE   sS     33.K%-	 4  I  %%')B)BBBr)   c               b
   ddl m} dd l}|j                  j                  j                         }|r.t        d| j                   d| d| d| d| j                   
       | j                         }|r"t        d|j                  j                          | j                  }	d }
|	c|j                  |	      }|d	|	 }t        |      |j                  }
|
d
|	 d}t        |      |j                         |
   }|j                    }nG|j#                         }|j                  j$                  }
|j'                         x}|j                    nd}|||k(  r|rt        d|d       y | j(                  xs t+        j,                  d      }|r|dvrd| d}t        |      |dk(  rdn|j.                  dk  sd|j.                   nd }|dn
t1        |      }|dk(  r|n |j2                  | }g }t5        ||      }| j                  r|t7        | |j2                  |       nd }i }|dk7  r|sddlm}m} |rt        d       t?               }|jA                  |	||      }d}tC        |jE                               D ]`  \  }}|jF                  jH                  |jJ                  k7  rd|jF                  jH                   } n|jL                  rg ||<   |jL                  D ]y  }|jN                  |jP                  k7  rd|jN                   } nO|jH                  |jJ                  k7  rd|jH                   } n%||   jS                  |jT                         |dz  }{ |r n|jW                  ||jF                  jX                  |jF                  jZ                         ||j]                  |jF                         |jS                  |jF                  jT                         c |rt?               |z
  } t        d | d!d"       |s|rAt_        |      dk(  rdnd#}!dk(  rdnd#}"t        d$t_        |       d%|! d&|	 d'|
 d(| d)|"         ||      }#|ja                         }$||ja                  t_        |      |$      nd }%| jb                  te        | jb                        nd }&tg        |||#|$||%||&|*	      S |d+k(  rd,| }ti        |      |rt        d-|        tk        tl        ||	||.      }' ||j#                               }(tn        jp                  js                  |(|'d/d/0      })tu        |)|1      S )2Nr   r/   z0IcebergDataset: to_dataset_scan(): snapshot ID: z	, limit: z, projection: z, filter_columns: z!, self._use_metadata_statistics: zEIcebergDataset: to_dataset_scan(): tbl.metadata.current_snapshot_id: ziceberg snapshot ID not found: z#IcebergDataset: requested snapshot z did not contain a schema ID zCIcebergDataset: to_dataset_scan(): early return (snapshot_id_key = )POLARS_ICEBERG_READER_OVERRIDE)native	pyicebergz-iceberg: unknown value for reader_override: 'z*', expected one of ('native', 'pyiceberg')rC   z"forced reader_override='pyiceberg'   z"unsupported table format version: )*)DataFileContent
FileFormatz7IcebergDataset: to_dataset_scan(): begin path expansion)r   r5   selected_fieldsznon-parquet format: z unsupported deletion file type: z"unsupported deletion file format:    )current_indexpartition_spec_idpartition_valuesz:IcebergDataset: to_dataset_scan(): finish path expansion (z.3fzs)sz:IcebergDataset: to_dataset_scan(): native scan_parquet(): z sourcez, snapshot ID: z, schema ID: z, z deletion file)	sourcesprojected_iceberg_schemacolumn_mappingdefault_valuesdeletion_filesmin_max_statisticsstatistics_loaderstorage_options_snapshot_id_keyrB   z)iceberg reader_override='native' failed: zGIcebergDataset: to_dataset_scan(): fallback to python[pyiceberg] scan: )r   n_rowswith_columnsT)pyarrowis_pure)lfrV   );r1   r0   polars._utils.logging_utilsloggingr   r   r   r"   r2   metadatacurrent_snapshot_idsnapshot_by_id
ValueError	schema_idschemasr   r-   current_schema_idcurrent_snapshotr!   osgetenvformat_versiontupleselectr   r   pyiceberg.manifestrF   rG   r   scan	enumerate
plan_filesfilefile_formatPARQUETdelete_filescontentPOSITION_DELETESappend	file_pathpush_partition_valuesspec_id	partitionpush_file_statisticslenfinishr    0_convert_iceberg_to_object_store_storage_options_NativeIcebergScanDatar   r   r   plr   _scan_python_function_PyIcebergScanData)*r%   r4   r5   r6   r7   r0   polarsr   tblr   rc   snapshotmsgiceberg_schemar;   vr   fallback_reasonrH   rO   rN   missing_field_defaultsrT   rR   rF   rG   
start_timerm   total_deletion_filesi	file_infodeletion_fileelapsedrM   s2rP   identity_transformed_valuesrS   rU   funcr+   r[   s*                                             r'   r9   z$IcebergDataset._to_dataset_scan_implZ   sF    	;$--''//1  $ 1 12 3 !)l +##1"2 32262O2O1PR jjl558\\5U5U4VX
 ''	"))+6H7}E o% **I 9+ G2 2  !o% [[]95N!)!5!5 6O ZZ\N66I -0,@,@,B'Bq&O1==/UW 
 *5-@+'+1.
  // 
299,4
 6MM#$$NP  S/! +- 1 %%* 6c6H6H5IJ 	 %/$6&E*<M &( &&&8 	! !J$"
 ,,1K $C)>)>)>)OP 	
 02k)/FPQ%J88' /  D $%  )$//*; < (99>>--1C1CC.y~~/I/I.JK $ ))(*N1%)2)?)? 2(00O4T4TT B#0#8#8"9!; , "(44
8J8JJ D#0#<#<"=!? , "&q)001H1HI,1,!2$ #&<<"#&/nn&<&<%.^^%=%= =  %0%::9>>Jy~~778Q(9T &.:5..5c]">
 g,!+B/14R#.7|nGA3 /$$/= 1""+B+,N2$@ /~>N*@*G*G*I' %0 "((W7RS  33? A44   *)A-:-#5"3 /!0
 
 (=o=NOCs##77F6GI
 &##
 )6\\//	 0 
 "R/JJr)   c                    | j                   8| j                  d}t        |      | j                         j                  | _         | j                   S )zFetch the metadata path.1impl error: both metadata_path and table are None)r   r   rb   r2   metadata_location)r%   r   s     r'   metadata_pathzIcebergDataset.metadata_path[  sG    &{{"I o%"&**,"@"@D"""r)   c                   | j                   q| j                  d}t        |      t               rt	        d| j                         ddlm} |j                  | j                  | j                  xs i       | _         | j                   S )z!Fetch the PyIceberg Table object.r   z;IcebergDataset: construct table from self._metadata_path = r   )StaticTable)r   
properties)	r   r   rb   r   r   pyiceberg.tabler   from_metadatar    )r%   r   r   s      r'   r2   zIcebergDataset.tablef  s    ;;""*I o%yUt?R?R>VWX3%33"&"5"5;;Ar 4 DK
 {{r)   c           
         | j                         | j                  | j                  | j                  d}t	               r>|d   }|d   x}d| dnd }t        |d         }|d   }t        d| d| d	| d
|        |S )N)r   r   r   r   r   r   'r   r   #IcebergDataset: getstate(): path: 'z', snapshot_id: z, iceberg_storage_properties: , reader_override: )r   r   r    r!   r   _redact_dict_valuesr   )r%   state	path_reprr   r   	keys_reprr   s          r'   __getstate__zIcebergDataset.__getstate__  s    !//1,,*.*J*J#44	
 9o.I,1-,@'@q&MAaS(SWK+E2N,OPI#$56O# %  +} -//8k :$$3#4	6 r)   c           
         t               r4|d   }|d   }t        |d         }|d   }t        d| d| d| d|        t        j	                  | |d   |d   |d   |d   	       y )
Nr   r   r   r   r   z', snapshot_id: 'z', iceberg_storage_properties: r   )r   r   r   )r   r   r   r   r(   )r%   r   r   r   r   r   s         r'   __setstate__zIcebergDataset.__setstate__  s    9o.I.K+E2N,OPI#$56O# %!!, .//8k :$$3#4	6 	/"m,',-I'J!"34 	  	
r)   )r&   zstr | Tabler   
int | Noner   zdict[str, Any] | Noner   z%Literal['native', 'pyiceberg'] | Noner   boolreturnNone)r   z	pa.schema)
r4   
str | Noner5   r   r6   list[str] | Noner7   r   r   ztuple[LazyFrame, str] | None)
r4   r   r5   r   r6   r   r7   r   r   z2_NativeIcebergScanData | _PyIcebergScanData | Noner   r$   )r   r   )r   dict[str, Any])r   r   r   r   )__name__
__module____qualname____doc__r(   r-   r+   r=   r9   r   r2   r   r    r)   r'   r   r      s   * #'<@AE(,!!  	!
 %:! ?! "&! 
!8#8 59 '++/C (2C 	C
 %C )C 
&C0 59 '++/{K (2{K 	{K
 %{K ){K 
<{KB	#40
r)   r   c                  :    e Zd Zedd       Zeedd              Zy)_ResolvedScanDataBasec                     y r   r   r,   s    r'   r:   z"_ResolvedScanDataBase.to_lazyframe  s    ,/r)   c                     y r   r   r,   s    r'   r;   z%_ResolvedScanDataBase.snapshot_id_key  s    &)r)   Nr   pl.LazyFramer   )r   r   r   r   r:   propertyr;   r   r)   r'   r   r     s&    / /)  )r)   r   c                      e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   ded<   ddZedd       Zy)r   z.Resolved parameters for a native Iceberg scan.z	list[str]rN   zpyiceberg.schema.SchemarO   z	pa.SchemarP   zdict[int, pl.Series | str]rQ   zdict[int, list[str]]rR   zpl.DataFrame | NonerS   zIcebergStatisticsLoader | NonerT   zdict[str, str] | NonerU   r$   rV   c                    ddl m}  || j                  t        j                         dd| j
                  d| j                  fd| j                  fd| j                  f| j                  	      S )	Nr   )scan_parquetinsertignoreziceberg-column-mappingicebergziceberg-position-delete)cast_optionsmissing_columnsextra_columnsrU   _column_mapping_default_values_deletion_files_table_statistics)
polars.io.parquet.functionsr   rN   r   _default_icebergrU   rP   rQ   rR   rS   )r%   r   s     r'   r:   z#_NativeIcebergScanData.to_lazyframe  sg    <LL(99;$" 005t7J7JK&(;(;<68K8KL"55

 
	
r)   c                    | j                   S r   rV   r,   s    r'   r;   z&_NativeIcebergScanData.snapshot_id_key      $$$r)   Nr   r   r   r   r   r   __annotations__r:   r   r;   r   r)   r'   r   r     sV    855..((++
 65**
 % %r)   r   c                  @    e Zd ZU dZded<   ded<   d	dZed
d       Zy)r   z.Resolved parameters for reading via PyIceberg.r   r[   r$   rV   c                    | j                   S r   )r[   r,   s    r'   r:   z_PyIcebergScanData.to_lazyframe  s    wwr)   c                    | j                   S r   r   r,   s    r'   r;   z"_PyIcebergScanData.snapshot_id_key  r   r)   Nr   r   r   r   r)   r'   r   r     s,    8 	 % %r)   r   c                    t        | t              r| j                         D ci c]  }|d c}S | dt        |       j                   dS dS c c}w )NREDACTED<z object>r   )r#   dictkeystyper   )objks     r'   r   r     s_     c4  !$
+1J+ ? c##$H-
 +s   
Adict[str, str]c                    i }| j                         D ]-  \  }}t        j                  |      x}	 |||<   $d|vs)|||<   / |S )N.)items&ICEBERG_TO_OBJECT_STORE_CONFIG_KEY_MAPget)r   rU   r   r   translated_keys        r'   r~   r~     sg     O*002 
#1DHHKKN /0ON+\
 "#OA
#  r)   zs3.endpointaws_endpoint_urlzs3.access-key-idaws_access_key_idzs3.secret-access-keyaws_secret_access_keyzs3.session-tokenaws_session_tokenz	s3.region
aws_regionzs3.proxy-uri	proxy_urlzs3.connect-timeoutconnect_timeoutzs3.request-timeouttimeoutzs3.force-virtual-addressing aws_virtual_hosted_style_requestzadls.account-nameazure_storage_account_namezadls.account-keyazure_storage_account_keyzadls.sas-tokenazure_storage_sas_keyzadls.tenant-idazure_storage_tenant_idzadls.client-idazure_storage_client_idzadls.client-secretazure_storage_client_secretzadls.account-hostazure_storage_authority_hostz
adls.tokenazure_storage_tokenbearer_tokentoken)zgcs.oauth2.tokenzhf.tokenr   )r   r
   r   r
   )r   r   r   r   ).
__future__r   rg   abcr   r   dataclassesr   	functoolsr   timer   typingr	   r
   r   polars._reexport	_reexportr   r\   r   r   polars.exceptionsr   polars.io.iceberg._utilsr   r   r   #polars.io.scan_options.cast_optionsr   rY   papyiceberg.schemarC   r   r   polars.lazyframe.framer   r   r   r   r   r   r~   r   r   r   r)   r'   <module>r     s   " 	 # !   . .  1 * 
 @%0Q
 Q
h*C * "%2 "% "%J %. % %  .4:%: +: 3	:
 +: : K: +: ): "#E: 5: 3: -: /:  /!:" 7#:$ 7%:& '':* '/: & r)   