
    iF                    6   d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d dlZd d
lmZmZ d dlmZ d dlmZmZ d dlmZmZmZm Z  d dl!m"Z"m#Z#m$Z$ erd dlm%Z% e jL                  e jN                  e jP                  e jR                  e jT                  e jV                  e jV                  dZ,e jP                  ejZ                  dfe jV                  ej\                  efe jL                  ej^                  dfe jN                  ej^                  dfe jR                  ej^                  dfe j`                  ej\                  dfe jT                  ejb                  d fiZ2ej^                  dejZ                  dej\                  diZ3 G d de      Z4y)    )annotations)TYPE_CHECKINGAnyN)using_python_scalars)infer_dtype)iNaT)NoBufferPresent)cache_readonly)BaseMaskedDtype)
ArrowDtypeDatetimeTZDtype)is_string_dtype)PandasBufferPandasBufferPyarrow)ColumnColumnBuffersColumnNullType	DtypeKind)ArrowCTypes
Endiannessdtype_to_arrow_c_fmt)Buffer)iufbUMmzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                      e Zd ZdZdddZddZedd       Zedd       Z	ddZ
ed        Zed        Zedd	       Zedd
       ZddZdddZddZ	 	 ddZddZddZy)PandasColumna  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    c                    t        |t        j                        rt        d|j                   d      t        |t        j
                        st        dt        |       d      || _        || _	        y)zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zExpected a Series, got a DataFrame. This likely happened because you called __dataframe__ on a DataFrame which, after converting column names to string, resulted in duplicated names: zD. Please rename these columns before using the interchange protocol.zColumns of type  not handled yetN)

isinstancepd	DataFrame	TypeErrorcolumnsSeriesNotImplementedErrortype_col_allow_copy)selfcolumn
allow_copys      [/app/cer_product_mecsu/.venv/lib/python3.12/site-packages/pandas/core/interchange/column.py__init__zPandasColumn.__init__V   su    
 fbll+ !..) *22  &")),%(8fFV&WXX 	%    c                .    | j                   j                  S )z2
        Size of the column, in elements.
        )r-   sizer/   s    r2   r6   zPandasColumn.sizej   s     yy~~r4   c                     y)z7
        Offset of first element. Always zero.
        r    r7   s    r2   offsetzPandasColumn.offsetp   s     r4   c                   | j                   j                  }t        |t        j                        rb| j                   j
                  j                  }| j                  |j                        \  }}}}t        j                  ||t        j                  fS t        |      rMt        | j                         dv r+t        j                  dt        |      t        j                  fS t!        d      | j                  |      S )N)stringempty   z.Non-string object dtypes are not supported yet)r-   dtyper%   r&   CategoricalDtypevaluescodes_dtype_from_pandasdtyper   CATEGORICALr   NATIVEr   r   STRINGr   r+   )r/   r?   rB   _bitwidthc_arrow_dtype_f_strs         r2   r?   zPandasColumn.dtypex   s    		eR001II$$**E ,,U[[9# %%#!!	  U#499%)<<$$(/%%	  &&VWW//66r4   c                   t         j                  |j                  d      }|t        d| d      t	        |t
              r|j                  j                  }nZt	        |t              r|j                  j                  }n3t	        |t              r|j                  j                  }n|j                  }|dk(  r||j                  t        j                  |fS ||j                  dz  t        |      |fS )z/
        See `self.dtype` for details.
        N
Data type z& not supported by interchange protocolzbool[pyarrow]r>   )	_NP_KINDSgetkind
ValueErrorr%   r   numpy_dtype	byteorderr   baser   itemsizer   BOOLr   )r/   r?   rN   rQ   s       r2   rC   z$PandasColumn._dtype_from_pandasdtype   s     }}UZZ.<z%0VWXXeZ())33I/

,,I/))33IIO#   	  U^^a')=e)DiOOr4   c                   | j                   d   t        j                  k(  st        d      | j                  j
                  j                  dt        t        j                  | j                  j
                  j                              dS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)
is_orderedis_dictionary
categories)r?   r   rD   r(   r-   catorderedr"   r&   r*   rX   r7   s    r2   describe_categoricalz!PandasColumn.describe_categorical   si    $ zz!}	 5 55U 
 ))--//!&ryy1I1I'JK
 	
r4   c                   t        | j                  j                  t              rt        j
                  }d}||fS t        | j                  j                  t              rb| j                  j                  j                  j                  d   j                         d   t        j                  d fS t        j                  dfS | j                  d   }	 t        |   \  }}||fS # t        $ r}t        d| d      |d }~ww xY w)N   r   rK   z not yet supported)r%   r-   r?   r   r   USE_BYTEMASKr   array	_pa_arraychunksbuffersNON_NULLABLEUSE_BITMASK_NULL_DESCRIPTIONKeyErrorr+   )r/   column_null_dtype
null_valuerN   nullvalueerrs          r2   describe_nullzPandasColumn.describe_null   s    diioo7 . ; ;J$j00diiooz2 yy((//2::<Q?G%22D88!--q00zz!}	V+D1KD% U{  	V%
4&8J&KLRUU	Vs   C! !	C?*C::C?c                    | j                   j                         j                         }t               s|j	                         }|S )zB
        Number of null elements. Should always be known.
        )r-   isnasumr   item)r/   results     r2   
null_countzPandasColumn.null_count   s3    
 !%%'#%[[]Fr4   c                2    d| j                   j                  iS )z8
        Store specific metadata of the column.
        zpandas.index)r-   indexr7   s    r2   metadatazPandasColumn.metadata   s    
 		00r4   c                     y)zE
        Return the number of chunks the column consists of.
        r]   r9   r7   s    r2   
num_chunkszPandasColumn.num_chunks   s     r4   Nc              #     K   |rt|dkD  rot        | j                        }||z  }||z  dk7  r|dz  }t        d||z  |      D ]4  }t        | j                  j                  |||z    | j
                         6 y|  yw)zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        r]   r   N)lenr-   ranger"   ilocr.   )r/   n_chunksr6   stepstarts        r2   
get_chunkszPandasColumn.get_chunks   s     
 1tyy>D8#Dh!#	q$/48 "IINN554<8$:J:J 
 Js   A<A>c                    | j                         ddd}	 | j                         |d<   	 | j                         |d<   |S # t        $ r Y !w xY w# t        $ r Y |S w xY w)a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)datavalidityoffsetsr   r   )_get_data_buffer_get_validity_bufferr	   _get_offsets_buffer)r/   rb   s     r2   get_bufferszPandasColumn.get_buffers  s    ( ))+"
	"&";";"=GJ	!%!9!9!;GI   		
  		s    ? A 	A
A	AAc                   | j                   d   t        j                  k(  rt        | j                   d         dkD  r4| j                  j
                  j                  d      j                         }n| j                  j                         }t        || j                        }t        j                  dt        j                  t        j                  f}||fS | j                   d   t        j                  t        j                  t        j                   t        j"                  fv r| j                   }| j                  j$                  }t'        | j                  j                   t(              rD|j*                  j,                  d   }t/        |j1                         d   t        |            }||fS t'        | j                  j                   t2              r|j4                  }n|j6                  }t        || j                        }||fS | j                   d   t        j8                  k(  rV| j                  j:                  j<                  }t        || j                        }| j?                  |j                         }||fS | j                   d   t        j@                  k(  r| j                  j                         }tC               }|D ]4  }t'        |tD              s|jG                  |jI                  d	
             6 t        tK        jL                  |d            }t        j                  dt        jN                  t        j                  f}||fS tQ        d| j                  j                    d      )zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r         N)r1   @   r]   lengthutf-8encodinguint8)r?   r>   rK   r$   ))r?   r   DATETIMEry   r-   dt
tz_convertto_numpyr   r.   INTr   INT64r   rE   UINTFLOATrT   r_   r%   r   r`   ra   r   rb   r   _data_ndarrayrD   rA   _codesrC   rF   	bytearraystrextendencodenp
frombufferUINT8r+   )	r/   np_arrbufferr?   arrrB   bufr   objs	            r2   r   zPandasColumn._get_data_buffer5  s    ::a=I... 4::a=!A%006??A++-!&T5E5EFF!!!!	Et u}i ZZ]MMNNOONN	
 
 JJE))//C$))//:6 mm**1-,KKM!$s8 u}$$))//?;!&T5E5EFF> u}= ZZ]i333II$$++E!%D4D4DEF00=E6 u}5 ZZ]i...))$$&CA  ;c3'HHSZZZ9:; ""--"ABF !!!!	E u} &
499??2CCS&TUUr4   c                   | j                   \  }}t        | j                  j                  t              r| j                  j
                  j                  j                  d   }t        j                  dt        j                  t        j                  f}|j                         d   yt        |j                         d   t        |            }||fS t        | j                  j                  t               r_| j                  j
                  j"                  }t%        |      }t        j                  dt        j                  t        j                  f}||fS | j                  d   t        j&                  k(  r| j                  j)                         }|dk(  }| }t+        j,                  t        |      ft*        j.                        }t1        |      D ]  \  }	}
t        |
t2              r|n|||	<    t%        |      }t        j                  dt        j                  t        j                  f}||fS 	 t4        |    d}t;        |      # t6        $ r}t9        d      |d}~ww xY w)	z
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   r]   Nr   r>   shaper?   z! so does not have a separate maskzSee self.describe_null)rl   r%   r-   r?   r   r_   r`   ra   r   rT   r   r   rE   rb   r   ry   r   _maskr   rF   r   r   zerosbool_	enumerater   _NO_VALIDITY_BUFFERrf   r+   r	   )r/   ri   invalidr   r?   r   maskr   validr   r   msgrk   s                r2   r   z!PandasColumn._get_validity_buffer  s    **gdiiooz2 ))//++2215C^^Q(8(8*:K:KLE{{}Q'(a 3xF 5= diioo799??((D!$'F^^Q(8(8*:K:KLE5= ::a=I,,, ))$$&C qLEiG883s8+RXX>D#C. E3#-c3#7%WQE
 "$'F ^^Q(8(8*:K:KLE5= 	I(.//PQC
 c""	  	I%&>?SH	Is   .I 	IIIc                   | j                   d   t        j                  k(  r| j                  j	                         }d}t        j                  t        |      dz   ft
        j                        }t        |      D ]=  \  }}t        |t              r |j                  d      }|t        |      z  }|||dz   <   ? t        |      }t        j                  dt        j                   t"        j$                  f}||fS t'        d      )a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   r]   r   r   r   r   zJThis column has a fixed-length dtype so it does not have an offsets buffer)r?   r   rF   r-   r   r   r   ry   int64r   r%   r   r   r   r   r   r   r   rE   r	   )	r/   rA   ptrr   r   vr   r   r?   s	            r2   r   z PandasColumn._get_offsets_buffer  s     ::a=I,,,YY'')FChhc&kAo%7rxxHG!&) %1 a%'2A3q6MC!$A% "'*F !!!!	E u} "5 r4   )T)r0   z	pd.Seriesr1   boolreturnNone)r   int)r   ztuple[DtypeKind, int, str, str])r   zdict[str, pd.Index])N)r|   z
int | None)r   r   )r   z.tuple[Buffer, tuple[DtypeKind, int, str, str]])r   ztuple[Buffer, Any] | None)r   ztuple[PandasBuffer, Any])__name__
__module____qualname____doc__r3   r6   propertyr:   r
   r?   rC   r[   rl   rr   ru   rw   r   r   r   r   r   r9   r4   r2   r"   r"   J   s    	&(   7 7:PB 
 
8  &   1 1"#JI	7IV7#r&r4   r"   )5
__future__r   typingr   r   numpyr   pandas._configr   pandas._libs.libr   pandas._libs.tslibsr   pandas.errorsr	   pandas.util._decoratorsr
   pandas.core.dtypes.dtypesr   pandasr&   r   r   pandas.api.typesr   pandas.core.interchange.bufferr   r   *pandas.core.interchange.dataframe_protocolr   r   r   r   pandas.core.interchange.utilsr   r   r   r   r   r   r   rT   rF   r   rL   USE_NANUSE_SENTINELrc   rD   r^   re   r   r"   r9   r4   r2   <module>r      sl   "
  / ( $ ) 2 5  -   A 
													 OOn,,d344d;MMN//6NN^00$7NN^00$7 N77<~22A6  !>:!D U6 Ur4   