
    i<V                    P   d dl mZ d dlZd dlmZ d dlZd dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmc mZ d dlmZ d d	lmZ d d
lmZ erd dlmZ d dlmZ d dlmZ ddZ  ed      	 	 	 	 	 	 d	 	 	 	 	 	 	 dd       Z! ed      ddd       Z" ed      	 d	 	 	 	 	 	 	 dd       Z#y)    )annotationsN)TYPE_CHECKING)
set_module)is_iteratoris_list_like)concat_compat)notna)
MultiIndex)concat)
to_numeric)Hashable)AnyArrayLike)	DataFramec                    | Gt        |       s| gS t        |t              rt        | t              st	        | d      t        |       S g S )Nz7 must be a list of tuples when columns are a MultiIndex)r   
isinstancer
   list
ValueError)arg_varsvariablecolumnss      U/app/cer_product_mecsu/.venv/lib/python3.12/site-packages/pandas/core/reshape/melt.pyensure_list_varsr      sT    H%:,Z$5O*ST  >!	    pandasc           
     V
   || j                   v rt        d| d      t        |d| j                         }|du}t        |d| j                         }t        | j                   j	                  |            t        |      kD  rt        d      |s|r|| j                   j                  |      }n| j                   }||z   }	|j	                  |	      }
|
dk(  }|j                         r1t        |	|d	      D cg c]
  \  }}|s	| }}}t        d
|       |r'| j                  ddt        j                  |
      f   } n%| j                  d      } n| j                  d      } | | j                   j                  |      | _         |t        | j                   t              rt        | j                   j                        t        t!        | j                   j                              k(  r| j                   j                  }nt#        t        | j                   j                              D cg c]  }d| 	 }}n| j                   j$                  | j                   j$                  ndg}nt'        |      rt        | j                   t              rht)        |      rt+        |      }t        |      t        | j                         kD  rCt        d|dt        |       dt        | j                          d      t        d|d      |g}| j,                  \  }}|t        |      z
  }i }|D ]  }| j/                  |      }t        |j0                  t2        j0                        sF|dkD  rt5        |g|z  d      ||<   R t7        |      g |j$                  |j0                        ||<   ~t3        j8                  |j:                  |      ||<    ||z   |gz   }| j,                  d   dkD  rlt        d | j<                  D              sPt5        t#        | j,                  d         D cg c]  }| j                  dd|f    c}d      j>                  ||<   n| j:                  jA                  d      ||<   tC        |      D ]2  \  }}| j                   jE                  |      jG                  |      ||<   4 | jI                  ||      }|sRt3        j8                  t3        jJ                  t        |             |      }| jL                  jO                  |      |_&        |S c c}}w c c}w c c}w )a  
    Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.

    This function is useful to reshape a DataFrame into a format where one
    or more columns are identifier variables (`id_vars`), while all other
    columns are considered measured variables (`value_vars`), and are "unpivoted" to
    the row axis, leaving just two non-identifier columns, 'variable' and
    'value'.

    Parameters
    ----------
    frame : DataFrame
        The DataFrame to unpivot.
    id_vars : scalar, tuple, list, or ndarray, optional
        Column(s) to use as identifier variables.
    value_vars : scalar, tuple, list, or ndarray, optional
        Column(s) to unpivot. If not specified, uses all columns that
        are not set as `id_vars`.
    var_name : scalar, tuple, list, or ndarray, optional
        Name to use for the 'variable' column. If None it uses
        ``frame.columns.name`` or 'variable'. Must be a scalar if columns are a
        MultiIndex.
    value_name : scalar, default 'value'
        Name to use for the 'value' column, can't be an existing column label.
    col_level : scalar, optional
        If columns are a MultiIndex then use this level to melt.
    ignore_index : bool, default True
        If True, original index is ignored. If False, the original index is retained.
        Index labels will be repeated as necessary.

    Returns
    -------
    DataFrame
        Unpivoted DataFrame.

    See Also
    --------
    DataFrame.melt : Identical method.
    pivot_table : Create a spreadsheet-style pivot table as a DataFrame.
    DataFrame.pivot : Return reshaped DataFrame organized
        by given index / column values.
    DataFrame.explode : Explode a DataFrame from list-like
            columns to long format.

    Notes
    -----
    Reference :ref:`the user guide <reshaping.melt>` for more examples.

    Examples
    --------
    >>> df = pd.DataFrame(
    ...     {
    ...         "A": {0: "a", 1: "b", 2: "c"},
    ...         "B": {0: 1, 1: 3, 2: 5},
    ...         "C": {0: 2, 1: 4, 2: 6},
    ...     }
    ... )
    >>> df
    A  B  C
    0  a  1  2
    1  b  3  4
    2  c  5  6

    >>> pd.melt(df, id_vars=["A"], value_vars=["B"])
    A variable  value
    0  a        B      1
    1  b        B      3
    2  c        B      5

    >>> pd.melt(df, id_vars=["A"], value_vars=["B", "C"])
    A variable  value
    0  a        B      1
    1  b        B      3
    2  c        B      5
    3  a        C      2
    4  b        C      4
    5  c        C      6

    The names of 'variable' and 'value' columns can be customized:

    >>> pd.melt(
    ...     df,
    ...     id_vars=["A"],
    ...     value_vars=["B"],
    ...     var_name="myVarname",
    ...     value_name="myValname",
    ... )
    A myVarname  myValname
    0  a         B          1
    1  b         B          3
    2  c         B          5

    Original index values can be kept around:

    >>> pd.melt(df, id_vars=["A"], value_vars=["B", "C"], ignore_index=False)
    A variable  value
    0  a        B      1
    1  b        B      3
    2  c        B      5
    0  a        C      2
    1  b        C      4
    2  c        C      6

    If you have multi-index columns:

    >>> df.columns = [list("ABC"), list("DEF")]
    >>> df
    A  B  C
    D  E  F
    0  a  1  2
    1  b  3  4
    2  c  5  6

    >>> pd.melt(df, col_level=0, id_vars=["A"], value_vars=["B"])
    A variable  value
    0  a        B      1
    1  b        B      3
    2  c        B      5

    >>> pd.melt(df, id_vars=[("A", "D")], value_vars=[("B", "E")])
    (A, D) variable_0 variable_1  value
    0      a          B          E      1
    1      b          B          E      3
    2      c          B          E      5
    zvalue_name (z3) cannot match an element in the DataFrame columns.id_varsN
value_varsz)id_vars cannot contain duplicate columns.T)strictzFThe following id_vars or value_vars are not present in the DataFrame: F)deep	variable_r   z	var_name=z has z, items, but the dataframe columns only have z levels.z must be a scalar.r   )ignore_index)namedtype   c              3  n   K   | ]-  }t        |t        j                         xr |j                   / y w)N)r   npr$   _supports_2d).0dts     r   	<genexpr>zmelt.<locals>.<genexpr>  s/      &=?Jr288$$88&s   35Fr   )(r   r   r   lenget_indexer_forget_level_valuesanyzipKeyErrorilocalgosuniquecopyr   r
   namessetranger#   r   r   r   shapepopr$   r'   r   typetile_valuesdtypesvaluesravel	enumerate_get_level_valuesrepeat_constructorarangeindextake)framer   r   var_name
value_name	col_levelr"   value_vars_was_not_nonelevellabelsidxmissinglab	not_foundmissing_labelsinum_rowsKnum_cols_adjustedmdatacolid_datamcolumnsresulttakers                            r   meltr`   ,   s   N U]]":, '% %
 	
 w	5==AG(4!*lEMMJJ 5==((12S\ADEE* MM229=EMME:%##F+);;=*-fgd*K&YyN  ""0!13  #JJq%,,s"334EJJEJ*E


&66yAemmZ05==&&'3s5==3F3F/G+HH ==..5:3u}}?R?R;S5TUisOUU ',mm&8&8&D""*H 
h	emmZ08$>8}s5==11  xks8}o 6;;>u}};M:NhX 
 	{*<=>>:++KHaCL(*,E 
E))C.'--2 1$#WI0A$APTUc
 +T']2GLLVc
2CDE#J
E !ZL0H{{1~# &CH<<& # #',U[[^'<=!UZZ1=D

& 	j "MM//4jH% I3]]44Q7>>xHc
I x8F		#e*-/@A{{''.M[. VR >s   5
T T+T!#T&c                F   i }g }t               }t        t        t        |j	                                           }|j                         D ]j  \  }}t        |      |k7  rt        d      |D 	cg c]  }	| |	   j                   }
}	t        |
      ||<   |j                  |       |j                  |      }l t        | j                  j                  |            }|D ](  }	t        j                  | |	   j                  |      ||	<   * |rxt        j                   t        ||d            t"              }|D ]  }|t%        ||         z  } |j'                         s&|j                         D ci c]  \  }}|||    }}}| j)                  |||z         S c c}	w c c}}w )a  
    Reshape wide-format data to long. Generalized inverse of DataFrame.pivot.

    Accepts a dictionary, ``groups``, in which each key is a new column name
    and each value is a list of old column names that will be "melted" under
    the new column name as part of the reshape.

    Parameters
    ----------
    data : DataFrame
        The wide-format DataFrame.
    groups : dict
        {new_name : list_of_columns}.
    dropna : bool, default True
        Do not include columns whose entries are all NaN.

    Returns
    -------
    DataFrame
        Reshaped DataFrame.

    See Also
    --------
    melt : Unpivot a DataFrame from wide to long format, optionally leaving
        identifiers set.
    pivot : Create a spreadsheet-style pivot table as a DataFrame.
    DataFrame.pivot : Pivot without aggregation that can handle
        non-numeric data.
    DataFrame.pivot_table : Generalization of pivot that can handle
        duplicate values for one index/column pair.
    DataFrame.unstack : Pivot based on the index values instead of a
        column.
    wide_to_long : Wide panel to long format. Less flexible but more
        user-friendly than melt.

    Examples
    --------
    >>> data = pd.DataFrame(
    ...     {
    ...         "hr1": [514, 573],
    ...         "hr2": [545, 526],
    ...         "team": ["Red Sox", "Yankees"],
    ...         "year1": [2007, 2007],
    ...         "year2": [2008, 2008],
    ...     }
    ... )
    >>> data
       hr1  hr2     team  year1  year2
    0  514  545  Red Sox   2007   2008
    1  573  526  Yankees   2007   2008

    >>> pd.lreshape(data, {"year": ["year1", "year2"], "hr": ["hr1", "hr2"]})
          team  year   hr
    0  Red Sox  2007  514
    1  Yankees  2007  573
    2  Red Sox  2008  545
    3  Yankees  2008  526
    z$All column lists must be same lengthr   )r$   r-   )r9   r.   nextiterrA   itemsr   r?   r   appendunionr   r   
differencer'   r>   onesboolr	   allrF   )datagroupsdropnarZ   
pivot_colsall_colsrX   targetr8   r[   	to_concatid_colsmaskckvs                   r   lreshaperw     s   x EJ!eHDfmmo&'(A )u:?CDD2783T#Y&&8	8%i0f&!>>%() 4<<**845G 3WWT#Y..2c
3 wws5A/0= 	$AE%(O#D	$xxz,1KKM:DAqQ$Z:E:UGj,@AA# 9 ;s   -F/Fc                   dd}d	d}t        |      s|g}nt        |      }| j                  j                  |      j	                         rt        d      t        |      s|g}nt        |      }| |   j                         j	                         rt        d      g }g }	|D ]:  }
 || |
||      }|	j                  |       |j                   || |
||||             < t        |d      }| j                  j                  |	      }| |   }t        |      dk(  r |j                  |      j                  |      S |j                  |j                         |      j                  g ||      S )
aV!  
    Unpivot a DataFrame from wide to long format.

    Less flexible but more user-friendly than melt.

    With stubnames ['A', 'B'], this function expects to find one or more
    group of columns with format
    A-suffix1, A-suffix2,..., B-suffix1, B-suffix2,...
    You specify what you want to call this suffix in the resulting long format
    with `j` (for example `j='year'`)

    Each row of these wide variables are assumed to be uniquely identified by
    `i` (can be a single column name or a list of column names)

    All remaining variables in the data frame are left intact.

    Parameters
    ----------
    df : DataFrame
        The wide-format DataFrame.
    stubnames : str or list-like
        The stub name(s). The wide format variables are assumed to
        start with the stub names.
    i : str or list-like
        Column(s) to use as id variable(s).
    j : str
        The name of the sub-observation variable. What you wish to name your
        suffix in the long format.
    sep : str, default ""
        A character indicating the separation of the variable names
        in the wide format, to be stripped from the names in the long format.
        For example, if your column names are A-suffix1, A-suffix2, you
        can strip the hyphen by specifying `sep='-'`.
    suffix : str, default '\\d+'
        A regular expression capturing the wanted suffixes. '\\d+' captures
        numeric suffixes. Suffixes with no numbers could be specified with the
        negated character class '\\D+'. You can also further disambiguate
        suffixes, for example, if your wide variables are of the form A-one,
        B-two,.., and you have an unrelated column A-rating, you can ignore the
        last one by specifying `suffix='(!?one|two)'`. When all suffixes are
        numeric, they are cast to int64/float64.

    Returns
    -------
    DataFrame
        A DataFrame that contains each stub name as a variable, with new index
        (i, j).

    See Also
    --------
    melt : Unpivot a DataFrame from wide to long format, optionally leaving
        identifiers set.
    pivot : Create a spreadsheet-style pivot table as a DataFrame.
    DataFrame.pivot : Pivot without aggregation that can handle
        non-numeric data.
    DataFrame.pivot_table : Generalization of pivot that can handle
        duplicate values for one index/column pair.
    DataFrame.unstack : Pivot based on the index values instead of a
        column.

    Notes
    -----
    All extra variables are left untouched. This simply uses
    `pandas.melt` under the hood, but is hard-coded to "do the right thing"
    in a typical case.

    Examples
    --------
    >>> np.random.seed(123)
    >>> df = pd.DataFrame(
    ...     {
    ...         "A1970": {0: "a", 1: "b", 2: "c"},
    ...         "A1980": {0: "d", 1: "e", 2: "f"},
    ...         "B1970": {0: 2.5, 1: 1.2, 2: 0.7},
    ...         "B1980": {0: 3.2, 1: 1.3, 2: 0.1},
    ...         "X": dict(zip(range(3), np.random.randn(3), strict=True)),
    ...     }
    ... )
    >>> df["id"] = df.index
    >>> df
      A1970 A1980  B1970  B1980         X  id
    0     a     d    2.5    3.2 -1.085631   0
    1     b     e    1.2    1.3  0.997345   1
    2     c     f    0.7    0.1  0.282978   2
    >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
    ... # doctest: +NORMALIZE_WHITESPACE
                    X  A    B
    id year
    0  1970 -1.085631  a  2.5
    1  1970  0.997345  b  1.2
    2  1970  0.282978  c  0.7
    0  1980 -1.085631  d  3.2
    1  1980  0.997345  e  1.3
    2  1980  0.282978  f  0.1

    With multiple id columns

    >>> df = pd.DataFrame(
    ...     {
    ...         "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3],
    ...         "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3],
    ...         "ht1": [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
    ...         "ht2": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9],
    ...     }
    ... )
    >>> df
       famid  birth  ht1  ht2
    0      1      1  2.8  3.4
    1      1      2  2.9  3.8
    2      1      3  2.2  2.9
    3      2      1  2.0  3.2
    4      2      2  1.8  2.8
    5      2      3  1.9  2.4
    6      3      1  2.2  3.3
    7      3      2  2.3  3.4
    8      3      3  2.1  2.9
    >>> long_format = pd.wide_to_long(df, stubnames="ht", i=["famid", "birth"], j="age")
    >>> long_format
    ... # doctest: +NORMALIZE_WHITESPACE
                      ht
    famid birth age
    1     1     1    2.8
                2    3.4
          2     1    2.9
                2    3.8
          3     1    2.2
                2    2.9
    2     1     1    2.0
                2    3.2
          2     1    1.8
                2    2.8
          3     1    1.9
                2    2.4
    3     1     1    2.2
                2    3.3
          2     1    2.3
                2    3.4
          3     1    2.1
                2    2.9

    Going from long back to wide just takes some creative use of `unstack`

    >>> wide_format = long_format.unstack()
    >>> wide_format.columns = wide_format.columns.map("{0[0]}{0[1]}".format)
    >>> wide_format.reset_index()
       famid  birth  ht1  ht2
    0      1      1  2.8  3.4
    1      1      2  2.9  3.8
    2      1      3  2.2  2.9
    3      2      1  2.0  3.2
    4      2      2  1.8  2.8
    5      2      3  1.9  2.4
    6      3      1  2.2  3.3
    7      3      2  2.3  3.4
    8      3      3  2.1  2.9

    Less wieldy column names are also handled

    >>> np.random.seed(0)
    >>> df = pd.DataFrame(
    ...     {
    ...         "A(weekly)-2010": np.random.rand(3),
    ...         "A(weekly)-2011": np.random.rand(3),
    ...         "B(weekly)-2010": np.random.rand(3),
    ...         "B(weekly)-2011": np.random.rand(3),
    ...         "X": np.random.randint(3, size=3),
    ...     }
    ... )
    >>> df["id"] = df.index
    >>> df  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
       A(weekly)-2010  A(weekly)-2011  B(weekly)-2010  B(weekly)-2011  X  id
    0        0.548814        0.544883        0.437587        0.383442  0   0
    1        0.715189        0.423655        0.891773        0.791725  1   1
    2        0.602763        0.645894        0.963663        0.528895  1   2

    >>> pd.wide_to_long(df, ["A(weekly)", "B(weekly)"], i="id", j="year", sep="-")
    ... # doctest: +NORMALIZE_WHITESPACE
             X  A(weekly)  B(weekly)
    id year
    0  2010  0   0.548814   0.437587
    1  2010  1   0.715189   0.891773
    2  2010  1   0.602763   0.963663
    0  2011  0   0.544883   0.383442
    1  2011  1   0.423655   0.791725
    2  2011  1   0.645894   0.528895

    If we have many columns, we could also use a regex to find our
    stubnames and pass that list on to wide_to_long

    >>> stubnames = sorted(
    ...     set(
    ...         [
    ...             match[0]
    ...             for match in df.columns.str.findall(r"[A-B]\(.*\)").values
    ...             if match != []
    ...         ]
    ...     )
    ... )
    >>> list(stubnames)
    ['A(weekly)', 'B(weekly)']

    All of the above examples have integers as suffixes. It is possible to
    have non-integers as suffixes.

    >>> df = pd.DataFrame(
    ...     {
    ...         "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3],
    ...         "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3],
    ...         "ht_one": [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
    ...         "ht_two": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9],
    ...     }
    ... )
    >>> df
       famid  birth  ht_one  ht_two
    0      1      1     2.8     3.4
    1      1      2     2.9     3.8
    2      1      3     2.2     2.9
    3      2      1     2.0     3.2
    4      2      2     1.8     2.8
    5      2      3     1.9     2.4
    6      3      1     2.2     3.3
    7      3      2     2.3     3.4
    8      3      3     2.1     2.9

    >>> long_format = pd.wide_to_long(
    ...     df, stubnames="ht", i=["famid", "birth"], j="age", sep="_", suffix=r"\w+"
    ... )
    >>> long_format
    ... # doctest: +NORMALIZE_WHITESPACE
                      ht
    famid birth age
    1     1     one  2.8
                two  3.4
          2     one  2.9
                two  3.8
          3     one  2.2
                two  2.9
    2     1     one  2.0
                two  3.2
          2     one  1.8
                two  2.8
          3     one  1.9
                two  2.4
    3     1     one  2.2
                two  3.3
          2     one  2.3
                two  3.4
          3     one  2.1
                two  2.9
    c                    dt        j                  |       t        j                  |       | d}| j                  | j                  j                  j	                  |         S )N^$)reescaper   strmatch)dfstubsepsuffixregexs        r   get_var_namesz#wide_to_long.<locals>.get_var_nameso  sL    RYYt_%biin%5fXQ?zz"**....u566r   c                6   t        | |||j                  |      |      }||   j                  j                  t	        j
                  ||z         dd      ||<   	 t        ||         ||<   |j                  g ||      S # t        t        t        f$ r Y +w xY w)N)r   r   rL   rK    T)r   )r`   rstripr~   replacer|   r}   r   	TypeErrorr   OverflowError	set_index)r   r   rV   jr   r   newdfs          r   	melt_stubzwide_to_long.<locals>.melt_stubs  s    !{{3'
 8<<''		$*(=r'Na	!%(+E!H
 wwAw''	 :}5 		s   B BBz,stubname can't be identical to a column namez3the id variables need to uniquely identify each rowr%   )axis)on)r   r~   r   r~   r   r~   )r   r~   r   r~   )r   r   r   isinr1   r   
duplicatedextendre   r   rg   r.   r   joinmergereset_index)r   	stubnamesrV   r   r   r   r   r   _meltedvalue_vars_flattenedr   	value_varmeltedr   news                  r   wide_to_longr   p  s]   ~7(& 	"K	O		zzy!%%'GHH?CG	!uNOOG B!"dC8	##I.yT1aC@AB
 G!$Fjj##$89G
W+C
1v{}}Q$$V,,yy++-!y4>>wwAwGGr   )r   r~   returnr   )NNNvalueNT)rJ   r   rL   r   r"   ri   r   r   )T)rk   r   rl   dictrm   ri   r   r   )r   z\d+)r   r   r   r~   r   r~   r   r   )$
__future__r   r|   typingr   numpyr'   pandas.util._decoratorsr   pandas.core.dtypes.commonr   r   pandas.core.dtypes.concatr   pandas.core.dtypes.missingr	   pandas.core.algorithmscore
algorithmsr5   pandas.core.indexes.apir
   pandas.core.reshape.concatr   pandas.core.tools.numericr   collections.abcr   pandas._typingr   r   r   r   r`   rw   r    r   r   <module>r      s   " 	    . 4 , & & . - 0(+  H "ii
 i i i iX HSB SBl HAGsHsH),sH;>sHsH sHr   