import sys, io, json
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

import openpyxl
from collections import defaultdict
from datetime import datetime, timedelta, date

# ─────────────────────────────────────────────────────
# WORKING HOURS CALCULATOR
# ─────────────────────────────────────────────────────
WORK_START_M = 8 * 60          # 480
WORK_END_M   = 17 * 60 + 30    # 1050
BREAKS_M = [(600, 615), (720, 810), (840, 855)]  # 10:00-10:15, 12:00-13:30, 14:00-14:15

def working_minutes_in_day(s_min, e_min):
    s = max(s_min, WORK_START_M)
    e = min(e_min, WORK_END_M)
    if s >= e: return 0
    total = e - s
    for bs, be in BREAKS_M:
        total -= max(0, min(e, be) - max(s, bs))
    return max(0, total)

def working_hours(dt1, dt2):
    if not dt1 or not dt2 or not isinstance(dt1, datetime) or not isinstance(dt2, datetime):
        return None
    if dt2 <= dt1: return 0
    total = 0
    d = dt1.date()
    end_d = dt2.date()
    while d <= end_d:
        if d.weekday() < 5:  # skip weekends (5=Sat, 6=Sun)
            ds = dt1.hour * 60 + dt1.minute if d == dt1.date() else WORK_START_M
            de = dt2.hour * 60 + dt2.minute if d == end_d else WORK_END_M
            total += working_minutes_in_day(ds, de)
        d += timedelta(days=1)
    return round(total / 60, 2)

def safe_dt(v):
    return v if isinstance(v, datetime) else None

def fmt_date(v):
    if isinstance(v, datetime): return v.strftime('%Y-%m-%d')
    if isinstance(v, date): return v.strftime('%Y-%m-%d')
    return None

def avg(lst):
    lst = [x for x in lst if x is not None and x >= 0]
    return round(sum(lst) / len(lst), 2) if lst else 0

# ─────────────────────────────────────────────────────
# LOAD WORKBOOK
# ─────────────────────────────────────────────────────
wb = openpyxl.load_workbook('Book2.xlsx', data_only=True)

PART_COL = 5   # F
CTRL_COL = 4   # E

STAGES_CFG = {
    'Receive':      {'sheet': 'Receive',      'date_col': 7,  'user_col': 8},
    'QC In':        {'sheet': 'QC in',        'date_col': 9,  'user_col': 10},
    'Packaging In': {'sheet': 'Packaging in', 'date_col': 11, 'user_col': 12},
    'Stock In':     {'sheet': 'Stock in',     'date_col': 13, 'user_col': 14},
}

# ─────────────────────────────────────────────────────
# EXTRACT STAGE WORKLOAD
# ─────────────────────────────────────────────────────
stage_workload = {}  # stage -> date -> {parts, controls, users: {uid: count}}

for stage_name, cfg in STAGES_CFG.items():
    ws = wb[cfg['sheet']]
    dc, uc = cfg['date_col'], cfg['user_col']
    track_ctrl = stage_name in ('Packaging In', 'Stock In')
    by_date = defaultdict(lambda: {
        'parts': set(), 'controls': set(),
        'users': defaultdict(set),
        'user_controls': defaultdict(set),   # only for Packaging/Stock
    })

    for i, row in enumerate(ws.iter_rows(values_only=True)):
        if i == 0: continue
        if len(row) <= max(dc, uc): continue
        dt    = row[dc]
        user  = row[uc]
        part  = row[PART_COL]
        ctrl  = row[CTRL_COL]
        d = fmt_date(dt)
        if not d or not user or not part: continue
        if isinstance(user, str): continue   # skip formula leftovers
        by_date[d]['parts'].add(part)
        by_date[d]['controls'].add(ctrl)
        by_date[d]['users'][int(user)].add(part)
        if track_ctrl:
            by_date[d]['user_controls'][int(user)].add(ctrl)

    stage_workload[stage_name] = {
        d: {
            'parts':         len(v['parts']),
            'controls':      len(v['controls']),
            'users':         {str(u): len(s) for u, s in v['users'].items()},
            'user_controls': {str(u): len(s) for u, s in v['user_controls'].items()} if track_ctrl else {},
        }
        for d, v in by_date.items()
        if d >= '2026-02-01'
    }

# ─────────────────────────────────────────────────────
# ALL DATES (sorted, recent only)
# ─────────────────────────────────────────────────────
all_dates = sorted(set(
    d for s in stage_workload.values() for d in s
))

# ─────────────────────────────────────────────────────
# ALL USERS per stage
# ─────────────────────────────────────────────────────
stage_users = {
    stage: sorted(set(
        int(u) for dv in dates.values() for u in dv['users']
    ))
    for stage, dates in stage_workload.items()
}

# ─────────────────────────────────────────────────────
# WAIT TIMES  (from Tổng sheet)
# ─────────────────────────────────────────────────────
ws_tong = wb['Tổng']
item_timelines = []

for i, row in enumerate(ws_tong.iter_rows(values_only=True)):
    if i == 0: continue
    if len(row) < 15: continue

    packing_id = row[3]
    ctrl       = row[4]
    part       = row[5]
    receive_dt = safe_dt(row[7])
    qc_dt      = safe_dt(row[9])
    pack_dt    = safe_dt(row[11])
    stock_dt   = safe_dt(row[13])

    if not receive_dt: continue
    rd = fmt_date(receive_dt)
    if not rd or rd < '2026-02-01': continue

    wh_rv_qc    = working_hours(receive_dt, qc_dt)
    wh_qc_pk    = working_hours(qc_dt, pack_dt)
    wh_pk_st    = working_hours(pack_dt, stock_dt)
    wh_total    = working_hours(receive_dt, stock_dt)

    item_timelines.append({
        'packing_id':   packing_id,
        'ctrl':         ctrl,
        'part':         part,
        'receive_date': rd,
        'qc_date':      fmt_date(qc_dt),
        'pack_date':    fmt_date(pack_dt),
        'stk_date':     fmt_date(stock_dt),
        'receive':      receive_dt.isoformat(),
        'qc':           qc_dt.isoformat() if qc_dt else None,
        'pack':         pack_dt.isoformat() if pack_dt else None,
        'stock':        stock_dt.isoformat() if stock_dt else None,
        'wh_rv_qc':     wh_rv_qc,
        'wh_qc_pk':     wh_qc_pk,
        'wh_pk_st':     wh_pk_st,
        'wh_total':     wh_total,
    })

# Average wait per transition
avg_wait = {
    'Recv → QC':    avg([x['wh_rv_qc']  for x in item_timelines]),
    'QC → Pack':    avg([x['wh_qc_pk']  for x in item_timelines]),
    'Pack → Stock': avg([x['wh_pk_st']  for x in item_timelines]),
    'Total':        avg([x['wh_total']  for x in item_timelines]),
}

# Wait time distribution buckets (0-2h, 2-8h, 8-24h, >24h)
def bucket(h):
    if h is None: return None
    if h <= 2:  return '0-2h'
    if h <= 8:  return '2-8h'
    if h <= 24: return '8-24h'
    return '>24h'

# Wait by date (avg total hours per receive date)
wait_by_date = defaultdict(list)
for item in item_timelines:
    if item['wh_total'] is not None:
        wait_by_date[item['receive_date']].append(item['wh_total'])
avg_wait_by_date = {d: avg(vs) for d, vs in wait_by_date.items() if d in all_dates}

# ─────────────────────────────────────────────────────
# THROUGHPUT RATIOS — cohort by receive date
# For each receive date: of the distinct parts received that day,
# what % has completed each subsequent stage (ever, not just same day)
# ─────────────────────────────────────────────────────

# Build lookup: part -> {stage: has_completed}
# Use Tổng sheet data (item_timelines already parsed)
part_stages = defaultdict(lambda: {'qc': False, 'pack': False, 'stock': False})
for item in item_timelines:
    p = item['part']
    if item['qc']:    part_stages[p]['qc']    = True
    if item['pack']:  part_stages[p]['pack']  = True
    if item['stock']: part_stages[p]['stock'] = True

# Group distinct parts by receive date
parts_by_receive_date = defaultdict(set)
for item in item_timelines:
    if item['receive_date'] in all_dates:
        parts_by_receive_date[item['receive_date']].add(item['part'])

throughput = {}
for d in all_dates:
    cohort = parts_by_receive_date.get(d, set())
    total = len(cohort)
    qc_done   = sum(1 for p in cohort if part_stages[p]['qc'])
    pack_done = sum(1 for p in cohort if part_stages[p]['pack'])
    stk_done  = sum(1 for p in cohort if part_stages[p]['stock'])
    # daily workload (for table)
    r = stage_workload['Receive'].get(d, {}).get('parts', 0)
    q = stage_workload['QC In'].get(d, {}).get('parts', 0)
    p = stage_workload['Packaging In'].get(d, {}).get('parts', 0)
    s = stage_workload['Stock In'].get(d, {}).get('parts', 0)
    throughput[d] = {
        'receive': r, 'qc': q, 'packaging': p, 'stock': s,
        'cohort_total': total,
        'cohort_qc':    qc_done,
        'cohort_pack':  pack_done,
        'cohort_stock': stk_done,
        # % of cohort that completed each stage
        'r_q':   round(qc_done   / total * 100, 1) if total else None,
        'q_p':   round(pack_done / total * 100, 1) if total else None,
        'p_s':   round(stk_done  / total * 100, 1) if total else None,
        'total': round(stk_done  / total * 100, 1) if total else None,
    }

# ─────────────────────────────────────────────────────
# RECEIVE TIME DISTRIBUTION (by Receiving ID)
# ─────────────────────────────────────────────────────
ws_recv = wb['Receive']
receiving_times = {}   # recv_id -> datetime (first scan)
for i, row in enumerate(ws_recv.iter_rows(values_only=True)):
    if i == 0: continue
    recv_id = row[2]   # col C = Receiving ID
    recv_dt = row[7]   # col H = Receive Date
    if not recv_id or not isinstance(recv_dt, datetime): continue
    if recv_id not in receiving_times:
        receiving_times[recv_id] = recv_dt

from collections import Counter

def hour_slots(counter):
    return {str(h): counter.get(h, 0) for h in range(8, 20)}

def zone_summary(counter):
    return {
        'morning':   sum(counter.get(h, 0) for h in range(8, 11)),
        'midday':    sum(counter.get(h, 0) for h in range(11, 14)),
        'afternoon': sum(counter.get(h, 0) for h in range(14, 17)),
        'evening':   sum(counter.get(h, 0) for h in range(17, 20)),
    }

# Group by day / month / year
recv_by_day   = defaultdict(Counter)   # "2026-02-23" -> {hour: count}
recv_by_month = defaultdict(Counter)   # "2026-02"    -> {hour: count}
recv_by_year  = defaultdict(Counter)   # "2026"       -> {hour: count}

for recv_id, dt in receiving_times.items():
    h = dt.hour
    recv_by_day[dt.strftime('%Y-%m-%d')][h]   += 1
    recv_by_month[dt.strftime('%Y-%m')][h]     += 1
    recv_by_year[dt.strftime('%Y')][h]         += 1

receive_time_dist = {
    'by_day':   {k: {'hours': hour_slots(v), 'zones': zone_summary(v), 'total': sum(v.values())}
                 for k, v in recv_by_day.items()},
    'by_month': {k: {'hours': hour_slots(v), 'zones': zone_summary(v), 'total': sum(v.values())}
                 for k, v in recv_by_month.items()},
    'by_year':  {k: {'hours': hour_slots(v), 'zones': zone_summary(v), 'total': sum(v.values())}
                 for k, v in recv_by_year.items()},
}

# Overall (all data) — kept for backward compat
hour_dist = Counter(dt.hour for dt in receiving_times.values())
receive_hour_dist = hour_slots(hour_dist)
zone_totals = zone_summary(hour_dist)

# ─────────────────────────────────────────────────────
# PIPELINE BACKLOG — tính theo MÃ (Part Number)
# pending = mã đã vào stage trước nhưng chưa xử lý stage này
# ─────────────────────────────────────────────────────
part_completion = {}
for x in item_timelines:
    p = x['part']
    if p not in part_completion:
        part_completion[p] = {'qc': False, 'pack': False, 'stock': False}
    if x['qc']:    part_completion[p]['qc']    = True
    if x['pack']:  part_completion[p]['pack']  = True
    if x['stock']: part_completion[p]['stock'] = True

total_parts    = len(part_completion)
qc_done_p      = sum(1 for v in part_completion.values() if v['qc'])
pack_done_p    = sum(1 for v in part_completion.values() if v['pack'])
stock_done_p   = sum(1 for v in part_completion.values() if v['stock'])

pipeline_backlog = {
    'received':      total_parts,
    'qc_done':       qc_done_p,
    'pack_done':     pack_done_p,
    'stock_done':    stock_done_p,
    'pending_qc':    total_parts  - qc_done_p,
    'pending_pack':  qc_done_p   - pack_done_p,
    'pending_stock': pack_done_p - stock_done_p,
    'pct_qc':        round(qc_done_p   / total_parts * 100, 1) if total_parts else 0,
    'pct_pack':      round(pack_done_p / total_parts * 100, 1) if total_parts else 0,
    'pct_stock':     round(stock_done_p/ total_parts * 100, 1) if total_parts else 0,
}

# ─────────────────────────────────────────────────────
# COMPLETION SNAPSHOTS — lũy kế đến từng ngày
# ─────────────────────────────────────────────────────
from collections import defaultdict as _dd

# Build packing_id -> [items] map (all items)
_packing_items = _dd(list)
for x in item_timelines:
    if x['packing_id']:
        _packing_items[x['packing_id']].append(x)

def snapshot_at(cutoff_date):
    """Cumulative stats: everything received up to cutoff_date."""
    items = [x for x in item_timelines if x['receive_date'] <= cutoff_date]
    if not items:
        return None

    packing_ids = set(x['packing_id'] for x in items if x['packing_id'])
    # Packing done = all items in that packing have been stocked by cutoff
    packing_done = sum(
        1 for pid in packing_ids
        if all((x['stk_date'] or '') <= cutoff_date for x in _packing_items[pid])
           and all(x['stk_date'] for x in _packing_items[pid])
    )

    parts_recv  = set(x['part'] for x in items)
    parts_qc    = set(x['part'] for x in items if x['qc_date']   and x['qc_date']   <= cutoff_date)
    parts_pack  = set(x['part'] for x in items if x['pack_date'] and x['pack_date'] <= cutoff_date)
    parts_stk   = set(x['part'] for x in items if x['stk_date']  and x['stk_date']  <= cutoff_date)

    nr, nq, np_, ns = len(parts_recv), len(parts_qc), len(parts_pack), len(parts_stk)
    return {
        'packing_total': len(packing_ids),
        'packing_done':  packing_done,
        'packing_pct':   round(packing_done / len(packing_ids) * 100, 1) if packing_ids else 0,
        'recv_total':    nr,
        'qc_total':      nr,   'qc_done':   nq, 'qc_pct':   round(nq/nr*100,1) if nr else 0,
        'pack_total':    nq,   'pack_done': np_,'pack_pct': round(np_/nq*100,1) if nq else 0,
        'stock_total':   np_,  'stock_done':ns, 'stock_pct':round(ns/np_*100,1) if np_ else 0,
    }

# Pre-compute for every date in all_dates + by month + by year
completion_by_day   = {d: snapshot_at(d) for d in all_dates}

_months = sorted(set(d[:7] for d in all_dates))
completion_by_month = {}
for m in _months:
    last_day = max(d for d in all_dates if d[:7] == m)
    completion_by_month[m] = snapshot_at(last_day)

_years = sorted(set(d[:4] for d in all_dates))
completion_by_year = {}
for y in _years:
    last_day = max(d for d in all_dates if d[:4] == y)
    completion_by_year[y] = snapshot_at(last_day)

completion_snapshots = {
    'by_day':   completion_by_day,
    'by_month': completion_by_month,
    'by_year':  completion_by_year,
}

# ─────────────────────────────────────────────────────
# AVERAGE PRODUCTIVITY per stage
# ─────────────────────────────────────────────────────
avg_productivity = {}
for stage, dates in stage_workload.items():
    vals = [v['parts'] for v in dates.values()]
    avg_productivity[stage] = avg(vals)

# ─────────────────────────────────────────────────────
# PACK DATA FOR JS
# ─────────────────────────────────────────────────────
payload = {
    'dates':                all_dates,
    'stage_workload':       stage_workload,
    'stage_users':          {k: [str(u) for u in v] for k, v in stage_users.items()},
    'avg_wait':             avg_wait,
    'avg_wait_by_date':     avg_wait_by_date,
    'throughput':           throughput,
    'avg_productivity':     avg_productivity,
    'item_timelines':       item_timelines,
    'pipeline_backlog':     pipeline_backlog,
    'completion_snapshots': completion_snapshots,
    'receive_hour_dist':    receive_hour_dist,
    'receive_time_dist':    receive_time_dist,
    'zone_totals':          zone_totals,
}

with open('dashboard_data.json', 'w', encoding='utf-8') as f:
    json.dump(payload, f, ensure_ascii=False, default=str)

print("dashboard_data.json generated OK")
print(f"  dates: {all_dates}")
print(f"  avg_wait: {avg_wait}")
print(f"  avg_productivity: {avg_productivity}")
print(f"  items in timeline: {len(item_timelines)}")
