diff --git a/pipelines.py b/pipelines.py index 9006825..0522d7e 100644 --- a/pipelines.py +++ b/pipelines.py @@ -1222,7 +1222,7 @@ def process_reg_history(): output1.write(f" [{dt}] {msg}\n") def recreate_reg_data(): - import csv + from collections import defaultdict from datetime import datetime @@ -1230,46 +1230,58 @@ def recreate_reg_data(): dt = datetime.strptime(row['datetime'], "%Y-%m-%dT%H-%M") crn = row['crn'] enrolled = int(row['enrolled']) - return dt, crn, enrolled + return dt, row['datetime'], crn, enrolled def reduce_latest_per_day(rows): - latest = defaultdict(dict) # latest[crn][date_str] = (dt, enrolled) + latest = defaultdict(dict) # latest[crn][date] = (dt, ts, enrolled) + latest_ts_by_date = {} # date → (dt, ts) for header naming for row in rows: - dt, crn, enrolled = parse_row(row) - day = dt.date().isoformat() - if day not in latest[crn] or dt > latest[crn][day][0]: - latest[crn][day] = (dt, enrolled) - return latest + dt, full_ts, crn, enrolled = parse_row(row) + date_str = dt.date().isoformat() + ts_header = dt.strftime("%Y-%m-%dT%H") # <-- this is what we want - def pivot_to_table(latest_data): - all_dates = sorted({day for crn in latest_data for day in latest_data[crn]}) - crns = sorted(latest_data) + # for each crn, per day, keep latest reading + if date_str not in latest[crn] or dt > latest[crn][date_str][0]: + latest[crn][date_str] = (dt, ts_header, enrolled) + + # also record latest timestamp per day for consistent column headers + if date_str not in latest_ts_by_date or dt > latest_ts_by_date[date_str][0]: + latest_ts_by_date[date_str] = (dt, ts_header) + + return latest, [ts for _, ts in sorted(latest_ts_by_date.values())] + + def pivot_table(latest, headers): + crns = sorted(latest) table = [] for crn in crns: row = [crn] - for day in all_dates: - val = latest_data[crn].get(day, (None, None))[1] - row.append(str(val) if val is not None else "") + for ts in headers: + date_str = ts[:10] # match on YYYY-MM-DD + val = latest[crn].get(date_str) + if val and val[1] == ts: + row.append(str(val[2])) + else: + row.append("") table.append(row) - return ["crn"] + all_dates, table + return ['crn'] + headers, table with open("cache/reg_history_fa25.csv", newline='') as f: fieldnames = ['datetime', 'crn', 'course', 'teacher', 'max', 'enrolled', 'waitlistmax', 'waitlisted'] reader = csv.DictReader(f, fieldnames=fieldnames) rows = list(reader) - latest = reduce_latest_per_day(rows) - header, table = pivot_to_table(latest) + latest, headers = reduce_latest_per_day(rows) + header_row, table = pivot_table(latest, headers) with open("cache/reg_data_fa25.csv", "w", newline='') as f: writer = csv.writer(f) - writer.writerow(header) + writer.writerow(header_row) writer.writerows(table) - + if __name__ == "__main__": print ('')