recreate reg_data. what broke it?

This commit is contained in:
Peter Howell 2025-08-16 14:33:15 -07:00
parent a0a2845198
commit f15d103fb8
1 changed files with 101 additions and 20 deletions

View File

@ -1145,6 +1145,17 @@ def process_reg_history():
grouped[ts] = {r['crn']: r for r in group} grouped[ts] = {r['crn']: r for r in group}
return grouped return grouped
def crossed_threshold(old_val, new_val, max_val):
thresholds = [0.25, 0.5, 0.75, 1.0]
if int(max_val) == 0:
return False, None
old_ratio = int(old_val) / int(max_val)
new_ratio = int(new_val) / int(max_val)
for t in thresholds:
if old_ratio < t <= new_ratio:
return True, int(t * 100)
return False, None
def detect_changes(prev, curr): def detect_changes(prev, curr):
changes = defaultdict(list) changes = defaultdict(list)
@ -1152,42 +1163,111 @@ def process_reg_history():
for crn in all_crns: for crn in all_crns:
o, n = prev.get(crn), curr.get(crn) o, n = prev.get(crn), curr.get(crn)
if not o: if not o:
changes[crn].append("Section was added.") changes[crn].append((n['datetime'], "Section was added."))
elif not n: elif not n:
changes[crn].append("Section was removed.") changes[crn].append((
o['datetime'],
f"Section was removed (last seen: teacher {o['teacher']}, "
f"{o['enrolled']}/{o['max']} enrolled, {o['waitlisted']}/{o['waitlistmax']} waitlisted)."
))
else: else:
dt = n['datetime']
if o['teacher'] != n['teacher']: if o['teacher'] != n['teacher']:
changes[crn].append(f"Changed teacher to {n['teacher']}.") changes[crn].append((dt, f"Teacher changed from {o['teacher']} to {n['teacher']}."))
if o['enrolled'] != n['enrolled']: if o['enrolled'] != n['enrolled']:
if int(n['enrolled']) >= int(n.get('max', 9999)): crossed, percent = crossed_threshold(o['enrolled'], n['enrolled'], n['max'])
changes[crn].append("Filled up.") if crossed:
else: changes[crn].append((dt, f"Enrollment crossed {percent}% ({n['enrolled']}/{n['max']})."))
changes[crn].append(f"Enrollment changed to {n['enrolled']}.") if int(n['waitlisted']) > 10 and o['waitlisted'] != n['waitlisted']:
if int(n.get('waitlisted', 0)) > 10 and o['waitlisted'] != n['waitlisted']: changes[crn].append((dt, f"Waitlist exceeds 10: {n['waitlisted']}."))
changes[crn].append(f"Waitlist exceeds 10: {n['waitlisted']}.")
return changes return changes
def process_diff_timeline(path): def process_diff_timeline(path):
snapshots = read_grouped_csv(path) snapshots = read_grouped_csv(path)
timeline = sorted(snapshots.keys()) timeline = sorted(snapshots.keys())
reports = [] timeline_diffs = []
course_names = {} # crn -> latest known course name
for i in range(1, len(timeline)): for i in range(1, len(timeline)):
prev_ts, curr_ts = timeline[i-1], timeline[i] prev_ts, curr_ts = timeline[i-1], timeline[i]
prev, curr = snapshots[prev_ts], snapshots[curr_ts] prev, curr = snapshots[prev_ts], snapshots[curr_ts]
# update course name map
for crn, row in curr.items():
course_names[crn] = row['course']
delta = detect_changes(prev, curr) delta = detect_changes(prev, curr)
if delta: timeline_diffs.append(delta)
reports.append((curr_ts, delta))
return reports
result = process_diff_timeline("cache/reg_history_fa25.csv") # Flatten and group by crn
for timestamp, changes in result: crn_changes = defaultdict(list)
print(f"\n[{timestamp}]") for delta in timeline_diffs:
for crn, msgs in sorted(changes.items()): for crn, changes in delta.items():
print(f" CRN {crn}:") crn_changes[crn].extend(changes)
for msg in msgs:
print(f" - {msg}")
# Sort changes for each CRN by datetime
for crn in crn_changes:
crn_changes[crn].sort(key=lambda x: x[0])
return crn_changes, course_names
output1 = codecs.open('cache/reg_timeline_fa25.txt','w','utf-8')
changes, course_names = process_diff_timeline("cache/reg_history_fa25.csv")
for crn in sorted(changes, key=lambda c: course_names.get(c, "")):
course = course_names.get(crn, "")
print(f"\n{course} (CRN {crn}):")
output1.write(f"\n{course} (CRN {crn}):\n")
for dt, msg in changes[crn]:
print(f" [{dt}] {msg}")
output1.write(f" [{dt}] {msg}\n")
def recreate_reg_data():
import csv
from collections import defaultdict
from datetime import datetime
def parse_row(row):
dt = datetime.strptime(row['datetime'], "%Y-%m-%dT%H-%M")
crn = row['crn']
enrolled = int(row['enrolled'])
return dt, crn, enrolled
def reduce_latest_per_day(rows):
latest = defaultdict(dict) # latest[crn][date_str] = (dt, enrolled)
for row in rows:
dt, crn, enrolled = parse_row(row)
day = dt.date().isoformat()
if day not in latest[crn] or dt > latest[crn][day][0]:
latest[crn][day] = (dt, enrolled)
return latest
def pivot_to_table(latest_data):
all_dates = sorted({day for crn in latest_data for day in latest_data[crn]})
crns = sorted(latest_data)
table = []
for crn in crns:
row = [crn]
for day in all_dates:
val = latest_data[crn].get(day, (None, None))[1]
row.append(str(val) if val is not None else "")
table.append(row)
return ["crn"] + all_dates, table
with open("cache/reg_history_fa25.csv", newline='') as f:
fieldnames = ['datetime', 'crn', 'course', 'teacher', 'max', 'enrolled', 'waitlistmax', 'waitlisted']
reader = csv.DictReader(f, fieldnames=fieldnames)
rows = list(reader)
latest = reduce_latest_per_day(rows)
header, table = pivot_to_table(latest)
with open("cache/reg_data_fa25.csv", "w", newline='') as f:
writer = csv.writer(f)
writer.writerow(header)
writer.writerows(table)
if __name__ == "__main__": if __name__ == "__main__":
@ -1197,6 +1277,7 @@ if __name__ == "__main__":
2: ['Get canvas data 2024 style', canvas_data_2024_run ], 2: ['Get canvas data 2024 style', canvas_data_2024_run ],
3: ['Set up canvas data 2024 style', setup_canvas_data_2024_run], 3: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
4: ['Narrative timeline of section updates', process_reg_history], 4: ['Narrative timeline of section updates', process_reg_history],
5: ['Recreate reg_data from full reg history', recreate_reg_data],
} }
'''1: ['Re-create schedule csv and json files from raw html',recent_schedules] , '''1: ['Re-create schedule csv and json files from raw html',recent_schedules] ,