recreate reg_data. what broke it?
This commit is contained in:
parent
a0a2845198
commit
f15d103fb8
121
pipelines.py
121
pipelines.py
|
|
@ -1145,6 +1145,17 @@ def process_reg_history():
|
|||
grouped[ts] = {r['crn']: r for r in group}
|
||||
return grouped
|
||||
|
||||
def crossed_threshold(old_val, new_val, max_val):
|
||||
thresholds = [0.25, 0.5, 0.75, 1.0]
|
||||
if int(max_val) == 0:
|
||||
return False, None
|
||||
old_ratio = int(old_val) / int(max_val)
|
||||
new_ratio = int(new_val) / int(max_val)
|
||||
for t in thresholds:
|
||||
if old_ratio < t <= new_ratio:
|
||||
return True, int(t * 100)
|
||||
return False, None
|
||||
|
||||
def detect_changes(prev, curr):
|
||||
changes = defaultdict(list)
|
||||
|
||||
|
|
@ -1152,42 +1163,111 @@ def process_reg_history():
|
|||
for crn in all_crns:
|
||||
o, n = prev.get(crn), curr.get(crn)
|
||||
if not o:
|
||||
changes[crn].append("Section was added.")
|
||||
changes[crn].append((n['datetime'], "Section was added."))
|
||||
elif not n:
|
||||
changes[crn].append("Section was removed.")
|
||||
changes[crn].append((
|
||||
o['datetime'],
|
||||
f"Section was removed (last seen: teacher {o['teacher']}, "
|
||||
f"{o['enrolled']}/{o['max']} enrolled, {o['waitlisted']}/{o['waitlistmax']} waitlisted)."
|
||||
))
|
||||
else:
|
||||
dt = n['datetime']
|
||||
if o['teacher'] != n['teacher']:
|
||||
changes[crn].append(f"Changed teacher to {n['teacher']}.")
|
||||
changes[crn].append((dt, f"Teacher changed from {o['teacher']} to {n['teacher']}."))
|
||||
if o['enrolled'] != n['enrolled']:
|
||||
if int(n['enrolled']) >= int(n.get('max', 9999)):
|
||||
changes[crn].append("Filled up.")
|
||||
else:
|
||||
changes[crn].append(f"Enrollment changed to {n['enrolled']}.")
|
||||
if int(n.get('waitlisted', 0)) > 10 and o['waitlisted'] != n['waitlisted']:
|
||||
changes[crn].append(f"Waitlist exceeds 10: {n['waitlisted']}.")
|
||||
crossed, percent = crossed_threshold(o['enrolled'], n['enrolled'], n['max'])
|
||||
if crossed:
|
||||
changes[crn].append((dt, f"Enrollment crossed {percent}% ({n['enrolled']}/{n['max']})."))
|
||||
if int(n['waitlisted']) > 10 and o['waitlisted'] != n['waitlisted']:
|
||||
changes[crn].append((dt, f"Waitlist exceeds 10: {n['waitlisted']}."))
|
||||
return changes
|
||||
|
||||
def process_diff_timeline(path):
|
||||
snapshots = read_grouped_csv(path)
|
||||
timeline = sorted(snapshots.keys())
|
||||
reports = []
|
||||
timeline_diffs = []
|
||||
course_names = {} # crn -> latest known course name
|
||||
|
||||
for i in range(1, len(timeline)):
|
||||
prev_ts, curr_ts = timeline[i-1], timeline[i]
|
||||
prev, curr = snapshots[prev_ts], snapshots[curr_ts]
|
||||
|
||||
# update course name map
|
||||
for crn, row in curr.items():
|
||||
course_names[crn] = row['course']
|
||||
|
||||
delta = detect_changes(prev, curr)
|
||||
if delta:
|
||||
reports.append((curr_ts, delta))
|
||||
return reports
|
||||
timeline_diffs.append(delta)
|
||||
|
||||
result = process_diff_timeline("cache/reg_history_fa25.csv")
|
||||
for timestamp, changes in result:
|
||||
print(f"\n[{timestamp}]")
|
||||
for crn, msgs in sorted(changes.items()):
|
||||
print(f" CRN {crn}:")
|
||||
for msg in msgs:
|
||||
print(f" - {msg}")
|
||||
# Flatten and group by crn
|
||||
crn_changes = defaultdict(list)
|
||||
for delta in timeline_diffs:
|
||||
for crn, changes in delta.items():
|
||||
crn_changes[crn].extend(changes)
|
||||
|
||||
# Sort changes for each CRN by datetime
|
||||
for crn in crn_changes:
|
||||
crn_changes[crn].sort(key=lambda x: x[0])
|
||||
|
||||
return crn_changes, course_names
|
||||
|
||||
output1 = codecs.open('cache/reg_timeline_fa25.txt','w','utf-8')
|
||||
changes, course_names = process_diff_timeline("cache/reg_history_fa25.csv")
|
||||
for crn in sorted(changes, key=lambda c: course_names.get(c, "")):
|
||||
course = course_names.get(crn, "")
|
||||
print(f"\n{course} (CRN {crn}):")
|
||||
output1.write(f"\n{course} (CRN {crn}):\n")
|
||||
for dt, msg in changes[crn]:
|
||||
print(f" [{dt}] {msg}")
|
||||
output1.write(f" [{dt}] {msg}\n")
|
||||
|
||||
def recreate_reg_data():
|
||||
import csv
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
|
||||
def parse_row(row):
|
||||
dt = datetime.strptime(row['datetime'], "%Y-%m-%dT%H-%M")
|
||||
crn = row['crn']
|
||||
enrolled = int(row['enrolled'])
|
||||
return dt, crn, enrolled
|
||||
|
||||
def reduce_latest_per_day(rows):
|
||||
latest = defaultdict(dict) # latest[crn][date_str] = (dt, enrolled)
|
||||
|
||||
for row in rows:
|
||||
dt, crn, enrolled = parse_row(row)
|
||||
day = dt.date().isoformat()
|
||||
if day not in latest[crn] or dt > latest[crn][day][0]:
|
||||
latest[crn][day] = (dt, enrolled)
|
||||
return latest
|
||||
|
||||
def pivot_to_table(latest_data):
|
||||
all_dates = sorted({day for crn in latest_data for day in latest_data[crn]})
|
||||
crns = sorted(latest_data)
|
||||
table = []
|
||||
|
||||
for crn in crns:
|
||||
row = [crn]
|
||||
for day in all_dates:
|
||||
val = latest_data[crn].get(day, (None, None))[1]
|
||||
row.append(str(val) if val is not None else "")
|
||||
table.append(row)
|
||||
|
||||
return ["crn"] + all_dates, table
|
||||
|
||||
with open("cache/reg_history_fa25.csv", newline='') as f:
|
||||
fieldnames = ['datetime', 'crn', 'course', 'teacher', 'max', 'enrolled', 'waitlistmax', 'waitlisted']
|
||||
reader = csv.DictReader(f, fieldnames=fieldnames)
|
||||
rows = list(reader)
|
||||
|
||||
latest = reduce_latest_per_day(rows)
|
||||
header, table = pivot_to_table(latest)
|
||||
|
||||
with open("cache/reg_data_fa25.csv", "w", newline='') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(header)
|
||||
writer.writerows(table)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -1197,6 +1277,7 @@ if __name__ == "__main__":
|
|||
2: ['Get canvas data 2024 style', canvas_data_2024_run ],
|
||||
3: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
|
||||
4: ['Narrative timeline of section updates', process_reg_history],
|
||||
5: ['Recreate reg_data from full reg history', recreate_reg_data],
|
||||
}
|
||||
|
||||
'''1: ['Re-create schedule csv and json files from raw html',recent_schedules] ,
|
||||
|
|
|
|||
Loading…
Reference in New Issue