diff --git a/pipelines.py b/pipelines.py index eacae19..6d633c6 100644 --- a/pipelines.py +++ b/pipelines.py @@ -1189,6 +1189,120 @@ def recreate_reg_data(term="fa25"): writer.writerow(header_row) writer.writerows(table) +def cvc_report(input_csv="cache/cvc_fa25.csv", out_dir="cache"): + import os, re + import pandas as pd + + os.makedirs(out_dir, exist_ok=True) + + # --- Load & normalize --- + df = pd.read_csv(input_csv, dtype=str).fillna("") + # Trim header/values + df.columns = [c.strip() for c in df.columns] + for c in ["Status","Home College name","Course","CCCID","Created On"]: + if c in df.columns: + df[c] = df[c].astype(str).str.strip() + + # Parse dates for sorting (keep originals) + def try_parse(s): + # tolerate 'YYYY-MM-DD hh:mm:ss PDT' and blanks + try: + return pd.to_datetime(s.replace(" PDT","").replace(" PST",""), errors="coerce") + except Exception: + return pd.NaT + df["_created"] = df["Created On"].apply(try_parse) + + # Sort by "Created On" then "CCCID" + df = df.sort_values(by=["_created","CCCID"], kind="mergesort") + + # Group by CCCID (for student-level view) + # Define gc_student per row, then roll up per student (1 if any row shows Gavilan as Home) + df["gc_student"] = (df["Home College name"] == "Gavilan College").astype(int) + + # Normalize status and success flag + status_norm = df["Status"].str.lower().str.strip() + df["_status"] = status_norm + df["success"] = (df["_status"] == "validated").astype(int) + + # Department code = leading letters before first number in "Course" + def dept_from_course(s): + # e.g., "CD32 - Intro..." -> "CD" + m = re.match(r"\s*([A-Za-z]+)", s or "") + return m.group(1).upper() if m else "UNKNOWN" + df["dept"] = df["Course"].apply(dept_from_course) + + # --- Student-level counts (unique CCCID) --- + # For each student, take gc_student = max over their rows + if "CCCID" not in df.columns or df["CCCID"].eq("").all(): + # Fallback: if CCCID missing, treat each row as a unique student + stu = df.assign(_stu_id=df.index.astype(str)).groupby("_stu_id", as_index=False)["gc_student"].max() + else: + stu = df[df["CCCID"]!=""].groupby("CCCID", as_index=False)["gc_student"].max() + + students_by_gc = stu.groupby("gc_student", as_index=False).size().rename(columns={"size":"count_students"}) + students_by_gc.to_csv(os.path.join(out_dir, "students_by_gc.csv"), index=False) + + # --- Outcome tallies by gc_student (row-level: sections/attempts) --- + outcomes_by_gc = df.groupby(["gc_student","success"], as_index=False).size().rename(columns={"size":"count_rows"}) + outcomes_by_gc.to_csv(os.path.join(out_dir, "outcomes_by_gc.csv"), index=False) + + # --- Department tallies, split by success and gc_student (row-level) --- + dept_by_gc_success = ( + df.groupby(["gc_student","success","dept"], as_index=False) + .size().rename(columns={"size":"count_rows"}) + .sort_values(["gc_student","success","dept"]) + ) + dept_by_gc_success.to_csv(os.path.join(out_dir, "dept_by_gc_success.csv"), index=False) + + # --- Unsuccessful reasons (status) by gc_student --- + reasons_by_gc = ( + df[df["success"] == 0] + .assign(reason=df["_status"].replace("", "unknown")) + .groupby(["gc_student","reason"], as_index=False) + .size().rename(columns={"size":"count_rows"}) + .sort_values(["gc_student","reason"]) + ) + reasons_by_gc.to_csv(os.path.join(out_dir, "reasons_by_gc.csv"), index=False) + + # Optional: quick prints for sanity + print("Wrote:") + for name in ["students_by_gc.csv","outcomes_by_gc.csv","dept_by_gc_success.csv","reasons_by_gc.csv"]: + print(" -", os.path.join(out_dir, name)) + + # Write to one Excel file with multiple sheets + output_xlsx = "cache/csv_fa25.xlsx" + with pd.ExcelWriter(output_xlsx, engine="openpyxl") as writer: + students_by_gc.to_excel(writer, sheet_name="students_by_gc", index=False) + outcomes_by_gc.to_excel(writer, sheet_name="outcomes_by_gc", index=False) + dept_by_gc_success.to_excel(writer, sheet_name="dept_by_gc_success", index=False) + reasons_by_gc.to_excel(writer, sheet_name="reasons_by_gc", index=False) + + print("Wrote Excel workbook:", output_xlsx) + + + + + +def test_starfish(): + api3 = f'{url}/api/v1/courses/29/gradebook_history/feed' + print(f"\n\ntesting: {api3}\n\n") + r3 = fetch(api3) + print(r3) + + return + + api1 = f'{url}/api/v1/courses/29/enrollments' + print(f"\n\ntesting: {api1}\n\n") + r1 = fetch(api1) + print(r1) + + api2 = f'{url}/api/v1/courses/29/assignments' + + print(f"\n\ntesting: {api2}\n\n") + r2 = fetch(api2) + print(r2) + + if __name__ == "__main__": @@ -1200,6 +1314,8 @@ if __name__ == "__main__": 5: ['Create narrative format all semesters', recreate_all], 6: ['Recreate reg_data from full reg history', recreate_reg_data], 7: ['Compute enrollment changes', compute_enrollment_changes], + 8: ['cvc report parse', cvc_report], + 9: ['test starfish account', test_starfish], } '''1: ['Re-create schedule csv and json files from raw html',recent_schedules] ,