cvc report parsing
This commit is contained in:
parent
742bd77f15
commit
4851edccd4
116
pipelines.py
116
pipelines.py
|
|
@ -1189,6 +1189,120 @@ def recreate_reg_data(term="fa25"):
|
||||||
writer.writerow(header_row)
|
writer.writerow(header_row)
|
||||||
writer.writerows(table)
|
writer.writerows(table)
|
||||||
|
|
||||||
|
def cvc_report(input_csv="cache/cvc_fa25.csv", out_dir="cache"):
|
||||||
|
import os, re
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
os.makedirs(out_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# --- Load & normalize ---
|
||||||
|
df = pd.read_csv(input_csv, dtype=str).fillna("")
|
||||||
|
# Trim header/values
|
||||||
|
df.columns = [c.strip() for c in df.columns]
|
||||||
|
for c in ["Status","Home College name","Course","CCCID","Created On"]:
|
||||||
|
if c in df.columns:
|
||||||
|
df[c] = df[c].astype(str).str.strip()
|
||||||
|
|
||||||
|
# Parse dates for sorting (keep originals)
|
||||||
|
def try_parse(s):
|
||||||
|
# tolerate 'YYYY-MM-DD hh:mm:ss PDT' and blanks
|
||||||
|
try:
|
||||||
|
return pd.to_datetime(s.replace(" PDT","").replace(" PST",""), errors="coerce")
|
||||||
|
except Exception:
|
||||||
|
return pd.NaT
|
||||||
|
df["_created"] = df["Created On"].apply(try_parse)
|
||||||
|
|
||||||
|
# Sort by "Created On" then "CCCID"
|
||||||
|
df = df.sort_values(by=["_created","CCCID"], kind="mergesort")
|
||||||
|
|
||||||
|
# Group by CCCID (for student-level view)
|
||||||
|
# Define gc_student per row, then roll up per student (1 if any row shows Gavilan as Home)
|
||||||
|
df["gc_student"] = (df["Home College name"] == "Gavilan College").astype(int)
|
||||||
|
|
||||||
|
# Normalize status and success flag
|
||||||
|
status_norm = df["Status"].str.lower().str.strip()
|
||||||
|
df["_status"] = status_norm
|
||||||
|
df["success"] = (df["_status"] == "validated").astype(int)
|
||||||
|
|
||||||
|
# Department code = leading letters before first number in "Course"
|
||||||
|
def dept_from_course(s):
|
||||||
|
# e.g., "CD32 - Intro..." -> "CD"
|
||||||
|
m = re.match(r"\s*([A-Za-z]+)", s or "")
|
||||||
|
return m.group(1).upper() if m else "UNKNOWN"
|
||||||
|
df["dept"] = df["Course"].apply(dept_from_course)
|
||||||
|
|
||||||
|
# --- Student-level counts (unique CCCID) ---
|
||||||
|
# For each student, take gc_student = max over their rows
|
||||||
|
if "CCCID" not in df.columns or df["CCCID"].eq("").all():
|
||||||
|
# Fallback: if CCCID missing, treat each row as a unique student
|
||||||
|
stu = df.assign(_stu_id=df.index.astype(str)).groupby("_stu_id", as_index=False)["gc_student"].max()
|
||||||
|
else:
|
||||||
|
stu = df[df["CCCID"]!=""].groupby("CCCID", as_index=False)["gc_student"].max()
|
||||||
|
|
||||||
|
students_by_gc = stu.groupby("gc_student", as_index=False).size().rename(columns={"size":"count_students"})
|
||||||
|
students_by_gc.to_csv(os.path.join(out_dir, "students_by_gc.csv"), index=False)
|
||||||
|
|
||||||
|
# --- Outcome tallies by gc_student (row-level: sections/attempts) ---
|
||||||
|
outcomes_by_gc = df.groupby(["gc_student","success"], as_index=False).size().rename(columns={"size":"count_rows"})
|
||||||
|
outcomes_by_gc.to_csv(os.path.join(out_dir, "outcomes_by_gc.csv"), index=False)
|
||||||
|
|
||||||
|
# --- Department tallies, split by success and gc_student (row-level) ---
|
||||||
|
dept_by_gc_success = (
|
||||||
|
df.groupby(["gc_student","success","dept"], as_index=False)
|
||||||
|
.size().rename(columns={"size":"count_rows"})
|
||||||
|
.sort_values(["gc_student","success","dept"])
|
||||||
|
)
|
||||||
|
dept_by_gc_success.to_csv(os.path.join(out_dir, "dept_by_gc_success.csv"), index=False)
|
||||||
|
|
||||||
|
# --- Unsuccessful reasons (status) by gc_student ---
|
||||||
|
reasons_by_gc = (
|
||||||
|
df[df["success"] == 0]
|
||||||
|
.assign(reason=df["_status"].replace("", "unknown"))
|
||||||
|
.groupby(["gc_student","reason"], as_index=False)
|
||||||
|
.size().rename(columns={"size":"count_rows"})
|
||||||
|
.sort_values(["gc_student","reason"])
|
||||||
|
)
|
||||||
|
reasons_by_gc.to_csv(os.path.join(out_dir, "reasons_by_gc.csv"), index=False)
|
||||||
|
|
||||||
|
# Optional: quick prints for sanity
|
||||||
|
print("Wrote:")
|
||||||
|
for name in ["students_by_gc.csv","outcomes_by_gc.csv","dept_by_gc_success.csv","reasons_by_gc.csv"]:
|
||||||
|
print(" -", os.path.join(out_dir, name))
|
||||||
|
|
||||||
|
# Write to one Excel file with multiple sheets
|
||||||
|
output_xlsx = "cache/csv_fa25.xlsx"
|
||||||
|
with pd.ExcelWriter(output_xlsx, engine="openpyxl") as writer:
|
||||||
|
students_by_gc.to_excel(writer, sheet_name="students_by_gc", index=False)
|
||||||
|
outcomes_by_gc.to_excel(writer, sheet_name="outcomes_by_gc", index=False)
|
||||||
|
dept_by_gc_success.to_excel(writer, sheet_name="dept_by_gc_success", index=False)
|
||||||
|
reasons_by_gc.to_excel(writer, sheet_name="reasons_by_gc", index=False)
|
||||||
|
|
||||||
|
print("Wrote Excel workbook:", output_xlsx)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_starfish():
|
||||||
|
api3 = f'{url}/api/v1/courses/29/gradebook_history/feed'
|
||||||
|
print(f"\n\ntesting: {api3}\n\n")
|
||||||
|
r3 = fetch(api3)
|
||||||
|
print(r3)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
api1 = f'{url}/api/v1/courses/29/enrollments'
|
||||||
|
print(f"\n\ntesting: {api1}\n\n")
|
||||||
|
r1 = fetch(api1)
|
||||||
|
print(r1)
|
||||||
|
|
||||||
|
api2 = f'{url}/api/v1/courses/29/assignments'
|
||||||
|
|
||||||
|
print(f"\n\ntesting: {api2}\n\n")
|
||||||
|
r2 = fetch(api2)
|
||||||
|
print(r2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
|
@ -1200,6 +1314,8 @@ if __name__ == "__main__":
|
||||||
5: ['Create narrative format all semesters', recreate_all],
|
5: ['Create narrative format all semesters', recreate_all],
|
||||||
6: ['Recreate reg_data from full reg history', recreate_reg_data],
|
6: ['Recreate reg_data from full reg history', recreate_reg_data],
|
||||||
7: ['Compute enrollment changes', compute_enrollment_changes],
|
7: ['Compute enrollment changes', compute_enrollment_changes],
|
||||||
|
8: ['cvc report parse', cvc_report],
|
||||||
|
9: ['test starfish account', test_starfish],
|
||||||
}
|
}
|
||||||
|
|
||||||
'''1: ['Re-create schedule csv and json files from raw html',recent_schedules] ,
|
'''1: ['Re-create schedule csv and json files from raw html',recent_schedules] ,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue