From 2577d105f50bcefca09c49dd302503a29717275b Mon Sep 17 00:00:00 2001 From: Peter Howell Date: Thu, 6 Nov 2025 19:14:00 +0000 Subject: [PATCH] k --- users.py | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/users.py b/users.py index 9d39189..80d7b46 100644 --- a/users.py +++ b/users.py @@ -2813,6 +2813,126 @@ def summarize_student_logs(id=0): print(f"Done. Output written to cache/users/logs/{id}_summary.json") +def readable_user_summary(): + import re + import ast + import math + from datetime import timedelta + from zoneinfo import ZoneInfo + + import pandas as pd + + + COURSE_RE = re.compile(r"/courses/(\d+)(?:/|$)") + + def extract_course_id(url, links_str) : + """Return Canvas course id from URL (/courses/) or from links.context.""" + if url: + m = COURSE_RE.search(url) + if m: + return int(m.group(1)) + if links_str: + try: + d = ast.literal_eval(links_str) + ctx = d.get("context") + if isinstance(ctx, int): + return ctx + except Exception: + pass + return None + + + id = input("user id? ") + csv_path = f"cache/users/logs/{id}.csv" + gap_minutes = 10 + tz_name = "America/Los_Angeles" + # Load + df = pd.read_csv(csv_path) + + # Extract course_id + df["course_id"] = [ + extract_course_id(u, l) for u, l in zip(df.get("url"), df.get("links")) + ] + + # Keep only rows with a course_id + df = df.dropna(subset=["course_id"]).copy() + df["course_id"] = df["course_id"].astype(int) + + # Parse times (UTC) and convert to local Pacific time (user asked for “Pacific standard time”; + # we’ll convert to America/Los_Angeles which accounts for DST automatically). + df["ts_utc"] = pd.to_datetime(df["created_at"], utc=True, errors="coerce") + df = df.dropna(subset=["ts_utc"]).copy() + local_tz = ZoneInfo(tz_name) + df["ts_local"] = df["ts_utc"].dt.tz_convert(local_tz) + + # Sort for gap detection + df = df.sort_values(["course_id", "session_id", "ts_local"]) + + # Session splitting: new session if gap > gap_minutes within same course+session_id + gap = pd.Timedelta(minutes=gap_minutes) + # Identify gap starts within each course+session_id stream + df["gap_new"] = ( + df.groupby(["course_id", "session_id"])["ts_local"] + .diff() + .gt(gap) + .fillna(True) # first row starts a session + ) + # Session index within each course+session_id + df["session_idx"] = df.groupby(["course_id", "session_id"])["gap_new"].cumsum() + + # Session key across all data + df["session_key"] = list(zip(df["course_id"], df["session_id"], df["session_idx"])) + + # Aggregate to sessions + agg = ( + df.groupby("session_key") + .agg(course_id=("course_id", "first"), + start=("ts_local", "min"), + end=("ts_local", "max"), + hits=("ts_local", "size")) + .reset_index(drop=True) + ) + + # Duration in minutes (ceil; minimum 1 minute) + dur_secs = (agg["end"] - agg["start"]).dt.total_seconds().clip(lower=0) + agg["mins"] = [max(1, math.ceil(s / 60)) for s in dur_secs] + + # Group by local calendar day for reporting + agg["day"] = agg["start"].dt.strftime("%b %-d") # e.g., "Oct 27" + # Format start time like "5:05pm" + agg["start_str"] = agg["start"].dt.strftime("%-I:%M%p").str.lower() + + # Order: day, then chronological within day + agg = agg.sort_values(["start"]) + + # Group by calendar date only + agg["day_key"] = agg["start"].dt.date + + # Build text report with nice formatting + lines: list[str] = [] + for day_key, day_df in agg.groupby("day_key", sort=False): + # Pretty date for the section header + # Using the first session time for that day to include time + first_time = day_df["start"].min() + # linux + # pretty_day = first_time.strftime("%b %-d %-I:%M%p").lower() # e.g., "Sep 26 8:02pm" + + # windows + pretty_day = first_time.strftime("%b %#d %#I:%M%p").lower() + lines.append(f"{pretty_day}:") + for _, row in day_df.iterrows(): + lines.append( + f" - course {row.course_id}, {row.start_str}: " + f"{row.hits} hits over {row.mins} minutes" + ) + report_out = codecs.open(f"cache/users/logs/{id}_report.txt", "w", "utf-8") + report_out.write( "\n".join(lines) ) + + return "\n".join(lines) if lines else "No course activity found." + + + # Example usage: + # print(generate_course_report("canvas_logs.csv", gap_minutes=10)) if __name__ == "__main__": @@ -2851,6 +2971,7 @@ if __name__ == "__main__": 40: ['reset user email without confirmation', set_email_skip_confirm], 41: ['summarize users logs', summarize_student_logs], + 50: ['summarize users logs 2', readable_user_summary], #3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm],