This commit is contained in:
Peter Howell 2025-11-06 19:14:00 +00:00
parent 8db592fb83
commit 2577d105f5
1 changed files with 121 additions and 0 deletions

121
users.py
View File

@ -2813,6 +2813,126 @@ def summarize_student_logs(id=0):
print(f"Done. Output written to cache/users/logs/{id}_summary.json") print(f"Done. Output written to cache/users/logs/{id}_summary.json")
def readable_user_summary():
import re
import ast
import math
from datetime import timedelta
from zoneinfo import ZoneInfo
import pandas as pd
COURSE_RE = re.compile(r"/courses/(\d+)(?:/|$)")
def extract_course_id(url, links_str) :
"""Return Canvas course id from URL (/courses/<id>) or from links.context."""
if url:
m = COURSE_RE.search(url)
if m:
return int(m.group(1))
if links_str:
try:
d = ast.literal_eval(links_str)
ctx = d.get("context")
if isinstance(ctx, int):
return ctx
except Exception:
pass
return None
id = input("user id? ")
csv_path = f"cache/users/logs/{id}.csv"
gap_minutes = 10
tz_name = "America/Los_Angeles"
# Load
df = pd.read_csv(csv_path)
# Extract course_id
df["course_id"] = [
extract_course_id(u, l) for u, l in zip(df.get("url"), df.get("links"))
]
# Keep only rows with a course_id
df = df.dropna(subset=["course_id"]).copy()
df["course_id"] = df["course_id"].astype(int)
# Parse times (UTC) and convert to local Pacific time (user asked for “Pacific standard time”;
# well convert to America/Los_Angeles which accounts for DST automatically).
df["ts_utc"] = pd.to_datetime(df["created_at"], utc=True, errors="coerce")
df = df.dropna(subset=["ts_utc"]).copy()
local_tz = ZoneInfo(tz_name)
df["ts_local"] = df["ts_utc"].dt.tz_convert(local_tz)
# Sort for gap detection
df = df.sort_values(["course_id", "session_id", "ts_local"])
# Session splitting: new session if gap > gap_minutes within same course+session_id
gap = pd.Timedelta(minutes=gap_minutes)
# Identify gap starts within each course+session_id stream
df["gap_new"] = (
df.groupby(["course_id", "session_id"])["ts_local"]
.diff()
.gt(gap)
.fillna(True) # first row starts a session
)
# Session index within each course+session_id
df["session_idx"] = df.groupby(["course_id", "session_id"])["gap_new"].cumsum()
# Session key across all data
df["session_key"] = list(zip(df["course_id"], df["session_id"], df["session_idx"]))
# Aggregate to sessions
agg = (
df.groupby("session_key")
.agg(course_id=("course_id", "first"),
start=("ts_local", "min"),
end=("ts_local", "max"),
hits=("ts_local", "size"))
.reset_index(drop=True)
)
# Duration in minutes (ceil; minimum 1 minute)
dur_secs = (agg["end"] - agg["start"]).dt.total_seconds().clip(lower=0)
agg["mins"] = [max(1, math.ceil(s / 60)) for s in dur_secs]
# Group by local calendar day for reporting
agg["day"] = agg["start"].dt.strftime("%b %-d") # e.g., "Oct 27"
# Format start time like "5:05pm"
agg["start_str"] = agg["start"].dt.strftime("%-I:%M%p").str.lower()
# Order: day, then chronological within day
agg = agg.sort_values(["start"])
# Group by calendar date only
agg["day_key"] = agg["start"].dt.date
# Build text report with nice formatting
lines: list[str] = []
for day_key, day_df in agg.groupby("day_key", sort=False):
# Pretty date for the section header
# Using the first session time for that day to include time
first_time = day_df["start"].min()
# linux
# pretty_day = first_time.strftime("%b %-d %-I:%M%p").lower() # e.g., "Sep 26 8:02pm"
# windows
pretty_day = first_time.strftime("%b %#d %#I:%M%p").lower()
lines.append(f"{pretty_day}:")
for _, row in day_df.iterrows():
lines.append(
f" - course {row.course_id}, {row.start_str}: "
f"{row.hits} hits over {row.mins} minutes"
)
report_out = codecs.open(f"cache/users/logs/{id}_report.txt", "w", "utf-8")
report_out.write( "\n".join(lines) )
return "\n".join(lines) if lines else "No course activity found."
# Example usage:
# print(generate_course_report("canvas_logs.csv", gap_minutes=10))
if __name__ == "__main__": if __name__ == "__main__":
@ -2851,6 +2971,7 @@ if __name__ == "__main__":
40: ['reset user email without confirmation', set_email_skip_confirm], 40: ['reset user email without confirmation', set_email_skip_confirm],
41: ['summarize users logs', summarize_student_logs], 41: ['summarize users logs', summarize_student_logs],
50: ['summarize users logs 2', readable_user_summary],
#3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm], #3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm],