This commit is contained in:
parent
8db592fb83
commit
2577d105f5
121
users.py
121
users.py
|
|
@ -2813,6 +2813,126 @@ def summarize_student_logs(id=0):
|
||||||
print(f"Done. Output written to cache/users/logs/{id}_summary.json")
|
print(f"Done. Output written to cache/users/logs/{id}_summary.json")
|
||||||
|
|
||||||
|
|
||||||
|
def readable_user_summary():
|
||||||
|
import re
|
||||||
|
import ast
|
||||||
|
import math
|
||||||
|
from datetime import timedelta
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
COURSE_RE = re.compile(r"/courses/(\d+)(?:/|$)")
|
||||||
|
|
||||||
|
def extract_course_id(url, links_str) :
|
||||||
|
"""Return Canvas course id from URL (/courses/<id>) or from links.context."""
|
||||||
|
if url:
|
||||||
|
m = COURSE_RE.search(url)
|
||||||
|
if m:
|
||||||
|
return int(m.group(1))
|
||||||
|
if links_str:
|
||||||
|
try:
|
||||||
|
d = ast.literal_eval(links_str)
|
||||||
|
ctx = d.get("context")
|
||||||
|
if isinstance(ctx, int):
|
||||||
|
return ctx
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
id = input("user id? ")
|
||||||
|
csv_path = f"cache/users/logs/{id}.csv"
|
||||||
|
gap_minutes = 10
|
||||||
|
tz_name = "America/Los_Angeles"
|
||||||
|
# Load
|
||||||
|
df = pd.read_csv(csv_path)
|
||||||
|
|
||||||
|
# Extract course_id
|
||||||
|
df["course_id"] = [
|
||||||
|
extract_course_id(u, l) for u, l in zip(df.get("url"), df.get("links"))
|
||||||
|
]
|
||||||
|
|
||||||
|
# Keep only rows with a course_id
|
||||||
|
df = df.dropna(subset=["course_id"]).copy()
|
||||||
|
df["course_id"] = df["course_id"].astype(int)
|
||||||
|
|
||||||
|
# Parse times (UTC) and convert to local Pacific time (user asked for “Pacific standard time”;
|
||||||
|
# we’ll convert to America/Los_Angeles which accounts for DST automatically).
|
||||||
|
df["ts_utc"] = pd.to_datetime(df["created_at"], utc=True, errors="coerce")
|
||||||
|
df = df.dropna(subset=["ts_utc"]).copy()
|
||||||
|
local_tz = ZoneInfo(tz_name)
|
||||||
|
df["ts_local"] = df["ts_utc"].dt.tz_convert(local_tz)
|
||||||
|
|
||||||
|
# Sort for gap detection
|
||||||
|
df = df.sort_values(["course_id", "session_id", "ts_local"])
|
||||||
|
|
||||||
|
# Session splitting: new session if gap > gap_minutes within same course+session_id
|
||||||
|
gap = pd.Timedelta(minutes=gap_minutes)
|
||||||
|
# Identify gap starts within each course+session_id stream
|
||||||
|
df["gap_new"] = (
|
||||||
|
df.groupby(["course_id", "session_id"])["ts_local"]
|
||||||
|
.diff()
|
||||||
|
.gt(gap)
|
||||||
|
.fillna(True) # first row starts a session
|
||||||
|
)
|
||||||
|
# Session index within each course+session_id
|
||||||
|
df["session_idx"] = df.groupby(["course_id", "session_id"])["gap_new"].cumsum()
|
||||||
|
|
||||||
|
# Session key across all data
|
||||||
|
df["session_key"] = list(zip(df["course_id"], df["session_id"], df["session_idx"]))
|
||||||
|
|
||||||
|
# Aggregate to sessions
|
||||||
|
agg = (
|
||||||
|
df.groupby("session_key")
|
||||||
|
.agg(course_id=("course_id", "first"),
|
||||||
|
start=("ts_local", "min"),
|
||||||
|
end=("ts_local", "max"),
|
||||||
|
hits=("ts_local", "size"))
|
||||||
|
.reset_index(drop=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Duration in minutes (ceil; minimum 1 minute)
|
||||||
|
dur_secs = (agg["end"] - agg["start"]).dt.total_seconds().clip(lower=0)
|
||||||
|
agg["mins"] = [max(1, math.ceil(s / 60)) for s in dur_secs]
|
||||||
|
|
||||||
|
# Group by local calendar day for reporting
|
||||||
|
agg["day"] = agg["start"].dt.strftime("%b %-d") # e.g., "Oct 27"
|
||||||
|
# Format start time like "5:05pm"
|
||||||
|
agg["start_str"] = agg["start"].dt.strftime("%-I:%M%p").str.lower()
|
||||||
|
|
||||||
|
# Order: day, then chronological within day
|
||||||
|
agg = agg.sort_values(["start"])
|
||||||
|
|
||||||
|
# Group by calendar date only
|
||||||
|
agg["day_key"] = agg["start"].dt.date
|
||||||
|
|
||||||
|
# Build text report with nice formatting
|
||||||
|
lines: list[str] = []
|
||||||
|
for day_key, day_df in agg.groupby("day_key", sort=False):
|
||||||
|
# Pretty date for the section header
|
||||||
|
# Using the first session time for that day to include time
|
||||||
|
first_time = day_df["start"].min()
|
||||||
|
# linux
|
||||||
|
# pretty_day = first_time.strftime("%b %-d %-I:%M%p").lower() # e.g., "Sep 26 8:02pm"
|
||||||
|
|
||||||
|
# windows
|
||||||
|
pretty_day = first_time.strftime("%b %#d %#I:%M%p").lower()
|
||||||
|
lines.append(f"{pretty_day}:")
|
||||||
|
for _, row in day_df.iterrows():
|
||||||
|
lines.append(
|
||||||
|
f" - course {row.course_id}, {row.start_str}: "
|
||||||
|
f"{row.hits} hits over {row.mins} minutes"
|
||||||
|
)
|
||||||
|
report_out = codecs.open(f"cache/users/logs/{id}_report.txt", "w", "utf-8")
|
||||||
|
report_out.write( "\n".join(lines) )
|
||||||
|
|
||||||
|
return "\n".join(lines) if lines else "No course activity found."
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage:
|
||||||
|
# print(generate_course_report("canvas_logs.csv", gap_minutes=10))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
@ -2851,6 +2971,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
40: ['reset user email without confirmation', set_email_skip_confirm],
|
40: ['reset user email without confirmation', set_email_skip_confirm],
|
||||||
41: ['summarize users logs', summarize_student_logs],
|
41: ['summarize users logs', summarize_student_logs],
|
||||||
|
50: ['summarize users logs 2', readable_user_summary],
|
||||||
|
|
||||||
|
|
||||||
#3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm],
|
#3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm],
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue