diff --git a/pipelines.py b/pipelines.py index 4586b09..17b8119 100644 --- a/pipelines.py +++ b/pipelines.py @@ -219,7 +219,8 @@ async def canvas_data_2024(): base_url: str = os.environ["DAP_API_URL"] client_id: str = os.environ["DAP_CLIENT_ID"] client_secret: str = os.environ["DAP_CLIENT_SECRET"] - connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db" + #connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db" + connection_string: str = "postgresql://postgres:rolley34@192.168.1.192/db" desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,pseudonyms,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',') credentials = Credentials.create(client_id=client_id, client_secret=client_secret) @@ -249,7 +250,8 @@ async def setup_canvas_data_2024(): base_url: str = os.environ["DAP_API_URL"] client_id: str = os.environ["DAP_CLIENT_ID"] client_secret: str = os.environ["DAP_CLIENT_SECRET"] - connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db" + #connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db" + connection_string: str = "postgresql://postgres:rolley34@192.168.1.192/db" desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,pseudonyms,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',') credentials = Credentials.create(client_id=client_id, client_secret=client_secret) diff --git a/users.py b/users.py index d7ffd63..017b7cf 100644 --- a/users.py +++ b/users.py @@ -1820,9 +1820,9 @@ def track_user(id=0,qid=0): url_addition = "" if 1: # hard code dates - start_date = "2022-01-01T00:00:00-07:00" + start_date = "2021-01-01T00:00:00-07:00" end_date = dt.now().strftime("%Y-%m-%dT%H:%M:%S-07:00") # - end_date = "2022-07-01T00:00:00-07:00" + end_date = "2026-07-01T00:00:00-07:00" url_addition = f"?start_time={start_date}&end_time={end_date}" elif 'last_days_log' in info: print("There's existing log data for %s (%s)" % (info['name'] , info['sis_user_id'])) @@ -2649,6 +2649,71 @@ def set_email_skip_confirm(): print('\n\n') +def summarize_student_logs(id=0): + import pandas as pd + import re + import json + from collections import defaultdict + from datetime import datetime + from localcache2 import course_from_id + + if id==0: + id = input("student id> ") + + # Load CSV + df = pd.read_csv(f"cache/users/logs/{id}.csv", parse_dates=["updated_at"]) + + # Extract course_id from URL + df["course_id"] = df["url"].str.extract(r"/courses/(\d+)") + df = df.dropna(subset=["course_id"]) + df["course_id"] = df["course_id"].astype(int) + + # Convert 'participated' to boolean (robustly) + df["participated"] = df["participated"].astype(str).str.lower() == "true" + + # Truncate to date only + df["date"] = df["updated_at"].dt.date + + # Group by course + course_summaries = {} + + for course_id, group in df.groupby("course_id"): + course_data = {} + try: + this_course = course_from_id(course_id) + course_data['course_code'] = this_course['course_code'] + except Exception as e: + print(f"didn't find course {course_id}, {e}") + course_data["first_seen"] = str(group["date"].min()) + course_data["last_seen"] = str(group["date"].max()) + + # Histogram of daily hits + daily_counts = group["date"].value_counts().sort_index() + course_data["daily_hits"] = {str(k): int(v) for k, v in daily_counts.items()} + + # Participation count + course_data["participated_count"] = int(group["participated"].sum()) + + course_summaries[str(course_id)] = course_data + + # App name tally + app_name_counts = df["app_name"].fillna("None").value_counts().to_dict() + + # Final output + output = { + "courses": course_summaries, + "app_name_counts": app_name_counts + } + + # Write to JSON file + with open(f"cache/users/logs/{id}_summary.json", "w") as f: + json.dump(output, f, indent=2) + + print(f"Done. Output written to cache/users/logs/{id}_summary.json") + + + + if __name__ == "__main__": print ("") options = { 1: ['Fetch iLearn users with @gavilan.edu email address', teacherRolesUpdateCache], @@ -2682,7 +2747,9 @@ if __name__ == "__main__": 30: ['get portfolios for user id', get_portfolios], 31: ['get portfolio pages for portfolio id', get_port_pages], - 40: ['reset user email without confirmation', set_email_skip_confirm] + 40: ['reset user email without confirmation', set_email_skip_confirm], + 41: ['summarize users logs', summarize_student_logs], + #3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm], #5: ['Match names in schedule & ilearn', match_usernames],