From 78be1c4186d8a5446f7dfffe82b0212ec6ba3cf5 Mon Sep 17 00:00:00 2001 From: Coding with Peter Date: Wed, 22 Mar 2023 14:14:44 -0700 Subject: [PATCH] fixing vscode warnings --- courses.py | 30 +----- depricated.py | 274 ++++++++++++++++++++++++++++++++++++++++++++++++++ localcache.py | 241 -------------------------------------------- 3 files changed, 279 insertions(+), 266 deletions(-) diff --git a/courses.py b/courses.py index 691757b..99ddaa0 100644 --- a/courses.py +++ b/courses.py @@ -966,8 +966,12 @@ def enroll_bulk_students_bydept(course_id, depts, the_term="172", cautious=1): if s: s = s[0] print("Removing: %s" % s[0]) - r1 = unenroll_student(str(course_id), stem_enrollments[j]['id']) + + ## TODO not done here + # r1 = unenroll_student(str(course_id), stem_enrollments[j]['id']) #print(r1) + + time.sleep(0.600) except Exception as e: print("Something went wrong with id %s, %s, %s" % (j, str(s), str(e))) @@ -1061,30 +1065,6 @@ def enroll_o_s_students(): print("Enrolled %i and unenrolled %i students in STEM shell" % (es,us)) print("Enrolled %i students in Orientation shell" % eo) - -########## -########## CALCULATING SEMESTER STUFF -########## - - -def summarize_proportion_online_classes(u): - # u is a "group" from the groupby fxn - #print u - if NUM_ONLY: - if ((1.0 * u.sum()) / u.size) > 0.85: return '2' - if ((1.0 * u.sum()) / u.size) < 0.15: return '0' - return '1' - else: - if ((1.0 * u.sum()) / u.size) > 0.85: return 'online-only' - if ((1.0 * u.sum()) / u.size) < 0.15: return 'f2f-only' - return 'mixed' - -def summarize_num_term_classes(u): - # u is a "group" from the groupby fxn - # term is sp18 now - #print u - return u.size - diff --git a/depricated.py b/depricated.py index c8f19c0..91a2a21 100644 --- a/depricated.py +++ b/depricated.py @@ -254,6 +254,34 @@ def serve(): """ +### courses.py + + +########## +########## CALCULATING SEMESTER STUFF +########## + + +def summarize_proportion_online_classes(u): + # u is a "group" from the groupby fxn + #print u + if NUM_ONLY: + if ((1.0 * u.sum()) / u.size) > 0.85: return '2' + if ((1.0 * u.sum()) / u.size) < 0.15: return '0' + return '1' + else: + if ((1.0 * u.sum()) / u.size) > 0.85: return 'online-only' + if ((1.0 * u.sum()) / u.size) < 0.15: return 'f2f-only' + return 'mixed' + +def summarize_num_term_classes(u): + # u is a "group" from the groupby fxn + # term is sp18 now + #print u + return u.size + + + @@ -379,6 +407,252 @@ def matchstyle(): ##### from localcache + +def user_role_and_online(): + # cross list users, classes enrolled, and their roles + global role_table, term_courses + + role_table = enrollment_file() + user_table = users_file() + user_table = user_table[ user_table['name']!="Test Student" ] + term_table = term_file() + current = term_table[lambda d: d.course_section=='2020 Spring'] # current semester from canvas + term_id = current['id'].values[0] + course_table = courses_file() # from canvas + schedule = current_schedule() # from banner... + + term_courses = course_table[lambda d: d.termid==term_id] # courses this semester ... now add a crn column + term_courses['crn'] = term_courses['code'].map( lambda x: get_crn_from_name(x) ) + # add is_online flag (for courses listed in schedule as online-only) + term_courses['is_online'] = term_courses['crn'].map( lambda x: course_is_online( x ) ) # kinda redundant + ban_can = term_courses.merge(schedule,on='crn',how='left') #join the schedule from banner to the courses from canvas + + role_table = role_table.where(lambda x: x.workflow=='active') + + # this join limits to current semester if 'inner', or all semesters if 'left' + courses_and_enrol = role_table.merge(ban_can,left_on='course_id',right_on='id', how='left') + + user_table = user_table.drop(columns="rootactid tz created vis school position gender locale public bd cc state".split(" ")) + c_e_user = courses_and_enrol.merge(user_table,left_on='user_id',right_on='id',how='left') + + + prop_online = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_proportion_online_classes).rename('proportion_online')) + num_trm_crs = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_num_term_classes).rename('num_term_crs')) + stu_tch_rol = pd.DataFrame(c_e_user.groupby(['user_id'])['type'].aggregate(summarize_student_teacher_role).rename('main_role')) + user_table = user_table.merge(prop_online,left_on='id',right_index=True) + user_table = user_table.merge(num_trm_crs,left_on='id',right_index=True) + user_table = user_table.merge(stu_tch_rol,left_on='id',right_index=True) + + # remove name-less entries + user_table = user_table.where(lambda x: (x.canvasid!='') ) # math.isnan(x.canvasid)) + + return user_table + +#print user_table.query('proportion_online=="online-only"') + #print user_table.query('main_role=="teacher"') + #user_table.to_csv('canvas_data/users_online.csv') + + + """e_qry = "CREATE TABLE IF NOT EXISTS enrollments ( + id integer PRIMARY KEY, + name text NOT NULL, + begin_date text, + end_date text + );""" + +""" + +['CREATE INDEX "idx_req_userid" ON "requests" ("id","courseid","userid" );', + 'CREATE INDEX "idx_users_id" ON "users" ("id","canvasid", );', + 'CREATE INDEX "idx_term_id" ON "terms" ("id","canvasid" );', + 'CREATE INDEX "idx_enrollment" ON "enrollment" ("cid","course_id","user_id" );', + 'CREATE INDEX "idx_courses" ON "courses" ("id","canvasid","termid","code","name" );' ] + + +took 6 seconds + + +select * from users where name = "Peter Howell" + +select * from users join requests on users.id = requests.userid where name = "Peter Howell" +20k rows in 1.014 seconds!! with index above + +without: killed it after 120 seconds + +select timestamp, url, useragent, httpmethod, remoteip, controller from users join requests on users.id = requests.userid where name = "Peter Howell" order by requests.timestamp + + + +select courses.name, courses.code, terms.name, requests.url from courses +join terms on courses.termid = terms.id +join requests on courses.id = requests.courseid +where terms.name='2020 Spring ' and courses.code='ACCT20 SP20 40039' +order by courses.code + + + + + + + + + + + +""" + + +def more_unused_xreferencing(): + """continue + + for line in lines: + r = requests_line(line.decode('utf-8'),filei) + if filei < 5: + print(r) + else: + break + filei += 1 + + + by_date_course = defaultdict( lambda: defaultdict(int) ) + by_date_user = defaultdict( lambda: defaultdict(int) ) + df_list = [] + df_list_crs = [] + users = defaultdict( lambda: defaultdict(int) ) + #by_user = {} + #by_course = {} + i = 0 + + limit = 300 + + #print(r) + date = dt.strptime( r['timestamp'], "%Y-%m-%d %H:%M:%S.%f" ) + if r['userid'] in users: + users[r['userid']]['freq'] += 1 + if users[r['userid']]['lastseen'] < date: + users[r['userid']]['lastseen'] = date + else: + users[r['userid']] = {"id":r['userid'], "lastseen":date, "freq":1} + by_date_course[ r['day'] ][ r['courseid'] ] += 1 + by_date_user[ r['day'] ][ r['userid'] ] += 1 + #if r['userid'] in by_user: by_user[r['userid']] += 1 + #else: by_user[r['userid']] = 1 + #if r['courseid'] in by_course: by_course[r['courseid']] += 1 + #else: by_course[r['courseid']] = 1 + #mylog.write("by_user = " + str(by_user)) + df_list.append(pd.DataFrame(data=by_date_user)) + df_list_crs.append(pd.DataFrame(data=by_date_course)) + i += 1 + if i > limit: break + #mylog.write("by_date_course = ") + result = pd.concat(df_list, axis=1,join='outer') + result_crs = pd.concat(df_list_crs, axis=1,join='outer') + #print result_crs + mylog.write(result.to_csv()) + # get users + usersf = user_role_and_online() + merged = pd.merge(result,usersf,left_index=True,right_on='id', how='left') + #dropkeys = "rootactid tz created vis school position gender locale public bd cc state".split(" ") + #merged.drop(dropkeys, inplace=True, axis=1) + mglog = open(local_data_folder+'userlogs.csv','w') + mglog.write(merged.to_csv()) + + # get courses + courses = courses_file() + merged2 = pd.merge(result_crs,courses,left_index=True,right_on='id', how='left') + dropkeys = "rootactid wikiid".split(" ") + merged2.drop(dropkeys, inplace=True, axis=1) + mglogc = open(local_data_folder + 'courselogs.csv','w') + mglogc.write(merged2.to_csv()) + + # a users / freq / lastseen file + ufl = open(local_data_folder + "user_freq.json","w") + today = datetime.datetime.today() + for U in list(users.keys()): + date = users[U]['lastseen'] + users[U]['lastseen'] = date.strftime("%Y-%m-%d") + diff = today - date + users[U]['daysago'] = str(diff.days) + users[U]['hoursago'] = str(int(diff.total_seconds()/3600)) + us_frame = pd.DataFrame.from_dict(users,orient='index') + us_with_names = pd.merge(us_frame,usersf,left_index=True,right_on='id', how='left') + #dropkeys = "id id_x id_y globalid rootactid tz created vis school position gender locale public bd cc state".split(" ") + #us_with_names.drop(dropkeys, inplace=True, axis=1) + print(us_with_names) + ufl.write( json.dumps(users, indent=4) ) + ufl.close() + mglogd = open('canvas_data/user_freq.csv','w') + mglogd.write(us_with_names.to_csv()) + """ + + """ -- projects table + CREATE TABLE IF NOT EXISTS projects ( + id integer PRIMARY KEY, + name text NOT NULL, + begin_date text, + end_date text + ); + """ + pass + + +def users_p_file(): + uf = users_file() + pf = pseudonym_file() + #print pf + upf = uf.merge(pf,left_on='id',right_on='user_id',how='left') + return upf + + """ + def com_channel_dim(): + all = os.listdir(local_data_folder) + all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime) + all.reverse() + #print "sorted file list:" + #print all + for F in all: + if re.search('communication_channel_dim',F): + cc_file = F + break + print("most recent communication channel file is " + cc_file) + cc_users = [] + for line in gzip.open(local_data_folder + cc_file,'r'): + line_dict = dict(list(zip(cc_format, line.split("\t")))) + #line_dict['globalid'] = line_dict['globalid'].rstrip() + cc_users.append(line_dict) + df = pd.DataFrame(cc_users) + return df + """ + + + """grp_sum_qry = ""SELECT u.sortablename, r.timeblock, SUM(r.viewcount), u.canvasid AS user, c.canvasid AS course + FROM requests_sum1 AS r + JOIN courses AS c ON e.course_id=c.id + JOIN enrollment as e ON r.courseid=c.id + JOIN users AS u ON u.id=e.user_id + WHERE c.canvasid=%s AND e."type"="StudentEnrollment" + GROUP BY u.id,c.id,r.timeblock + ORDER BY u.sortablename DESC, r.timeblock"" % course_id + + q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course + FROM requests_sum1 AS r + JOIN courses AS c ON e.course_id=c.id + JOIN enrollment as e ON r.courseid=c.id + JOIN users AS u ON u.id=e.user_id + WHERE c.canvasid=%s AND e."type"="StudentEnrollment" AND u.canvasid=810 + ORDER BY u.sortablename DESC, r.timeblock"" % course_id + + + q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course FROM enrollment as e JOIN courses AS c ON e.course_id=c.id +JOIN requests_sum1 AS r ON r.courseid=c.id +JOIN users AS u ON u.id=e.user_id +WHERE c.canvasid=%s AND e."type"="StudentEnrollment" +ORDER BY u.sortablename, r.timeblock"" % course_id""" + + + + + stem_course_id = '11015' # TODO # NO LONGER USED - SEE COURSES diff --git a/localcache.py b/localcache.py index 3da9fb0..978a9a2 100644 --- a/localcache.py +++ b/localcache.py @@ -1564,195 +1564,6 @@ def semester_enrollments(verbose=0): # Overview of student hits in a course. Return a (pandas??) table student/timeblock/hits 6 * 7 * 7 items per student. - """e_qry = "CREATE TABLE IF NOT EXISTS enrollments ( - id integer PRIMARY KEY, - name text NOT NULL, - begin_date text, - end_date text - );""" - -""" - -['CREATE INDEX "idx_req_userid" ON "requests" ("id","courseid","userid" );', - 'CREATE INDEX "idx_users_id" ON "users" ("id","canvasid", );', - 'CREATE INDEX "idx_term_id" ON "terms" ("id","canvasid" );', - 'CREATE INDEX "idx_enrollment" ON "enrollment" ("cid","course_id","user_id" );', - 'CREATE INDEX "idx_courses" ON "courses" ("id","canvasid","termid","code","name" );' ] - - -took 6 seconds - - -select * from users where name = "Peter Howell" - -select * from users join requests on users.id = requests.userid where name = "Peter Howell" -20k rows in 1.014 seconds!! with index above - -without: killed it after 120 seconds - -select timestamp, url, useragent, httpmethod, remoteip, controller from users join requests on users.id = requests.userid where name = "Peter Howell" order by requests.timestamp - - - -select courses.name, courses.code, terms.name, requests.url from courses -join terms on courses.termid = terms.id -join requests on courses.id = requests.courseid -where terms.name='2020 Spring ' and courses.code='ACCT20 SP20 40039' -order by courses.code - - - - - - - - - - - -""" - - -def more_unused_xreferencing(): - """continue - - for line in lines: - r = requests_line(line.decode('utf-8'),filei) - if filei < 5: - print(r) - else: - break - filei += 1 - - - by_date_course = defaultdict( lambda: defaultdict(int) ) - by_date_user = defaultdict( lambda: defaultdict(int) ) - df_list = [] - df_list_crs = [] - users = defaultdict( lambda: defaultdict(int) ) - #by_user = {} - #by_course = {} - i = 0 - - limit = 300 - - #print(r) - date = dt.strptime( r['timestamp'], "%Y-%m-%d %H:%M:%S.%f" ) - if r['userid'] in users: - users[r['userid']]['freq'] += 1 - if users[r['userid']]['lastseen'] < date: - users[r['userid']]['lastseen'] = date - else: - users[r['userid']] = {"id":r['userid'], "lastseen":date, "freq":1} - by_date_course[ r['day'] ][ r['courseid'] ] += 1 - by_date_user[ r['day'] ][ r['userid'] ] += 1 - #if r['userid'] in by_user: by_user[r['userid']] += 1 - #else: by_user[r['userid']] = 1 - #if r['courseid'] in by_course: by_course[r['courseid']] += 1 - #else: by_course[r['courseid']] = 1 - #mylog.write("by_user = " + str(by_user)) - df_list.append(pd.DataFrame(data=by_date_user)) - df_list_crs.append(pd.DataFrame(data=by_date_course)) - i += 1 - if i > limit: break - #mylog.write("by_date_course = ") - result = pd.concat(df_list, axis=1,join='outer') - result_crs = pd.concat(df_list_crs, axis=1,join='outer') - #print result_crs - mylog.write(result.to_csv()) - # get users - usersf = user_role_and_online() - merged = pd.merge(result,usersf,left_index=True,right_on='id', how='left') - #dropkeys = "rootactid tz created vis school position gender locale public bd cc state".split(" ") - #merged.drop(dropkeys, inplace=True, axis=1) - mglog = open(local_data_folder+'userlogs.csv','w') - mglog.write(merged.to_csv()) - - # get courses - courses = courses_file() - merged2 = pd.merge(result_crs,courses,left_index=True,right_on='id', how='left') - dropkeys = "rootactid wikiid".split(" ") - merged2.drop(dropkeys, inplace=True, axis=1) - mglogc = open(local_data_folder + 'courselogs.csv','w') - mglogc.write(merged2.to_csv()) - - # a users / freq / lastseen file - ufl = open(local_data_folder + "user_freq.json","w") - today = datetime.datetime.today() - for U in list(users.keys()): - date = users[U]['lastseen'] - users[U]['lastseen'] = date.strftime("%Y-%m-%d") - diff = today - date - users[U]['daysago'] = str(diff.days) - users[U]['hoursago'] = str(int(diff.total_seconds()/3600)) - us_frame = pd.DataFrame.from_dict(users,orient='index') - us_with_names = pd.merge(us_frame,usersf,left_index=True,right_on='id', how='left') - #dropkeys = "id id_x id_y globalid rootactid tz created vis school position gender locale public bd cc state".split(" ") - #us_with_names.drop(dropkeys, inplace=True, axis=1) - print(us_with_names) - ufl.write( json.dumps(users, indent=4) ) - ufl.close() - mglogd = open('canvas_data/user_freq.csv','w') - mglogd.write(us_with_names.to_csv()) - """ - - """ -- projects table - CREATE TABLE IF NOT EXISTS projects ( - id integer PRIMARY KEY, - name text NOT NULL, - begin_date text, - end_date text - ); - """ - pass - -def user_role_and_online(): - # cross list users, classes enrolled, and their roles - global role_table, term_courses - - role_table = enrollment_file() - user_table = users_file() - user_table = user_table[ user_table['name']!="Test Student" ] - term_table = term_file() - current = term_table[lambda d: d.course_section=='2020 Spring'] # current semester from canvas - term_id = current['id'].values[0] - course_table = courses_file() # from canvas - schedule = current_schedule() # from banner... - - term_courses = course_table[lambda d: d.termid==term_id] # courses this semester ... now add a crn column - term_courses['crn'] = term_courses['code'].map( lambda x: get_crn_from_name(x) ) - # add is_online flag (for courses listed in schedule as online-only) - term_courses['is_online'] = term_courses['crn'].map( lambda x: course_is_online( x ) ) # kinda redundant - ban_can = term_courses.merge(schedule,on='crn',how='left') #join the schedule from banner to the courses from canvas - - role_table = role_table.where(lambda x: x.workflow=='active') - - # this join limits to current semester if 'inner', or all semesters if 'left' - courses_and_enrol = role_table.merge(ban_can,left_on='course_id',right_on='id', how='left') - - user_table = user_table.drop(columns="rootactid tz created vis school position gender locale public bd cc state".split(" ")) - c_e_user = courses_and_enrol.merge(user_table,left_on='user_id',right_on='id',how='left') - - - prop_online = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_proportion_online_classes).rename('proportion_online')) - num_trm_crs = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_num_term_classes).rename('num_term_crs')) - stu_tch_rol = pd.DataFrame(c_e_user.groupby(['user_id'])['type'].aggregate(summarize_student_teacher_role).rename('main_role')) - user_table = user_table.merge(prop_online,left_on='id',right_index=True) - user_table = user_table.merge(num_trm_crs,left_on='id',right_index=True) - user_table = user_table.merge(stu_tch_rol,left_on='id',right_index=True) - - # remove name-less entries - user_table = user_table.where(lambda x: (x.canvasid!='') ) # math.isnan(x.canvasid)) - - return user_table - -#print user_table.query('proportion_online=="online-only"') - #print user_table.query('main_role=="teacher"') - #user_table.to_csv('canvas_data/users_online.csv') - - - - def comm_channel_file(): @@ -1797,58 +1608,6 @@ def pseudonym_file(): df = pd.DataFrame(all_users) return df -def users_p_file(): - uf = users_file() - pf = pseudonym_file() - #print pf - upf = uf.merge(pf,left_on='id',right_on='user_id',how='left') - return upf - - """ - def com_channel_dim(): - all = os.listdir(local_data_folder) - all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime) - all.reverse() - #print "sorted file list:" - #print all - for F in all: - if re.search('communication_channel_dim',F): - cc_file = F - break - print("most recent communication channel file is " + cc_file) - cc_users = [] - for line in gzip.open(local_data_folder + cc_file,'r'): - line_dict = dict(list(zip(cc_format, line.split("\t")))) - #line_dict['globalid'] = line_dict['globalid'].rstrip() - cc_users.append(line_dict) - df = pd.DataFrame(cc_users) - return df - """ - - - """grp_sum_qry = ""SELECT u.sortablename, r.timeblock, SUM(r.viewcount), u.canvasid AS user, c.canvasid AS course - FROM requests_sum1 AS r - JOIN courses AS c ON e.course_id=c.id - JOIN enrollment as e ON r.courseid=c.id - JOIN users AS u ON u.id=e.user_id - WHERE c.canvasid=%s AND e."type"="StudentEnrollment" - GROUP BY u.id,c.id,r.timeblock - ORDER BY u.sortablename DESC, r.timeblock"" % course_id - - q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course - FROM requests_sum1 AS r - JOIN courses AS c ON e.course_id=c.id - JOIN enrollment as e ON r.courseid=c.id - JOIN users AS u ON u.id=e.user_id - WHERE c.canvasid=%s AND e."type"="StudentEnrollment" AND u.canvasid=810 - ORDER BY u.sortablename DESC, r.timeblock"" % course_id - - - q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course FROM enrollment as e JOIN courses AS c ON e.course_id=c.id -JOIN requests_sum1 AS r ON r.courseid=c.id -JOIN users AS u ON u.id=e.user_id -WHERE c.canvasid=%s AND e."type"="StudentEnrollment" -ORDER BY u.sortablename, r.timeblock"" % course_id""" def abcd(): setup_table('index')