fixing vscode warnings

This commit is contained in:
Coding with Peter 2023-03-22 14:14:44 -07:00
parent 3c9a9ef665
commit 78be1c4186
3 changed files with 279 additions and 266 deletions

View File

@ -966,8 +966,12 @@ def enroll_bulk_students_bydept(course_id, depts, the_term="172", cautious=1):
if s:
s = s[0]
print("Removing: %s" % s[0])
r1 = unenroll_student(str(course_id), stem_enrollments[j]['id'])
## TODO not done here
# r1 = unenroll_student(str(course_id), stem_enrollments[j]['id'])
#print(r1)
time.sleep(0.600)
except Exception as e:
print("Something went wrong with id %s, %s, %s" % (j, str(s), str(e)))
@ -1061,30 +1065,6 @@ def enroll_o_s_students():
print("Enrolled %i and unenrolled %i students in STEM shell" % (es,us))
print("Enrolled %i students in Orientation shell" % eo)
##########
########## CALCULATING SEMESTER STUFF
##########
def summarize_proportion_online_classes(u):
# u is a "group" from the groupby fxn
#print u
if NUM_ONLY:
if ((1.0 * u.sum()) / u.size) > 0.85: return '2'
if ((1.0 * u.sum()) / u.size) < 0.15: return '0'
return '1'
else:
if ((1.0 * u.sum()) / u.size) > 0.85: return 'online-only'
if ((1.0 * u.sum()) / u.size) < 0.15: return 'f2f-only'
return 'mixed'
def summarize_num_term_classes(u):
# u is a "group" from the groupby fxn
# term is sp18 now
#print u
return u.size

View File

@ -254,6 +254,34 @@ def serve():
"""
### courses.py
##########
########## CALCULATING SEMESTER STUFF
##########
def summarize_proportion_online_classes(u):
# u is a "group" from the groupby fxn
#print u
if NUM_ONLY:
if ((1.0 * u.sum()) / u.size) > 0.85: return '2'
if ((1.0 * u.sum()) / u.size) < 0.15: return '0'
return '1'
else:
if ((1.0 * u.sum()) / u.size) > 0.85: return 'online-only'
if ((1.0 * u.sum()) / u.size) < 0.15: return 'f2f-only'
return 'mixed'
def summarize_num_term_classes(u):
# u is a "group" from the groupby fxn
# term is sp18 now
#print u
return u.size
@ -379,6 +407,252 @@ def matchstyle():
##### from localcache
def user_role_and_online():
# cross list users, classes enrolled, and their roles
global role_table, term_courses
role_table = enrollment_file()
user_table = users_file()
user_table = user_table[ user_table['name']!="Test Student" ]
term_table = term_file()
current = term_table[lambda d: d.course_section=='2020 Spring'] # current semester from canvas
term_id = current['id'].values[0]
course_table = courses_file() # from canvas
schedule = current_schedule() # from banner...
term_courses = course_table[lambda d: d.termid==term_id] # courses this semester ... now add a crn column
term_courses['crn'] = term_courses['code'].map( lambda x: get_crn_from_name(x) )
# add is_online flag (for courses listed in schedule as online-only)
term_courses['is_online'] = term_courses['crn'].map( lambda x: course_is_online( x ) ) # kinda redundant
ban_can = term_courses.merge(schedule,on='crn',how='left') #join the schedule from banner to the courses from canvas
role_table = role_table.where(lambda x: x.workflow=='active')
# this join limits to current semester if 'inner', or all semesters if 'left'
courses_and_enrol = role_table.merge(ban_can,left_on='course_id',right_on='id', how='left')
user_table = user_table.drop(columns="rootactid tz created vis school position gender locale public bd cc state".split(" "))
c_e_user = courses_and_enrol.merge(user_table,left_on='user_id',right_on='id',how='left')
prop_online = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_proportion_online_classes).rename('proportion_online'))
num_trm_crs = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_num_term_classes).rename('num_term_crs'))
stu_tch_rol = pd.DataFrame(c_e_user.groupby(['user_id'])['type'].aggregate(summarize_student_teacher_role).rename('main_role'))
user_table = user_table.merge(prop_online,left_on='id',right_index=True)
user_table = user_table.merge(num_trm_crs,left_on='id',right_index=True)
user_table = user_table.merge(stu_tch_rol,left_on='id',right_index=True)
# remove name-less entries
user_table = user_table.where(lambda x: (x.canvasid!='') ) # math.isnan(x.canvasid))
return user_table
#print user_table.query('proportion_online=="online-only"')
#print user_table.query('main_role=="teacher"')
#user_table.to_csv('canvas_data/users_online.csv')
"""e_qry = "CREATE TABLE IF NOT EXISTS enrollments (
id integer PRIMARY KEY,
name text NOT NULL,
begin_date text,
end_date text
);"""
"""
['CREATE INDEX "idx_req_userid" ON "requests" ("id","courseid","userid" );',
'CREATE INDEX "idx_users_id" ON "users" ("id","canvasid", );',
'CREATE INDEX "idx_term_id" ON "terms" ("id","canvasid" );',
'CREATE INDEX "idx_enrollment" ON "enrollment" ("cid","course_id","user_id" );',
'CREATE INDEX "idx_courses" ON "courses" ("id","canvasid","termid","code","name" );' ]
took 6 seconds
select * from users where name = "Peter Howell"
select * from users join requests on users.id = requests.userid where name = "Peter Howell"
20k rows in 1.014 seconds!! with index above
without: killed it after 120 seconds
select timestamp, url, useragent, httpmethod, remoteip, controller from users join requests on users.id = requests.userid where name = "Peter Howell" order by requests.timestamp
select courses.name, courses.code, terms.name, requests.url from courses
join terms on courses.termid = terms.id
join requests on courses.id = requests.courseid
where terms.name='2020 Spring ' and courses.code='ACCT20 SP20 40039'
order by courses.code
"""
def more_unused_xreferencing():
"""continue
for line in lines:
r = requests_line(line.decode('utf-8'),filei)
if filei < 5:
print(r)
else:
break
filei += 1
by_date_course = defaultdict( lambda: defaultdict(int) )
by_date_user = defaultdict( lambda: defaultdict(int) )
df_list = []
df_list_crs = []
users = defaultdict( lambda: defaultdict(int) )
#by_user = {}
#by_course = {}
i = 0
limit = 300
#print(r)
date = dt.strptime( r['timestamp'], "%Y-%m-%d %H:%M:%S.%f" )
if r['userid'] in users:
users[r['userid']]['freq'] += 1
if users[r['userid']]['lastseen'] < date:
users[r['userid']]['lastseen'] = date
else:
users[r['userid']] = {"id":r['userid'], "lastseen":date, "freq":1}
by_date_course[ r['day'] ][ r['courseid'] ] += 1
by_date_user[ r['day'] ][ r['userid'] ] += 1
#if r['userid'] in by_user: by_user[r['userid']] += 1
#else: by_user[r['userid']] = 1
#if r['courseid'] in by_course: by_course[r['courseid']] += 1
#else: by_course[r['courseid']] = 1
#mylog.write("by_user = " + str(by_user))
df_list.append(pd.DataFrame(data=by_date_user))
df_list_crs.append(pd.DataFrame(data=by_date_course))
i += 1
if i > limit: break
#mylog.write("by_date_course = ")
result = pd.concat(df_list, axis=1,join='outer')
result_crs = pd.concat(df_list_crs, axis=1,join='outer')
#print result_crs
mylog.write(result.to_csv())
# get users
usersf = user_role_and_online()
merged = pd.merge(result,usersf,left_index=True,right_on='id', how='left')
#dropkeys = "rootactid tz created vis school position gender locale public bd cc state".split(" ")
#merged.drop(dropkeys, inplace=True, axis=1)
mglog = open(local_data_folder+'userlogs.csv','w')
mglog.write(merged.to_csv())
# get courses
courses = courses_file()
merged2 = pd.merge(result_crs,courses,left_index=True,right_on='id', how='left')
dropkeys = "rootactid wikiid".split(" ")
merged2.drop(dropkeys, inplace=True, axis=1)
mglogc = open(local_data_folder + 'courselogs.csv','w')
mglogc.write(merged2.to_csv())
# a users / freq / lastseen file
ufl = open(local_data_folder + "user_freq.json","w")
today = datetime.datetime.today()
for U in list(users.keys()):
date = users[U]['lastseen']
users[U]['lastseen'] = date.strftime("%Y-%m-%d")
diff = today - date
users[U]['daysago'] = str(diff.days)
users[U]['hoursago'] = str(int(diff.total_seconds()/3600))
us_frame = pd.DataFrame.from_dict(users,orient='index')
us_with_names = pd.merge(us_frame,usersf,left_index=True,right_on='id', how='left')
#dropkeys = "id id_x id_y globalid rootactid tz created vis school position gender locale public bd cc state".split(" ")
#us_with_names.drop(dropkeys, inplace=True, axis=1)
print(us_with_names)
ufl.write( json.dumps(users, indent=4) )
ufl.close()
mglogd = open('canvas_data/user_freq.csv','w')
mglogd.write(us_with_names.to_csv())
"""
""" -- projects table
CREATE TABLE IF NOT EXISTS projects (
id integer PRIMARY KEY,
name text NOT NULL,
begin_date text,
end_date text
);
"""
pass
def users_p_file():
uf = users_file()
pf = pseudonym_file()
#print pf
upf = uf.merge(pf,left_on='id',right_on='user_id',how='left')
return upf
"""
def com_channel_dim():
all = os.listdir(local_data_folder)
all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime)
all.reverse()
#print "sorted file list:"
#print all
for F in all:
if re.search('communication_channel_dim',F):
cc_file = F
break
print("most recent communication channel file is " + cc_file)
cc_users = []
for line in gzip.open(local_data_folder + cc_file,'r'):
line_dict = dict(list(zip(cc_format, line.split("\t"))))
#line_dict['globalid'] = line_dict['globalid'].rstrip()
cc_users.append(line_dict)
df = pd.DataFrame(cc_users)
return df
"""
"""grp_sum_qry = ""SELECT u.sortablename, r.timeblock, SUM(r.viewcount), u.canvasid AS user, c.canvasid AS course
FROM requests_sum1 AS r
JOIN courses AS c ON e.course_id=c.id
JOIN enrollment as e ON r.courseid=c.id
JOIN users AS u ON u.id=e.user_id
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
GROUP BY u.id,c.id,r.timeblock
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course
FROM requests_sum1 AS r
JOIN courses AS c ON e.course_id=c.id
JOIN enrollment as e ON r.courseid=c.id
JOIN users AS u ON u.id=e.user_id
WHERE c.canvasid=%s AND e."type"="StudentEnrollment" AND u.canvasid=810
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course FROM enrollment as e JOIN courses AS c ON e.course_id=c.id
JOIN requests_sum1 AS r ON r.courseid=c.id
JOIN users AS u ON u.id=e.user_id
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
ORDER BY u.sortablename, r.timeblock"" % course_id"""
stem_course_id = '11015' # TODO
# NO LONGER USED - SEE COURSES

View File

@ -1564,195 +1564,6 @@ def semester_enrollments(verbose=0):
# Overview of student hits in a course. Return a (pandas??) table student/timeblock/hits 6 * 7 * 7 items per student.
"""e_qry = "CREATE TABLE IF NOT EXISTS enrollments (
id integer PRIMARY KEY,
name text NOT NULL,
begin_date text,
end_date text
);"""
"""
['CREATE INDEX "idx_req_userid" ON "requests" ("id","courseid","userid" );',
'CREATE INDEX "idx_users_id" ON "users" ("id","canvasid", );',
'CREATE INDEX "idx_term_id" ON "terms" ("id","canvasid" );',
'CREATE INDEX "idx_enrollment" ON "enrollment" ("cid","course_id","user_id" );',
'CREATE INDEX "idx_courses" ON "courses" ("id","canvasid","termid","code","name" );' ]
took 6 seconds
select * from users where name = "Peter Howell"
select * from users join requests on users.id = requests.userid where name = "Peter Howell"
20k rows in 1.014 seconds!! with index above
without: killed it after 120 seconds
select timestamp, url, useragent, httpmethod, remoteip, controller from users join requests on users.id = requests.userid where name = "Peter Howell" order by requests.timestamp
select courses.name, courses.code, terms.name, requests.url from courses
join terms on courses.termid = terms.id
join requests on courses.id = requests.courseid
where terms.name='2020 Spring ' and courses.code='ACCT20 SP20 40039'
order by courses.code
"""
def more_unused_xreferencing():
"""continue
for line in lines:
r = requests_line(line.decode('utf-8'),filei)
if filei < 5:
print(r)
else:
break
filei += 1
by_date_course = defaultdict( lambda: defaultdict(int) )
by_date_user = defaultdict( lambda: defaultdict(int) )
df_list = []
df_list_crs = []
users = defaultdict( lambda: defaultdict(int) )
#by_user = {}
#by_course = {}
i = 0
limit = 300
#print(r)
date = dt.strptime( r['timestamp'], "%Y-%m-%d %H:%M:%S.%f" )
if r['userid'] in users:
users[r['userid']]['freq'] += 1
if users[r['userid']]['lastseen'] < date:
users[r['userid']]['lastseen'] = date
else:
users[r['userid']] = {"id":r['userid'], "lastseen":date, "freq":1}
by_date_course[ r['day'] ][ r['courseid'] ] += 1
by_date_user[ r['day'] ][ r['userid'] ] += 1
#if r['userid'] in by_user: by_user[r['userid']] += 1
#else: by_user[r['userid']] = 1
#if r['courseid'] in by_course: by_course[r['courseid']] += 1
#else: by_course[r['courseid']] = 1
#mylog.write("by_user = " + str(by_user))
df_list.append(pd.DataFrame(data=by_date_user))
df_list_crs.append(pd.DataFrame(data=by_date_course))
i += 1
if i > limit: break
#mylog.write("by_date_course = ")
result = pd.concat(df_list, axis=1,join='outer')
result_crs = pd.concat(df_list_crs, axis=1,join='outer')
#print result_crs
mylog.write(result.to_csv())
# get users
usersf = user_role_and_online()
merged = pd.merge(result,usersf,left_index=True,right_on='id', how='left')
#dropkeys = "rootactid tz created vis school position gender locale public bd cc state".split(" ")
#merged.drop(dropkeys, inplace=True, axis=1)
mglog = open(local_data_folder+'userlogs.csv','w')
mglog.write(merged.to_csv())
# get courses
courses = courses_file()
merged2 = pd.merge(result_crs,courses,left_index=True,right_on='id', how='left')
dropkeys = "rootactid wikiid".split(" ")
merged2.drop(dropkeys, inplace=True, axis=1)
mglogc = open(local_data_folder + 'courselogs.csv','w')
mglogc.write(merged2.to_csv())
# a users / freq / lastseen file
ufl = open(local_data_folder + "user_freq.json","w")
today = datetime.datetime.today()
for U in list(users.keys()):
date = users[U]['lastseen']
users[U]['lastseen'] = date.strftime("%Y-%m-%d")
diff = today - date
users[U]['daysago'] = str(diff.days)
users[U]['hoursago'] = str(int(diff.total_seconds()/3600))
us_frame = pd.DataFrame.from_dict(users,orient='index')
us_with_names = pd.merge(us_frame,usersf,left_index=True,right_on='id', how='left')
#dropkeys = "id id_x id_y globalid rootactid tz created vis school position gender locale public bd cc state".split(" ")
#us_with_names.drop(dropkeys, inplace=True, axis=1)
print(us_with_names)
ufl.write( json.dumps(users, indent=4) )
ufl.close()
mglogd = open('canvas_data/user_freq.csv','w')
mglogd.write(us_with_names.to_csv())
"""
""" -- projects table
CREATE TABLE IF NOT EXISTS projects (
id integer PRIMARY KEY,
name text NOT NULL,
begin_date text,
end_date text
);
"""
pass
def user_role_and_online():
# cross list users, classes enrolled, and their roles
global role_table, term_courses
role_table = enrollment_file()
user_table = users_file()
user_table = user_table[ user_table['name']!="Test Student" ]
term_table = term_file()
current = term_table[lambda d: d.course_section=='2020 Spring'] # current semester from canvas
term_id = current['id'].values[0]
course_table = courses_file() # from canvas
schedule = current_schedule() # from banner...
term_courses = course_table[lambda d: d.termid==term_id] # courses this semester ... now add a crn column
term_courses['crn'] = term_courses['code'].map( lambda x: get_crn_from_name(x) )
# add is_online flag (for courses listed in schedule as online-only)
term_courses['is_online'] = term_courses['crn'].map( lambda x: course_is_online( x ) ) # kinda redundant
ban_can = term_courses.merge(schedule,on='crn',how='left') #join the schedule from banner to the courses from canvas
role_table = role_table.where(lambda x: x.workflow=='active')
# this join limits to current semester if 'inner', or all semesters if 'left'
courses_and_enrol = role_table.merge(ban_can,left_on='course_id',right_on='id', how='left')
user_table = user_table.drop(columns="rootactid tz created vis school position gender locale public bd cc state".split(" "))
c_e_user = courses_and_enrol.merge(user_table,left_on='user_id',right_on='id',how='left')
prop_online = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_proportion_online_classes).rename('proportion_online'))
num_trm_crs = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_num_term_classes).rename('num_term_crs'))
stu_tch_rol = pd.DataFrame(c_e_user.groupby(['user_id'])['type'].aggregate(summarize_student_teacher_role).rename('main_role'))
user_table = user_table.merge(prop_online,left_on='id',right_index=True)
user_table = user_table.merge(num_trm_crs,left_on='id',right_index=True)
user_table = user_table.merge(stu_tch_rol,left_on='id',right_index=True)
# remove name-less entries
user_table = user_table.where(lambda x: (x.canvasid!='') ) # math.isnan(x.canvasid))
return user_table
#print user_table.query('proportion_online=="online-only"')
#print user_table.query('main_role=="teacher"')
#user_table.to_csv('canvas_data/users_online.csv')
def comm_channel_file():
@ -1797,58 +1608,6 @@ def pseudonym_file():
df = pd.DataFrame(all_users)
return df
def users_p_file():
uf = users_file()
pf = pseudonym_file()
#print pf
upf = uf.merge(pf,left_on='id',right_on='user_id',how='left')
return upf
"""
def com_channel_dim():
all = os.listdir(local_data_folder)
all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime)
all.reverse()
#print "sorted file list:"
#print all
for F in all:
if re.search('communication_channel_dim',F):
cc_file = F
break
print("most recent communication channel file is " + cc_file)
cc_users = []
for line in gzip.open(local_data_folder + cc_file,'r'):
line_dict = dict(list(zip(cc_format, line.split("\t"))))
#line_dict['globalid'] = line_dict['globalid'].rstrip()
cc_users.append(line_dict)
df = pd.DataFrame(cc_users)
return df
"""
"""grp_sum_qry = ""SELECT u.sortablename, r.timeblock, SUM(r.viewcount), u.canvasid AS user, c.canvasid AS course
FROM requests_sum1 AS r
JOIN courses AS c ON e.course_id=c.id
JOIN enrollment as e ON r.courseid=c.id
JOIN users AS u ON u.id=e.user_id
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
GROUP BY u.id,c.id,r.timeblock
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course
FROM requests_sum1 AS r
JOIN courses AS c ON e.course_id=c.id
JOIN enrollment as e ON r.courseid=c.id
JOIN users AS u ON u.id=e.user_id
WHERE c.canvasid=%s AND e."type"="StudentEnrollment" AND u.canvasid=810
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course FROM enrollment as e JOIN courses AS c ON e.course_id=c.id
JOIN requests_sum1 AS r ON r.courseid=c.id
JOIN users AS u ON u.id=e.user_id
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
ORDER BY u.sortablename, r.timeblock"" % course_id"""
def abcd():
setup_table('index')