Merge branch 'master' of http://192.168.1.6/phowell/canvasapp
This commit is contained in:
commit
ad9e248e1b
|
|
@ -1,4 +1,4 @@
|
||||||
secrets.py
|
canvas_secrets.py
|
||||||
*.bak
|
*.bak
|
||||||
.ipynb_checkpoints
|
.ipynb_checkpoints
|
||||||
104ab42f11
|
104ab42f11
|
||||||
|
|
|
||||||
30
courses.py
30
courses.py
|
|
@ -964,8 +964,12 @@ def enroll_bulk_students_bydept(course_id, depts, the_term="172", cautious=1):
|
||||||
if s:
|
if s:
|
||||||
s = s[0]
|
s = s[0]
|
||||||
print("Removing: %s" % s[0])
|
print("Removing: %s" % s[0])
|
||||||
r1 = unenroll_student(str(course_id), stem_enrollments[j]['id'])
|
|
||||||
|
## TODO not done here
|
||||||
|
# r1 = unenroll_student(str(course_id), stem_enrollments[j]['id'])
|
||||||
#print(r1)
|
#print(r1)
|
||||||
|
|
||||||
|
|
||||||
time.sleep(0.600)
|
time.sleep(0.600)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Something went wrong with id %s, %s, %s" % (j, str(s), str(e)))
|
print("Something went wrong with id %s, %s, %s" % (j, str(s), str(e)))
|
||||||
|
|
@ -1060,30 +1064,6 @@ def enroll_o_s_students():
|
||||||
print("Enrolled %i students in Orientation shell" % eo)
|
print("Enrolled %i students in Orientation shell" % eo)
|
||||||
|
|
||||||
|
|
||||||
##########
|
|
||||||
########## CALCULATING SEMESTER STUFF
|
|
||||||
##########
|
|
||||||
|
|
||||||
|
|
||||||
def summarize_proportion_online_classes(u):
|
|
||||||
# u is a "group" from the groupby fxn
|
|
||||||
#print u
|
|
||||||
if NUM_ONLY:
|
|
||||||
if ((1.0 * u.sum()) / u.size) > 0.85: return '2'
|
|
||||||
if ((1.0 * u.sum()) / u.size) < 0.15: return '0'
|
|
||||||
return '1'
|
|
||||||
else:
|
|
||||||
if ((1.0 * u.sum()) / u.size) > 0.85: return 'online-only'
|
|
||||||
if ((1.0 * u.sum()) / u.size) < 0.15: return 'f2f-only'
|
|
||||||
return 'mixed'
|
|
||||||
|
|
||||||
def summarize_num_term_classes(u):
|
|
||||||
# u is a "group" from the groupby fxn
|
|
||||||
# term is sp18 now
|
|
||||||
#print u
|
|
||||||
return u.size
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def make_ztc_list(sem='sp20'):
|
def make_ztc_list(sem='sp20'):
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
import codecs, json, requests
|
import codecs, json, requests
|
||||||
from secrets import cq_token, ph_token
|
from canvas_secrets import cq_token, ph_token
|
||||||
token = cq_token
|
token = cq_token
|
||||||
url = 'https://ilearn.gavilan.edu'
|
url = 'https://ilearn.gavilan.edu'
|
||||||
header = {'Authorization': 'Bearer ' + token}
|
header = {'Authorization': 'Bearer ' + token}
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ from bs4 import BeautifulSoup as bs
|
||||||
leafcount = 0
|
leafcount = 0
|
||||||
displaynames = []
|
displaynames = []
|
||||||
|
|
||||||
from secrets import cq_user, cq_pasw
|
from canvas_secrets import cq_user, cq_pasw
|
||||||
|
|
||||||
|
|
||||||
CQ_URL = "https://secure.curricunet.com/scripts/webservices/generic_meta/clients/versions/v4/gavilan.cfc"
|
CQ_URL = "https://secure.curricunet.com/scripts/webservices/generic_meta/clients/versions/v4/gavilan.cfc"
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ import pandas as pd
|
||||||
import sys, locale, re
|
import sys, locale, re
|
||||||
from pipelines import getSemesterSchedule
|
from pipelines import getSemesterSchedule
|
||||||
|
|
||||||
from secrets import cq_url, cq_user, cq_pasw
|
from canvas_secrets import cq_url, cq_user, cq_pasw
|
||||||
|
|
||||||
|
|
||||||
#sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
|
#sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
|
||||||
|
|
|
||||||
304
depricated.py
304
depricated.py
|
|
@ -4,6 +4,36 @@
|
||||||
|
|
||||||
# from pipelines - canvas data
|
# from pipelines - canvas data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
online_courses = {}
|
||||||
|
def prep_online_courses_df():
|
||||||
|
global online_courses
|
||||||
|
schedule = current_schedule() # from banner
|
||||||
|
online_courses = schedule[lambda x: x.type=='online']
|
||||||
|
|
||||||
|
def course_is_online(crn):
|
||||||
|
global online_courses
|
||||||
|
#print "looking up: " + str(crn)
|
||||||
|
#print online_courses
|
||||||
|
course = online_courses[lambda x: x.crn==int(crn)]
|
||||||
|
return len(course)
|
||||||
|
|
||||||
|
def get_crn_from_name(name):
|
||||||
|
#print "name is: "
|
||||||
|
#print(name)
|
||||||
|
m = re.search( r'(\d\d\d\d\d)', name)
|
||||||
|
if m: return int(m.groups(1)[0])
|
||||||
|
else: return 0
|
||||||
|
|
||||||
|
def get_enrlmts_for_user(user,enrollments):
|
||||||
|
#active enrollments
|
||||||
|
u_en = enrollments[ lambda x: (x['user_id'] == user) & (x['workflow']=='active') ]
|
||||||
|
return u_en[['type','course_id']]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
timestamp = nowAsStr()
|
timestamp = nowAsStr()
|
||||||
|
|
||||||
|
|
@ -224,6 +254,34 @@ def serve():
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
### courses.py
|
||||||
|
|
||||||
|
|
||||||
|
##########
|
||||||
|
########## CALCULATING SEMESTER STUFF
|
||||||
|
##########
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_proportion_online_classes(u):
|
||||||
|
# u is a "group" from the groupby fxn
|
||||||
|
#print u
|
||||||
|
if NUM_ONLY:
|
||||||
|
if ((1.0 * u.sum()) / u.size) > 0.85: return '2'
|
||||||
|
if ((1.0 * u.sum()) / u.size) < 0.15: return '0'
|
||||||
|
return '1'
|
||||||
|
else:
|
||||||
|
if ((1.0 * u.sum()) / u.size) > 0.85: return 'online-only'
|
||||||
|
if ((1.0 * u.sum()) / u.size) < 0.15: return 'f2f-only'
|
||||||
|
return 'mixed'
|
||||||
|
|
||||||
|
def summarize_num_term_classes(u):
|
||||||
|
# u is a "group" from the groupby fxn
|
||||||
|
# term is sp18 now
|
||||||
|
#print u
|
||||||
|
return u.size
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -349,6 +407,252 @@ def matchstyle():
|
||||||
|
|
||||||
##### from localcache
|
##### from localcache
|
||||||
|
|
||||||
|
|
||||||
|
def user_role_and_online():
|
||||||
|
# cross list users, classes enrolled, and their roles
|
||||||
|
global role_table, term_courses
|
||||||
|
|
||||||
|
role_table = enrollment_file()
|
||||||
|
user_table = users_file()
|
||||||
|
user_table = user_table[ user_table['name']!="Test Student" ]
|
||||||
|
term_table = term_file()
|
||||||
|
current = term_table[lambda d: d.course_section=='2020 Spring'] # current semester from canvas
|
||||||
|
term_id = current['id'].values[0]
|
||||||
|
course_table = courses_file() # from canvas
|
||||||
|
schedule = current_schedule() # from banner...
|
||||||
|
|
||||||
|
term_courses = course_table[lambda d: d.termid==term_id] # courses this semester ... now add a crn column
|
||||||
|
term_courses['crn'] = term_courses['code'].map( lambda x: get_crn_from_name(x) )
|
||||||
|
# add is_online flag (for courses listed in schedule as online-only)
|
||||||
|
term_courses['is_online'] = term_courses['crn'].map( lambda x: course_is_online( x ) ) # kinda redundant
|
||||||
|
ban_can = term_courses.merge(schedule,on='crn',how='left') #join the schedule from banner to the courses from canvas
|
||||||
|
|
||||||
|
role_table = role_table.where(lambda x: x.workflow=='active')
|
||||||
|
|
||||||
|
# this join limits to current semester if 'inner', or all semesters if 'left'
|
||||||
|
courses_and_enrol = role_table.merge(ban_can,left_on='course_id',right_on='id', how='left')
|
||||||
|
|
||||||
|
user_table = user_table.drop(columns="rootactid tz created vis school position gender locale public bd cc state".split(" "))
|
||||||
|
c_e_user = courses_and_enrol.merge(user_table,left_on='user_id',right_on='id',how='left')
|
||||||
|
|
||||||
|
|
||||||
|
prop_online = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_proportion_online_classes).rename('proportion_online'))
|
||||||
|
num_trm_crs = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_num_term_classes).rename('num_term_crs'))
|
||||||
|
stu_tch_rol = pd.DataFrame(c_e_user.groupby(['user_id'])['type'].aggregate(summarize_student_teacher_role).rename('main_role'))
|
||||||
|
user_table = user_table.merge(prop_online,left_on='id',right_index=True)
|
||||||
|
user_table = user_table.merge(num_trm_crs,left_on='id',right_index=True)
|
||||||
|
user_table = user_table.merge(stu_tch_rol,left_on='id',right_index=True)
|
||||||
|
|
||||||
|
# remove name-less entries
|
||||||
|
user_table = user_table.where(lambda x: (x.canvasid!='') ) # math.isnan(x.canvasid))
|
||||||
|
|
||||||
|
return user_table
|
||||||
|
|
||||||
|
#print user_table.query('proportion_online=="online-only"')
|
||||||
|
#print user_table.query('main_role=="teacher"')
|
||||||
|
#user_table.to_csv('canvas_data/users_online.csv')
|
||||||
|
|
||||||
|
|
||||||
|
"""e_qry = "CREATE TABLE IF NOT EXISTS enrollments (
|
||||||
|
id integer PRIMARY KEY,
|
||||||
|
name text NOT NULL,
|
||||||
|
begin_date text,
|
||||||
|
end_date text
|
||||||
|
);"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
['CREATE INDEX "idx_req_userid" ON "requests" ("id","courseid","userid" );',
|
||||||
|
'CREATE INDEX "idx_users_id" ON "users" ("id","canvasid", );',
|
||||||
|
'CREATE INDEX "idx_term_id" ON "terms" ("id","canvasid" );',
|
||||||
|
'CREATE INDEX "idx_enrollment" ON "enrollment" ("cid","course_id","user_id" );',
|
||||||
|
'CREATE INDEX "idx_courses" ON "courses" ("id","canvasid","termid","code","name" );' ]
|
||||||
|
|
||||||
|
|
||||||
|
took 6 seconds
|
||||||
|
|
||||||
|
|
||||||
|
select * from users where name = "Peter Howell"
|
||||||
|
|
||||||
|
select * from users join requests on users.id = requests.userid where name = "Peter Howell"
|
||||||
|
20k rows in 1.014 seconds!! with index above
|
||||||
|
|
||||||
|
without: killed it after 120 seconds
|
||||||
|
|
||||||
|
select timestamp, url, useragent, httpmethod, remoteip, controller from users join requests on users.id = requests.userid where name = "Peter Howell" order by requests.timestamp
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
select courses.name, courses.code, terms.name, requests.url from courses
|
||||||
|
join terms on courses.termid = terms.id
|
||||||
|
join requests on courses.id = requests.courseid
|
||||||
|
where terms.name='2020 Spring ' and courses.code='ACCT20 SP20 40039'
|
||||||
|
order by courses.code
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def more_unused_xreferencing():
|
||||||
|
"""continue
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
r = requests_line(line.decode('utf-8'),filei)
|
||||||
|
if filei < 5:
|
||||||
|
print(r)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
filei += 1
|
||||||
|
|
||||||
|
|
||||||
|
by_date_course = defaultdict( lambda: defaultdict(int) )
|
||||||
|
by_date_user = defaultdict( lambda: defaultdict(int) )
|
||||||
|
df_list = []
|
||||||
|
df_list_crs = []
|
||||||
|
users = defaultdict( lambda: defaultdict(int) )
|
||||||
|
#by_user = {}
|
||||||
|
#by_course = {}
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
limit = 300
|
||||||
|
|
||||||
|
#print(r)
|
||||||
|
date = dt.strptime( r['timestamp'], "%Y-%m-%d %H:%M:%S.%f" )
|
||||||
|
if r['userid'] in users:
|
||||||
|
users[r['userid']]['freq'] += 1
|
||||||
|
if users[r['userid']]['lastseen'] < date:
|
||||||
|
users[r['userid']]['lastseen'] = date
|
||||||
|
else:
|
||||||
|
users[r['userid']] = {"id":r['userid'], "lastseen":date, "freq":1}
|
||||||
|
by_date_course[ r['day'] ][ r['courseid'] ] += 1
|
||||||
|
by_date_user[ r['day'] ][ r['userid'] ] += 1
|
||||||
|
#if r['userid'] in by_user: by_user[r['userid']] += 1
|
||||||
|
#else: by_user[r['userid']] = 1
|
||||||
|
#if r['courseid'] in by_course: by_course[r['courseid']] += 1
|
||||||
|
#else: by_course[r['courseid']] = 1
|
||||||
|
#mylog.write("by_user = " + str(by_user))
|
||||||
|
df_list.append(pd.DataFrame(data=by_date_user))
|
||||||
|
df_list_crs.append(pd.DataFrame(data=by_date_course))
|
||||||
|
i += 1
|
||||||
|
if i > limit: break
|
||||||
|
#mylog.write("by_date_course = ")
|
||||||
|
result = pd.concat(df_list, axis=1,join='outer')
|
||||||
|
result_crs = pd.concat(df_list_crs, axis=1,join='outer')
|
||||||
|
#print result_crs
|
||||||
|
mylog.write(result.to_csv())
|
||||||
|
# get users
|
||||||
|
usersf = user_role_and_online()
|
||||||
|
merged = pd.merge(result,usersf,left_index=True,right_on='id', how='left')
|
||||||
|
#dropkeys = "rootactid tz created vis school position gender locale public bd cc state".split(" ")
|
||||||
|
#merged.drop(dropkeys, inplace=True, axis=1)
|
||||||
|
mglog = open(local_data_folder+'userlogs.csv','w')
|
||||||
|
mglog.write(merged.to_csv())
|
||||||
|
|
||||||
|
# get courses
|
||||||
|
courses = courses_file()
|
||||||
|
merged2 = pd.merge(result_crs,courses,left_index=True,right_on='id', how='left')
|
||||||
|
dropkeys = "rootactid wikiid".split(" ")
|
||||||
|
merged2.drop(dropkeys, inplace=True, axis=1)
|
||||||
|
mglogc = open(local_data_folder + 'courselogs.csv','w')
|
||||||
|
mglogc.write(merged2.to_csv())
|
||||||
|
|
||||||
|
# a users / freq / lastseen file
|
||||||
|
ufl = open(local_data_folder + "user_freq.json","w")
|
||||||
|
today = datetime.datetime.today()
|
||||||
|
for U in list(users.keys()):
|
||||||
|
date = users[U]['lastseen']
|
||||||
|
users[U]['lastseen'] = date.strftime("%Y-%m-%d")
|
||||||
|
diff = today - date
|
||||||
|
users[U]['daysago'] = str(diff.days)
|
||||||
|
users[U]['hoursago'] = str(int(diff.total_seconds()/3600))
|
||||||
|
us_frame = pd.DataFrame.from_dict(users,orient='index')
|
||||||
|
us_with_names = pd.merge(us_frame,usersf,left_index=True,right_on='id', how='left')
|
||||||
|
#dropkeys = "id id_x id_y globalid rootactid tz created vis school position gender locale public bd cc state".split(" ")
|
||||||
|
#us_with_names.drop(dropkeys, inplace=True, axis=1)
|
||||||
|
print(us_with_names)
|
||||||
|
ufl.write( json.dumps(users, indent=4) )
|
||||||
|
ufl.close()
|
||||||
|
mglogd = open('canvas_data/user_freq.csv','w')
|
||||||
|
mglogd.write(us_with_names.to_csv())
|
||||||
|
"""
|
||||||
|
|
||||||
|
""" -- projects table
|
||||||
|
CREATE TABLE IF NOT EXISTS projects (
|
||||||
|
id integer PRIMARY KEY,
|
||||||
|
name text NOT NULL,
|
||||||
|
begin_date text,
|
||||||
|
end_date text
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def users_p_file():
|
||||||
|
uf = users_file()
|
||||||
|
pf = pseudonym_file()
|
||||||
|
#print pf
|
||||||
|
upf = uf.merge(pf,left_on='id',right_on='user_id',how='left')
|
||||||
|
return upf
|
||||||
|
|
||||||
|
"""
|
||||||
|
def com_channel_dim():
|
||||||
|
all = os.listdir(local_data_folder)
|
||||||
|
all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime)
|
||||||
|
all.reverse()
|
||||||
|
#print "sorted file list:"
|
||||||
|
#print all
|
||||||
|
for F in all:
|
||||||
|
if re.search('communication_channel_dim',F):
|
||||||
|
cc_file = F
|
||||||
|
break
|
||||||
|
print("most recent communication channel file is " + cc_file)
|
||||||
|
cc_users = []
|
||||||
|
for line in gzip.open(local_data_folder + cc_file,'r'):
|
||||||
|
line_dict = dict(list(zip(cc_format, line.split("\t"))))
|
||||||
|
#line_dict['globalid'] = line_dict['globalid'].rstrip()
|
||||||
|
cc_users.append(line_dict)
|
||||||
|
df = pd.DataFrame(cc_users)
|
||||||
|
return df
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
"""grp_sum_qry = ""SELECT u.sortablename, r.timeblock, SUM(r.viewcount), u.canvasid AS user, c.canvasid AS course
|
||||||
|
FROM requests_sum1 AS r
|
||||||
|
JOIN courses AS c ON e.course_id=c.id
|
||||||
|
JOIN enrollment as e ON r.courseid=c.id
|
||||||
|
JOIN users AS u ON u.id=e.user_id
|
||||||
|
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
|
||||||
|
GROUP BY u.id,c.id,r.timeblock
|
||||||
|
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
|
||||||
|
|
||||||
|
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course
|
||||||
|
FROM requests_sum1 AS r
|
||||||
|
JOIN courses AS c ON e.course_id=c.id
|
||||||
|
JOIN enrollment as e ON r.courseid=c.id
|
||||||
|
JOIN users AS u ON u.id=e.user_id
|
||||||
|
WHERE c.canvasid=%s AND e."type"="StudentEnrollment" AND u.canvasid=810
|
||||||
|
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
|
||||||
|
|
||||||
|
|
||||||
|
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course FROM enrollment as e JOIN courses AS c ON e.course_id=c.id
|
||||||
|
JOIN requests_sum1 AS r ON r.courseid=c.id
|
||||||
|
JOIN users AS u ON u.id=e.user_id
|
||||||
|
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
|
||||||
|
ORDER BY u.sortablename, r.timeblock"" % course_id"""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
stem_course_id = '11015' # TODO
|
stem_course_id = '11015' # TODO
|
||||||
|
|
||||||
# NO LONGER USED - SEE COURSES
|
# NO LONGER USED - SEE COURSES
|
||||||
|
|
|
||||||
2
gpt.py
2
gpt.py
|
|
@ -1,7 +1,7 @@
|
||||||
import os, json, sys
|
import os, json, sys
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
from secrets import openai_org, openai_api_key
|
from canvas_secrets import openai_org, openai_api_key
|
||||||
|
|
||||||
|
|
||||||
openai.organization = "org-66WLoZQEtBrO42Z9S8rfd10M"
|
openai.organization = "org-66WLoZQEtBrO42Z9S8rfd10M"
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ from importlib import reload
|
||||||
import server
|
import server
|
||||||
import localcache
|
import localcache
|
||||||
from server import *
|
from server import *
|
||||||
from secrets import flask_secretkey
|
from canvas_secrets import flask_secretkey
|
||||||
|
|
||||||
q = Queue()
|
q = Queue()
|
||||||
|
|
||||||
|
|
|
||||||
241
localcache.py
241
localcache.py
|
|
@ -1564,195 +1564,6 @@ def semester_enrollments(verbose=0):
|
||||||
|
|
||||||
# Overview of student hits in a course. Return a (pandas??) table student/timeblock/hits 6 * 7 * 7 items per student.
|
# Overview of student hits in a course. Return a (pandas??) table student/timeblock/hits 6 * 7 * 7 items per student.
|
||||||
|
|
||||||
"""e_qry = "CREATE TABLE IF NOT EXISTS enrollments (
|
|
||||||
id integer PRIMARY KEY,
|
|
||||||
name text NOT NULL,
|
|
||||||
begin_date text,
|
|
||||||
end_date text
|
|
||||||
);"""
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
['CREATE INDEX "idx_req_userid" ON "requests" ("id","courseid","userid" );',
|
|
||||||
'CREATE INDEX "idx_users_id" ON "users" ("id","canvasid", );',
|
|
||||||
'CREATE INDEX "idx_term_id" ON "terms" ("id","canvasid" );',
|
|
||||||
'CREATE INDEX "idx_enrollment" ON "enrollment" ("cid","course_id","user_id" );',
|
|
||||||
'CREATE INDEX "idx_courses" ON "courses" ("id","canvasid","termid","code","name" );' ]
|
|
||||||
|
|
||||||
|
|
||||||
took 6 seconds
|
|
||||||
|
|
||||||
|
|
||||||
select * from users where name = "Peter Howell"
|
|
||||||
|
|
||||||
select * from users join requests on users.id = requests.userid where name = "Peter Howell"
|
|
||||||
20k rows in 1.014 seconds!! with index above
|
|
||||||
|
|
||||||
without: killed it after 120 seconds
|
|
||||||
|
|
||||||
select timestamp, url, useragent, httpmethod, remoteip, controller from users join requests on users.id = requests.userid where name = "Peter Howell" order by requests.timestamp
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
select courses.name, courses.code, terms.name, requests.url from courses
|
|
||||||
join terms on courses.termid = terms.id
|
|
||||||
join requests on courses.id = requests.courseid
|
|
||||||
where terms.name='2020 Spring ' and courses.code='ACCT20 SP20 40039'
|
|
||||||
order by courses.code
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def more_unused_xreferencing():
|
|
||||||
"""continue
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
r = requests_line(line.decode('utf-8'),filei)
|
|
||||||
if filei < 5:
|
|
||||||
print(r)
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
filei += 1
|
|
||||||
|
|
||||||
|
|
||||||
by_date_course = defaultdict( lambda: defaultdict(int) )
|
|
||||||
by_date_user = defaultdict( lambda: defaultdict(int) )
|
|
||||||
df_list = []
|
|
||||||
df_list_crs = []
|
|
||||||
users = defaultdict( lambda: defaultdict(int) )
|
|
||||||
#by_user = {}
|
|
||||||
#by_course = {}
|
|
||||||
i = 0
|
|
||||||
|
|
||||||
limit = 300
|
|
||||||
|
|
||||||
#print(r)
|
|
||||||
date = dt.strptime( r['timestamp'], "%Y-%m-%d %H:%M:%S.%f" )
|
|
||||||
if r['userid'] in users:
|
|
||||||
users[r['userid']]['freq'] += 1
|
|
||||||
if users[r['userid']]['lastseen'] < date:
|
|
||||||
users[r['userid']]['lastseen'] = date
|
|
||||||
else:
|
|
||||||
users[r['userid']] = {"id":r['userid'], "lastseen":date, "freq":1}
|
|
||||||
by_date_course[ r['day'] ][ r['courseid'] ] += 1
|
|
||||||
by_date_user[ r['day'] ][ r['userid'] ] += 1
|
|
||||||
#if r['userid'] in by_user: by_user[r['userid']] += 1
|
|
||||||
#else: by_user[r['userid']] = 1
|
|
||||||
#if r['courseid'] in by_course: by_course[r['courseid']] += 1
|
|
||||||
#else: by_course[r['courseid']] = 1
|
|
||||||
#mylog.write("by_user = " + str(by_user))
|
|
||||||
df_list.append(pd.DataFrame(data=by_date_user))
|
|
||||||
df_list_crs.append(pd.DataFrame(data=by_date_course))
|
|
||||||
i += 1
|
|
||||||
if i > limit: break
|
|
||||||
#mylog.write("by_date_course = ")
|
|
||||||
result = pd.concat(df_list, axis=1,join='outer')
|
|
||||||
result_crs = pd.concat(df_list_crs, axis=1,join='outer')
|
|
||||||
#print result_crs
|
|
||||||
mylog.write(result.to_csv())
|
|
||||||
# get users
|
|
||||||
usersf = user_role_and_online()
|
|
||||||
merged = pd.merge(result,usersf,left_index=True,right_on='id', how='left')
|
|
||||||
#dropkeys = "rootactid tz created vis school position gender locale public bd cc state".split(" ")
|
|
||||||
#merged.drop(dropkeys, inplace=True, axis=1)
|
|
||||||
mglog = open(local_data_folder+'userlogs.csv','w')
|
|
||||||
mglog.write(merged.to_csv())
|
|
||||||
|
|
||||||
# get courses
|
|
||||||
courses = courses_file()
|
|
||||||
merged2 = pd.merge(result_crs,courses,left_index=True,right_on='id', how='left')
|
|
||||||
dropkeys = "rootactid wikiid".split(" ")
|
|
||||||
merged2.drop(dropkeys, inplace=True, axis=1)
|
|
||||||
mglogc = open(local_data_folder + 'courselogs.csv','w')
|
|
||||||
mglogc.write(merged2.to_csv())
|
|
||||||
|
|
||||||
# a users / freq / lastseen file
|
|
||||||
ufl = open(local_data_folder + "user_freq.json","w")
|
|
||||||
today = datetime.datetime.today()
|
|
||||||
for U in list(users.keys()):
|
|
||||||
date = users[U]['lastseen']
|
|
||||||
users[U]['lastseen'] = date.strftime("%Y-%m-%d")
|
|
||||||
diff = today - date
|
|
||||||
users[U]['daysago'] = str(diff.days)
|
|
||||||
users[U]['hoursago'] = str(int(diff.total_seconds()/3600))
|
|
||||||
us_frame = pd.DataFrame.from_dict(users,orient='index')
|
|
||||||
us_with_names = pd.merge(us_frame,usersf,left_index=True,right_on='id', how='left')
|
|
||||||
#dropkeys = "id id_x id_y globalid rootactid tz created vis school position gender locale public bd cc state".split(" ")
|
|
||||||
#us_with_names.drop(dropkeys, inplace=True, axis=1)
|
|
||||||
print(us_with_names)
|
|
||||||
ufl.write( json.dumps(users, indent=4) )
|
|
||||||
ufl.close()
|
|
||||||
mglogd = open('canvas_data/user_freq.csv','w')
|
|
||||||
mglogd.write(us_with_names.to_csv())
|
|
||||||
"""
|
|
||||||
|
|
||||||
""" -- projects table
|
|
||||||
CREATE TABLE IF NOT EXISTS projects (
|
|
||||||
id integer PRIMARY KEY,
|
|
||||||
name text NOT NULL,
|
|
||||||
begin_date text,
|
|
||||||
end_date text
|
|
||||||
);
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def user_role_and_online():
|
|
||||||
# cross list users, classes enrolled, and their roles
|
|
||||||
global role_table, term_courses
|
|
||||||
|
|
||||||
role_table = enrollment_file()
|
|
||||||
user_table = users_file()
|
|
||||||
user_table = user_table[ user_table['name']!="Test Student" ]
|
|
||||||
term_table = term_file()
|
|
||||||
current = term_table[lambda d: d.course_section=='2020 Spring'] # current semester from canvas
|
|
||||||
term_id = current['id'].values[0]
|
|
||||||
course_table = courses_file() # from canvas
|
|
||||||
schedule = current_schedule() # from banner...
|
|
||||||
|
|
||||||
term_courses = course_table[lambda d: d.termid==term_id] # courses this semester ... now add a crn column
|
|
||||||
term_courses['crn'] = term_courses['code'].map( lambda x: get_crn_from_name(x) )
|
|
||||||
# add is_online flag (for courses listed in schedule as online-only)
|
|
||||||
term_courses['is_online'] = term_courses['crn'].map( lambda x: course_is_online( x ) ) # kinda redundant
|
|
||||||
ban_can = term_courses.merge(schedule,on='crn',how='left') #join the schedule from banner to the courses from canvas
|
|
||||||
|
|
||||||
role_table = role_table.where(lambda x: x.workflow=='active')
|
|
||||||
|
|
||||||
# this join limits to current semester if 'inner', or all semesters if 'left'
|
|
||||||
courses_and_enrol = role_table.merge(ban_can,left_on='course_id',right_on='id', how='left')
|
|
||||||
|
|
||||||
user_table = user_table.drop(columns="rootactid tz created vis school position gender locale public bd cc state".split(" "))
|
|
||||||
c_e_user = courses_and_enrol.merge(user_table,left_on='user_id',right_on='id',how='left')
|
|
||||||
|
|
||||||
|
|
||||||
prop_online = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_proportion_online_classes).rename('proportion_online'))
|
|
||||||
num_trm_crs = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_num_term_classes).rename('num_term_crs'))
|
|
||||||
stu_tch_rol = pd.DataFrame(c_e_user.groupby(['user_id'])['type'].aggregate(summarize_student_teacher_role).rename('main_role'))
|
|
||||||
user_table = user_table.merge(prop_online,left_on='id',right_index=True)
|
|
||||||
user_table = user_table.merge(num_trm_crs,left_on='id',right_index=True)
|
|
||||||
user_table = user_table.merge(stu_tch_rol,left_on='id',right_index=True)
|
|
||||||
|
|
||||||
# remove name-less entries
|
|
||||||
user_table = user_table.where(lambda x: (x.canvasid!='') ) # math.isnan(x.canvasid))
|
|
||||||
|
|
||||||
return user_table
|
|
||||||
|
|
||||||
#print user_table.query('proportion_online=="online-only"')
|
|
||||||
#print user_table.query('main_role=="teacher"')
|
|
||||||
#user_table.to_csv('canvas_data/users_online.csv')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def comm_channel_file():
|
def comm_channel_file():
|
||||||
|
|
@ -1797,58 +1608,6 @@ def pseudonym_file():
|
||||||
df = pd.DataFrame(all_users)
|
df = pd.DataFrame(all_users)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
def users_p_file():
|
|
||||||
uf = users_file()
|
|
||||||
pf = pseudonym_file()
|
|
||||||
#print pf
|
|
||||||
upf = uf.merge(pf,left_on='id',right_on='user_id',how='left')
|
|
||||||
return upf
|
|
||||||
|
|
||||||
"""
|
|
||||||
def com_channel_dim():
|
|
||||||
all = os.listdir(local_data_folder)
|
|
||||||
all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime)
|
|
||||||
all.reverse()
|
|
||||||
#print "sorted file list:"
|
|
||||||
#print all
|
|
||||||
for F in all:
|
|
||||||
if re.search('communication_channel_dim',F):
|
|
||||||
cc_file = F
|
|
||||||
break
|
|
||||||
print("most recent communication channel file is " + cc_file)
|
|
||||||
cc_users = []
|
|
||||||
for line in gzip.open(local_data_folder + cc_file,'r'):
|
|
||||||
line_dict = dict(list(zip(cc_format, line.split("\t"))))
|
|
||||||
#line_dict['globalid'] = line_dict['globalid'].rstrip()
|
|
||||||
cc_users.append(line_dict)
|
|
||||||
df = pd.DataFrame(cc_users)
|
|
||||||
return df
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
"""grp_sum_qry = ""SELECT u.sortablename, r.timeblock, SUM(r.viewcount), u.canvasid AS user, c.canvasid AS course
|
|
||||||
FROM requests_sum1 AS r
|
|
||||||
JOIN courses AS c ON e.course_id=c.id
|
|
||||||
JOIN enrollment as e ON r.courseid=c.id
|
|
||||||
JOIN users AS u ON u.id=e.user_id
|
|
||||||
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
|
|
||||||
GROUP BY u.id,c.id,r.timeblock
|
|
||||||
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
|
|
||||||
|
|
||||||
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course
|
|
||||||
FROM requests_sum1 AS r
|
|
||||||
JOIN courses AS c ON e.course_id=c.id
|
|
||||||
JOIN enrollment as e ON r.courseid=c.id
|
|
||||||
JOIN users AS u ON u.id=e.user_id
|
|
||||||
WHERE c.canvasid=%s AND e."type"="StudentEnrollment" AND u.canvasid=810
|
|
||||||
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
|
|
||||||
|
|
||||||
|
|
||||||
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course FROM enrollment as e JOIN courses AS c ON e.course_id=c.id
|
|
||||||
JOIN requests_sum1 AS r ON r.courseid=c.id
|
|
||||||
JOIN users AS u ON u.id=e.user_id
|
|
||||||
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
|
|
||||||
ORDER BY u.sortablename, r.timeblock"" % course_id"""
|
|
||||||
|
|
||||||
def abcd():
|
def abcd():
|
||||||
setup_table('index')
|
setup_table('index')
|
||||||
|
|
|
||||||
30
pipelines.py
30
pipelines.py
|
|
@ -11,8 +11,8 @@ import sys, shutil, hmac, hashlib, base64, schedule, time, pathlib, datetime
|
||||||
import pdb
|
import pdb
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from deepdiff import DeepDiff
|
from deepdiff import DeepDiff
|
||||||
from secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, GOO, GOO_PIN, token, url, domain, account_id, header, g_id, g_secret
|
from canvas_secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, GOO, GOO_PIN, token, url, domain, account_id, header, g_id, g_secret
|
||||||
from secrets import instructure_url, instructure_username, instructure_private_key
|
from canvas_secrets import instructure_url, instructure_username, instructure_private_key
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -429,32 +429,6 @@ def getSemesterSchedule(short='sp21'): # I used to be current_sch
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
online_courses = {}
|
|
||||||
def prep_online_courses_df():
|
|
||||||
global online_courses
|
|
||||||
schedule = current_schedule() # from banner
|
|
||||||
online_courses = schedule[lambda x: x.type=='online']
|
|
||||||
|
|
||||||
def course_is_online(crn):
|
|
||||||
global online_courses
|
|
||||||
#print "looking up: " + str(crn)
|
|
||||||
#print online_courses
|
|
||||||
course = online_courses[lambda x: x.crn==int(crn)]
|
|
||||||
return len(course)
|
|
||||||
|
|
||||||
def get_crn_from_name(name):
|
|
||||||
#print "name is: "
|
|
||||||
#print(name)
|
|
||||||
m = re.search( r'(\d\d\d\d\d)', name)
|
|
||||||
if m: return int(m.groups(1)[0])
|
|
||||||
else: return 0
|
|
||||||
|
|
||||||
def get_enrlmts_for_user(user,enrollments):
|
|
||||||
#active enrollments
|
|
||||||
u_en = enrollments[ lambda x: (x['user_id'] == user) & (x['workflow']=='active') ]
|
|
||||||
return u_en[['type','course_id']]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
################
|
################
|
||||||
################ CANVAS DATA
|
################ CANVAS DATA
|
||||||
|
|
|
||||||
2
tasks.py
2
tasks.py
|
|
@ -17,7 +17,7 @@ import pysftp, os, datetime, requests, re, json, sqlite3, codecs, csv, sys
|
||||||
import funcy, os.path, shutil, urllib
|
import funcy, os.path, shutil, urllib
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from secrets import badgr_target, badgr_hd
|
from canvas_secrets import badgr_target, badgr_hd
|
||||||
|
|
||||||
|
|
||||||
if os.name != 'posix':
|
if os.name != 'posix':
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import re
|
||||||
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from secrets import banner_url1, banner_url2, GOO, GOO_PIN, otter_login, otter_pw
|
from canvas_secrets import banner_url1, banner_url2, GOO, GOO_PIN, otter_login, otter_pw
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue