3454 lines
117 KiB
Python
3454 lines
117 KiB
Python
|
|
#get_schedule('201770')
|
|
|
|
|
|
# from pipelines - canvas data
|
|
|
|
|
|
# Canvas data, download all new files
|
|
def sync_non_interactive():
|
|
resp = do_request('/api/account/self/file/sync')
|
|
mylog.write(json.dumps(resp, indent=4))
|
|
#mylog.close()
|
|
gotten = os.listdir(local_data_folder)
|
|
wanted = []
|
|
i = 0
|
|
for x in resp['files']:
|
|
filename = x['filename']
|
|
exi = "No "
|
|
if filename in gotten: exi = "Yes"
|
|
else: wanted.append(x)
|
|
|
|
print(str(i) + '.\tLocal: %s\tRemote: %s' % ( exi, filename ))
|
|
i += 1
|
|
print("I will attempt to download %i files." % len(wanted))
|
|
|
|
#answer = input("Press enter to begin, or q to quit ")
|
|
#if not answer == '': return
|
|
|
|
good_count = 0
|
|
bad_count = 0
|
|
for W in wanted:
|
|
print("Downloading: " + W['filename'])
|
|
response = requests.request(method='GET', url=W['url'], stream=True)
|
|
if(response.status_code != 200):
|
|
print('Request response went bad. Got back a %s code, meaning the request was %s' % \
|
|
(response.status_code, response.reason))
|
|
print('URL: ' + W['url'])
|
|
bad_count += 1
|
|
|
|
else:
|
|
#Use the downloaded data
|
|
with open(local_data_folder + W['filename'], 'wb') as fd:
|
|
for chunk in response.iter_content(chunk_size=128):
|
|
fd.write(chunk)
|
|
print("Success")
|
|
good_count += 1
|
|
print("Out of %i files, %i succeeded and %i failed." % (len(wanted),good_count,bad_count))
|
|
|
|
|
|
## OLD STYLE CANVAS DATA
|
|
|
|
# Get something from Canvas Data
|
|
def do_request(path):
|
|
#Set up the request pieces
|
|
method = 'GET'
|
|
host = 'api.inshosteddata.com'
|
|
apiTime = datetime.utcnow().strftime('%a, %d %b %y %H:%M:%S GMT')
|
|
apiContentType = 'application/json'
|
|
|
|
msgList = []
|
|
msgList.append(method)
|
|
msgList.append(host)
|
|
msgList.append(apiContentType)
|
|
msgList.append('')
|
|
msgList.append(path)
|
|
msgList.append('')
|
|
msgList.append(apiTime)
|
|
msgList.append(apiSecret)
|
|
|
|
msgStr = bytes("".join("%s\n" % k for k in msgList).strip(),'utf-8')
|
|
|
|
sig = base64.b64encode(hmac.new(key=bytes(apiSecret,'utf-8'),msg=msgStr,digestmod=hashlib.sha256).digest())
|
|
sig = sig.decode('utf-8')
|
|
|
|
headers = {}
|
|
headers['Authorization'] = 'HMACAuth {}:{}'.format(apiKey,sig)
|
|
headers['Date'] = apiTime
|
|
headers['Content-type'] = apiContentType
|
|
|
|
|
|
#Submit the request/get a response
|
|
uri = "https://"+host+path
|
|
print (uri)
|
|
print (headers)
|
|
response = requests.request(method='GET', url=uri, headers=headers, stream=True)
|
|
|
|
#Check to make sure the request was ok
|
|
if(response.status_code != 200):
|
|
print(('Request response went bad. Got back a ', response.status_code, ' code, meaning the request was ', response.reason))
|
|
else:
|
|
#Use the downloaded data
|
|
jsonData = response.json()
|
|
#print(json.dumps(jsonData, indent=4))
|
|
return jsonData
|
|
|
|
|
|
|
|
|
|
def file_doesnt_exist(name):
|
|
# Get list of files in current directory
|
|
files = os.listdir()
|
|
|
|
# Filter out zero-size files and directories
|
|
files = [f for f in files if os.path.isfile(f) and os.path.getsize(f) > 0]
|
|
|
|
if name in files:
|
|
print( f" * file: {name} already exists. not downloading." )
|
|
else:
|
|
print( f" * file: {name} downloading." )
|
|
|
|
# Check if the file exists in the filtered list
|
|
return not (name in files)
|
|
|
|
|
|
|
|
# read schedule file with an eye toward watching what's filling up
|
|
def schedule_filling():
|
|
sem = 'spring2021' # todo: hardcoded
|
|
days = []
|
|
for f in sorted(os.listdir('cache/rosters/'+sem+'/')):
|
|
if f.endswith('.html'):
|
|
match = re.search(r'sched_(\d\d\d\d)_(\d\d)_(\d+)\.html',f)
|
|
if match:
|
|
print(f)
|
|
y = match.group(1)
|
|
m = match.group(2)
|
|
d = match.group(3)
|
|
print("Schedule from %s %s %s." % (y,m,d))
|
|
csv_sched = ssb_to_csv(open('cache/rosters/'+sem+'/'+f,'r').read())
|
|
jsn = to_section_list(csv_sched)
|
|
#print(json.dumps(jsn,indent=2))
|
|
days.append(jsn)
|
|
day1 = days[-2]
|
|
day2 = days[-1]
|
|
df = jsondiff.diff(day1, day2)
|
|
gains = defaultdict( list )
|
|
|
|
for D in df.keys():
|
|
if isinstance(D, int):
|
|
#print(day1[D]['code'] + '\t' + day1[D]['crn'] + ' Before: ' + day1[D]['act'] + ' After: ' + day2[D]['act'])
|
|
try:
|
|
gain = int(day2[D]['act']) - int(day1[D]['act'])
|
|
gains[gain].append( day1[D]['code'] + ' ' + day1[D]['crn'] )
|
|
except:
|
|
print("No gain for " + str(D))
|
|
#print("\t" + str(df[D]))
|
|
else:
|
|
print(D)
|
|
print(df[D])
|
|
for key, value in sorted(gains.items(), key=lambda x: x[0]):
|
|
print("{} : {}".format(key, value))
|
|
|
|
#print(json.dumps(gains,indent=2))
|
|
|
|
|
|
|
|
def argos_data():
|
|
global dean,gp
|
|
|
|
f2 = codecs.open('cache/enrollment_argos_fa23.csv','w','utf-8')
|
|
writer = csv.writer(f2)
|
|
headers = 'gp dean dept num code crn name act site'.split(' ')
|
|
writer.writerow(headers)
|
|
|
|
f = codecs.open('cache/sched_draft_fa23.csv','r','utf-8')
|
|
reader = csv.reader(f, delimiter=',')
|
|
headers = next(reader)
|
|
for r in reader:
|
|
d = dict(list(zip(headers,r)))
|
|
print(d)
|
|
my_dean = dean[d['Subj']]
|
|
my_gp = gp[d['Subj']]
|
|
dept = d['Subj']
|
|
num = d['Crse No']
|
|
code = dept + " " + num
|
|
crn = d['CRN']
|
|
name = d['Course Title']
|
|
act = d['Open Seats']
|
|
campus = d['Campus']
|
|
session = d['Session']
|
|
if campus == "Off Campus": site = session
|
|
else: site = campus
|
|
print(site)
|
|
writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def scrape_for_db():
|
|
|
|
global SEMESTER, gp, dean, short_sem, semester_begin, filename, filename_html
|
|
fields = 'sem,crn,dept,num,gp,dean,code,name,teacher,type,cap,act,loc,site,date,days,time,cred,ztc'.split(',')
|
|
|
|
|
|
"""
|
|
SEMESTER = 'Fall 2022'
|
|
short_sem = 'fa22'
|
|
semester_begin = strptime('08/22', '%m/%d')
|
|
filename = 'fa22_sched.json'
|
|
filename_html = 'fa22_sched.html'
|
|
|
|
as_dict = scrape_schedule()
|
|
fff = codecs.open('cache/%s_sched.sql' % filename, 'w', 'utf-8')
|
|
fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text);\n")
|
|
for S in as_dict:
|
|
parts = S['code'].split(' ')
|
|
S['dept'] = parts[0]
|
|
S['num'] = parts[1]
|
|
S['gp'] = gp[parts[0]]
|
|
S['dean'] = dean[parts[0]]
|
|
S['sem'] = short_sem
|
|
str = "INSERT INTO schedule (sem,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc) VALUES (%s);\n" % \
|
|
", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
|
|
print(str)
|
|
fff.write(str)
|
|
fff.write('UPDATE schedule SET site="OnlineLive" WHERE loc="ONLINE LIVE";\n')
|
|
fff.close()
|
|
"""
|
|
|
|
|
|
|
|
|
|
SEMESTER = 'Spring 2023 (View only)'
|
|
short_sem = 'sp23'
|
|
semester_begin = strptime('01/30', '%m/%d')
|
|
filename = 'sp23_sched.json'
|
|
filename_html = 'sp23_sched.html'
|
|
|
|
as_dict = scrape_schedule()
|
|
fff = codecs.open('cache/%s_sched.sql' % filename, 'w', 'utf-8')
|
|
fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text);\n")
|
|
for S in as_dict:
|
|
parts = S['code'].split(' ')
|
|
S['dept'] = parts[0]
|
|
S['num'] = parts[1]
|
|
S['gp'] = gp[parts[0]]
|
|
S['dean'] = dean[parts[0]]
|
|
S['sem'] = short_sem
|
|
str = "INSERT INTO schedule (sem,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc) VALUES (%s);\n" % \
|
|
", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
|
|
print(str)
|
|
fff.write(str)
|
|
fff.write('UPDATE schedule SET site="OnlineLive" WHERE loc="ONLINE LIVE";\n')
|
|
fff.close()
|
|
|
|
|
|
|
|
def todays_date_filename(short_sem): # helper
|
|
n = datetime.now()
|
|
m = n.month
|
|
if m < 10: m = "0"+str(m)
|
|
d = n.day
|
|
if d < 10: d = "0" + str(d)
|
|
return "reg_" + short_sem + "_" + str(n.year) + str(m) + str(d)
|
|
|
|
def expand_old_semesters():
|
|
|
|
terms = 'sp16,su16,fa16,sp17,su17,fa17,sp18,su18,fa18,sp19,su19,fa19,sp20,su20,fa20,sp21,su21,fa21,sp22,su22,fa22'.split(',')
|
|
terms = 'sp16,su16,fa16,sp17,su17,fa17,sp18,su18,fa18,sp19,su19,fa19,sp20,su20'.split(',')
|
|
terms.reverse()
|
|
|
|
for t in terms:
|
|
list_latestarts(t)
|
|
input('press return to continue.')
|
|
|
|
|
|
|
|
def argos_data_from_cvc():
|
|
global dean,gp
|
|
short_sem = 'fa23'
|
|
|
|
f3 = codecs.open('cache/%s_sched.json' % short_sem, 'w', 'utf-8')
|
|
all_courses = []
|
|
|
|
f = codecs.open('cache/sched_draft_%s.csv' % short_sem, 'r','utf-8')
|
|
reader = csv.reader(f, delimiter=',')
|
|
headers = next(reader)
|
|
for r in reader:
|
|
d = dict(list(zip(headers,r)))
|
|
#print(d)
|
|
parts = re.search(r'^([A-Z]+)(\d+[A-Z]*)$', d['Course_Code'])
|
|
if parts:
|
|
dept = parts.group(1)
|
|
num = parts.group(2)
|
|
my_dean = dean[dept]
|
|
my_gp = gp[dept]
|
|
code = dept + " " + num
|
|
crn = d['CRN']
|
|
cred = d['Units_Credit_hours']
|
|
days, time_start, time_end = days_times(d['Meeting_Days_and_Times'])
|
|
times = ""
|
|
if time_start: times = time_start + "-" + time_end
|
|
date = remove_year(d['Start_Date']) + "-" + remove_year(d['End_Date'])
|
|
start = remove_year(d['Start_Date'])
|
|
end = remove_year(d['End_Date'])
|
|
ztc = d['ZTC']
|
|
name = d['Course_Name']
|
|
cap = d['Class_Capacity']
|
|
rem = d['Available_Seats']
|
|
act = int(cap) - int(rem)
|
|
teacher = d['Instructor_First_Name'] + " " + d['Instructor_Last_Name']
|
|
delivery = d['Delivery']
|
|
if delivery == "Online":
|
|
if days:
|
|
site = "Online"
|
|
type = "online live"
|
|
loc = "Online Live"
|
|
else:
|
|
site = "Online"
|
|
type = "online"
|
|
loc = "ONLINE"
|
|
elif delivery == "Hybrid":
|
|
site = d['Campus_College']
|
|
type = "hybrid"
|
|
loc = d['Meeting_Locations']
|
|
else:
|
|
site = d['Campus_College']
|
|
type = "in-person"
|
|
loc = d['Meeting_Locations']
|
|
this_course = { "crn": crn, "dept": dept, "num": num, "code": code, "name": name, "teacher": teacher, "type": type, "loc": loc, \
|
|
"cap": cap.strip(), "act": act, "site": site, "date": date, "cred": cred.strip(), "ztc": ztc, "days": days, "time": times, \
|
|
"start": start, "end": end, "time_start": time_start, "time_end": time_end, "dean": my_dean, "gp": my_gp}
|
|
all_courses.append(this_course)
|
|
print(site)
|
|
#writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site])
|
|
print(all_courses)
|
|
#print(json.dumps(all_courses))
|
|
f3.write( json.dumps(all_courses,indent=2) )
|
|
f3.close()
|
|
expanded = list_latestarts(short_sem)
|
|
|
|
|
|
|
|
|
|
def days_times(s):
|
|
parts = re.search(r'^([MTWThRF]+)\s?(.*?)$',s)
|
|
if parts:
|
|
day = parts.group(1)
|
|
time = parts.group(2)
|
|
parts2 = re.search(r'^(.*)\s?-\s?(.*)$',time)
|
|
if parts2:
|
|
time_start = parts2.group(1).strip()
|
|
time_end = parts2.group(2).strip()
|
|
return day, time_start, time_end
|
|
return day, time, ''
|
|
return '','',''
|
|
|
|
|
|
|
|
|
|
|
|
def remove_year(s):
|
|
s = re.sub(r'\-', '/', s)
|
|
if len(s)>5: return s[5:]
|
|
return s
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_enrlmts_for_user(user,enrollments):
|
|
#active enrollments
|
|
u_en = enrollments[ lambda x: (x['user_id'] == user) & (x['workflow']=='active') ]
|
|
return u_en[['type','course_id']]
|
|
|
|
|
|
|
|
### course is a list of 1-3 lists, each one being a line in the schedule's output. First one has section
|
|
def course_start(course):
|
|
#todo: use this to make a early/late/short field and store semester dates w/ other constants
|
|
|
|
start = datetime(2019,1,28)
|
|
end = datetime(2019,5,24)
|
|
|
|
# is it normal, early, late, winter?
|
|
li = course[0]
|
|
date = li[12]
|
|
|
|
if date=='01/28-05/24':
|
|
return 'Normal'
|
|
if date=='TBA':
|
|
return 'TBA'
|
|
if date=='01/02-01/25':
|
|
return 'Winter'
|
|
if date=='01/02-01/24':
|
|
return 'Winter'
|
|
|
|
ma = re.search( r'(\d+)\/(\d+)\-(\d+)\/(\d+)', date)
|
|
if ma:
|
|
# TODO do these years matter?
|
|
mystart = datetime(2019, int(ma.group(1)), int(ma.group(2)))
|
|
if int(ma.group(1)) > 10: mystart = datetime(2018, int(ma.group(1)), int(ma.group(2)))
|
|
myend = datetime(2019, int(ma.group(3)), int(ma.group(4)))
|
|
length = myend - mystart
|
|
weeks = length.days / 7
|
|
|
|
if mystart != start:
|
|
if mystart < start:
|
|
#print 'Early Start ', str(weeks), " weeks ",
|
|
return 'Early start'
|
|
else:
|
|
#print 'Late Start ', str(weeks), " weeks ",
|
|
return 'Late start'
|
|
else:
|
|
if myend > end:
|
|
#print 'Long class ', str(weeks), " weeks ",
|
|
return 'Long term'
|
|
else:
|
|
#print 'Short term ', str(weeks), " weeks ",
|
|
return 'Short term'
|
|
#return ma.group(1) + '/' + ma.group(2) + " end: " + ma.group(3) + "/" + ma.group(4)
|
|
else:
|
|
return "Didn't match: " + date
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# list files in canvas_data (online) and choose one or some to download.
|
|
def interactive():
|
|
resp = do_request('/api/account/self/file/sync')
|
|
mylog.write(json.dumps(resp, indent=4))
|
|
#mylog.close()
|
|
i = 0
|
|
gotten = os.listdir(local_data_folder)
|
|
for x in resp['files']:
|
|
print(str(i) + '.\t' + x['filename'])
|
|
i += 1
|
|
which = input("Which files to get? (separate with commas, or say 'all') ")
|
|
if which=='all':
|
|
which_a = list(range(i-1))
|
|
else:
|
|
which_a = which.split(",")
|
|
for W in which_a:
|
|
this_i = int(W)
|
|
this_f = resp['files'][this_i]
|
|
filename = this_f['filename']
|
|
if filename in gotten: continue
|
|
print("Downloading: " + filename)
|
|
response = requests.request(method='GET', url=this_f['url'], stream=True)
|
|
if(response.status_code != 200):
|
|
print(('Request response went bad. Got back a ', response.status_code, ' code, meaning the request was ', response.reason))
|
|
else:
|
|
#Use the downloaded data
|
|
with open(local_data_folder + filename, 'wb') as fd:
|
|
for chunk in response.iter_content(chunk_size=128):
|
|
fd.write(chunk)
|
|
print("Success")
|
|
"""if filename.split('.')[-1] == 'gz':
|
|
try:
|
|
plain_filename = 'canvas_data/' + ".".join(filename.split('.')[:-1])
|
|
pf = open(plain_filename,'w')
|
|
with gzip.open('canvas_data/' + filename , 'rb') as f:
|
|
pf.write(f.read())
|
|
except Exception as e:
|
|
print "Failed to ungizp. Probably too big: " + str(e)"""
|
|
|
|
|
|
|
|
|
|
|
|
# todo: where does the most recent schedule come from?
|
|
|
|
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
|
def list_latestarts():
|
|
#term = input("Name of current semester file? (ex: sp18) ")
|
|
term = "sp23" # sems[0]
|
|
|
|
term_in = "cache/" + term + "_sched.json"
|
|
term_out = "cache/" + term + "_latestarts.txt"
|
|
print("Writing output to " + term_out)
|
|
infile = open(term_in, "r")
|
|
outfile = open(term_out, "w")
|
|
sched = json.loads(infile.read())
|
|
#print sched
|
|
by_date = {}
|
|
for C in sched:
|
|
parts = C['date'].split("-")
|
|
start = parts[0]
|
|
codes = C['code'].split(' ')
|
|
dept = codes[0]
|
|
if dept in ['JLE','JFT','CWE']:
|
|
continue
|
|
if re.search('TBA',start): continue
|
|
try:
|
|
startd = parser.parse(start)
|
|
except Exception as e:
|
|
print(e, "\nproblem parsing ", start)
|
|
#print startd
|
|
if not startd in by_date:
|
|
by_date[startd] = []
|
|
by_date[startd].append(C)
|
|
for X in sorted(by_date.keys()):
|
|
#print "Start: " + str(X)
|
|
if len(by_date[X]) < 200:
|
|
prettydate = X.strftime("%A, %B %d")
|
|
print(prettydate + ": " + str(len(by_date[X])) + " courses")
|
|
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
|
|
for Y in by_date[X]:
|
|
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
|
|
print(Y)
|
|
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
|
|
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
|
|
|
|
|
|
|
|
|
|
online_courses = {}
|
|
def prep_online_courses_df():
|
|
global online_courses
|
|
schedule = current_schedule() # from banner
|
|
online_courses = schedule[lambda x: x.type=='online']
|
|
|
|
def course_is_online(crn):
|
|
global online_courses
|
|
#print "looking up: " + str(crn)
|
|
#print online_courses
|
|
course = online_courses[lambda x: x.crn==int(crn)]
|
|
return len(course)
|
|
|
|
def get_crn_from_name(name):
|
|
#print "name is: "
|
|
#print(name)
|
|
m = re.search( r'(\d\d\d\d\d)', name)
|
|
if m: return int(m.groups(1)[0])
|
|
else: return 0
|
|
|
|
def get_enrlmts_for_user(user,enrollments):
|
|
#active enrollments
|
|
u_en = enrollments[ lambda x: (x['user_id'] == user) & (x['workflow']=='active') ]
|
|
return u_en[['type','course_id']]
|
|
|
|
|
|
|
|
|
|
"""
|
|
timestamp = nowAsStr()
|
|
|
|
requestParts = [ method,
|
|
host,
|
|
'', #content Type Header
|
|
'', #content MD5 Header
|
|
path,
|
|
'', #alpha-sorted Query Params
|
|
timestamp,
|
|
apiSecret ]
|
|
|
|
#Build the request
|
|
requestMessage = '\n'.join( requestParts )
|
|
requestMessage = requestMessage.encode('ASCII')
|
|
print((requestMessage.__repr__()))
|
|
hmacObject = hmac.new(bytearray(apiSecret,'ASCII'), bytearray('','ASCII'), hashlib.sha256) #
|
|
hmacObject.update(requestMessage)
|
|
hmac_digest = hmacObject.digest()
|
|
sig = base64.b64encode(hmac_digest)
|
|
headerDict = {
|
|
'Authorization' : 'HMACAuth ' + apiKey + ':' + str(sig),
|
|
'Date' : timestamp
|
|
}
|
|
|
|
|
|
"""
|
|
|
|
# Don't know
|
|
def demo():
|
|
resp = do_request('/api/account/self/file/sync')
|
|
mylog.write(json.dumps(resp, indent=4))
|
|
sample_table = resp['files'][10]
|
|
filename = sample_table['filename']
|
|
print(sample_table['table'])
|
|
|
|
response = requests.request(method='GET', url=sample_table['url'], stream=True)
|
|
if(response.status_code != 200):
|
|
print(('Request response went bad. Got back a ', response.status_code, ' code, meaning the request was ', response.reason))
|
|
else:
|
|
#Use the downloaded data
|
|
with open(local_data_folder + filename, 'wb') as fd:
|
|
for chunk in response.iter_content(chunk_size=128):
|
|
fd.write(chunk)
|
|
print("Success")
|
|
if filename.split('.')[-1] == 'gz':
|
|
plain_filename = 'canvas_data/' + ".".join(filename.split('.')[:-1])
|
|
pf = open(plain_filename,'w')
|
|
with gzip.open('canvas_data/' + filename , 'rb') as f:
|
|
pf.write(f.read())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# How to drop columns
|
|
#columns = ['Col1', 'Col2', ...]
|
|
#df.drop(columns, inplace=True, axis=1)
|
|
|
|
# left join, one on column, one on index
|
|
#merged = pd.merge(result,users,left_index=True,right_on='id', how='left')
|
|
|
|
|
|
"""
|
|
You can call set_index on the result of the dataframe:
|
|
|
|
In [2]:
|
|
data=[['Australia',100],['France',200],['Germany',300],['America',400]]
|
|
pd.DataFrame(data,columns=['Country','Volume']).set_index('Country')
|
|
|
|
Out[2]:
|
|
Volume
|
|
Country
|
|
Australia 100
|
|
France 200
|
|
Germany 300
|
|
America 400
|
|
"""
|
|
|
|
|
|
|
|
def stats():
|
|
# nothing seems to happen here?
|
|
|
|
#input = csv.DictReader(codecs.open(schedfile,'r','utf-8'))
|
|
input = csv.DictReader(open(schedfile,'r'))
|
|
out2 = open('temp2.csv','w')
|
|
clean = {}
|
|
for r in input:
|
|
if r['crn']: clean[ r['crn'] ] = r
|
|
|
|
for c,r in list(clean.items()):
|
|
try:
|
|
if int(r['cap'])==0: continue
|
|
else: prct = (1.0 * int( r['act'] )) / int(r['cap'])
|
|
if prct < 0.01: continue
|
|
o_str = ''
|
|
if r['location'].strip()=='ONLINE': o_str = 'online'
|
|
#print r['location']
|
|
date_parts = r['date'].split('-')
|
|
start = strptime(date_parts[0], '%m/%d')
|
|
if start > semester_begin: o_str += "\tlatestart " + date_parts[0]
|
|
out2.write( "".join([c, "\t", r['sub'], "\t", r['crs'], "\t", str(round(prct,2)), "% full\t", o_str, "\n"]) )
|
|
except:
|
|
pass
|
|
|
|
|
|
######### from tasks.py
|
|
|
|
def strip(x): return x.strip()
|
|
|
|
def esc_comma(x): return re.sub(',','[CMA]',x)
|
|
|
|
def by_sem(x): return x['sem']
|
|
|
|
def parse_schedule():
|
|
# "Course Code","Start Date","End Date",Term,Delivery,CRN,Status,"Course Name",
|
|
# 8 "Course Description","Units/Credit hours","Instructor Last Name",
|
|
# 11 "Instructor First Name",Campus/College,"Meeting Days and Times",
|
|
# 14 "Pass/No Pass available?","Class Capacity","Available Seats","Waitlist Capacity",
|
|
# 18 "Current Waitlist Length","Meeting Locations","Course Notes",ZTC 21
|
|
|
|
oo = codecs.open('cache/fa20_section_notes.txt','w','utf-8')
|
|
pp = codecs.open('cache/fa20_section_summary.txt','w','utf-8')
|
|
|
|
|
|
u0 = "https://hhh.gavilan.edu/phowell/map/dir_api_tester.php?a=get/sections"
|
|
existing_sections = json.loads( requests.get(u0).text )
|
|
|
|
existing_sections = funcy.group_by(by_sem,existing_sections)
|
|
by_sem_crn = {}
|
|
|
|
for sem,sects in existing_sections.items():
|
|
for s in sects:
|
|
new_key = sem + '_' + s['crn']
|
|
by_sem_crn[new_key] = s
|
|
|
|
#print(json.dumps(by_sem_crn,indent=2))
|
|
mt = open('cache/missed_instructors.txt','w')
|
|
|
|
teacher_cache = {}
|
|
count = 0
|
|
stopat = 20000
|
|
|
|
u1 = "https://www.gavilan.edu/_files/php/current_schedule.csv"
|
|
with requests.Session() as s:
|
|
download = s.get(u1)
|
|
decoded_content = download.content.decode('utf-8')
|
|
cr = csv.reader(decoded_content.splitlines(), delimiter=',')
|
|
my_list = list(cr)
|
|
#for row in my_list:
|
|
# print(row)
|
|
for row in my_list:
|
|
row = list(map(strip,row))
|
|
row = list(map(esc_comma,row))
|
|
if row[3] in sem_to_short:
|
|
row[3] = sem_to_short[row[3]]
|
|
if row[20]:
|
|
oo.write("%s - %s \n" % (row[0], row[20]))
|
|
summary = "%s %s %s %s \t %s %s\t %s" % (row[4], row[11],row[10],row[6], row[5], row[0], row[7])
|
|
pp.write(summary + "\n")
|
|
|
|
# cancelled?
|
|
status = row[6]
|
|
if status != "Active": continue
|
|
|
|
# ignore if exists? TODO check if i need to update it
|
|
this_sem_crn = row[3] + '_' + row[5]
|
|
if this_sem_crn in by_sem_crn:
|
|
print("\t...already uploaded...skipping %s" % this_sem_crn)
|
|
continue
|
|
|
|
if count >0 and count<stopat:
|
|
t_name = "%s %s" % (row[11],row[10])
|
|
if t_name in teacher_cache:
|
|
r = {'sched_alias':1, 'id': teacher_cache[t_name]}
|
|
else:
|
|
u2 = "https://hhh.gavilan.edu/phowell/map/dir_api_tester.php?a=get/instructor/name"
|
|
p2 = {'inst': t_name }
|
|
print("\tSearching for teacher: %s" % t_name )
|
|
r2 = requests.post(u2, data=p2)
|
|
r = json.loads(r2.text)
|
|
if r and 'id' in r: teacher_cache[t_name] = r['id']
|
|
|
|
if not r:
|
|
print("\tCouldn't locate teacher: %s %s" % (row[11],row[10]))
|
|
u2 = "https://hhh.gavilan.edu/phowell/map/dir_api_tester.php?a=get/instructor/fuzzyname"
|
|
p2 = {'inst': row[11] + " % " + row[10] }
|
|
print("\tFuzzy search for teacher: " + row[11] + " % " + row[10])
|
|
r2 = requests.post(u2, data=p2)
|
|
r = json.loads(r2.text)
|
|
if r and 'id' in r: teacher_cache[t_name] = r['id']
|
|
|
|
if r and 'sched_alias' in r:
|
|
row[10] = r['id']
|
|
#print("\tfound teacher: " + str(r))
|
|
payload = { 'cols':'code,start_date,end_date,sem,delivery,crn,status,name,descr,units,teacher_id,days,pnp,location,note,ztc',
|
|
'vals': u','.join( [ row[j] for j in [0,1,2,3,4,5,6,7,8,9,10,13,14,19,20,21] ] ) }
|
|
#print(json.dumps(payload,indent=2))
|
|
#print()
|
|
r3 = requests.post("https://hhh.gavilan.edu/phowell/map/dir_api_tester.php?a=add/section", params=payload)
|
|
result = json.loads(r3.text)
|
|
if result and 'err' in result and result['err']:
|
|
print("\n*** Problem? --> %s" % result['err'] )
|
|
mt.write("*** Problem? --> %s\n" % result['err'] )
|
|
else:
|
|
print("*** Still Couldn't locate teacher: %s %s" % (row[11],row[10]))
|
|
mt.write("Couldn't locate teacher: %s %s\n" % (row[11],row[10]))
|
|
print()
|
|
count += 1
|
|
|
|
|
|
|
|
def certificates_gott_build_2020():
|
|
#send_email("Peter Howell", "Peter", "phowell@gavilan.edu", "test", "this is a test")
|
|
|
|
#g2e = user_goo_to_email()
|
|
g2e = {} # missing function?
|
|
g2name = {}
|
|
ix = {} # everyone
|
|
ix1 = {} # only gott 1
|
|
ix2 = {} # only gott 2
|
|
|
|
cc = csv.reader( open('cache/completers_gott1_su20.csv','r'), delimiter=',')
|
|
|
|
'''for row in cc:
|
|
# name, goo, section, x, count
|
|
doc = DocxTemplate("cache/certificates/gott 1 template.docx")
|
|
doc.render({ 'name' : row[0] })
|
|
fn = "cache/certificates/gott_1_%s." % re.sub('\s', '_', row[0].lower())
|
|
print(fn+'docx')
|
|
try:
|
|
goo = row[1]
|
|
email = g2e[ goo ]
|
|
print(email)
|
|
g2name[goo] = row[0]
|
|
ix1[ goo ] = fn+"pdf"
|
|
ix[ goo ] = email
|
|
except:
|
|
print("can't find email")
|
|
doc.save(fn+'docx')
|
|
#convert_to_pdf(fn+'docx', fn+'pdf')
|
|
'''
|
|
|
|
for row in csv.reader( open('cache/completers_gott2_wi24.csv','r'), delimiter=','):
|
|
# name, goo, section, x, count
|
|
doc = DocxTemplate("cache/certificates/gott 2 template.docx")
|
|
doc.render({ 'name' : row[0] })
|
|
fn = "cache/certificates/gott_2_wi24_%s." % re.sub('\s', '_', row[0].lower())
|
|
print(fn+'docx')
|
|
try:
|
|
goo = row[1]
|
|
email = g2e[ goo ]
|
|
print( email )
|
|
g2name[goo] = row[0]
|
|
ix2[ goo ] = fn+"pdf"
|
|
ix[ goo ] = email
|
|
except:
|
|
print("can't find email")
|
|
doc.save(fn+'docx')
|
|
#convert_to_pdf(fn+'docx', fn+'pdf')
|
|
|
|
#
|
|
'''
|
|
g1f = open('cache/gott_emails_1.csv', 'w')
|
|
g2f = open('cache/gott_emails_2.csv', 'w')
|
|
g12f = open('cache/gott_emails_12.csv', 'w')
|
|
for k in ix.keys():
|
|
if k in ix1 and not k in ix2:
|
|
print(k + " only gott 1")
|
|
file1 = ix1[k]
|
|
email = ix[k]
|
|
file1 = "https://www.gavilan.edu/staff/tlc/certificates/" + ix1[k].split("/")[-1]
|
|
fname = g2name[k]
|
|
g1f.write("%s, %s, %s\n" % (fname, email, file1))
|
|
elif k in ix2 and not k in ix1:
|
|
print(k + " only in gott 2")
|
|
file2 = "https://www.gavilan.edu/staff/tlc/certificates/" + ix2[k].split("/")[-1]
|
|
email = ix[k]
|
|
fname = g2name[k]
|
|
g2f.write("%s, %s, %s\n" % (fname, email, file2))
|
|
elif k in ix1 and k in ix2:
|
|
print(k + " in both")
|
|
file1 = "https://www.gavilan.edu/staff/tlc/certificates/" + ix1[k].split("/")[-1]
|
|
file2 = "https://www.gavilan.edu/staff/tlc/certificates/" + ix2[k].split("/")[-1]
|
|
email = ix[k]
|
|
fname = g2name[k]
|
|
g12f.write("%s, %s, %s, %s\n" % (fname, email, file1, file2))
|
|
'''
|
|
|
|
# Email experiment
|
|
def mail_test():
|
|
outlook = win32com.client.Dispatch('outlook.application') #get a reference to Outlook
|
|
mail = outlook.CreateItem(0) #create a new mail item
|
|
mail.To = 'executives@bigcompany.com'
|
|
mail.Subject = 'Finance Status Report '+datetime.today().strftime('%m/%d')
|
|
|
|
mail.HTMLBody = '''
|
|
<p>Hi Team,</p>
|
|
|
|
<p>This email is to provide a status of the our current sales numbers</p>
|
|
|
|
|
|
<img src='C:\\Users\\sam\\Desktop\\EmailAuto\\paste1.png'>
|
|
|
|
|
|
<img src='C:\\Users\\sam\\Desktop\\EmailAuto\\paste2.png'>
|
|
|
|
|
|
<p>Thanks and have a great day!</p>
|
|
'''
|
|
mail.Display()
|
|
|
|
|
|
|
|
######### from curriculum. py
|
|
|
|
|
|
# open('cache/programs/programs_1.txt','r').read()
|
|
|
|
"""
|
|
SEE serve.py .... i mean ... interactive.py
|
|
def dict_generator(indict, pre=None):
|
|
pre = pre[:] if pre else []
|
|
if isinstance(indict, dict):
|
|
for key, value in indict.items():
|
|
if isinstance(value, dict):
|
|
for d in dict_generator(value, pre + [key]):
|
|
yield d
|
|
elif isinstance(value, list) or isinstance(value, tuple):
|
|
for v in value:
|
|
for d in dict_generator(v, pre + [key]):
|
|
yield d
|
|
else:
|
|
yield str(pre) + " " + str([key, value]) + "\n"
|
|
else:
|
|
yield pre + [indict]
|
|
yield str(pre) + " " + str([indict]) + "\n"
|
|
|
|
|
|
|
|
def print_dict(v, prefix='',indent=''):
|
|
if isinstance(v, dict):
|
|
return [ print_dict(v2, "{}['{}']".format(prefix, k) + "<br />", indent+" " ) for k, v2 in v.items() ]
|
|
elif isinstance(v, list):
|
|
return [ print_dict( v2, "{}[{}]".format(prefix , i) + "<br />", indent+" ") for i, v2 in enumerate(v) ]
|
|
else:
|
|
return '{} = {}'.format(prefix, repr(v)) + "\n"
|
|
|
|
|
|
def walk_file():
|
|
j = json.loads(open('cache/programs/programs_2.txt','r').read())
|
|
|
|
return print_dict(j)
|
|
|
|
from flask import Flask
|
|
from flask import request
|
|
|
|
def tag(x,y): return "<%s>%s</%s>" % (x,y,x)
|
|
|
|
def tagc(x,c,y): return '<%s class="%s">%s</%s>' % (x,c,y,x)
|
|
|
|
def a(t,h): return '<a href="%s">%s</a>' % (h,t)
|
|
|
|
def server_save(key,value):
|
|
codecs.open('cache/server_data.txt','a').write( "%s=%s\n" % (str(key),str(value)))
|
|
|
|
def flask_thread(q):
|
|
app = Flask(__name__)
|
|
|
|
@app.route("/")
|
|
def home():
|
|
return tag('h1','This is my server.') + "<br />" + a('want to shut down?','/sd')
|
|
|
|
@app.route("/save/<key>/<val>")
|
|
def s(key,val):
|
|
server_save(key,val)
|
|
return tag('h1','Saved.') + "<br />" + tag('p', 'Saved: %s = %s' % (str(key),str(val)))
|
|
|
|
@app.route("/crazy")
|
|
def hello():
|
|
r = '<link rel="stylesheet" href="static/bootstrap.min.css">'
|
|
r += tag('style', 'textarea { white-space:nowrap; }')
|
|
r += tag('body', \
|
|
tagc('div','container-fluid', \
|
|
tagc('div','row', \
|
|
tagc( 'div', 'col-md-6', tag('pre', walk_file() ) ) + \
|
|
tagc( 'div', 'col-md-6', 'Column 2' + a('Shut Down','/shutdown' ) ) ) ) )
|
|
|
|
|
|
|
|
return r
|
|
|
|
@app.route("/sd")
|
|
def sd():
|
|
print('SIGINT or CTRL-C detected. Exiting gracefully')
|
|
func = request.environ.get('werkzeug.server.shutdown')
|
|
if func is None:
|
|
raise RuntimeError('Not running with the Werkzeug Server')
|
|
func()
|
|
return "Server has shut down."
|
|
app.run()
|
|
|
|
|
|
from queue import Queue
|
|
|
|
q = Queue()
|
|
|
|
def serve():
|
|
import webbrowser
|
|
import threading
|
|
x = threading.Thread(target=flask_thread, args=(q,))
|
|
x.start()
|
|
webbrowser.open_new_tab("http://localhost:5000")
|
|
|
|
|
|
|
|
|
|
#s = open('cache/programs/index.json','w')
|
|
#s.write( json.dumps({'departments':sorted(list(dept_index)), 'programs':prog_index}, indent=2) )
|
|
#s.close()
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
### interactive.py
|
|
|
|
from textual.app import App, ComposeResult
|
|
from textual.containers import ScrollableContainer, Container, Horizontal
|
|
from textual.widgets import Button, Footer, Header, Static, Label
|
|
from textual.widgets import Welcome
|
|
from textual import events
|
|
from textual.app import App, ComposeResult
|
|
from textual.widgets import RichLog
|
|
|
|
|
|
class CanvasApp(App):
|
|
"""A Textual app to manage canvas."""
|
|
|
|
BINDINGS = [("d", "toggle_dark", "Toggle dark mode"), ("q", "quit_app", "Quit")]
|
|
CSS_PATH = "interactive.tcss"
|
|
|
|
current_label = ""
|
|
|
|
# RHS side panel: Status & Saved Preferences
|
|
current_term = ""
|
|
last_db_pull = ""
|
|
orientation_shell_id = ""
|
|
stem_shell_id = ""
|
|
|
|
|
|
|
|
|
|
def compose(self) -> ComposeResult:
|
|
"""Create child widgets for the app."""
|
|
yield Header()
|
|
#yield Welcome()
|
|
yield Label(self.current_label, classes="box", id="feedback")
|
|
yield RichLog()
|
|
yield Footer()
|
|
|
|
def action_toggle_dark(self) -> None:
|
|
"""An action to toggle dark mode."""
|
|
self.dark = not self.dark
|
|
|
|
def action_quit_app(self) -> None:
|
|
self.exit()
|
|
|
|
def on_button_pressed(self) -> None:
|
|
self.exit()
|
|
|
|
def on_key(self, event: events.Key) -> None:
|
|
self.query_one(RichLog).write(event)
|
|
self.current_label += event.character
|
|
fb = self.query_one("#feedback")
|
|
fb.update(self.current_label)
|
|
|
|
|
|
def text_app():
|
|
app = CanvasApp()
|
|
app.run()
|
|
|
|
|
|
|
|
|
|
|
|
"""class HelloWorldExample(object):
|
|
def make_teacher_rel(self, tchr, clss):
|
|
with self._driver.session() as tx:
|
|
tx.run("MERGE (tchr:Teacher {name: $tchr}) MERGE (tchr)-[:TEACHES]->(clss:Class {name: $clss})", \
|
|
tchr=tchr, clss=clss)
|
|
|
|
def __init__(self, uri, user, password):
|
|
self._driver = GraphDatabase.driver(uri, auth=(user, password))
|
|
|
|
def close(self):
|
|
self._driver.close()
|
|
|
|
|
|
|
|
def print_greeting(self, message):
|
|
with self._driver.session() as session:
|
|
greeting = session.write_transaction(self._create_and_return_greeting, message)
|
|
print(greeting)
|
|
|
|
@staticmethod
|
|
def _create_and_return_greeting(tx, message):
|
|
result = tx.run("CREATE (a:Greeting) "
|
|
"SET a.message = $message "
|
|
"RETURN a.message + ', from node ' + id(a)", message=message)
|
|
return result.single()[0]
|
|
"""
|
|
|
|
|
|
def make_teacher_rel(g, tchr, clss):
|
|
g.run("MERGE (tchr:Teacher {name: $tchr}) MERGE (tchr)-[:TEACHES]->(clss:Class {name: $clss})", \
|
|
tchr=tchr, clss=clss)
|
|
|
|
|
|
def testgraph():
|
|
gg = Graph("bolt://localhost:7687", auth=("neo4j", "asdf"))
|
|
|
|
#gg.run("DROP CONSTRAINT ON (tchr:Teacher) ASSERT tchr.name IS UNIQUE")
|
|
#gg.run("DROP CONSTRAINT ON (clss:Class) ASSERT clss.name IS UNIQUE")
|
|
|
|
#gg.run("CREATE INDEX ON :Teacher(name)")
|
|
#gg.run("CREATE INDEX ON :Class(name)")
|
|
|
|
stuff = json.loads( open('output/semesters/2020spring/sp20_sched.json','r').read())
|
|
|
|
# make lists of unique course code+name, teacher, locations
|
|
tch = {}
|
|
crs = {}
|
|
loc = {}
|
|
sem = Node("Semester", name="sp20")
|
|
for c in stuff:
|
|
if not c['teacher'] in tch:
|
|
tch[c['teacher']] = Node("Teacher", name=c['teacher'])
|
|
gg.create(tch[c['teacher']])
|
|
if not c['code'] in crs:
|
|
crs[ c['code'] ] = Node("Course section", name=c['name'], code=c['code'])
|
|
gg.create(crs[ c['code'] ])
|
|
if not c['loc'] in loc:
|
|
loc[ c['loc'] ] = Node("Location", loc=c['loc'])
|
|
gg.create(loc[ c['loc'] ])
|
|
sect = Node("Section", crn=int(c['crn']))
|
|
gg.create(Relationship(tch[c['teacher']], "TEACHES", sect ))
|
|
gg.create(Relationship(sect, "CLASS OF", crs[ c['code'] ] ))
|
|
gg.create(Relationship( sect, "LOCATED AT", loc[ c['loc'] ] ))
|
|
|
|
"""
|
|
for c in stuff:
|
|
print(c['crn'])
|
|
q = "CREATE (section:Section { Name: "+c['name']+", Code: "+c['code']+", Crn: "+c['crn']+", Teacher: "+c['teacher']+" })"
|
|
q = 'CREATE (section:Section { Name: "%s", Code: "%s", Crn: "%s", Teacher: "%s" })' % \
|
|
(c['name'], c['code'], c['crn'], c['teacher'])
|
|
gg.run(q)
|
|
"""
|
|
#gg = HelloWorldExample("bolt://localhost:7687", "neo4j", "asdf")
|
|
#gg.print_greeting("hi there world")
|
|
"""
|
|
make_teacher_rel(gg, "Peter Howell","CSIS 42")
|
|
make_teacher_rel(gg, "Alex Stoykov","CSIS 42")
|
|
make_teacher_rel(gg, "Sabrina Lawrence","CSIS 85")
|
|
make_teacher_rel(gg, "Peter Howell","CSIS 85")
|
|
"""
|
|
|
|
screen = 0
|
|
|
|
def Memoize( func):
|
|
"""
|
|
Memoize decorator
|
|
"""
|
|
cache = {}
|
|
|
|
@wraps(func)
|
|
def wrapper(*args):
|
|
if args not in cache:
|
|
cache[args] = func(*args)
|
|
return cache[args]
|
|
return wrapper
|
|
|
|
|
|
|
|
|
|
class MyRepl:
|
|
description = {
|
|
"switch ": "Switch stream. You can use either 'switch public' or 'switch mine'",
|
|
"home " : "Show your timeline. 'home 7' will show 7 tweet.",
|
|
"harry " : "a guys name.",
|
|
"homo " : "means the same.",
|
|
"view " : "'view @mdo' will show @mdo's home.",
|
|
"h " : "Show help.",
|
|
"t " : "'t opps' will tweet 'opps' immediately.",
|
|
"s " : "'s #AKB48' will search for '#AKB48' and return 5 newest tweets."
|
|
}
|
|
|
|
|
|
def startup(self, outfile):
|
|
global screen # make it self
|
|
self.g = {}
|
|
self.buf = {}
|
|
screen = None
|
|
self.enter_ary = [curses.KEY_ENTER,10]
|
|
self.delete_ary = [curses.KEY_BACKSPACE,curses.KEY_DC,8,127,263]
|
|
self.tab_ary = [9]
|
|
self.up_ary = [curses.KEY_UP]
|
|
self.down_ary = [curses.KEY_DOWN]
|
|
|
|
# Init curses screen
|
|
screen = curses.initscr()
|
|
screen.keypad(1)
|
|
curses.noecho()
|
|
try:
|
|
curses.start_color()
|
|
curses.use_default_colors()
|
|
for i in range(0, curses.COLORS):
|
|
curses.init_pair(i + 1, i, -1)
|
|
except curses.error:
|
|
pass
|
|
curses.cbreak()
|
|
self.g['height'] , self.g['width'] = screen.getmaxyx()
|
|
#print("Width: %i" % self.g['width'])
|
|
|
|
# Init color function
|
|
s = self
|
|
self.white = lambda x:curses_print_word(x,7) #0)
|
|
self.grey = lambda x:curses_print_word(x, 3) #3)1)
|
|
self.red = lambda x:curses_print_word(x,7) #2)
|
|
self.green = lambda x:curses_print_word(x, 3) #3)
|
|
self.yellow = lambda x:curses_print_word(x,7) #4)
|
|
self.blue = lambda x:curses_print_word(x,3)
|
|
self.magenta = lambda x:curses_print_word(x,7) #6)
|
|
self.cyan = lambda x:curses_print_word(x,7) #7)
|
|
self.colors_shuffle = [s.grey, s.red, s.green, s.yellow, s.blue, s.magenta, s.cyan]
|
|
self.cyc = itertools.cycle(s.colors_shuffle[1:])
|
|
self.index_cyc = itertools.cycle(range(1,8))
|
|
self.setup_command(outfile)
|
|
|
|
|
|
def set_my_dict(self,d):
|
|
self.description = d
|
|
|
|
@Memoize
|
|
def cycle_color(self, s):
|
|
"""
|
|
Cycle the colors_shuffle
|
|
"""
|
|
return next(self.cyc)
|
|
|
|
|
|
def ascii_art(self, text):
|
|
"""
|
|
Draw the Ascii Art
|
|
"""
|
|
fi = figlet_format(text, font='doom')
|
|
for i in fi.split('\n'):
|
|
self.curses_print_line(i,next(self.index_cyc))
|
|
|
|
|
|
def close_window(self, ):
|
|
"""
|
|
Close screen
|
|
"""
|
|
global screen
|
|
screen.keypad(0);
|
|
curses.nocbreak();
|
|
curses.echo()
|
|
curses.endwin()
|
|
|
|
|
|
def suggest(self, word):
|
|
"""
|
|
Find suggestion
|
|
"""
|
|
rel = []
|
|
if not word: return rel
|
|
word = word.lower()
|
|
|
|
for candidate in self.description:
|
|
|
|
ca = candidate.lower()
|
|
#if ca.startswith(word): rel.append(candidate)
|
|
|
|
for eachword in ca.split(" "):
|
|
if eachword.startswith(word):
|
|
rel.append(candidate)
|
|
|
|
return rel
|
|
|
|
|
|
def curses_print_word(self, word,color_pair_code):
|
|
"""
|
|
Print a word
|
|
"""
|
|
global screen
|
|
word = word.encode('utf8')
|
|
screen.addstr(word,curses.color_pair(color_pair_code))
|
|
|
|
|
|
def curses_print_line(self, line,color_pair_code):
|
|
"""
|
|
Print a line, scroll down if need
|
|
"""
|
|
global screen
|
|
line = line.encode('utf8')
|
|
y,x = screen.getyx()
|
|
if y - self.g['height'] == -3:
|
|
self.scroll_down(2,y,x)
|
|
screen.addstr(y,0,line,curses.color_pair(color_pair_code))
|
|
self.buf[y] = line, color_pair_code
|
|
elif y - self.g['height'] == -2:
|
|
self.scroll_down(3,y,x)
|
|
screen.addstr(y-1,0,line,curses.color_pair(color_pair_code))
|
|
self.buf[y-1] = line ,color_pair_code
|
|
else:
|
|
screen.addstr(y+1,0,line,curses.color_pair(color_pair_code))
|
|
self.buf[y+1] = line, color_pair_code
|
|
|
|
|
|
def redraw(self, start_y,end_y,fallback_y,fallback_x):
|
|
"""
|
|
Redraw lines from buf
|
|
"""
|
|
global screen
|
|
for cursor in range(start_y,end_y):
|
|
screen.move(cursor,0)
|
|
screen.clrtoeol()
|
|
try:
|
|
line, color_pair_code = self.buf[cursor]
|
|
screen.addstr(cursor,0,line,curses.color_pair(color_pair_code))
|
|
except:
|
|
pass
|
|
screen.move(fallback_y,fallback_x)
|
|
|
|
|
|
def scroll_down(self, noredraw,fallback_y,fallback_x):
|
|
"""
|
|
Scroll down 1 line
|
|
"""
|
|
global screen
|
|
# Recreate buf
|
|
# noredraw = n means that screen will scroll down n-1 line
|
|
trip_list = heapq.nlargest(noredraw-1,buf)
|
|
for i in buf:
|
|
if i not in trip_list:
|
|
self.buf[i] = self.buf[i+noredraw-1]
|
|
for j in trip_list:
|
|
buf.pop(j)
|
|
|
|
# Clear and redraw
|
|
screen.clear()
|
|
self.redraw(1,g['height']-noredraw,fallback_y,fallback_x)
|
|
|
|
|
|
def clear_upside(self, n,y,x):
|
|
"""
|
|
Clear n lines upside
|
|
"""
|
|
global screen
|
|
for i in range(1,n+1):
|
|
screen.move(y-i,0)
|
|
screen.clrtoeol()
|
|
screen.refresh()
|
|
screen.move(y,x)
|
|
|
|
|
|
def display_suggest(self, y,x,word):
|
|
"""
|
|
Display box of suggestion
|
|
"""
|
|
global screen
|
|
g = self.g
|
|
side = 2
|
|
|
|
# Check if need to print upside
|
|
upside = y+6 > int(g['height'])
|
|
|
|
# Redraw if suggestion is not the same as previous display
|
|
sug = self.suggest(word)
|
|
if sug != self.g['prev']:
|
|
# 0-line means there is no suggetions (height = 0)
|
|
# 3-line means there are many suggetions (height = 3)
|
|
# 5-line means there is only one suggetions (height = 5)
|
|
# Clear upside section
|
|
if upside:
|
|
# Clear upside is a bit difficult. Here it's seperate to 4 case.
|
|
# now: 3-lines / previous : 0 line
|
|
if len(sug) > 1 and not self.g['prev']:
|
|
self.clear_upside(3,y,x)
|
|
# now: 0-lines / previous :3 lines
|
|
elif not sug and len(g['prev'])>1:
|
|
self.redraw(y-3,y,y,x)
|
|
# now: 3-lines / previous :5 lines
|
|
elif len(sug) > 1 == len(g['prev']):
|
|
self.redraw(y-5,y-3,y,x)
|
|
self.clear_upside(3,y,x)
|
|
# now: 5-lines / previous :3 lines
|
|
elif len(sug) == 1 < len(g['prev']):
|
|
self.clear_upside(3,y,x)
|
|
# now: 0-lines / previous :5 lines
|
|
elif not sug and len(g['prev'])==1:
|
|
self.redraw(y-5,y,y,x)
|
|
# now: 3-lines / previous :3 lines
|
|
elif len(sug) == len(g['prev']) > 1:
|
|
self.clear_upside(3,y,x)
|
|
# now: 5-lines / previous :5 lines
|
|
elif len(sug) == len(g['prev']) == 1:
|
|
self.clear_upside(5,y,x)
|
|
screen.refresh()
|
|
else:
|
|
# Clear downside
|
|
screen.clrtobot()
|
|
screen.refresh()
|
|
self.g['prev'] = sug
|
|
|
|
if sug:
|
|
# More than 1 suggestion
|
|
if len(sug) > 1:
|
|
if len(sug) > 5: sug = sug[:5]
|
|
|
|
#needed_lenth = sum([len(i)+side for i in sug]) + side
|
|
needed_lenth = max( self.g['width']-5, sum([len(i)+side for i in sug]) + side)
|
|
print(self.g['width'])
|
|
print(word)
|
|
print(sug)
|
|
print(needed_lenth)
|
|
if upside:
|
|
win = curses.newwin(3,needed_lenth,y-3,0)
|
|
win.erase()
|
|
win.box()
|
|
win.refresh()
|
|
cur_width = side
|
|
for i in range(len(sug)):
|
|
if cur_width+len(sug[i]) > self.g['width']: break
|
|
screen.addstr(y-2,cur_width,sug[i],curses.color_pair(4))
|
|
cur_width += len(sug[i]) + side
|
|
if cur_width > self.g['width']:
|
|
break
|
|
else:
|
|
win = curses.newwin(3,needed_lenth,y+1,0)
|
|
win.erase()
|
|
win.box()
|
|
win.refresh()
|
|
cur_width = side
|
|
for i in range(len(sug)):
|
|
screen.addstr(y+2,cur_width,sug[i],curses.color_pair(4))
|
|
cur_width += len(sug[i]) + side
|
|
if cur_width > self.g['width']:
|
|
break
|
|
# Only 1 suggestion
|
|
else:
|
|
can = sug[0]
|
|
if upside:
|
|
win = curses.newwin(5,len(self.description[can])+2*side,y-5,0)
|
|
win.box()
|
|
win.refresh()
|
|
screen.addstr(y-4,side,can,curses.color_pair(4))
|
|
screen.addstr(y-2,side,self.description[can],curses.color_pair(3))
|
|
else:
|
|
win = curses.newwin(5,len(self.description[can])+2*side,y+1,0)
|
|
win.box()
|
|
win.refresh()
|
|
screen.addstr(y+2,side,can,curses.color_pair(4))
|
|
screen.addstr(y+4,side,self.description[can],curses.color_pair(3))
|
|
|
|
|
|
def inputloop(self, ):
|
|
"""
|
|
Main loop input
|
|
"""
|
|
global screen
|
|
word = ''
|
|
screen.addstr("\n" + self.g['prefix'],curses.color_pair(7))
|
|
|
|
while True:
|
|
# Current position
|
|
y,x = screen.getyx()
|
|
# Get char
|
|
event = screen.getch()
|
|
try :
|
|
char = chr(event)
|
|
except:
|
|
char = ''
|
|
|
|
# Test curses_print_line
|
|
if char == '?':
|
|
self.buf[y] = self.g['prefix'] + '?', 0
|
|
self.ascii_art('dtvd88')
|
|
|
|
# TAB to complete
|
|
elif event in self.tab_ary:
|
|
# First tab
|
|
try:
|
|
if not self.g['tab_cycle']:
|
|
self.g['tab_cycle'] = itertools.cycle(self.suggest(word))
|
|
|
|
suggestion = next(self.g['tab_cycle'])
|
|
# Clear current line
|
|
screen.move(y,len(self.g['prefix']))
|
|
screen.clrtoeol()
|
|
# Print out suggestion
|
|
word = suggestion
|
|
screen.addstr(y,len(self.g['prefix']),word)
|
|
self.display_suggest(y,x,word)
|
|
screen.move(y,len(word)+len(self.g['prefix']))
|
|
except:
|
|
pass
|
|
|
|
# UP key
|
|
elif event in self.up_ary:
|
|
if self.g['hist']:
|
|
# Clear current line
|
|
screen.move(y,len(self.g['prefix']))
|
|
screen.clrtoeol()
|
|
# Print out previous history
|
|
if self.g['hist_index'] > 0 - len(self.g['hist']):
|
|
self.g['hist_index'] -= 1
|
|
word = self.g['hist'][self.g['hist_index']]
|
|
screen.addstr(y,len(self.g['prefix']),word)
|
|
self.display_suggest(y,x,word)
|
|
screen.move(y,len(word)+len(self.g['prefix']))
|
|
|
|
# DOWN key
|
|
elif event in self.down_ary:
|
|
if self.g['hist']:
|
|
# clear current line
|
|
screen.move(y,len(self.g['prefix']))
|
|
screen.clrtoeol()
|
|
# print out previous history
|
|
if not self.g['hist_index']:
|
|
self.g['hist_index'] = -1
|
|
if self.g['hist_index'] < -1:
|
|
self.g['hist_index'] += 1
|
|
word = self.g['hist'][self.g['hist_index']]
|
|
screen.addstr(y,len(self.g['prefix']),word)
|
|
self.display_suggest(y,x,word)
|
|
screen.move(y,len(word)+len(self.g['prefix']))
|
|
|
|
# Enter key #### I should get the command out of there?
|
|
# #### Can I register a callback function?
|
|
|
|
elif event in self.enter_ary:
|
|
self.g['tab_cycle'] = None
|
|
self.g['hist_index'] = 0
|
|
self.g['hist'].append(word)
|
|
if word== 'q':
|
|
self.cleanup_command()
|
|
break;
|
|
self.display_suggest(y,x,'')
|
|
screen.clrtobot()
|
|
self.handle_command(word)
|
|
|
|
self.buf[y] = self.g['prefix'] + word, 0
|
|
# Touch the screen's end
|
|
if y - self.g['height'] > -3:
|
|
self.scroll_down(2,y,x)
|
|
screen.addstr(y,0,self.g['prefix'],curses.color_pair(7)) ## SHOW NEW PROMPT
|
|
else:
|
|
screen.addstr(y+1,0,self.g['prefix'],curses.color_pair(7))
|
|
word = ''
|
|
|
|
# Delete / Backspace
|
|
elif event in self.delete_ary:
|
|
self.g['tab_cycle'] = None
|
|
# Touch to line start
|
|
if x < len(self.g['prefix']) + 1:
|
|
screen.move(y,x)
|
|
word = ''
|
|
# Midle of line
|
|
else:
|
|
word = word[:-1]
|
|
screen.move(y,x-1)
|
|
screen.clrtoeol()
|
|
self.display_suggest(y,x,word)
|
|
screen.move(y,x-1)
|
|
|
|
# Another keys
|
|
else:
|
|
self.g['tab_cycle'] = None
|
|
# Explicitly print char
|
|
try:
|
|
screen.addstr(char)
|
|
word += char
|
|
self.display_suggest(y,x,word)
|
|
screen.move(y,x+1)
|
|
except ValueError as e: # got errors here when i adjusted the volume....
|
|
pass
|
|
|
|
# Reset
|
|
self.close_window()
|
|
|
|
def setup_command(self,outfile):
|
|
self.data = open(outfile,'a')
|
|
|
|
self.g['prev'] = None
|
|
self.g['tab_cycle'] = None
|
|
self.g['prefix'] = '[gav]: '
|
|
self.g['hist_index'] = 0
|
|
# Load history from previous session
|
|
try:
|
|
o = open('completer.hist')
|
|
self.g['hist'] = [i.strip() for i in o.readlines()]
|
|
except:
|
|
self.g['hist'] = []
|
|
|
|
def cleanup_command(self):
|
|
o = open('completer.hist','a')
|
|
o.write("\n".join(self.g['hist']))
|
|
o.close()
|
|
self.data.close()
|
|
|
|
def handle_command(self, cmd):
|
|
r1 = re.search( r'^n\s(.*)$',cmd)
|
|
if r1:
|
|
# new data collection mode
|
|
mode = r1.group(1)
|
|
self.g['prefix'] = "[" + mode + "]"
|
|
|
|
self.data.write("\n\n# %s\n" % mode)
|
|
else:
|
|
#winsound.Beep(440,300)
|
|
self.data.write(cmd + "\n")
|
|
self.data.flush()
|
|
|
|
|
|
|
|
def repl_staff():
|
|
|
|
tch = json.loads( open('cache/teacherdata/teachers.json','r').read() )
|
|
newdict = {}
|
|
for T in tch:
|
|
newdict[T['name']] = 'teacher with id ' + T['login_id']
|
|
c = MyRepl()
|
|
|
|
c.set_my_dict(newdict)
|
|
c.startup('cache/people_logs.txt')
|
|
c.inputloop()
|
|
|
|
|
|
def repl_degs():
|
|
|
|
tch = csv.reader( open('cache/attainment_masterlist.csv','r'),delimiter=",")
|
|
|
|
newdict = {}
|
|
num = 0
|
|
for row in tch:
|
|
if num==0:
|
|
pass
|
|
else:
|
|
d = ' '
|
|
if row[0]: d = row[0]
|
|
newdict[row[4]] = d
|
|
num += 1
|
|
|
|
#print(newdict)
|
|
#input('ready')
|
|
c = MyRepl()
|
|
|
|
c.set_my_dict(newdict)
|
|
|
|
#c.startup('cache/g_path_cluster2020_.txt')
|
|
# c.inputloop()
|
|
|
|
def repl():
|
|
repl_degs()
|
|
|
|
|
|
|
|
#input('ready')
|
|
c = MyRepl()
|
|
|
|
c.set_my_dict(newdict)
|
|
|
|
#c.startup('cache/g_path_cluster2020_.txt')
|
|
# c.inputloop()
|
|
|
|
def repl():
|
|
repl_degs()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### courses.py
|
|
|
|
|
|
##########
|
|
########## CALCULATING SEMESTER STUFF
|
|
##########
|
|
|
|
|
|
def summarize_proportion_online_classes(u):
|
|
# u is a "group" from the groupby fxn
|
|
#print u
|
|
if NUM_ONLY:
|
|
if ((1.0 * u.sum()) / u.size) > 0.85: return '2'
|
|
if ((1.0 * u.sum()) / u.size) < 0.15: return '0'
|
|
return '1'
|
|
else:
|
|
if ((1.0 * u.sum()) / u.size) > 0.85: return 'online-only'
|
|
if ((1.0 * u.sum()) / u.size) < 0.15: return 'f2f-only'
|
|
return 'mixed'
|
|
|
|
def summarize_num_term_classes(u):
|
|
# u is a "group" from the groupby fxn
|
|
# term is sp18 now
|
|
#print u
|
|
return u.size
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Prompt for course id, return list of user dicts. TODO this duplicates courses.py ??
|
|
def getUsersInCourse(id=0): # returns list
|
|
if not id:
|
|
id = str(input("The Course ID? "))
|
|
id = str(id)
|
|
return fetch('/api/v1/courses/%s/users' % id, 0)
|
|
|
|
|
|
|
|
|
|
#### curriculum.py
|
|
|
|
|
|
def recur_look_for_leafs(item,indent=0,show=1):
|
|
global leafcount, displaynames
|
|
ii = indent * " "
|
|
is_leaf = am_i_a_leaf(item)
|
|
if type(item) == type({}):
|
|
status = ""
|
|
if show:
|
|
status = "Dict"
|
|
if is_leaf:
|
|
leafcount += 1
|
|
status = "Leaf Dict"
|
|
if status:
|
|
print("\n%s%s" % (ii,status))
|
|
indent += 1
|
|
ii = indent * " "
|
|
for K,V in list(item.items()):
|
|
if show or is_leaf:
|
|
print("%s%s:" % (ii, K), end="")
|
|
if K =='displayName': displaynames.append(V)
|
|
recur_look_for_leafs(V,indent+1,show or is_leaf)
|
|
|
|
elif type(item) == type([]):
|
|
status = ""
|
|
if show: status = "List (" + str( len(item) ) + ")"
|
|
if is_leaf: status = "Leaf List (" + str( len(item) ) + ")"
|
|
if status:
|
|
print("\n%s%s" % (ii,status))
|
|
indent += 1
|
|
ii = indent * " "
|
|
for V in item:
|
|
recur_look_for_leafs(V,indent+1, show or is_leaf)
|
|
|
|
elif type(item) == type("abc"):
|
|
if show: print("%s%s" % (' ', item))
|
|
elif type(item) == type(55):
|
|
if show: print("%s%i" % (' ', item))
|
|
elif type(item) == type(5.5):
|
|
if show: print("%s%f" % (' ', item))
|
|
elif type(item) == type(False):
|
|
if show: print("%s%s" % (' ', str(item)))
|
|
|
|
|
|
def am_i_a_leaf(item):
|
|
if type(item) == type({}):
|
|
for K,V in list(item.items()):
|
|
if type(V) == type({}) or type(V) == type([]):
|
|
return False
|
|
|
|
elif type(item) == type([]):
|
|
for V in item:
|
|
if type(V) == type({}) or type(V) == type([]):
|
|
return False
|
|
|
|
elif type(item) == type("abc"): return True
|
|
elif type(item) == type(55): return True
|
|
elif type(item) == type(5.5): return True
|
|
elif type(item) == type(False):
|
|
if item == False: return True
|
|
elif item == True: return True
|
|
return True
|
|
|
|
def sampleclass():
|
|
theclass = json.loads( codecs.open('cache/courses/samplecourse.json','r','utf-8').read() )
|
|
#print(json.dumps(theclass,indent=2))
|
|
recur_look_for_leafs(theclass)
|
|
print(leafcount)
|
|
print(sorted(displaynames))
|
|
|
|
|
|
|
|
|
|
|
|
def matchstyle():
|
|
theclass = json.loads( codecs.open('cache/courses/samplecourse.json','r','utf-8').read() )
|
|
print("\n".join(recur_matcher(theclass)))
|
|
|
|
|
|
# 7: ['pattern matcher style', matchstyle],
|
|
# 8: ['pattern matcher - test on all classes', match_style_test],
|
|
|
|
|
|
|
|
##### from localcache
|
|
|
|
|
|
def user_role_and_online():
|
|
# cross list users, classes enrolled, and their roles
|
|
global role_table, term_courses
|
|
|
|
role_table = enrollment_file()
|
|
user_table = users_file()
|
|
user_table = user_table[ user_table['name']!="Test Student" ]
|
|
term_table = term_file()
|
|
current = term_table[lambda d: d.course_section=='2020 Spring'] # current semester from canvas
|
|
term_id = current['id'].values[0]
|
|
course_table = courses_file() # from canvas
|
|
schedule = current_schedule() # from banner...
|
|
|
|
term_courses = course_table[lambda d: d.termid==term_id] # courses this semester ... now add a crn column
|
|
term_courses['crn'] = term_courses['code'].map( lambda x: get_crn_from_name(x) )
|
|
# add is_online flag (for courses listed in schedule as online-only)
|
|
term_courses['is_online'] = term_courses['crn'].map( lambda x: course_is_online( x ) ) # kinda redundant
|
|
ban_can = term_courses.merge(schedule,on='crn',how='left') #join the schedule from banner to the courses from canvas
|
|
|
|
role_table = role_table.where(lambda x: x.workflow=='active')
|
|
|
|
# this join limits to current semester if 'inner', or all semesters if 'left'
|
|
courses_and_enrol = role_table.merge(ban_can,left_on='course_id',right_on='id', how='left')
|
|
|
|
user_table = user_table.drop(columns="rootactid tz created vis school position gender locale public bd cc state".split(" "))
|
|
c_e_user = courses_and_enrol.merge(user_table,left_on='user_id',right_on='id',how='left')
|
|
|
|
|
|
prop_online = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_proportion_online_classes).rename('proportion_online'))
|
|
num_trm_crs = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_num_term_classes).rename('num_term_crs'))
|
|
stu_tch_rol = pd.DataFrame(c_e_user.groupby(['user_id'])['type'].aggregate(summarize_student_teacher_role).rename('main_role'))
|
|
user_table = user_table.merge(prop_online,left_on='id',right_index=True)
|
|
user_table = user_table.merge(num_trm_crs,left_on='id',right_index=True)
|
|
user_table = user_table.merge(stu_tch_rol,left_on='id',right_index=True)
|
|
|
|
# remove name-less entries
|
|
user_table = user_table.where(lambda x: (x.canvasid!='') ) # math.isnan(x.canvasid))
|
|
|
|
return user_table
|
|
|
|
#print user_table.query('proportion_online=="online-only"')
|
|
#print user_table.query('main_role=="teacher"')
|
|
#user_table.to_csv('canvas_data/users_online.csv')
|
|
|
|
|
|
"""e_qry = "CREATE TABLE IF NOT EXISTS enrollments (
|
|
id integer PRIMARY KEY,
|
|
name text NOT NULL,
|
|
begin_date text,
|
|
end_date text
|
|
);"""
|
|
|
|
"""
|
|
|
|
['CREATE INDEX "idx_req_userid" ON "requests" ("id","courseid","userid" );',
|
|
'CREATE INDEX "idx_users_id" ON "users" ("id","canvasid", );',
|
|
'CREATE INDEX "idx_term_id" ON "terms" ("id","canvasid" );',
|
|
'CREATE INDEX "idx_enrollment" ON "enrollment" ("cid","course_id","user_id" );',
|
|
'CREATE INDEX "idx_courses" ON "courses" ("id","canvasid","termid","code","name" );' ]
|
|
|
|
|
|
took 6 seconds
|
|
|
|
|
|
select * from users where name = "Peter Howell"
|
|
|
|
select * from users join requests on users.id = requests.userid where name = "Peter Howell"
|
|
20k rows in 1.014 seconds!! with index above
|
|
|
|
without: killed it after 120 seconds
|
|
|
|
select timestamp, url, useragent, httpmethod, remoteip, controller from users join requests on users.id = requests.userid where name = "Peter Howell" order by requests.timestamp
|
|
|
|
|
|
|
|
select courses.name, courses.code, terms.name, requests.url from courses
|
|
join terms on courses.termid = terms.id
|
|
join requests on courses.id = requests.courseid
|
|
where terms.name='2020 Spring ' and courses.code='ACCT20 SP20 40039'
|
|
order by courses.code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
def more_unused_xreferencing():
|
|
"""continue
|
|
|
|
for line in lines:
|
|
r = requests_line(line.decode('utf-8'),filei)
|
|
if filei < 5:
|
|
print(r)
|
|
else:
|
|
break
|
|
filei += 1
|
|
|
|
|
|
by_date_course = defaultdict( lambda: defaultdict(int) )
|
|
by_date_user = defaultdict( lambda: defaultdict(int) )
|
|
df_list = []
|
|
df_list_crs = []
|
|
users = defaultdict( lambda: defaultdict(int) )
|
|
#by_user = {}
|
|
#by_course = {}
|
|
i = 0
|
|
|
|
limit = 300
|
|
|
|
#print(r)
|
|
date = dt.strptime( r['timestamp'], "%Y-%m-%d %H:%M:%S.%f" )
|
|
if r['userid'] in users:
|
|
users[r['userid']]['freq'] += 1
|
|
if users[r['userid']]['lastseen'] < date:
|
|
users[r['userid']]['lastseen'] = date
|
|
else:
|
|
users[r['userid']] = {"id":r['userid'], "lastseen":date, "freq":1}
|
|
by_date_course[ r['day'] ][ r['courseid'] ] += 1
|
|
by_date_user[ r['day'] ][ r['userid'] ] += 1
|
|
#if r['userid'] in by_user: by_user[r['userid']] += 1
|
|
#else: by_user[r['userid']] = 1
|
|
#if r['courseid'] in by_course: by_course[r['courseid']] += 1
|
|
#else: by_course[r['courseid']] = 1
|
|
#mylog.write("by_user = " + str(by_user))
|
|
df_list.append(pd.DataFrame(data=by_date_user))
|
|
df_list_crs.append(pd.DataFrame(data=by_date_course))
|
|
i += 1
|
|
if i > limit: break
|
|
#mylog.write("by_date_course = ")
|
|
result = pd.concat(df_list, axis=1,join='outer')
|
|
result_crs = pd.concat(df_list_crs, axis=1,join='outer')
|
|
#print result_crs
|
|
mylog.write(result.to_csv())
|
|
# get users
|
|
usersf = user_role_and_online()
|
|
merged = pd.merge(result,usersf,left_index=True,right_on='id', how='left')
|
|
#dropkeys = "rootactid tz created vis school position gender locale public bd cc state".split(" ")
|
|
#merged.drop(dropkeys, inplace=True, axis=1)
|
|
mglog = open(local_data_folder+'userlogs.csv','w')
|
|
mglog.write(merged.to_csv())
|
|
|
|
# get courses
|
|
courses = courses_file()
|
|
merged2 = pd.merge(result_crs,courses,left_index=True,right_on='id', how='left')
|
|
dropkeys = "rootactid wikiid".split(" ")
|
|
merged2.drop(dropkeys, inplace=True, axis=1)
|
|
mglogc = open(local_data_folder + 'courselogs.csv','w')
|
|
mglogc.write(merged2.to_csv())
|
|
|
|
# a users / freq / lastseen file
|
|
ufl = open(local_data_folder + "user_freq.json","w")
|
|
today = datetime.datetime.today()
|
|
for U in list(users.keys()):
|
|
date = users[U]['lastseen']
|
|
users[U]['lastseen'] = date.strftime("%Y-%m-%d")
|
|
diff = today - date
|
|
users[U]['daysago'] = str(diff.days)
|
|
users[U]['hoursago'] = str(int(diff.total_seconds()/3600))
|
|
us_frame = pd.DataFrame.from_dict(users,orient='index')
|
|
us_with_names = pd.merge(us_frame,usersf,left_index=True,right_on='id', how='left')
|
|
#dropkeys = "id id_x id_y globalid rootactid tz created vis school position gender locale public bd cc state".split(" ")
|
|
#us_with_names.drop(dropkeys, inplace=True, axis=1)
|
|
print(us_with_names)
|
|
ufl.write( json.dumps(users, indent=4) )
|
|
ufl.close()
|
|
mglogd = open('canvas_data/user_freq.csv','w')
|
|
mglogd.write(us_with_names.to_csv())
|
|
"""
|
|
|
|
""" -- projects table
|
|
CREATE TABLE IF NOT EXISTS projects (
|
|
id integer PRIMARY KEY,
|
|
name text NOT NULL,
|
|
begin_date text,
|
|
end_date text
|
|
);
|
|
"""
|
|
pass
|
|
|
|
|
|
def users_p_file():
|
|
uf = users_file()
|
|
pf = pseudonym_file()
|
|
#print pf
|
|
upf = uf.merge(pf,left_on='id',right_on='user_id',how='left')
|
|
return upf
|
|
|
|
"""
|
|
def com_channel_dim():
|
|
all = os.listdir(local_data_folder)
|
|
all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime)
|
|
all.reverse()
|
|
#print "sorted file list:"
|
|
#print all
|
|
for F in all:
|
|
if re.search('communication_channel_dim',F):
|
|
cc_file = F
|
|
break
|
|
print("most recent communication channel file is " + cc_file)
|
|
cc_users = []
|
|
for line in gzip.open(local_data_folder + cc_file,'r'):
|
|
line_dict = dict(list(zip(cc_format, line.split("\t"))))
|
|
#line_dict['globalid'] = line_dict['globalid'].rstrip()
|
|
cc_users.append(line_dict)
|
|
df = pd.DataFrame(cc_users)
|
|
return df
|
|
"""
|
|
|
|
|
|
"""grp_sum_qry = ""SELECT u.sortablename, r.timeblock, SUM(r.viewcount), u.canvasid AS user, c.canvasid AS course
|
|
FROM requests_sum1 AS r
|
|
JOIN courses AS c ON e.course_id=c.id
|
|
JOIN enrollment as e ON r.courseid=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
|
|
GROUP BY u.id,c.id,r.timeblock
|
|
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
|
|
|
|
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course
|
|
FROM requests_sum1 AS r
|
|
JOIN courses AS c ON e.course_id=c.id
|
|
JOIN enrollment as e ON r.courseid=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid=%s AND e."type"="StudentEnrollment" AND u.canvasid=810
|
|
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
|
|
|
|
|
|
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course FROM enrollment as e JOIN courses AS c ON e.course_id=c.id
|
|
JOIN requests_sum1 AS r ON r.courseid=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
|
|
ORDER BY u.sortablename, r.timeblock"" % course_id"""
|
|
|
|
|
|
|
|
|
|
|
|
stem_course_id = '11015' # TODO
|
|
|
|
# NO LONGER USED - SEE COURSES
|
|
def enroll_stem_students():
|
|
depts = "MATH BIO CHEM PHYS ASTR GEOG".split(" ")
|
|
students = set()
|
|
for d in depts:
|
|
students.update(dept_classes(d))
|
|
print(students)
|
|
|
|
to_enroll = [ x for x in students if x not in already_enrolled ]
|
|
|
|
print(to_enroll)
|
|
print("prev line is people to enroll\nnext line is students already enrolled in stem")
|
|
print(already_enrolled)
|
|
|
|
for s in to_enroll:
|
|
t = url + '/api/v1/courses/%s/enrollments' % stem_course_id
|
|
data = { 'enrollment[user_id]': s[1], 'enrollment[type]':'StudentEnrollment',
|
|
'enrollment[enrollment_state]': 'active' }
|
|
print(data)
|
|
print(t)
|
|
if input('enter to enroll %s or q to quit: ' % s[0]) == 'q':
|
|
break
|
|
r3 = requests.post(t, headers=header, params=data)
|
|
print(r3.text)
|
|
|
|
|
|
#####
|
|
##### from users.py pretty much just use sql now
|
|
|
|
|
|
# unused?
|
|
def getAllTeachersInTerm(): # a list
|
|
# classes taught in last 3 semesters
|
|
# How many of them were published and used
|
|
# hits in last week/month/year
|
|
# most common department
|
|
# email addr
|
|
all_courses = {}
|
|
teachers = {} # keyed by goo
|
|
# { 'name':'', 'id':'', 'email':'', 'goo':'', 'classes':[ (#name,#id,#pubd,#hitsbyteacher) ... ] }
|
|
|
|
# This is a bit different from the 1 year schedule above, because it looks at
|
|
# people who were active in their shells in iLearn.
|
|
|
|
outfile = codecs.open('teacherdata/historical_shells_used.json','w', encoding='utf-8')
|
|
for term in last_4_semesters_ids: # [60,]:
|
|
print(("Fetching term: " + str(term)))
|
|
all_courses[term] = \
|
|
fetch('/api/v1/accounts/1/courses?enrollment_term_id=' + str(term) + '&perpage=100')
|
|
i = 0
|
|
j = 0
|
|
for k,v in list(all_courses.items()): ##### term k, list v
|
|
for a_class in v:
|
|
print((a_class['name']))
|
|
published = 0
|
|
if a_class['workflow_state'] in ['available','completed']:
|
|
j += 1
|
|
published = 1
|
|
i += 1
|
|
#if i > 20: break
|
|
tch = fetch('/api/v1/courses/' + str(a_class['id']) + '/search_users?enrollment_type=teacher')
|
|
for r in tch: ##### TEACHER r of COURSE a_class
|
|
name = str(r['sortable_name'])
|
|
if not 'sis_import_id' in r:
|
|
print("This user wasn't available: " + name)
|
|
continue
|
|
goo = str(r['sis_import_id'])
|
|
print((r['sortable_name']))
|
|
if not name in teachers:
|
|
email = getEmail(r['id'])
|
|
teachers[name] = { 'name':r['sortable_name'], 'id':r['id'], 'email':email, 'goo':goo, 'classes':[] }
|
|
info = (a_class['name'],a_class['id'],published)
|
|
teachers[name]['classes'].append( info )
|
|
|
|
## TODO: hits in courses by teachers https://gavilan.instructure.com:443/api/v1/users/2/page_views?end_time=Dec%2010%2C%202018
|
|
|
|
for t,v in list(teachers.items()):
|
|
teachers[t]['num_courses'] = len(v['classes'])
|
|
teachers[t]['num_active_courses'] = sum( [x[2] for x in v['classes']] )
|
|
depts = [ dept_from_name(x[0]) for x in v['classes'] ]
|
|
teachers[t]['dept'] = most_common_item(depts)
|
|
|
|
#print(str(j), "/", str(i), " sections are published")
|
|
outfile.write(json.dumps(teachers))
|
|
|
|
|
|
"""
|
|
def teacherActivityLog(uid=1): ### Next: save results in a hash and return that....
|
|
global results, users, users_by_id
|
|
#get_users() # do this if you think 'teachers/users.json' is outdated.
|
|
|
|
load_users()
|
|
|
|
#for x in users_by_id.keys():
|
|
# if x < 20:
|
|
# print x
|
|
# print users_by_id[x]
|
|
|
|
|
|
teachers = csv.reader(open('teachers/current_semester.txt','r'), delimiter="\t")
|
|
for row in teachers:
|
|
print(row[0] + " is id: " + row[1])
|
|
uid = row[1]
|
|
print("Comes up as: " + str(users_by_id[int(uid)]))
|
|
info = users_by_id[int(uid)]
|
|
goo = info['login_id']
|
|
|
|
output_file = open('logs/users/byweek/'+ goo.lower() + '.csv', 'w')
|
|
|
|
|
|
# okay, actually, the first week here is the week before school IRL
|
|
start = isoweek.Week.withdate( datetime.date(2017,8,21))
|
|
end = isoweek.Week.thisweek()
|
|
byweek = []
|
|
|
|
i = 0
|
|
while(1):
|
|
results = []
|
|
start = start + 1
|
|
if start > end: break
|
|
|
|
myStart = start.day(0).isoformat() + 'T00:00-0700'
|
|
myEnd = start.day(6).isoformat() + 'T11:59:59-0700'
|
|
t = url + "/api/v1/users/" + str(uid) + "/page_views?start_time=" + myStart + '&end_time=' + myEnd + "&perpage=500"
|
|
print(t)
|
|
while(t):
|
|
print(".", end=' ')
|
|
t = fetch(t)
|
|
print("")
|
|
thisWeek = len(results)
|
|
print("Week # " + str(i) + "\t" + str(thisWeek))
|
|
byweek.append( "Week # " + str(i) + "\t" + str(thisWeek) )
|
|
output_file.write( start.isoformat() + "," + str(thisWeek) + "\n")
|
|
i += 1
|
|
for j in byweek:
|
|
print(j)
|
|
"""
|
|
|
|
"""
|
|
def summarize_student_teacher_role(u):
|
|
# u is a "group" from the groupby fxn
|
|
# term is sp18 now
|
|
t = 0
|
|
s = 0
|
|
for a in u:
|
|
if a=='TeacherEnrollment': t += 1
|
|
else: s += 1
|
|
if NUM_ONLY:
|
|
if t > s: return 'teacher'
|
|
return 'student'
|
|
else:
|
|
if t > s: return '1'
|
|
return '0'
|
|
"""
|
|
"""
|
|
def user_roles2():
|
|
# cross list users, classes enrolled, and their roles
|
|
global role_table, term_courses
|
|
|
|
role_table = enrollment_file()
|
|
user_table = users_file()
|
|
course_table = courses_file() # from canvas
|
|
term_table = term_file()
|
|
schedule = current_schedule() # from banner
|
|
|
|
# current semester
|
|
current = term_table[lambda d: d.course_section=='2018 Spring']
|
|
term_id = current['id'].values[0]
|
|
term_courses = course_table[lambda d: d.termid==term_id] # courses this semester
|
|
|
|
# add is_online flag (for courses listed in schedule as online-only)
|
|
term_courses['is_online'] = term_courses['code'].map( lambda x: course_is_online( get_crn_from_name(x) ) )
|
|
|
|
new_df = pd.DataFrame(columns=['type','oo','num'])
|
|
|
|
m = 0
|
|
data = []
|
|
for u in user_table.iterrows():
|
|
if m % 1000 == 0: print("on row " + str(m))
|
|
m += 1
|
|
data.append(categorize_user(u))
|
|
#if m > 1500: break
|
|
new_df = pd.DataFrame(data,columns=['i','type','onlineonly','numcls']).set_index('i')
|
|
print(new_df)
|
|
|
|
user_table = user_table.merge(new_df,left_index=True,right_index=True)
|
|
user_table.to_csv('canvas_data/users_online.csv')
|
|
"""
|
|
|
|
### IS THIS IN CANVAS_DATA.py?
|
|
|
|
|
|
|
|
|
|
""" Collate the raw logs into something more compact and useful. Version 1:
|
|
- # of accesses, user/day
|
|
- # of participations, user/day
|
|
-
|
|
|
|
- where day is the number of days into the semester. Classes shorter than 16 weeks should get a multiplier
|
|
-
|
|
|
|
- 2 initial goals:
|
|
a. data for statistics / clustering / regression / learning
|
|
b. data for visualization
|
|
"""
|
|
def req_to_db(fname_list):
|
|
fields = ','.join("id timestamp timestamp_year timestamp_month timestamp_day user_id course_id root_account_id course_account_id quiz_id discussion_id conversation_id assignment_id url user_agent http_method remote_ip interaction_micros web_application_controller web_applicaiton_action web_application_context_type web_application_context_id real_user_id session_id user_agent_id http_status http_version".split(" "))
|
|
sqlite_file = 'canvas_data/data.db'
|
|
conn = sqlite3.connect(sqlite_file)
|
|
c = conn.cursor()
|
|
# merge all requests into db
|
|
by_date_course = defaultdict( lambda: defaultdict(int) )
|
|
by_date_user = defaultdict( lambda: defaultdict(int) )
|
|
df_list = []
|
|
df_list_crs = []
|
|
users = defaultdict( lambda: defaultdict(int) )
|
|
i = 0
|
|
limit = 300
|
|
for fname in fname_list:
|
|
print((fname+"\n"))
|
|
for line in gzip.open('canvas_data/'+fname,'r'):
|
|
r = line.split('\t')
|
|
#tot = len(fields.split(','))
|
|
#i = 0
|
|
#for x in fields.split(','):
|
|
# print x + "\t" + r[i]
|
|
# i+= 1
|
|
|
|
qry = "insert into requests("+fields+") values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
|
|
conn.execute(qry, r)
|
|
|
|
|
|
# New method for below:
|
|
# read collated data from sqlite
|
|
# collate from more logs
|
|
# write back....?
|
|
|
|
"""
|
|
date = datetime.datetime.strptime( r['timestamp'], "%Y-%m-%d %H:%M:%S.%f" )
|
|
if r['userid'] in users:
|
|
users[r['userid']]['freq'] += 1
|
|
if users[r['userid']]['lastseen'] < date:
|
|
users[r['userid']]['lastseen'] = date
|
|
else:
|
|
users[r['userid']] = {"id":r['userid'], "lastseen":date, "freq":1}
|
|
by_date_course[ r['day'] ][ r['courseid'] ] += 1
|
|
by_date_user[ r['day'] ][ r['userid'] ] += 1
|
|
#if r['userid'] in by_user: by_user[r['userid']] += 1
|
|
#else: by_user[r['userid']] = 1
|
|
#if r['courseid'] in by_course: by_course[r['courseid']] += 1
|
|
#else: by_course[r['courseid']] = 1
|
|
#mylog.write("by_user = " + str(by_user))
|
|
df_list.append(pd.DataFrame(data=by_date_user))
|
|
df_list_crs.append(pd.DataFrame(data=by_date_course))
|
|
"""
|
|
i += 1
|
|
if i > limit: break
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
"""
|
|
Making columns:
|
|
table_data = [['a', 'b', 'c'], ['aaaaaaaaaa', 'b', 'c'], ['a', 'bbbbbbbbbb', 'c']]
|
|
for row in table_data:
|
|
print("{: >20} {: >20} {: >20}".format(*row))
|
|
|
|
Transpose a matrix:
|
|
rez = [[m[j][i] for j in range(len(m))] for i in range(len(m[0]))]
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
"""
|
|
ilearn_by_id = {}
|
|
ilearn_by_name = {}
|
|
for x in ilearn_list:
|
|
ilearn_by_id[x[3]] = x
|
|
ilearn_by_name[x[0]] = x
|
|
|
|
for ml in open('cache/teacher_manual_name_lookup.csv','r').readlines():
|
|
parts = ml.strip().split(',')
|
|
try:
|
|
manual_list[parts[0]] = ilearn_by_id[parts[1]]
|
|
except Exception as e:
|
|
print "Teacher missing: " + parts[0]
|
|
|
|
il_names = [ x[0] for x in ilearn_list ]
|
|
il_byname = {}
|
|
for x in ilearn_list: il_byname[x[0]] = x
|
|
sched_list_missed = [x for x in sched_list]
|
|
|
|
#
|
|
# key is long name (with middle name) from schedule, value is tuple with everything
|
|
name_lookup = manual_list
|
|
matches = []
|
|
|
|
#print ilearn_list
|
|
|
|
num_in_sched = len(sched_list)
|
|
num_in_ilearn = len(ilearn_list)
|
|
|
|
#for i in range(min(num_in_sched,num_in_ilearn)):
|
|
# print "|"+sched_list[i] + "|\t\t|" + ilearn_list[i][0] + "|"
|
|
|
|
print("Sched names: %i, iLearn names: %i" % (num_in_sched,num_in_ilearn))
|
|
|
|
for s in sched_list:
|
|
for t in il_names:
|
|
if first_last(s) == t:
|
|
#print ' MATCHED ' + s + ' to ' + t
|
|
sched_list_missed.remove(s)
|
|
try:
|
|
name_lookup[s] = ilearn_by_name[ first_last(s) ]
|
|
except Exception as e:
|
|
print "Teacher missing (2): " + s
|
|
il_names.remove(first_last(s))
|
|
matches.append(s)
|
|
|
|
|
|
print "Matched: " + str(matches)
|
|
|
|
print "\nDidn't match: " + str(len(sched_list_missed)) + " schedule names."
|
|
|
|
print "\nFinal results: "
|
|
print name_lookup
|
|
|
|
nlf = codecs.open('cache/sched_to_ilearn_names.json','w','utf-8')
|
|
nlf.write(json.dumps(name_lookup,indent=2))
|
|
# STRING DISTANCE
|
|
#sim = find_most_similar(s,i_names)
|
|
#print ' CLOSEST MATCHES to ' + s + ' are: ' + str(sim)
|
|
#mm.write(s+',\n')
|
|
"""
|
|
|
|
|
|
#ilearn_list = sorted(list(set(map(
|
|
# lambda x: #(tfi[x]['name'],tfi[x]['email'],tfi[x]['dept'],str(tfi[x]['id']),tfi[x]['goo']),
|
|
# tfi.keys()))))
|
|
#i_names = [ x[0] for x in ilearn_list ]
|
|
|
|
#print json.dumps(i_names,indent=2)
|
|
#return
|
|
|
|
|
|
|
|
# how to filter a dict based on values
|
|
# filtered = {k: v for k, v in course_combos.items() if v['dept'] == 'LIB' or v['dept'] == 'CSIS' }
|
|
|
|
# more pandas
|
|
# gapminder['continent'].unique()
|
|
|
|
|
|
|
|
|
|
|
|
#for name,group in bycode:
|
|
# #print name
|
|
# print name, " ", group['type']
|
|
|
|
#onl = gg.agg( lambda x: has_online(x) )
|
|
#ttl = gg.agg( lambda x: len(x) )
|
|
#ttl = ttl.rename(columns={'type':'total_sections'})
|
|
|
|
#onl.join(gg.agg( lambda x: has_hybrid(x) ),how='outer')
|
|
#onl.join(gg.agg( lambda x: has_lecture(x) ), how='outer')
|
|
|
|
#onl['num_sections'] = 0
|
|
#onl['num_lec'] = 0
|
|
#onl['num_online'] = 0
|
|
|
|
#all = pd.merge([onl,hyb,lec])
|
|
#print onl
|
|
#total=len, f2f=lambda x: ) set(x)
|
|
#{ 'num_sections': "count",
|
|
# 'num_lec': lambda x: 5,
|
|
# 'num_online': lambda x: 5 } )
|
|
#print gg
|
|
"""
|
|
|
|
|
|
def has_online(series):
|
|
# if any items of the series have the string 'online', return 1
|
|
for i in series:
|
|
if i == 'online': return 1
|
|
return 0
|
|
def has_lecture(series):
|
|
# if any items of the series have the string 'online', return 1
|
|
for i in series:
|
|
if i == 'online': return 1
|
|
return 0
|
|
def has_hybrid(series):
|
|
# if any items of the series have the string 'online', return 1
|
|
for i in series:
|
|
if i == 'hybrid': return 1
|
|
return 0
|
|
"""
|
|
#### RIGHT HERE IS WHERE I THINK... MAYBE THIS ISN'T THE RIGHT APPROACH. I DON'T SEEM
|
|
#### TO BE ABLE TO QUERY THE FACT BASE. IS THAT TRUE? SHOULD I JUST BE USING TABLES?
|
|
|
|
#### CHANGING COURSE... USE THE RULES TO UPDATE A DATABASE/TABLE/DATAFRAME
|
|
#### OR SET OF DICTS.
|
|
|
|
# ultimately i want this to be more flexible, so i can categorize degrees as 'available evening' etc
|
|
#
|
|
|
|
|
|
# Simple data structure. In this function, a degree is
|
|
""" degree = { 'name': 'History AA',
|
|
'blocks': [ { 'original_title':'xxx', 'rulecode':'u3',
|
|
'courses': [ {'code':'math1a', 'units': '3.0', 'wasonline':False },
|
|
{'code':'math2a', 'units': '3.0', 'wasonline':False },
|
|
{'code':'math3a', 'units': '3.0', 'wasonline':False } ] },
|
|
{ 'original_title':'xyz', 'rulecode':'a',
|
|
'courses': [ {'code':'math5a', 'units': '3.0', 'wasonline':False },
|
|
{'code':'math6a', 'units': '3.0', 'wasonline':False },
|
|
{'code':'math7a', 'units': '3.0', 'wasonline':False } ] } ] }
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Wrapper to get 2 schedules at once
|
|
def dl_sched():
|
|
global SEMESTER, semester_begin, filename, short_sem
|
|
SEMESTER = 'Fall 2019'
|
|
short_sem = 'fa19'
|
|
semester_begin = strptime('08/26', '%m/%d')
|
|
filename = 'fa19_sched.json'
|
|
|
|
txt = login()
|
|
codecs.open('output/'+filename,'w').write( json.dumps( to_section_list(txt) ) )
|
|
#stats()
|
|
#reg_nums()
|
|
|
|
#todo: these semesters
|
|
SEMESTER = 'Summer 2019'
|
|
short_sem = 'su19'
|
|
semester_begin = strptime('06/17', '%m/%d')
|
|
filename = 'su19_sched.json'
|
|
|
|
txt = login()
|
|
codecs.open('output/'+filename,'w').write( json.dumps( to_section_list(txt) ) )
|
|
#stats()
|
|
#reg_nums()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Send a personalized email regarding ZTC
|
|
def send_z_email(fullname, firstname, addr, courses_list):
|
|
FULLNAME = fullname #"Sabrina Lawrence"
|
|
FNAME = firstname # "Sabrina"
|
|
to_email = addr # "slawrence@gavilan.edu"
|
|
courses = courses_list # ["CSIS45", "CSIS85"]
|
|
|
|
course_template = "<a href='%s'>%s</a> "
|
|
url_template = "https://docs.google.com/forms/d/e/1FAIpQLSfZLQp6wHFEdqsmpZ7jz2Y8HtKLo8XTAhrE2fyvTDOEgquBDQ/viewform?usp=pp_url&entry.783353363=%s&entry.1130271051=%s" # % (FULLNAME, COURSE1)
|
|
|
|
bare_link = "https://forms.gle/pwZJHdWSkyvmH4L19"
|
|
|
|
COURSELINKS = ''
|
|
PLAINCOURSES = ''
|
|
for C in courses:
|
|
ut = url_template % (FULLNAME, C)
|
|
COURSELINKS += course_template % (ut, C)
|
|
PLAINCOURSES += C + " "
|
|
|
|
text_version = open('cache/ztc_mail1.txt','r').read()
|
|
html_version = open('cache/ztc_mail1_h.txt','r').read()
|
|
|
|
# replace these: $FNAME $COURSELINKS $LINK
|
|
|
|
email = re.sub( r'\$FNAME', FNAME, text_version )
|
|
email = re.sub( r'\$COURSELINKS', PLAINCOURSES, email )
|
|
email = re.sub( r'\$LINK', bare_link, email )
|
|
|
|
email_h = re.sub( r'\$FNAME', FNAME, html_version )
|
|
email_h = re.sub( r'\$COURSELINKS', COURSELINKS, email_h )
|
|
|
|
print(email_h+"\n\n"+email)
|
|
|
|
from O365 import Account
|
|
|
|
credentials = ('phowell@gavilan.edu', 'xxx')
|
|
client_secret = 'xxx' # expires 10/28/2020
|
|
tenant_id = "4ad609c3-9156-4b89-9496-0c0600aeb0bb"
|
|
# application client id: 29859402-fa55-4646-b717-752d90c61cde
|
|
|
|
account = Account(credentials, auth_flow_type='credentials', tenant_id=tenant_id)
|
|
if account.authenticate():
|
|
print('Authenticated!')
|
|
|
|
#account = Account(credentials)
|
|
#if account.authenticate(scopes=['message_all']):
|
|
# print('Authenticated!')
|
|
m = account.new_message()
|
|
m.to.add(addr)
|
|
m.subject = 'Quick question about your course textbook'
|
|
m.body = "email_h"
|
|
m.send()
|
|
|
|
"""
|
|
import smtplib
|
|
from email.mime.multipart import MIMEMultipart
|
|
from email.mime.text import MIMEText
|
|
|
|
msg = MIMEMultipart('alternative')
|
|
msg['Subject'] = "Quick question about your course textbook"
|
|
msg['From'] = "gavdisted@gmail.com"
|
|
msg['To'] = to_email
|
|
|
|
msg.attach(MIMEText(email, 'plain'))
|
|
msg.attach(MIMEText(email_h, 'html'))
|
|
|
|
|
|
#s = smtplib.SMTP('smtp.gmail.com', 587)
|
|
#s.starttls()
|
|
#s.login("gavdisted", "xxx")
|
|
|
|
|
|
s = smtplib.SMTP_SSL('smtp.office365.com',587)
|
|
s.ehlo()
|
|
s.starttls()
|
|
s.login('phowell@gavilan.edu', 'xxx')
|
|
|
|
#s.sendmail(msg['From'], msg['To'], msg.as_string())
|
|
s.sendmail(msg['From'], msg['To'], "Testing")
|
|
s.quit()"""
|
|
|
|
|
|
|
|
def getInactiveTeachersInTerm(t=23): # a list
|
|
global results
|
|
teachers = {}
|
|
emails = {}
|
|
outfile = codecs.open('canvas/inactive_teachers.txt','w', encoding='utf-8')
|
|
efile = codecs.open('canvas/inactive_teachers_emails.txt','w', encoding='utf-8')
|
|
|
|
#yn = raw_input('All courses? y=all n=only active ')
|
|
#all = 0
|
|
#if yn=='y': all = 1
|
|
|
|
if not t:
|
|
t = askForTerms()
|
|
else: t = [ t, ]
|
|
for term in t:
|
|
r = url + '/api/v1/accounts/1/courses?enrollment_term_id=' + str(term) + '&perpage=100'
|
|
while(r): r = fetch(r)
|
|
all_courses = results #json.loads(results)
|
|
#print "All unpublished courses: "
|
|
i = 0
|
|
j = 0
|
|
for k in all_courses:
|
|
j += 1
|
|
if k['workflow_state'] != 'available':
|
|
i += 1
|
|
print(str(i), "\t", k['name'], "\t", k['workflow_state'])
|
|
results = []
|
|
t2 = url + '/api/v1/courses/' + str(k['id']) + '/search_users?enrollment_type=teacher'
|
|
|
|
|
|
while(t2): t2 = fetch(t2)
|
|
#print results
|
|
for r in results:
|
|
key = r['sortable_name'] + "\t" + str(r['id'])
|
|
#if not 'email' in r: pdb.set_trace()
|
|
emails[key] = str(r['sis_user_id'])
|
|
#print r
|
|
if key in teachers:
|
|
teachers[key].append(k['name'])
|
|
else:
|
|
teachers[key] = [ k['name'], ]
|
|
#print json.dumps(results, indent=4, sort_keys=True)
|
|
#a = raw_input()
|
|
|
|
print(str(i), "/", str(j), " sections are unpublished")
|
|
for t in list(emails.keys()):
|
|
efile.write(emails[t] + ", ")
|
|
for t in list(teachers.keys()):
|
|
outfile.write(t + "\t")
|
|
for c in teachers[t]:
|
|
outfile.write(c + ",")
|
|
outfile.write("\n")
|
|
#f.write(json.dumps(teachers, indent=4, sort_keys=True))
|
|
print("Output file is in ./teachers/current_semester.txt")
|
|
#print json.dumps(all_courses, indent=4, sort_keys=True)
|
|
"""for x in all_courses:
|
|
qry = '/api/v1/courses/' + str(course_id) + '/search_users?enrollment_type=teacher'
|
|
t = url + qry
|
|
while(t): t = fetch(t)
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
#for t,v in teachers.items():
|
|
# outfile.write( "|".join( [ v['goo'], v['name'], v['email'], v['dept'], str(v['num_courses']), str(v['num_active_courses']) ] ) + "\n" )
|
|
|
|
#{"goo": "G00275722", "name": "Agaliotis, Paul", "num_courses": 1, "num_active_courses": 1, "id": 5092, "dept": "AMT", "classes": [["AMT120 POWERPLANT TECH FA18 10958", 5322, 1]], "email": "PAgaliotis@gavilan.edu"},
|
|
|
|
#for t in teachers.keys():
|
|
# outfile.write(t + "\t")
|
|
# for c in teachers[t]:
|
|
# outfile.write(c + ",")
|
|
# outfile.write("\n")
|
|
#f.write(json.dumps(teachers, indent=4, sort_keys=True))
|
|
#print "Output file is in ./teachers/current_semester.txt"
|
|
#print json.dumps(all_courses, indent=4, sort_keys=True)
|
|
"""for x in all_courses:
|
|
qry = '/api/v1/courses/' + str(course_id) + '/search_users?enrollment_type=teacher'
|
|
t = url + qry
|
|
while(t): t = fetch(t)
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def course_location(course):
|
|
if len(course[0]) > 13:
|
|
period = Set( [course_location_raw(course[0][13])], )
|
|
else:
|
|
period = Set()
|
|
|
|
if len(course) > 1:
|
|
period.add(course_location_raw(course[1][13]))
|
|
|
|
if len(course) > 2:
|
|
period.add(course_location_raw(course[2][13]))
|
|
|
|
if len(course) > 3:
|
|
period.add(course_location_raw(course[3][13]))
|
|
|
|
if len(course) > 4:
|
|
period.add(course_location_raw(course[4][13]))
|
|
|
|
if len(course) > 5:
|
|
period.add(course_location_raw(course[5][13]))
|
|
|
|
|
|
if 'TBA' in period:
|
|
period.remove('TBA')
|
|
|
|
period = list(period)
|
|
|
|
if len(period)==0:
|
|
return ''
|
|
|
|
if len(period)==1:
|
|
return period[0]
|
|
|
|
if len(period)==2 and 'Online' in period:
|
|
period.remove('Online')
|
|
return 'Hybrid at ' + period[0]
|
|
return '/'.join(period)
|
|
|
|
|
|
def course_time(course):
|
|
# is it morning, mid, or evening?
|
|
|
|
period = Set( [raw_course_time(course[0][7])], )
|
|
|
|
if len(course) > 1:
|
|
#time += ", " + course[1][7]
|
|
period.add(raw_course_time(course[1][7]))
|
|
|
|
if len(course) > 2:
|
|
#time += ", " + course[2][7]
|
|
period.add(raw_course_time(course[2][7]))
|
|
|
|
if len(course) > 3:
|
|
#time += ", " + course[3][7]
|
|
period.add(raw_course_time(course[3][7]))
|
|
|
|
if len(course) > 4:
|
|
#time += ", " + course[4][7]
|
|
period.add(raw_course_time(course[4][7]))
|
|
|
|
if len(course) > 5:
|
|
#time += ", " + course[5][7]
|
|
period.add(raw_course_time(course[5][7]))
|
|
|
|
#print raw_course_time(course[0][7]),
|
|
|
|
if 'TBA' in period:
|
|
period.remove('TBA')
|
|
|
|
period = list(period)
|
|
|
|
if len(period)==0:
|
|
return ''
|
|
|
|
if len(period)==1:
|
|
return period[0]
|
|
|
|
return '/'.join(period)
|
|
|
|
|
|
|
|
def course_teacher(course):
|
|
t = Set()
|
|
for c in course:
|
|
t.add(c[11])
|
|
return " / ".join(list(t))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def reg_nums():
|
|
courses = []
|
|
dates = []
|
|
sections = categorize()
|
|
|
|
today = todays_date_filename()
|
|
|
|
out = open(today+'.csv','w')
|
|
dates = {'loc':{}, 'time':{}, 'start':{}, 'teacher':{}}
|
|
i = 1
|
|
for f in os.listdir('.'):
|
|
m = re.search('reg_'+short_sem+'_(\d+)\.csv',f)
|
|
if m:
|
|
filein = open(f,'r').readlines()[1:]
|
|
d = m.group(1)
|
|
dates[d] = {}
|
|
for L in filein:
|
|
parts = L.split(',') # crn,code,sec,cmp,cred,name,days,time,cap,act,rem,teacher,date,loc
|
|
if not re.search('(\d+)',parts[0]): continue
|
|
if len(parts)<8: continue
|
|
if not parts[8]: continue
|
|
if float(parts[8])==0: continue
|
|
|
|
dates[d][parts[0] + " " + parts[1]] = (1.0* float(parts[9])) / float(parts[8])
|
|
|
|
if i == 1 and parts[0] in sections:
|
|
dates['loc'][parts[0] + " " + parts[1]] = course_location( sections[parts[0]] )
|
|
dates['time'][parts[0] + " " + parts[1]] = course_time(sections[parts[0]] )
|
|
dates['start'][parts[0] + " " + parts[1]] = course_start( sections[parts[0]] )
|
|
dates['teacher'][parts[0] + " " + parts[1]] = course_teacher( sections[parts[0]] )
|
|
|
|
#dates[d]['act'] = parts[9]
|
|
#dates[d]['nam'] = parts[5]
|
|
#dates[d]['onl'] = ''
|
|
#print parts
|
|
#if len(parts)>13 and parts[13]=='ONLINE': dates[d]['onl'] = 'online'
|
|
i += 1
|
|
"""for d in sorted(dates.keys()):
|
|
for c in d:
|
|
print d
|
|
print dates[d]['crs']"""
|
|
|
|
df = pd.DataFrame(dates)
|
|
df.to_csv(out)
|
|
|
|
# In the schedule, is this a class or a continuation of the class above?
|
|
def categorize():
|
|
# todo: must we open all these files?
|
|
dates = {}
|
|
|
|
files = sorted(os.listdir('.'))
|
|
files = list( filter( lambda x: re.search('reg(\d+)\.csv',x), files) )
|
|
files.reverse()
|
|
|
|
f = files[0]
|
|
filein = codecs.open(f,'r','utf-8').readlines()[1:]
|
|
sections = {}
|
|
this_section = []
|
|
|
|
for L in filein:
|
|
parts = L.strip().split(',') # crn,code,sec,cmp,cred,name,days,time,cap,act,rem,teacher,date,loc
|
|
parts = list( map( lambda x: clean_funny3(x), parts ) )
|
|
|
|
if not re.search('(\d+)',parts[0]): # This is a continuation
|
|
this_section.append(parts)
|
|
else: # this is a new section or the first line
|
|
if this_section:
|
|
sections[ this_section[0][0] ] = this_section
|
|
#print "Section: " + this_section[0][0] + " is: " + str(this_section) + "\n"
|
|
#print this_section[0][0] + "\t", course_start(this_section)
|
|
#print this_section[0][0] + "\t", course_time(this_section)
|
|
#print this_section[0][0] + "\t", course_location(this_section)
|
|
this_section = [ parts, ]
|
|
return sections
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
|
# Deprecated. call perl.
|
|
def constructSchedule():
|
|
term = raw_input("Name of html file? (ex: sp18.html) ")
|
|
os.chdir('make-web-sched')
|
|
cmd = 'perl make.pl ' + term
|
|
print "command: " + cmd
|
|
os.system(cmd)
|
|
'''
|
|
|
|
"""
|
|
def fetch_dict(target,params={}):
|
|
# if there are more results, return the url for more fetching.
|
|
# else return false
|
|
#print target
|
|
global results_dict
|
|
r2 = requests.get(target, headers = header, params=params)
|
|
output = r2.text
|
|
if output.startswith('while('):
|
|
output = output[9:]
|
|
#print output
|
|
mycopy = results_dict.copy()
|
|
results_dict = {}
|
|
results_dict.update(json.loads(output))
|
|
results_dict.update(mycopy)
|
|
f.write(json.dumps(results_dict, indent=2))
|
|
#print "\n"
|
|
if ('link' in r2.headers):
|
|
links = r2.headers['link'].split(',')
|
|
for L in links:
|
|
ll = L.split(';')
|
|
link = ll[0].replace("<","")
|
|
link = link.replace(">","")
|
|
if re.search(r'next', ll[1]):
|
|
#print ll[1] + ":\t" + link
|
|
return link
|
|
return ""
|
|
"""
|
|
|
|
def get_schedule(term='201870', sem='fall'):
|
|
"""
|
|
sched_data = { 'term_in':term, 'sel_subj':'dummy', 'sel_day':'dummy',
|
|
'sel_schd':'dummy', 'sel_insm':'dummy', 'sel_camp':'dummy', 'sel_levl':'dummy', 'sel_sess':'dummy',
|
|
'sel_instr':'dummy', 'sel_ptrm':'dummy', 'sel_attr':'dummy', 'sel_subj':'%', 'sel_crse':'', 'sel_title':'',
|
|
'sel_schd':'%', 'sel_from_cred':'', 'sel_to_cred':'', 'sel_camp':'%', 'sel_ptrm':'%', 'sel_sess':'%',
|
|
'sel_attr':'%', 'begin_hh':'0', 'begin_mi':'0', 'begin_ap':'a', 'end_hh':'0', 'end_mi':'0', 'end_ap':'a' }
|
|
initial_headers = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Encoding':'gzip, deflate, sdch, br',
|
|
'Accept-Language':'en-US,en;q=0.8',
|
|
'Connection':'keep-alive',
|
|
'Host':'ssb.gavilan.edu',
|
|
'Upgrade-Insecure-Requests':'1',
|
|
} #'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' }
|
|
headers = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Encoding':'gzip, deflate, br',
|
|
'Accept-Language':'en-US,en;q=0.8',
|
|
'Cache-Control':'max-age=0',
|
|
'Connection':'keep-alive',
|
|
'Content-Type':'application/x-www-form-urlencoded',
|
|
'Host':'ssb.gavilan.edu',
|
|
'Origin':'https://ssb.gavilan.edu',
|
|
'Referer':'https://ssb.gavilan.edu/prod/bwckgens.p_proc_term_date?p_calling_proc=bwckschd.p_disp_dyn_sched&p_term='+term,
|
|
'Upgrade-Insecure-Requests':'1',
|
|
} #'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' }
|
|
initial_url = 'https://ssb.gavilan.edu/prod/bwckgens.p_proc_term_date?p_calling_proc=bwckschd.p_disp_dyn_sched&p_term=' + term
|
|
sesh = requests.Session()
|
|
#r1 = sesh.get(initial_url,headers=initial_headers)
|
|
#sesh.headers.update(headers)
|
|
url = 'https://ssb.gavilan.edu/prod/bwckschd.p_get_crse_unsec'
|
|
r1 = sesh.get(initial_url)
|
|
r = sesh.post(url, data=sched_data)
|
|
print r.headers
|
|
data = r.text
|
|
out = open('data/temp/'+term+'.html','w')
|
|
out.write(data)
|
|
out.close()"""
|
|
os.system('perl parse_schedule.pl data/temp/' + term + '.html' + ' ' + sem)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#####
|
|
#####
|
|
##### conf.py ?
|
|
|
|
|
|
str="""355 985 1296
|
|
354 730 1295
|
|
353 319 1290
|
|
352 985 1289
|
|
351 813 1285
|
|
350 281 1285
|
|
349 267 1279
|
|
348 981 1252
|
|
347 994 1252
|
|
346 26 1250
|
|
345 757 1288
|
|
344 368 1288
|
|
343 1 1286
|
|
259 703 1295
|
|
256 693 1293
|
|
255 660 1292
|
|
254 1 1291
|
|
250 482 1287
|
|
246 2 1284
|
|
245 333 1283
|
|
244 27 1282
|
|
243 703 1281
|
|
242 730 1281
|
|
241 482 1280
|
|
239 211 1278
|
|
238 794 1278
|
|
237 2 1277
|
|
236 297 1276
|
|
235 831 1276
|
|
233 482 1251"""
|
|
|
|
for L in str.split("\n"):
|
|
(id,host,session) = L.split("\t")
|
|
qry = "INSERT INTO conf_signups (user,session,timestamp) VALUES (%s,%s,'2022-08-08 17:20:00');" % (host,session)
|
|
print(qry)
|
|
|
|
|
|
|
|
## sched.py
|
|
|
|
import codecs
|
|
import requests, re, csv, json, funcy, sys
|
|
|
|
from content import upload_page
|
|
|
|
|
|
def dates(s):
|
|
#print(s)
|
|
m = re.match(r'(\d\d\d\d)\-(\d\d)\-(\d\d)',s)
|
|
if m:
|
|
s = m.group(2) + "/" + m.group(3)
|
|
#print(s)
|
|
return s
|
|
# "Course Code","Start Date","End Date",Term,Delivery,CRN,Status,"Course Name","Course Description","Units/Credit hours","Instructor Last Name","Instructor First Name",Campus/College,"Meeting Days and Times","Pass/No Pass available?","Class Capacity","Available Seats","Waitlist Capacity","Current Waitlist Length","Meeting Locations","Course Notes",ZTC
|
|
# ACCT103,2021-06-14,2021-07-23,"Summer 2021",Online,80386,Active,"General Office Accounting","This course is designed to prepare students for entry-level office accounting positions. Emphasis is on practical accounting applications. This course has the option of a letter grade or pass/no pass. ADVISORY: Eligible for Mathematics 430."," 3.00","Valenzuela Roque",Karla,"Gavilan College"," ",T," 30"," 18"," 20"," 0",,,
|
|
|
|
|
|
def parse_www_csv_sched():
|
|
old_keys = [ "CRN","Course Code","Units/Credit hours","Course Name","Meeting Days and Times","Class Capacity","Available Seats","Waitlist Capacity","Current Waitlist Length","Instructor Last Name","Start Date","Meeting Locations","ZTC","Delivery","Campus/College","Status","Course Description","Pass/No Pass available?","Course Notes" ]
|
|
|
|
# "Instructor First Name","End Date","Term",
|
|
|
|
new_keys = [ "crn", "code","cred", "name", "days", "cap", "rem", "wl_cap", "wl_act", "teacher", "date", "loc", "ztc", "type", "site","status","desc","pnp","note" ]
|
|
|
|
# "time","act","wl_rem", "partofday",
|
|
|
|
|
|
url = "https://gavilan.edu/_files/php/current_schedule.csv"
|
|
|
|
sched_txt = requests.get(url).text.splitlines()
|
|
sched = {"Fall 2021":[], "Spring 2022":[], "Winter 2022":[], "Summer 2021":[]}
|
|
shortsems = {"Fall 2021":"fa21", "Spring 2022":"sp22", "Winter 2022":"wi22", "Summer 2021":"su21","Summer 2022":"su22","Fall 2022":"fa22"}
|
|
for row in csv.DictReader(sched_txt):
|
|
d = dict(row)
|
|
for (old_key,new_key) in zip(old_keys,new_keys):
|
|
d[new_key] = d.pop(old_key).strip()
|
|
d['teacher'] = d.pop('Instructor First Name').strip() + " " + d['teacher']
|
|
d['date'] = dates(d['date']) + '-' + dates(d.pop('End Date').strip())
|
|
d['term'] = shortsems[d.pop('Term')]
|
|
if d['cred'] == ".00":
|
|
d['cred'] = "0"
|
|
if d['type'] == "Online":
|
|
d["loc"] = "ONLINE"
|
|
d["site"] = "Online"
|
|
d["type"] = "online"
|
|
#d.pop('Instructor First Name').strip() + " " + d['teacher']
|
|
#d["code"] = d.pop("Course Code")
|
|
#d["crn"] = d.pop("CRN")
|
|
sched[row['Term']].append(d) #print(row)
|
|
|
|
print( json.dumps(sched,indent=2))
|
|
for k,v in sched.items():
|
|
print("%s: %i" % (k,len(v)))
|
|
|
|
for v in sched["Fall 2021"]:
|
|
print("%s\t %s\t %s\t %s" % ( v['code'], v['days'], v['type'], v['loc'] ))
|
|
#print("%s\t %s\t %s\t %s" % ( v['Course Code'], v['Meeting Days and Times'], v['Delivery'], v['Meeting Locations'] ))
|
|
|
|
def parse_json_test_sched():
|
|
j2 = open('cache/classes_json.json','r').readlines()
|
|
|
|
for L in j2:
|
|
o3 = json.loads(L)
|
|
print(json.dumps(o3,indent=2))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
print ('')
|
|
options = {
|
|
1: ['fetch and parse the csv on www.', parse_www_csv_sched],
|
|
2: ['parse the test json file.', parse_json_test_sched ],
|
|
}
|
|
|
|
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
|
resp = int(sys.argv[1])
|
|
print("\n\nPerforming: %s\n\n" % options[resp][0])
|
|
|
|
else:
|
|
print ('')
|
|
for key in options:
|
|
print(str(key) + '.\t' + options[key][0])
|
|
|
|
print('')
|
|
resp = input('Choose: ')
|
|
|
|
# Call the function in the options dict
|
|
options[ int(resp)][1]()
|
|
|
|
|
|
def put_revised_pages():
|
|
course_num = '6862'
|
|
course_folder = '../course_temps/course_6862'
|
|
filein = codecs.open(course_folder+'/fullcourse.v2.html','r','utf-8')
|
|
my_titles = []
|
|
my_urls = []
|
|
my_bodys = []
|
|
started = 0
|
|
current_body = ""
|
|
for L in filein.readlines():
|
|
ma = re.search('^<h1>(.*)</h1>.*$',L)
|
|
if ma:
|
|
my_titles.append(ma.group(1))
|
|
my_urls.append(ma.group(2))
|
|
if started:
|
|
my_bodys.append(current_body)
|
|
current_body = ""
|
|
started = 1
|
|
else:
|
|
current_body += "\n" + L
|
|
my_bodys.append(current_body)
|
|
|
|
i = 0
|
|
for U in my_urls:
|
|
# and now upload it....lol
|
|
upload_page(course_num,U,my_bodys[i])
|
|
i += 1
|
|
|
|
# Upload pages. Local copy has a particular format.
|
|
# Appears to not be used
|
|
|
|
def put_course_pages():
|
|
course_num = '6862'
|
|
filein = codecs.open('cache/pages/course_'+str(course_num)+'.html','r','utf-8')
|
|
my_titles = []
|
|
my_urls = []
|
|
my_bodys = []
|
|
started = 0
|
|
current_body = ""
|
|
for L in filein.readlines():
|
|
ma = re.search('^###\s(.*)###\s(.*)$',L)
|
|
if ma:
|
|
my_titles.append(ma.group(1))
|
|
my_urls.append(ma.group(2))
|
|
if started:
|
|
my_bodys.append(current_body)
|
|
current_body = ""
|
|
started = 1
|
|
else:
|
|
current_body += "\n" + L
|
|
my_bodys.append(current_body)
|
|
|
|
i = 0
|
|
for U in my_urls:
|
|
# and now upload it....lol
|
|
upload_page(course_num,U,my_bodys[i])
|
|
i += 1
|
|
|
|
|
|
|
|
|
|
|
|
def freshdesk():
|
|
path = "C:\\Users\\peter\\Downloads\\freshdesk\\Solutions.xml"
|
|
soup = bs( codecs.open(path,'r','utf-8').read() ,features="lxml")
|
|
|
|
outpt = codecs.open('cache/faqs.txt','w')
|
|
out = ""
|
|
for a in soup.find_all('solution-article'):
|
|
|
|
print("TITLE\n"+a.find('title').get_text())
|
|
out += a.find('title').get_text()
|
|
|
|
"""for d in a.find_all('description'):
|
|
#print(d)
|
|
if d:
|
|
d = h.unescape(d.get_text())
|
|
e = stripper(d)
|
|
m = tomd.convert( e )
|
|
m = mycleaner(m)
|
|
print("\nDESCRIPTION\n"+m)"""
|
|
|
|
#print("\nWHAT IS THIS?\n" +
|
|
hh = a.find('desc-un-html').get_text()
|
|
d = h.unescape(hh)
|
|
e = stripper(d)
|
|
m = tomd.convert( e )
|
|
m = mycleaner(m)
|
|
print("\nDESCRIPTION\n"+m)
|
|
out += "\n\n" + m + "\n\n"
|
|
|
|
print("-----------\n\n")
|
|
outpt.write(out)
|
|
|
|
|
|
|
|
|
|
|
|
#### content.py
|
|
|
|
|
|
from pattern.web import plaintext, extension
|
|
from pattern.web import download
|
|
#from pattern import URL, MIMETYPE_IMAGE
|
|
from pattern.web import Crawler, DEPTH, FIFO, MIMETYPE_IMAGE, MIMETYPE_PDF
|
|
|
|
class GavCrawl(Crawler):
|
|
def visit(self, link, source=None):
|
|
print('visited:', repr(link.url), 'from:', link.referrer)
|
|
print(' ', link.url.mimetype)
|
|
#txt = plaintext(source, keep={'h1':[], 'h2':[], 'h3':[], 'h4':[], 'td':[], 'strong':[], 'b':[], 'a':['href'], 'img':['src'], 'ul':[], 'ol':[], 'li':[], 'dd':[], 'dt':[], 'i':[]})
|
|
#codecs.open(save_folder + '/' + mycleaner(clean_title(link.url)) + '.txt','w','utf-8').write(tomd.convert(txt))
|
|
|
|
codecs.open(save_folder + '/' + clean_fn(link.url) + '.txt','w','utf-8').write(trafilatura.extract(source,include_links=True, deduplicate=True, include_images=True, include_formatting=True))
|
|
|
|
|
|
def fail(self, link):
|
|
print('failed:', repr(link.url))
|
|
if re.search(r'\.pdf$', link.url):
|
|
m = re.search(r'\/([^\/]+\.pdf)$', link.url)
|
|
if m:
|
|
save_file = m.group(1)
|
|
print("saving to ", save_folder + '/' + save_file)
|
|
pdf_response = requests.get(link.url)
|
|
with open(save_folder + '/' + save_file, 'wb') as f:
|
|
f.write(pdf_response.content)
|
|
text = extract_text(save_folder + '/' + save_file)
|
|
#print(text)
|
|
codecs.open(save_folder + '/' + save_file + '.txt','w','utf-8').write(text)
|
|
else:
|
|
print("no match for pdf url: ", link.url)
|
|
|
|
for ext in ['jpg','jpeg','gif','webp']:
|
|
if re.search(r'\.'+ext+'$', link.url):
|
|
m = re.search(r'\/([^\/]+\.'+ext+')$', link.url)
|
|
if m:
|
|
save_file = m.group(1)
|
|
print("saving to ", save_folder + '/' + save_file)
|
|
pdf_response = requests.get(link.url)
|
|
with open(save_folder + '/' + save_file, 'wb') as f:
|
|
f.write(pdf_response.content)
|
|
else:
|
|
print('no match for '+ext+' url: ', link.url)
|
|
|
|
def crawl2():
|
|
#p = GavCrawl(links=['http://www.gavilan.edu/'], domains=['gavilan.edu', 'gavilan.curriqunet.com','www.boarddocs.com'], delay=0.75)
|
|
#p = GavCrawl(links=['https://gavilan.edu/finaid/2022-23DirectLoanApplication1.pdf'], domains=['gavilan.edu', 'gavilan.curriqunet.com','www.boarddocs.com'], delay=0.75)
|
|
p = GavCrawl(links=['https://gavilan.curriqunet.com/catalog/iq/1826'], domains=['gavilan.edu', 'gavilan.curriqunet.com','www.boarddocs.com'], delay=0.75)
|
|
|
|
|
|
|
|
|
|
while not p.done:
|
|
try:
|
|
p.crawl(method=DEPTH, cached=False, throttle=0.76)
|
|
except Exception as e:
|
|
print("Exception: ", e)
|
|
|
|
|
|
|
|
def samples():
|
|
crawler = Crawler(links=[], domains=[], delay=20.0, sort=FIFO)
|
|
|
|
url = URL('http://www.clips.ua.ac.bemedia/pattern_schema.gif')
|
|
print(url.mimetype in MIMETYPE_IMAGE)
|
|
|
|
|
|
#html = download('http://www.clips.ua.ac.be/', unicode=True)
|
|
s = URL('http://www.clips.ua.ac.be').download()
|
|
s = plaintext(s, keep={'h1':[], 'h2':[], 'strong':[], 'a':['href']})
|
|
|
|
|
|
# getting absolute urls
|
|
from pattern.web import URL, DOM, abs
|
|
|
|
url = URL('http://www.clips.ua.ac.be')
|
|
dom = DOM(url.download())
|
|
for link in dom('a'):
|
|
print(abs(link.attributes.get('href',''), base=url.redirect or url.string))
|
|
|
|
# get pdfs
|
|
from pattern.web import URL, PDF
|
|
|
|
url = URL('http://www.clips.ua.ac.be/sites/default/files/ctrs-002_0.pdf')
|
|
pdf = PDF(url.download())
|
|
print(pdf.string)
|
|
|
|
|
|
############### stats.py
|
|
|
|
|
|
def grades_rundown():
|
|
global results, users_by_id
|
|
load_users()
|
|
results = []
|
|
all_sem_courses = []
|
|
ids_out = open('all_teachers_by_goo','w')
|
|
all_ids = {}
|
|
# for the current or given semester's shells (really, only active ones)
|
|
with open('grades_out.csv','wb') as f:
|
|
w = csv.DictWriter(f, 'id,name,teacher,mean,median,count,count_gt70,grades,avg_activity_time'.split(','))
|
|
w.writeheader()
|
|
#for c in all_sem_courses:
|
|
courses = getCoursesInTerm(term=23,show=0,active=1)
|
|
for C in courses:
|
|
activity_time_total = 0.0
|
|
course_info = {'id':str(C['id']),'name':C['name'],'grades':[], 'teacher':[] }
|
|
#print(str(C['id']) + "\t " + C['name'])
|
|
emts = course_enrollment(C['id'])
|
|
for k,E in emts.items():
|
|
if E['type'] == 'TeacherEnrollment':
|
|
course_info['teacher'].append(users_by_id[E['user_id']]['name'])
|
|
all_ids[E['sis_user_id']] = 1
|
|
""" if 'grades' in E and E['grades']['current_score']:
|
|
#print(str(E['grades']['final_score']) + ", ",)
|
|
#print(str(E['grades']['current_score']) + ", ",)
|
|
course_info['grades'].append(E['grades']['current_score'])
|
|
activity_time_total += E['total_activity_time']
|
|
if course_info['grades']:
|
|
s = pd.Series(course_info['grades'])
|
|
course_info['mean'] = s.mean()
|
|
course_info['median'] = s.median()
|
|
course_info['count'] = len(s.values)
|
|
course_info['count_gt70'] = (s > 70.0).count()
|
|
course_info['avg_activity_time'] = activity_time_total / len(s.values)
|
|
else:
|
|
course_info['mean'] = 0
|
|
course_info['median'] = 0
|
|
course_info['count'] = 0
|
|
course_info['count_gt70'] = 0
|
|
course_info['avg_activity_time'] = 0"""
|
|
|
|
#print(course_info)
|
|
all_sem_courses.append(course_info)
|
|
w.writerow(course_info)
|
|
f.flush()
|
|
|
|
# get a grade (final? current?) for each student
|
|
for k,v in all_ids.items():
|
|
if k: ids_out.write(k + ', ')
|
|
|
|
# sanity check to make sure grading is actually happening in the shell
|
|
|
|
# report an average, median, and buckets
|
|
|
|
|
|
|
|
|
|
def class_logs():
|
|
global results
|
|
# 1. Search the current semester and the misc semesters for a list of courses
|
|
# that we want to check for users/activity.
|
|
#target = url + '/api/v1/accounts/1/terms' # list the terms
|
|
target = url + '/api/v1/accounts/1/courses?published=true&enrollment_term_id=14'
|
|
print("Getting term classes.")
|
|
while target:
|
|
target = fetch(target)
|
|
|
|
print("\n\n\n")
|
|
|
|
term_results = results
|
|
full_results = []
|
|
for x in term_results:
|
|
results = []
|
|
# now see who's logged in recently:
|
|
target = url + '/api/v1/courses/' + str(x['id']) + '/recent_students'
|
|
print("Getting class id: ", str(x['id']))
|
|
fetch(target)
|
|
if len(results):
|
|
#print(results)
|
|
LL = [ how_long_ago(z['last_login']) for z in results ]
|
|
avg = 9999
|
|
if len(LL): avg = sum(LL) / len(LL)
|
|
d = { 'id':x['id'], 'avg':avg, 'name':x['name'] }
|
|
full_results.append(d)
|
|
sorted_results = sorted(full_results, key=lambda k: k['avg'])
|
|
for x in sorted_results:
|
|
print(x['id'], "\t", str(x['avg']), "\t", x['name'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def user_logs():
|
|
global url, users_by_id, results
|
|
target_user = "6357"
|
|
load_users()
|
|
results = []
|
|
target = url + '/api/v1/users/' + target_user + '/page_views?per_page=200'
|
|
while target:
|
|
print(target)
|
|
target = fetch(target)
|
|
# have all student's hits. Filter to only this class
|
|
#results = filter(match59,results)
|
|
times = []
|
|
print(users_by_id[ int(target_user) ])
|
|
f.write(str(users_by_id[ int(target_user) ]) + "\n")
|
|
f.write( "link,updated_at,remote_ip,url,context_type,user_agent,action\n")
|
|
for hit in results:
|
|
L = [hit['links']['user'],hit['updated_at'],hit['remote_ip'],hit['url'],hit['context_type'],hit['user_agent'],hit['action']]
|
|
L = map(str,L)
|
|
f.write( ",".join(L) + "\n" )
|
|
|
|
|
|
|
|
|
|
def recent_logins():
|
|
global results, url, results_dict
|
|
p = { 'start_time':'2017-08-31T00:00:00Z', 'end_time':'2017-08-31T00:05:00Z'}
|
|
target = url + "/api/v1/audit/authentication/accounts/1"
|
|
results_dict = {}
|
|
resp = fetch_dict(target,p)
|
|
print(resp)
|
|
print(results_dict)
|
|
|
|
|
|
|
|
def userHitsThisSemester(uid=2):
|
|
begin = "20170820T0000"
|
|
t = url + "/api/v1/users/" + str(uid) + "/page_views?start_time=" + str(begin)
|
|
while(t): t = fetch(t)
|
|
print(json.dumps(results, indent=4, sort_keys=True))
|
|
|
|
|
|
|
|
|
|
def getCurrentActivity(): # a dict
|
|
# CURRENT ACTIVITY
|
|
#r = requests.get(url + '/api/v1/accounts/1/analytics/current/activity', headers = header )
|
|
#t = url + '/api/v1/accounts/1/users?per_page=500'
|
|
# analytics/terms/:term_id/activity
|
|
#t = url + '/api/v1/accounts/1/analytics/current/statistics'
|
|
global results_dict
|
|
t = url + '/api/v1/accounts/1/analytics/terms/11/activity'
|
|
while(t): t = fetch_dict(t)
|
|
sp17 = results_dict['by_date']
|
|
results_dict = {}
|
|
|
|
t = url + '/api/v1/accounts/1/analytics/terms/14/activity'
|
|
while(t): t = fetch_dict(t)
|
|
su17 = results_dict['by_date']
|
|
results_dict = {}
|
|
|
|
t = url + '/api/v1/accounts/1/analytics/terms/15/activity'
|
|
while(t): t = fetch_dict(t)
|
|
su17b = results_dict['by_date']
|
|
results_dict = {}
|
|
|
|
t = url + '/api/v1/accounts/1/analytics/terms/18/activity'
|
|
while(t): t = fetch_dict(t)
|
|
fa17 = results_dict['by_date']
|
|
results_dict = {}
|
|
|
|
t = url + '/api/v1/accounts/1/analytics/terms/21/activity'
|
|
while(t): t = fetch_dict(t)
|
|
sp18 = results_dict['by_date']
|
|
results_dict = {}
|
|
|
|
t = url + '/api/v1/accounts/1/analytics/terms/7/activity'
|
|
while(t): t = fetch_dict(t)
|
|
cmte = results_dict['by_date']
|
|
results_dict = {}
|
|
|
|
t = url + '/api/v1/accounts/1/analytics/terms/6/activity'
|
|
while(t): t = fetch_dict(t)
|
|
dev = results_dict['by_date']
|
|
results_dict = {}
|
|
|
|
master_list_by_date = {}
|
|
for sem in [sp17,su17,su17b,fa17,sp18,cmte,dev]:
|
|
#print(sem)
|
|
for record in sem:
|
|
print(record)
|
|
date = record['date']
|
|
if date in master_list_by_date:
|
|
master_list_by_date[date]['participations'] += record['participations']
|
|
master_list_by_date[date]['views'] += record['views']
|
|
else:
|
|
master_list_by_date[date] = {}
|
|
master_list_by_date[date]['date'] = date
|
|
master_list_by_date[date]['participations'] = record['participations']
|
|
master_list_by_date[date]['views'] = record['views']
|
|
out = open('canvas/daily.json','w')
|
|
# want to match the old, funny format
|
|
by_date = []
|
|
my_out = {'by_date':by_date}
|
|
|
|
for day in master_list_by_date.keys():
|
|
by_date.append(master_list_by_date[day])
|
|
out.write(json.dumps(my_out,indent=2))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def externaltool(): # a list
|
|
|
|
|
|
#mydata = { "course_navigation[text]": "Video Chat",
|
|
# "course_navigation[default]": "false" }
|
|
#t = url + '/api/v1/accounts/1/external_tools/704?course_navigation[text]=Video Chat&course_navigation[default]=false'
|
|
#r = requests.put(t, headers=header)
|
|
t = url + '/api/v1/accounts/1/external_tools/'
|
|
while(t): t = fetch(t)
|
|
print(results)
|
|
|
|
|
|
|
|
|