2510 lines
84 KiB
Python
2510 lines
84 KiB
Python
|
|
#get_schedule('201770')
|
|
|
|
|
|
# from pipelines - canvas data
|
|
|
|
|
|
|
|
|
|
# todo: where does the most recent schedule come from?
|
|
|
|
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
|
def list_latestarts():
|
|
#term = input("Name of current semester file? (ex: sp18) ")
|
|
term = "sp23" # sems[0]
|
|
|
|
term_in = "cache/" + term + "_sched.json"
|
|
term_out = "cache/" + term + "_latestarts.txt"
|
|
print("Writing output to " + term_out)
|
|
infile = open(term_in, "r")
|
|
outfile = open(term_out, "w")
|
|
sched = json.loads(infile.read())
|
|
#print sched
|
|
by_date = {}
|
|
for C in sched:
|
|
parts = C['date'].split("-")
|
|
start = parts[0]
|
|
codes = C['code'].split(' ')
|
|
dept = codes[0]
|
|
if dept in ['JLE','JFT','CWE']:
|
|
continue
|
|
if re.search('TBA',start): continue
|
|
try:
|
|
startd = parser.parse(start)
|
|
except Exception as e:
|
|
print(e, "\nproblem parsing ", start)
|
|
#print startd
|
|
if not startd in by_date:
|
|
by_date[startd] = []
|
|
by_date[startd].append(C)
|
|
for X in sorted(by_date.keys()):
|
|
#print "Start: " + str(X)
|
|
if len(by_date[X]) < 200:
|
|
prettydate = X.strftime("%A, %B %d")
|
|
print(prettydate + ": " + str(len(by_date[X])) + " courses")
|
|
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
|
|
for Y in by_date[X]:
|
|
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
|
|
print(Y)
|
|
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
|
|
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
|
|
|
|
|
|
|
|
|
|
online_courses = {}
|
|
def prep_online_courses_df():
|
|
global online_courses
|
|
schedule = current_schedule() # from banner
|
|
online_courses = schedule[lambda x: x.type=='online']
|
|
|
|
def course_is_online(crn):
|
|
global online_courses
|
|
#print "looking up: " + str(crn)
|
|
#print online_courses
|
|
course = online_courses[lambda x: x.crn==int(crn)]
|
|
return len(course)
|
|
|
|
def get_crn_from_name(name):
|
|
#print "name is: "
|
|
#print(name)
|
|
m = re.search( r'(\d\d\d\d\d)', name)
|
|
if m: return int(m.groups(1)[0])
|
|
else: return 0
|
|
|
|
def get_enrlmts_for_user(user,enrollments):
|
|
#active enrollments
|
|
u_en = enrollments[ lambda x: (x['user_id'] == user) & (x['workflow']=='active') ]
|
|
return u_en[['type','course_id']]
|
|
|
|
|
|
|
|
|
|
"""
|
|
timestamp = nowAsStr()
|
|
|
|
requestParts = [ method,
|
|
host,
|
|
'', #content Type Header
|
|
'', #content MD5 Header
|
|
path,
|
|
'', #alpha-sorted Query Params
|
|
timestamp,
|
|
apiSecret ]
|
|
|
|
#Build the request
|
|
requestMessage = '\n'.join( requestParts )
|
|
requestMessage = requestMessage.encode('ASCII')
|
|
print((requestMessage.__repr__()))
|
|
hmacObject = hmac.new(bytearray(apiSecret,'ASCII'), bytearray('','ASCII'), hashlib.sha256) #
|
|
hmacObject.update(requestMessage)
|
|
hmac_digest = hmacObject.digest()
|
|
sig = base64.b64encode(hmac_digest)
|
|
headerDict = {
|
|
'Authorization' : 'HMACAuth ' + apiKey + ':' + str(sig),
|
|
'Date' : timestamp
|
|
}
|
|
|
|
|
|
"""
|
|
|
|
# Don't know
|
|
def demo():
|
|
resp = do_request('/api/account/self/file/sync')
|
|
mylog.write(json.dumps(resp, indent=4))
|
|
sample_table = resp['files'][10]
|
|
filename = sample_table['filename']
|
|
print(sample_table['table'])
|
|
|
|
response = requests.request(method='GET', url=sample_table['url'], stream=True)
|
|
if(response.status_code != 200):
|
|
print(('Request response went bad. Got back a ', response.status_code, ' code, meaning the request was ', response.reason))
|
|
else:
|
|
#Use the downloaded data
|
|
with open(local_data_folder + filename, 'wb') as fd:
|
|
for chunk in response.iter_content(chunk_size=128):
|
|
fd.write(chunk)
|
|
print("Success")
|
|
if filename.split('.')[-1] == 'gz':
|
|
plain_filename = 'canvas_data/' + ".".join(filename.split('.')[:-1])
|
|
pf = open(plain_filename,'w')
|
|
with gzip.open('canvas_data/' + filename , 'rb') as f:
|
|
pf.write(f.read())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# How to drop columns
|
|
#columns = ['Col1', 'Col2', ...]
|
|
#df.drop(columns, inplace=True, axis=1)
|
|
|
|
# left join, one on column, one on index
|
|
#merged = pd.merge(result,users,left_index=True,right_on='id', how='left')
|
|
|
|
|
|
"""
|
|
You can call set_index on the result of the dataframe:
|
|
|
|
In [2]:
|
|
data=[['Australia',100],['France',200],['Germany',300],['America',400]]
|
|
pd.DataFrame(data,columns=['Country','Volume']).set_index('Country')
|
|
|
|
Out[2]:
|
|
Volume
|
|
Country
|
|
Australia 100
|
|
France 200
|
|
Germany 300
|
|
America 400
|
|
"""
|
|
|
|
|
|
|
|
def stats():
|
|
# nothing seems to happen here?
|
|
|
|
#input = csv.DictReader(codecs.open(schedfile,'r','utf-8'))
|
|
input = csv.DictReader(open(schedfile,'r'))
|
|
out2 = open('temp2.csv','w')
|
|
clean = {}
|
|
for r in input:
|
|
if r['crn']: clean[ r['crn'] ] = r
|
|
|
|
for c,r in list(clean.items()):
|
|
try:
|
|
if int(r['cap'])==0: continue
|
|
else: prct = (1.0 * int( r['act'] )) / int(r['cap'])
|
|
if prct < 0.01: continue
|
|
o_str = ''
|
|
if r['location'].strip()=='ONLINE': o_str = 'online'
|
|
#print r['location']
|
|
date_parts = r['date'].split('-')
|
|
start = strptime(date_parts[0], '%m/%d')
|
|
if start > semester_begin: o_str += "\tlatestart " + date_parts[0]
|
|
out2.write( "".join([c, "\t", r['sub'], "\t", r['crs'], "\t", str(round(prct,2)), "% full\t", o_str, "\n"]) )
|
|
except:
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
######### from curriculum. py
|
|
|
|
|
|
# open('cache/programs/programs_1.txt','r').read()
|
|
|
|
"""
|
|
SEE serve.py .... i mean ... interactive.py
|
|
def dict_generator(indict, pre=None):
|
|
pre = pre[:] if pre else []
|
|
if isinstance(indict, dict):
|
|
for key, value in indict.items():
|
|
if isinstance(value, dict):
|
|
for d in dict_generator(value, pre + [key]):
|
|
yield d
|
|
elif isinstance(value, list) or isinstance(value, tuple):
|
|
for v in value:
|
|
for d in dict_generator(v, pre + [key]):
|
|
yield d
|
|
else:
|
|
yield str(pre) + " " + str([key, value]) + "\n"
|
|
else:
|
|
yield pre + [indict]
|
|
yield str(pre) + " " + str([indict]) + "\n"
|
|
|
|
|
|
|
|
def print_dict(v, prefix='',indent=''):
|
|
if isinstance(v, dict):
|
|
return [ print_dict(v2, "{}['{}']".format(prefix, k) + "<br />", indent+" " ) for k, v2 in v.items() ]
|
|
elif isinstance(v, list):
|
|
return [ print_dict( v2, "{}[{}]".format(prefix , i) + "<br />", indent+" ") for i, v2 in enumerate(v) ]
|
|
else:
|
|
return '{} = {}'.format(prefix, repr(v)) + "\n"
|
|
|
|
|
|
def walk_file():
|
|
j = json.loads(open('cache/programs/programs_2.txt','r').read())
|
|
|
|
return print_dict(j)
|
|
|
|
from flask import Flask
|
|
from flask import request
|
|
|
|
def tag(x,y): return "<%s>%s</%s>" % (x,y,x)
|
|
|
|
def tagc(x,c,y): return '<%s class="%s">%s</%s>' % (x,c,y,x)
|
|
|
|
def a(t,h): return '<a href="%s">%s</a>' % (h,t)
|
|
|
|
def server_save(key,value):
|
|
codecs.open('cache/server_data.txt','a').write( "%s=%s\n" % (str(key),str(value)))
|
|
|
|
def flask_thread(q):
|
|
app = Flask(__name__)
|
|
|
|
@app.route("/")
|
|
def home():
|
|
return tag('h1','This is my server.') + "<br />" + a('want to shut down?','/sd')
|
|
|
|
@app.route("/save/<key>/<val>")
|
|
def s(key,val):
|
|
server_save(key,val)
|
|
return tag('h1','Saved.') + "<br />" + tag('p', 'Saved: %s = %s' % (str(key),str(val)))
|
|
|
|
@app.route("/crazy")
|
|
def hello():
|
|
r = '<link rel="stylesheet" href="static/bootstrap.min.css">'
|
|
r += tag('style', 'textarea { white-space:nowrap; }')
|
|
r += tag('body', \
|
|
tagc('div','container-fluid', \
|
|
tagc('div','row', \
|
|
tagc( 'div', 'col-md-6', tag('pre', walk_file() ) ) + \
|
|
tagc( 'div', 'col-md-6', 'Column 2' + a('Shut Down','/shutdown' ) ) ) ) )
|
|
|
|
|
|
|
|
return r
|
|
|
|
@app.route("/sd")
|
|
def sd():
|
|
print('SIGINT or CTRL-C detected. Exiting gracefully')
|
|
func = request.environ.get('werkzeug.server.shutdown')
|
|
if func is None:
|
|
raise RuntimeError('Not running with the Werkzeug Server')
|
|
func()
|
|
return "Server has shut down."
|
|
app.run()
|
|
|
|
|
|
from queue import Queue
|
|
|
|
q = Queue()
|
|
|
|
def serve():
|
|
import webbrowser
|
|
import threading
|
|
x = threading.Thread(target=flask_thread, args=(q,))
|
|
x.start()
|
|
webbrowser.open_new_tab("http://localhost:5000")
|
|
|
|
|
|
|
|
|
|
#s = open('cache/programs/index.json','w')
|
|
#s.write( json.dumps({'departments':sorted(list(dept_index)), 'programs':prog_index}, indent=2) )
|
|
#s.close()
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
### interactive.py
|
|
|
|
|
|
|
|
|
|
"""class HelloWorldExample(object):
|
|
def make_teacher_rel(self, tchr, clss):
|
|
with self._driver.session() as tx:
|
|
tx.run("MERGE (tchr:Teacher {name: $tchr}) MERGE (tchr)-[:TEACHES]->(clss:Class {name: $clss})", \
|
|
tchr=tchr, clss=clss)
|
|
|
|
def __init__(self, uri, user, password):
|
|
self._driver = GraphDatabase.driver(uri, auth=(user, password))
|
|
|
|
def close(self):
|
|
self._driver.close()
|
|
|
|
|
|
|
|
def print_greeting(self, message):
|
|
with self._driver.session() as session:
|
|
greeting = session.write_transaction(self._create_and_return_greeting, message)
|
|
print(greeting)
|
|
|
|
@staticmethod
|
|
def _create_and_return_greeting(tx, message):
|
|
result = tx.run("CREATE (a:Greeting) "
|
|
"SET a.message = $message "
|
|
"RETURN a.message + ', from node ' + id(a)", message=message)
|
|
return result.single()[0]
|
|
"""
|
|
|
|
|
|
def make_teacher_rel(g, tchr, clss):
|
|
g.run("MERGE (tchr:Teacher {name: $tchr}) MERGE (tchr)-[:TEACHES]->(clss:Class {name: $clss})", \
|
|
tchr=tchr, clss=clss)
|
|
|
|
|
|
def testgraph():
|
|
gg = Graph("bolt://localhost:7687", auth=("neo4j", "asdf"))
|
|
|
|
#gg.run("DROP CONSTRAINT ON (tchr:Teacher) ASSERT tchr.name IS UNIQUE")
|
|
#gg.run("DROP CONSTRAINT ON (clss:Class) ASSERT clss.name IS UNIQUE")
|
|
|
|
#gg.run("CREATE INDEX ON :Teacher(name)")
|
|
#gg.run("CREATE INDEX ON :Class(name)")
|
|
|
|
stuff = json.loads( open('output/semesters/2020spring/sp20_sched.json','r').read())
|
|
|
|
# make lists of unique course code+name, teacher, locations
|
|
tch = {}
|
|
crs = {}
|
|
loc = {}
|
|
sem = Node("Semester", name="sp20")
|
|
for c in stuff:
|
|
if not c['teacher'] in tch:
|
|
tch[c['teacher']] = Node("Teacher", name=c['teacher'])
|
|
gg.create(tch[c['teacher']])
|
|
if not c['code'] in crs:
|
|
crs[ c['code'] ] = Node("Course section", name=c['name'], code=c['code'])
|
|
gg.create(crs[ c['code'] ])
|
|
if not c['loc'] in loc:
|
|
loc[ c['loc'] ] = Node("Location", loc=c['loc'])
|
|
gg.create(loc[ c['loc'] ])
|
|
sect = Node("Section", crn=int(c['crn']))
|
|
gg.create(Relationship(tch[c['teacher']], "TEACHES", sect ))
|
|
gg.create(Relationship(sect, "CLASS OF", crs[ c['code'] ] ))
|
|
gg.create(Relationship( sect, "LOCATED AT", loc[ c['loc'] ] ))
|
|
|
|
"""
|
|
for c in stuff:
|
|
print(c['crn'])
|
|
q = "CREATE (section:Section { Name: "+c['name']+", Code: "+c['code']+", Crn: "+c['crn']+", Teacher: "+c['teacher']+" })"
|
|
q = 'CREATE (section:Section { Name: "%s", Code: "%s", Crn: "%s", Teacher: "%s" })' % \
|
|
(c['name'], c['code'], c['crn'], c['teacher'])
|
|
gg.run(q)
|
|
"""
|
|
#gg = HelloWorldExample("bolt://localhost:7687", "neo4j", "asdf")
|
|
#gg.print_greeting("hi there world")
|
|
"""
|
|
make_teacher_rel(gg, "Peter Howell","CSIS 42")
|
|
make_teacher_rel(gg, "Alex Stoykov","CSIS 42")
|
|
make_teacher_rel(gg, "Sabrina Lawrence","CSIS 85")
|
|
make_teacher_rel(gg, "Peter Howell","CSIS 85")
|
|
"""
|
|
|
|
screen = 0
|
|
|
|
def Memoize( func):
|
|
"""
|
|
Memoize decorator
|
|
"""
|
|
cache = {}
|
|
|
|
@wraps(func)
|
|
def wrapper(*args):
|
|
if args not in cache:
|
|
cache[args] = func(*args)
|
|
return cache[args]
|
|
return wrapper
|
|
|
|
|
|
|
|
|
|
class MyRepl:
|
|
description = {
|
|
"switch ": "Switch stream. You can use either 'switch public' or 'switch mine'",
|
|
"home " : "Show your timeline. 'home 7' will show 7 tweet.",
|
|
"harry " : "a guys name.",
|
|
"homo " : "means the same.",
|
|
"view " : "'view @mdo' will show @mdo's home.",
|
|
"h " : "Show help.",
|
|
"t " : "'t opps' will tweet 'opps' immediately.",
|
|
"s " : "'s #AKB48' will search for '#AKB48' and return 5 newest tweets."
|
|
}
|
|
|
|
|
|
def startup(self, outfile):
|
|
global screen # make it self
|
|
self.g = {}
|
|
self.buf = {}
|
|
screen = None
|
|
self.enter_ary = [curses.KEY_ENTER,10]
|
|
self.delete_ary = [curses.KEY_BACKSPACE,curses.KEY_DC,8,127,263]
|
|
self.tab_ary = [9]
|
|
self.up_ary = [curses.KEY_UP]
|
|
self.down_ary = [curses.KEY_DOWN]
|
|
|
|
# Init curses screen
|
|
screen = curses.initscr()
|
|
screen.keypad(1)
|
|
curses.noecho()
|
|
try:
|
|
curses.start_color()
|
|
curses.use_default_colors()
|
|
for i in range(0, curses.COLORS):
|
|
curses.init_pair(i + 1, i, -1)
|
|
except curses.error:
|
|
pass
|
|
curses.cbreak()
|
|
self.g['height'] , self.g['width'] = screen.getmaxyx()
|
|
#print("Width: %i" % self.g['width'])
|
|
|
|
# Init color function
|
|
s = self
|
|
self.white = lambda x:curses_print_word(x,7) #0)
|
|
self.grey = lambda x:curses_print_word(x, 3) #3)1)
|
|
self.red = lambda x:curses_print_word(x,7) #2)
|
|
self.green = lambda x:curses_print_word(x, 3) #3)
|
|
self.yellow = lambda x:curses_print_word(x,7) #4)
|
|
self.blue = lambda x:curses_print_word(x,3)
|
|
self.magenta = lambda x:curses_print_word(x,7) #6)
|
|
self.cyan = lambda x:curses_print_word(x,7) #7)
|
|
self.colors_shuffle = [s.grey, s.red, s.green, s.yellow, s.blue, s.magenta, s.cyan]
|
|
self.cyc = itertools.cycle(s.colors_shuffle[1:])
|
|
self.index_cyc = itertools.cycle(range(1,8))
|
|
self.setup_command(outfile)
|
|
|
|
|
|
def set_my_dict(self,d):
|
|
self.description = d
|
|
|
|
@Memoize
|
|
def cycle_color(self, s):
|
|
"""
|
|
Cycle the colors_shuffle
|
|
"""
|
|
return next(self.cyc)
|
|
|
|
|
|
def ascii_art(self, text):
|
|
"""
|
|
Draw the Ascii Art
|
|
"""
|
|
fi = figlet_format(text, font='doom')
|
|
for i in fi.split('\n'):
|
|
self.curses_print_line(i,next(self.index_cyc))
|
|
|
|
|
|
def close_window(self, ):
|
|
"""
|
|
Close screen
|
|
"""
|
|
global screen
|
|
screen.keypad(0);
|
|
curses.nocbreak();
|
|
curses.echo()
|
|
curses.endwin()
|
|
|
|
|
|
def suggest(self, word):
|
|
"""
|
|
Find suggestion
|
|
"""
|
|
rel = []
|
|
if not word: return rel
|
|
word = word.lower()
|
|
|
|
for candidate in self.description:
|
|
|
|
ca = candidate.lower()
|
|
#if ca.startswith(word): rel.append(candidate)
|
|
|
|
for eachword in ca.split(" "):
|
|
if eachword.startswith(word):
|
|
rel.append(candidate)
|
|
|
|
return rel
|
|
|
|
|
|
def curses_print_word(self, word,color_pair_code):
|
|
"""
|
|
Print a word
|
|
"""
|
|
global screen
|
|
word = word.encode('utf8')
|
|
screen.addstr(word,curses.color_pair(color_pair_code))
|
|
|
|
|
|
def curses_print_line(self, line,color_pair_code):
|
|
"""
|
|
Print a line, scroll down if need
|
|
"""
|
|
global screen
|
|
line = line.encode('utf8')
|
|
y,x = screen.getyx()
|
|
if y - self.g['height'] == -3:
|
|
self.scroll_down(2,y,x)
|
|
screen.addstr(y,0,line,curses.color_pair(color_pair_code))
|
|
self.buf[y] = line, color_pair_code
|
|
elif y - self.g['height'] == -2:
|
|
self.scroll_down(3,y,x)
|
|
screen.addstr(y-1,0,line,curses.color_pair(color_pair_code))
|
|
self.buf[y-1] = line ,color_pair_code
|
|
else:
|
|
screen.addstr(y+1,0,line,curses.color_pair(color_pair_code))
|
|
self.buf[y+1] = line, color_pair_code
|
|
|
|
|
|
def redraw(self, start_y,end_y,fallback_y,fallback_x):
|
|
"""
|
|
Redraw lines from buf
|
|
"""
|
|
global screen
|
|
for cursor in range(start_y,end_y):
|
|
screen.move(cursor,0)
|
|
screen.clrtoeol()
|
|
try:
|
|
line, color_pair_code = self.buf[cursor]
|
|
screen.addstr(cursor,0,line,curses.color_pair(color_pair_code))
|
|
except:
|
|
pass
|
|
screen.move(fallback_y,fallback_x)
|
|
|
|
|
|
def scroll_down(self, noredraw,fallback_y,fallback_x):
|
|
"""
|
|
Scroll down 1 line
|
|
"""
|
|
global screen
|
|
# Recreate buf
|
|
# noredraw = n means that screen will scroll down n-1 line
|
|
trip_list = heapq.nlargest(noredraw-1,buf)
|
|
for i in buf:
|
|
if i not in trip_list:
|
|
self.buf[i] = self.buf[i+noredraw-1]
|
|
for j in trip_list:
|
|
buf.pop(j)
|
|
|
|
# Clear and redraw
|
|
screen.clear()
|
|
self.redraw(1,g['height']-noredraw,fallback_y,fallback_x)
|
|
|
|
|
|
def clear_upside(self, n,y,x):
|
|
"""
|
|
Clear n lines upside
|
|
"""
|
|
global screen
|
|
for i in range(1,n+1):
|
|
screen.move(y-i,0)
|
|
screen.clrtoeol()
|
|
screen.refresh()
|
|
screen.move(y,x)
|
|
|
|
|
|
def display_suggest(self, y,x,word):
|
|
"""
|
|
Display box of suggestion
|
|
"""
|
|
global screen
|
|
g = self.g
|
|
side = 2
|
|
|
|
# Check if need to print upside
|
|
upside = y+6 > int(g['height'])
|
|
|
|
# Redraw if suggestion is not the same as previous display
|
|
sug = self.suggest(word)
|
|
if sug != self.g['prev']:
|
|
# 0-line means there is no suggetions (height = 0)
|
|
# 3-line means there are many suggetions (height = 3)
|
|
# 5-line means there is only one suggetions (height = 5)
|
|
# Clear upside section
|
|
if upside:
|
|
# Clear upside is a bit difficult. Here it's seperate to 4 case.
|
|
# now: 3-lines / previous : 0 line
|
|
if len(sug) > 1 and not self.g['prev']:
|
|
self.clear_upside(3,y,x)
|
|
# now: 0-lines / previous :3 lines
|
|
elif not sug and len(g['prev'])>1:
|
|
self.redraw(y-3,y,y,x)
|
|
# now: 3-lines / previous :5 lines
|
|
elif len(sug) > 1 == len(g['prev']):
|
|
self.redraw(y-5,y-3,y,x)
|
|
self.clear_upside(3,y,x)
|
|
# now: 5-lines / previous :3 lines
|
|
elif len(sug) == 1 < len(g['prev']):
|
|
self.clear_upside(3,y,x)
|
|
# now: 0-lines / previous :5 lines
|
|
elif not sug and len(g['prev'])==1:
|
|
self.redraw(y-5,y,y,x)
|
|
# now: 3-lines / previous :3 lines
|
|
elif len(sug) == len(g['prev']) > 1:
|
|
self.clear_upside(3,y,x)
|
|
# now: 5-lines / previous :5 lines
|
|
elif len(sug) == len(g['prev']) == 1:
|
|
self.clear_upside(5,y,x)
|
|
screen.refresh()
|
|
else:
|
|
# Clear downside
|
|
screen.clrtobot()
|
|
screen.refresh()
|
|
self.g['prev'] = sug
|
|
|
|
if sug:
|
|
# More than 1 suggestion
|
|
if len(sug) > 1:
|
|
if len(sug) > 5: sug = sug[:5]
|
|
|
|
#needed_lenth = sum([len(i)+side for i in sug]) + side
|
|
needed_lenth = max( self.g['width']-5, sum([len(i)+side for i in sug]) + side)
|
|
print(self.g['width'])
|
|
print(word)
|
|
print(sug)
|
|
print(needed_lenth)
|
|
if upside:
|
|
win = curses.newwin(3,needed_lenth,y-3,0)
|
|
win.erase()
|
|
win.box()
|
|
win.refresh()
|
|
cur_width = side
|
|
for i in range(len(sug)):
|
|
if cur_width+len(sug[i]) > self.g['width']: break
|
|
screen.addstr(y-2,cur_width,sug[i],curses.color_pair(4))
|
|
cur_width += len(sug[i]) + side
|
|
if cur_width > self.g['width']:
|
|
break
|
|
else:
|
|
win = curses.newwin(3,needed_lenth,y+1,0)
|
|
win.erase()
|
|
win.box()
|
|
win.refresh()
|
|
cur_width = side
|
|
for i in range(len(sug)):
|
|
screen.addstr(y+2,cur_width,sug[i],curses.color_pair(4))
|
|
cur_width += len(sug[i]) + side
|
|
if cur_width > self.g['width']:
|
|
break
|
|
# Only 1 suggestion
|
|
else:
|
|
can = sug[0]
|
|
if upside:
|
|
win = curses.newwin(5,len(self.description[can])+2*side,y-5,0)
|
|
win.box()
|
|
win.refresh()
|
|
screen.addstr(y-4,side,can,curses.color_pair(4))
|
|
screen.addstr(y-2,side,self.description[can],curses.color_pair(3))
|
|
else:
|
|
win = curses.newwin(5,len(self.description[can])+2*side,y+1,0)
|
|
win.box()
|
|
win.refresh()
|
|
screen.addstr(y+2,side,can,curses.color_pair(4))
|
|
screen.addstr(y+4,side,self.description[can],curses.color_pair(3))
|
|
|
|
|
|
def inputloop(self, ):
|
|
"""
|
|
Main loop input
|
|
"""
|
|
global screen
|
|
word = ''
|
|
screen.addstr("\n" + self.g['prefix'],curses.color_pair(7))
|
|
|
|
while True:
|
|
# Current position
|
|
y,x = screen.getyx()
|
|
# Get char
|
|
event = screen.getch()
|
|
try :
|
|
char = chr(event)
|
|
except:
|
|
char = ''
|
|
|
|
# Test curses_print_line
|
|
if char == '?':
|
|
self.buf[y] = self.g['prefix'] + '?', 0
|
|
self.ascii_art('dtvd88')
|
|
|
|
# TAB to complete
|
|
elif event in self.tab_ary:
|
|
# First tab
|
|
try:
|
|
if not self.g['tab_cycle']:
|
|
self.g['tab_cycle'] = itertools.cycle(self.suggest(word))
|
|
|
|
suggestion = next(self.g['tab_cycle'])
|
|
# Clear current line
|
|
screen.move(y,len(self.g['prefix']))
|
|
screen.clrtoeol()
|
|
# Print out suggestion
|
|
word = suggestion
|
|
screen.addstr(y,len(self.g['prefix']),word)
|
|
self.display_suggest(y,x,word)
|
|
screen.move(y,len(word)+len(self.g['prefix']))
|
|
except:
|
|
pass
|
|
|
|
# UP key
|
|
elif event in self.up_ary:
|
|
if self.g['hist']:
|
|
# Clear current line
|
|
screen.move(y,len(self.g['prefix']))
|
|
screen.clrtoeol()
|
|
# Print out previous history
|
|
if self.g['hist_index'] > 0 - len(self.g['hist']):
|
|
self.g['hist_index'] -= 1
|
|
word = self.g['hist'][self.g['hist_index']]
|
|
screen.addstr(y,len(self.g['prefix']),word)
|
|
self.display_suggest(y,x,word)
|
|
screen.move(y,len(word)+len(self.g['prefix']))
|
|
|
|
# DOWN key
|
|
elif event in self.down_ary:
|
|
if self.g['hist']:
|
|
# clear current line
|
|
screen.move(y,len(self.g['prefix']))
|
|
screen.clrtoeol()
|
|
# print out previous history
|
|
if not self.g['hist_index']:
|
|
self.g['hist_index'] = -1
|
|
if self.g['hist_index'] < -1:
|
|
self.g['hist_index'] += 1
|
|
word = self.g['hist'][self.g['hist_index']]
|
|
screen.addstr(y,len(self.g['prefix']),word)
|
|
self.display_suggest(y,x,word)
|
|
screen.move(y,len(word)+len(self.g['prefix']))
|
|
|
|
# Enter key #### I should get the command out of there?
|
|
# #### Can I register a callback function?
|
|
|
|
elif event in self.enter_ary:
|
|
self.g['tab_cycle'] = None
|
|
self.g['hist_index'] = 0
|
|
self.g['hist'].append(word)
|
|
if word== 'q':
|
|
self.cleanup_command()
|
|
break;
|
|
self.display_suggest(y,x,'')
|
|
screen.clrtobot()
|
|
self.handle_command(word)
|
|
|
|
self.buf[y] = self.g['prefix'] + word, 0
|
|
# Touch the screen's end
|
|
if y - self.g['height'] > -3:
|
|
self.scroll_down(2,y,x)
|
|
screen.addstr(y,0,self.g['prefix'],curses.color_pair(7)) ## SHOW NEW PROMPT
|
|
else:
|
|
screen.addstr(y+1,0,self.g['prefix'],curses.color_pair(7))
|
|
word = ''
|
|
|
|
# Delete / Backspace
|
|
elif event in self.delete_ary:
|
|
self.g['tab_cycle'] = None
|
|
# Touch to line start
|
|
if x < len(self.g['prefix']) + 1:
|
|
screen.move(y,x)
|
|
word = ''
|
|
# Midle of line
|
|
else:
|
|
word = word[:-1]
|
|
screen.move(y,x-1)
|
|
screen.clrtoeol()
|
|
self.display_suggest(y,x,word)
|
|
screen.move(y,x-1)
|
|
|
|
# Another keys
|
|
else:
|
|
self.g['tab_cycle'] = None
|
|
# Explicitly print char
|
|
try:
|
|
screen.addstr(char)
|
|
word += char
|
|
self.display_suggest(y,x,word)
|
|
screen.move(y,x+1)
|
|
except ValueError as e: # got errors here when i adjusted the volume....
|
|
pass
|
|
|
|
# Reset
|
|
self.close_window()
|
|
|
|
def setup_command(self,outfile):
|
|
self.data = open(outfile,'a')
|
|
|
|
self.g['prev'] = None
|
|
self.g['tab_cycle'] = None
|
|
self.g['prefix'] = '[gav]: '
|
|
self.g['hist_index'] = 0
|
|
# Load history from previous session
|
|
try:
|
|
o = open('completer.hist')
|
|
self.g['hist'] = [i.strip() for i in o.readlines()]
|
|
except:
|
|
self.g['hist'] = []
|
|
|
|
def cleanup_command(self):
|
|
o = open('completer.hist','a')
|
|
o.write("\n".join(self.g['hist']))
|
|
o.close()
|
|
self.data.close()
|
|
|
|
def handle_command(self, cmd):
|
|
r1 = re.search( r'^n\s(.*)$',cmd)
|
|
if r1:
|
|
# new data collection mode
|
|
mode = r1.group(1)
|
|
self.g['prefix'] = "[" + mode + "]"
|
|
|
|
self.data.write("\n\n# %s\n" % mode)
|
|
else:
|
|
#winsound.Beep(440,300)
|
|
self.data.write(cmd + "\n")
|
|
self.data.flush()
|
|
|
|
|
|
|
|
def repl_staff():
|
|
|
|
tch = json.loads( open('cache/teacherdata/teachers.json','r').read() )
|
|
newdict = {}
|
|
for T in tch:
|
|
newdict[T['name']] = 'teacher with id ' + T['login_id']
|
|
c = MyRepl()
|
|
|
|
c.set_my_dict(newdict)
|
|
c.startup('cache/people_logs.txt')
|
|
c.inputloop()
|
|
|
|
|
|
def repl_degs():
|
|
|
|
tch = csv.reader( open('cache/attainment_masterlist.csv','r'),delimiter=",")
|
|
|
|
newdict = {}
|
|
num = 0
|
|
for row in tch:
|
|
if num==0:
|
|
pass
|
|
else:
|
|
d = ' '
|
|
if row[0]: d = row[0]
|
|
newdict[row[4]] = d
|
|
num += 1
|
|
|
|
#print(newdict)
|
|
#input('ready')
|
|
c = MyRepl()
|
|
|
|
c.set_my_dict(newdict)
|
|
|
|
#c.startup('cache/g_path_cluster2020_.txt')
|
|
# c.inputloop()
|
|
|
|
def repl():
|
|
repl_degs()
|
|
|
|
|
|
|
|
#input('ready')
|
|
c = MyRepl()
|
|
|
|
c.set_my_dict(newdict)
|
|
|
|
#c.startup('cache/g_path_cluster2020_.txt')
|
|
# c.inputloop()
|
|
|
|
def repl():
|
|
repl_degs()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### courses.py
|
|
|
|
|
|
##########
|
|
########## CALCULATING SEMESTER STUFF
|
|
##########
|
|
|
|
|
|
def summarize_proportion_online_classes(u):
|
|
# u is a "group" from the groupby fxn
|
|
#print u
|
|
if NUM_ONLY:
|
|
if ((1.0 * u.sum()) / u.size) > 0.85: return '2'
|
|
if ((1.0 * u.sum()) / u.size) < 0.15: return '0'
|
|
return '1'
|
|
else:
|
|
if ((1.0 * u.sum()) / u.size) > 0.85: return 'online-only'
|
|
if ((1.0 * u.sum()) / u.size) < 0.15: return 'f2f-only'
|
|
return 'mixed'
|
|
|
|
def summarize_num_term_classes(u):
|
|
# u is a "group" from the groupby fxn
|
|
# term is sp18 now
|
|
#print u
|
|
return u.size
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Prompt for course id, return list of user dicts. TODO this duplicates courses.py ??
|
|
def getUsersInCourse(id=0): # returns list
|
|
if not id:
|
|
id = str(input("The Course ID? "))
|
|
id = str(id)
|
|
return fetch('/api/v1/courses/%s/users' % id, 0)
|
|
|
|
|
|
|
|
|
|
#### curriculum.py
|
|
|
|
|
|
def recur_look_for_leafs(item,indent=0,show=1):
|
|
global leafcount, displaynames
|
|
ii = indent * " "
|
|
is_leaf = am_i_a_leaf(item)
|
|
if type(item) == type({}):
|
|
status = ""
|
|
if show:
|
|
status = "Dict"
|
|
if is_leaf:
|
|
leafcount += 1
|
|
status = "Leaf Dict"
|
|
if status:
|
|
print("\n%s%s" % (ii,status))
|
|
indent += 1
|
|
ii = indent * " "
|
|
for K,V in list(item.items()):
|
|
if show or is_leaf:
|
|
print("%s%s:" % (ii, K), end="")
|
|
if K =='displayName': displaynames.append(V)
|
|
recur_look_for_leafs(V,indent+1,show or is_leaf)
|
|
|
|
elif type(item) == type([]):
|
|
status = ""
|
|
if show: status = "List (" + str( len(item) ) + ")"
|
|
if is_leaf: status = "Leaf List (" + str( len(item) ) + ")"
|
|
if status:
|
|
print("\n%s%s" % (ii,status))
|
|
indent += 1
|
|
ii = indent * " "
|
|
for V in item:
|
|
recur_look_for_leafs(V,indent+1, show or is_leaf)
|
|
|
|
elif type(item) == type("abc"):
|
|
if show: print("%s%s" % (' ', item))
|
|
elif type(item) == type(55):
|
|
if show: print("%s%i" % (' ', item))
|
|
elif type(item) == type(5.5):
|
|
if show: print("%s%f" % (' ', item))
|
|
elif type(item) == type(False):
|
|
if show: print("%s%s" % (' ', str(item)))
|
|
|
|
|
|
def am_i_a_leaf(item):
|
|
if type(item) == type({}):
|
|
for K,V in list(item.items()):
|
|
if type(V) == type({}) or type(V) == type([]):
|
|
return False
|
|
|
|
elif type(item) == type([]):
|
|
for V in item:
|
|
if type(V) == type({}) or type(V) == type([]):
|
|
return False
|
|
|
|
elif type(item) == type("abc"): return True
|
|
elif type(item) == type(55): return True
|
|
elif type(item) == type(5.5): return True
|
|
elif type(item) == type(False):
|
|
if item == False: return True
|
|
elif item == True: return True
|
|
return True
|
|
|
|
def sampleclass():
|
|
theclass = json.loads( codecs.open('cache/courses/samplecourse.json','r','utf-8').read() )
|
|
#print(json.dumps(theclass,indent=2))
|
|
recur_look_for_leafs(theclass)
|
|
print(leafcount)
|
|
print(sorted(displaynames))
|
|
|
|
|
|
|
|
|
|
|
|
def matchstyle():
|
|
theclass = json.loads( codecs.open('cache/courses/samplecourse.json','r','utf-8').read() )
|
|
print("\n".join(recur_matcher(theclass)))
|
|
|
|
|
|
# 7: ['pattern matcher style', matchstyle],
|
|
# 8: ['pattern matcher - test on all classes', match_style_test],
|
|
|
|
|
|
|
|
##### from localcache
|
|
|
|
|
|
def user_role_and_online():
|
|
# cross list users, classes enrolled, and their roles
|
|
global role_table, term_courses
|
|
|
|
role_table = enrollment_file()
|
|
user_table = users_file()
|
|
user_table = user_table[ user_table['name']!="Test Student" ]
|
|
term_table = term_file()
|
|
current = term_table[lambda d: d.course_section=='2020 Spring'] # current semester from canvas
|
|
term_id = current['id'].values[0]
|
|
course_table = courses_file() # from canvas
|
|
schedule = current_schedule() # from banner...
|
|
|
|
term_courses = course_table[lambda d: d.termid==term_id] # courses this semester ... now add a crn column
|
|
term_courses['crn'] = term_courses['code'].map( lambda x: get_crn_from_name(x) )
|
|
# add is_online flag (for courses listed in schedule as online-only)
|
|
term_courses['is_online'] = term_courses['crn'].map( lambda x: course_is_online( x ) ) # kinda redundant
|
|
ban_can = term_courses.merge(schedule,on='crn',how='left') #join the schedule from banner to the courses from canvas
|
|
|
|
role_table = role_table.where(lambda x: x.workflow=='active')
|
|
|
|
# this join limits to current semester if 'inner', or all semesters if 'left'
|
|
courses_and_enrol = role_table.merge(ban_can,left_on='course_id',right_on='id', how='left')
|
|
|
|
user_table = user_table.drop(columns="rootactid tz created vis school position gender locale public bd cc state".split(" "))
|
|
c_e_user = courses_and_enrol.merge(user_table,left_on='user_id',right_on='id',how='left')
|
|
|
|
|
|
prop_online = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_proportion_online_classes).rename('proportion_online'))
|
|
num_trm_crs = pd.DataFrame(c_e_user.groupby(['user_id'])['is_online'].aggregate(summarize_num_term_classes).rename('num_term_crs'))
|
|
stu_tch_rol = pd.DataFrame(c_e_user.groupby(['user_id'])['type'].aggregate(summarize_student_teacher_role).rename('main_role'))
|
|
user_table = user_table.merge(prop_online,left_on='id',right_index=True)
|
|
user_table = user_table.merge(num_trm_crs,left_on='id',right_index=True)
|
|
user_table = user_table.merge(stu_tch_rol,left_on='id',right_index=True)
|
|
|
|
# remove name-less entries
|
|
user_table = user_table.where(lambda x: (x.canvasid!='') ) # math.isnan(x.canvasid))
|
|
|
|
return user_table
|
|
|
|
#print user_table.query('proportion_online=="online-only"')
|
|
#print user_table.query('main_role=="teacher"')
|
|
#user_table.to_csv('canvas_data/users_online.csv')
|
|
|
|
|
|
"""e_qry = "CREATE TABLE IF NOT EXISTS enrollments (
|
|
id integer PRIMARY KEY,
|
|
name text NOT NULL,
|
|
begin_date text,
|
|
end_date text
|
|
);"""
|
|
|
|
"""
|
|
|
|
['CREATE INDEX "idx_req_userid" ON "requests" ("id","courseid","userid" );',
|
|
'CREATE INDEX "idx_users_id" ON "users" ("id","canvasid", );',
|
|
'CREATE INDEX "idx_term_id" ON "terms" ("id","canvasid" );',
|
|
'CREATE INDEX "idx_enrollment" ON "enrollment" ("cid","course_id","user_id" );',
|
|
'CREATE INDEX "idx_courses" ON "courses" ("id","canvasid","termid","code","name" );' ]
|
|
|
|
|
|
took 6 seconds
|
|
|
|
|
|
select * from users where name = "Peter Howell"
|
|
|
|
select * from users join requests on users.id = requests.userid where name = "Peter Howell"
|
|
20k rows in 1.014 seconds!! with index above
|
|
|
|
without: killed it after 120 seconds
|
|
|
|
select timestamp, url, useragent, httpmethod, remoteip, controller from users join requests on users.id = requests.userid where name = "Peter Howell" order by requests.timestamp
|
|
|
|
|
|
|
|
select courses.name, courses.code, terms.name, requests.url from courses
|
|
join terms on courses.termid = terms.id
|
|
join requests on courses.id = requests.courseid
|
|
where terms.name='2020 Spring ' and courses.code='ACCT20 SP20 40039'
|
|
order by courses.code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
def more_unused_xreferencing():
|
|
"""continue
|
|
|
|
for line in lines:
|
|
r = requests_line(line.decode('utf-8'),filei)
|
|
if filei < 5:
|
|
print(r)
|
|
else:
|
|
break
|
|
filei += 1
|
|
|
|
|
|
by_date_course = defaultdict( lambda: defaultdict(int) )
|
|
by_date_user = defaultdict( lambda: defaultdict(int) )
|
|
df_list = []
|
|
df_list_crs = []
|
|
users = defaultdict( lambda: defaultdict(int) )
|
|
#by_user = {}
|
|
#by_course = {}
|
|
i = 0
|
|
|
|
limit = 300
|
|
|
|
#print(r)
|
|
date = dt.strptime( r['timestamp'], "%Y-%m-%d %H:%M:%S.%f" )
|
|
if r['userid'] in users:
|
|
users[r['userid']]['freq'] += 1
|
|
if users[r['userid']]['lastseen'] < date:
|
|
users[r['userid']]['lastseen'] = date
|
|
else:
|
|
users[r['userid']] = {"id":r['userid'], "lastseen":date, "freq":1}
|
|
by_date_course[ r['day'] ][ r['courseid'] ] += 1
|
|
by_date_user[ r['day'] ][ r['userid'] ] += 1
|
|
#if r['userid'] in by_user: by_user[r['userid']] += 1
|
|
#else: by_user[r['userid']] = 1
|
|
#if r['courseid'] in by_course: by_course[r['courseid']] += 1
|
|
#else: by_course[r['courseid']] = 1
|
|
#mylog.write("by_user = " + str(by_user))
|
|
df_list.append(pd.DataFrame(data=by_date_user))
|
|
df_list_crs.append(pd.DataFrame(data=by_date_course))
|
|
i += 1
|
|
if i > limit: break
|
|
#mylog.write("by_date_course = ")
|
|
result = pd.concat(df_list, axis=1,join='outer')
|
|
result_crs = pd.concat(df_list_crs, axis=1,join='outer')
|
|
#print result_crs
|
|
mylog.write(result.to_csv())
|
|
# get users
|
|
usersf = user_role_and_online()
|
|
merged = pd.merge(result,usersf,left_index=True,right_on='id', how='left')
|
|
#dropkeys = "rootactid tz created vis school position gender locale public bd cc state".split(" ")
|
|
#merged.drop(dropkeys, inplace=True, axis=1)
|
|
mglog = open(local_data_folder+'userlogs.csv','w')
|
|
mglog.write(merged.to_csv())
|
|
|
|
# get courses
|
|
courses = courses_file()
|
|
merged2 = pd.merge(result_crs,courses,left_index=True,right_on='id', how='left')
|
|
dropkeys = "rootactid wikiid".split(" ")
|
|
merged2.drop(dropkeys, inplace=True, axis=1)
|
|
mglogc = open(local_data_folder + 'courselogs.csv','w')
|
|
mglogc.write(merged2.to_csv())
|
|
|
|
# a users / freq / lastseen file
|
|
ufl = open(local_data_folder + "user_freq.json","w")
|
|
today = datetime.datetime.today()
|
|
for U in list(users.keys()):
|
|
date = users[U]['lastseen']
|
|
users[U]['lastseen'] = date.strftime("%Y-%m-%d")
|
|
diff = today - date
|
|
users[U]['daysago'] = str(diff.days)
|
|
users[U]['hoursago'] = str(int(diff.total_seconds()/3600))
|
|
us_frame = pd.DataFrame.from_dict(users,orient='index')
|
|
us_with_names = pd.merge(us_frame,usersf,left_index=True,right_on='id', how='left')
|
|
#dropkeys = "id id_x id_y globalid rootactid tz created vis school position gender locale public bd cc state".split(" ")
|
|
#us_with_names.drop(dropkeys, inplace=True, axis=1)
|
|
print(us_with_names)
|
|
ufl.write( json.dumps(users, indent=4) )
|
|
ufl.close()
|
|
mglogd = open('canvas_data/user_freq.csv','w')
|
|
mglogd.write(us_with_names.to_csv())
|
|
"""
|
|
|
|
""" -- projects table
|
|
CREATE TABLE IF NOT EXISTS projects (
|
|
id integer PRIMARY KEY,
|
|
name text NOT NULL,
|
|
begin_date text,
|
|
end_date text
|
|
);
|
|
"""
|
|
pass
|
|
|
|
|
|
def users_p_file():
|
|
uf = users_file()
|
|
pf = pseudonym_file()
|
|
#print pf
|
|
upf = uf.merge(pf,left_on='id',right_on='user_id',how='left')
|
|
return upf
|
|
|
|
"""
|
|
def com_channel_dim():
|
|
all = os.listdir(local_data_folder)
|
|
all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime)
|
|
all.reverse()
|
|
#print "sorted file list:"
|
|
#print all
|
|
for F in all:
|
|
if re.search('communication_channel_dim',F):
|
|
cc_file = F
|
|
break
|
|
print("most recent communication channel file is " + cc_file)
|
|
cc_users = []
|
|
for line in gzip.open(local_data_folder + cc_file,'r'):
|
|
line_dict = dict(list(zip(cc_format, line.split("\t"))))
|
|
#line_dict['globalid'] = line_dict['globalid'].rstrip()
|
|
cc_users.append(line_dict)
|
|
df = pd.DataFrame(cc_users)
|
|
return df
|
|
"""
|
|
|
|
|
|
"""grp_sum_qry = ""SELECT u.sortablename, r.timeblock, SUM(r.viewcount), u.canvasid AS user, c.canvasid AS course
|
|
FROM requests_sum1 AS r
|
|
JOIN courses AS c ON e.course_id=c.id
|
|
JOIN enrollment as e ON r.courseid=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
|
|
GROUP BY u.id,c.id,r.timeblock
|
|
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
|
|
|
|
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course
|
|
FROM requests_sum1 AS r
|
|
JOIN courses AS c ON e.course_id=c.id
|
|
JOIN enrollment as e ON r.courseid=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid=%s AND e."type"="StudentEnrollment" AND u.canvasid=810
|
|
ORDER BY u.sortablename DESC, r.timeblock"" % course_id
|
|
|
|
|
|
q = ""SELECT u.sortablename, r.timeblock, r.viewcount, u.canvasid AS user, c.canvasid AS course FROM enrollment as e JOIN courses AS c ON e.course_id=c.id
|
|
JOIN requests_sum1 AS r ON r.courseid=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid=%s AND e."type"="StudentEnrollment"
|
|
ORDER BY u.sortablename, r.timeblock"" % course_id"""
|
|
|
|
|
|
|
|
|
|
|
|
stem_course_id = '11015' # TODO
|
|
|
|
# NO LONGER USED - SEE COURSES
|
|
def enroll_stem_students():
|
|
depts = "MATH BIO CHEM PHYS ASTR GEOG".split(" ")
|
|
students = set()
|
|
for d in depts:
|
|
students.update(dept_classes(d))
|
|
print(students)
|
|
|
|
to_enroll = [ x for x in students if x not in already_enrolled ]
|
|
|
|
print(to_enroll)
|
|
print("prev line is people to enroll\nnext line is students already enrolled in stem")
|
|
print(already_enrolled)
|
|
|
|
for s in to_enroll:
|
|
t = url + '/api/v1/courses/%s/enrollments' % stem_course_id
|
|
data = { 'enrollment[user_id]': s[1], 'enrollment[type]':'StudentEnrollment',
|
|
'enrollment[enrollment_state]': 'active' }
|
|
print(data)
|
|
print(t)
|
|
if input('enter to enroll %s or q to quit: ' % s[0]) == 'q':
|
|
break
|
|
r3 = requests.post(t, headers=header, params=data)
|
|
print(r3.text)
|
|
|
|
|
|
#####
|
|
##### from users.py pretty much just use sql now
|
|
|
|
|
|
# unused?
|
|
def getAllTeachersInTerm(): # a list
|
|
# classes taught in last 3 semesters
|
|
# How many of them were published and used
|
|
# hits in last week/month/year
|
|
# most common department
|
|
# email addr
|
|
all_courses = {}
|
|
teachers = {} # keyed by goo
|
|
# { 'name':'', 'id':'', 'email':'', 'goo':'', 'classes':[ (#name,#id,#pubd,#hitsbyteacher) ... ] }
|
|
|
|
# This is a bit different from the 1 year schedule above, because it looks at
|
|
# people who were active in their shells in iLearn.
|
|
|
|
outfile = codecs.open('teacherdata/historical_shells_used.json','w', encoding='utf-8')
|
|
for term in last_4_semesters_ids: # [60,]:
|
|
print(("Fetching term: " + str(term)))
|
|
all_courses[term] = \
|
|
fetch('/api/v1/accounts/1/courses?enrollment_term_id=' + str(term) + '&perpage=100')
|
|
i = 0
|
|
j = 0
|
|
for k,v in list(all_courses.items()): ##### term k, list v
|
|
for a_class in v:
|
|
print((a_class['name']))
|
|
published = 0
|
|
if a_class['workflow_state'] in ['available','completed']:
|
|
j += 1
|
|
published = 1
|
|
i += 1
|
|
#if i > 20: break
|
|
tch = fetch('/api/v1/courses/' + str(a_class['id']) + '/search_users?enrollment_type=teacher')
|
|
for r in tch: ##### TEACHER r of COURSE a_class
|
|
name = str(r['sortable_name'])
|
|
if not 'sis_import_id' in r:
|
|
print("This user wasn't available: " + name)
|
|
continue
|
|
goo = str(r['sis_import_id'])
|
|
print((r['sortable_name']))
|
|
if not name in teachers:
|
|
email = getEmail(r['id'])
|
|
teachers[name] = { 'name':r['sortable_name'], 'id':r['id'], 'email':email, 'goo':goo, 'classes':[] }
|
|
info = (a_class['name'],a_class['id'],published)
|
|
teachers[name]['classes'].append( info )
|
|
|
|
## TODO: hits in courses by teachers https://gavilan.instructure.com:443/api/v1/users/2/page_views?end_time=Dec%2010%2C%202018
|
|
|
|
for t,v in list(teachers.items()):
|
|
teachers[t]['num_courses'] = len(v['classes'])
|
|
teachers[t]['num_active_courses'] = sum( [x[2] for x in v['classes']] )
|
|
depts = [ dept_from_name(x[0]) for x in v['classes'] ]
|
|
teachers[t]['dept'] = most_common_item(depts)
|
|
|
|
#print(str(j), "/", str(i), " sections are published")
|
|
outfile.write(json.dumps(teachers))
|
|
|
|
|
|
"""
|
|
def teacherActivityLog(uid=1): ### Next: save results in a hash and return that....
|
|
global results, users, users_by_id
|
|
#get_users() # do this if you think 'teachers/users.json' is outdated.
|
|
|
|
load_users()
|
|
|
|
#for x in users_by_id.keys():
|
|
# if x < 20:
|
|
# print x
|
|
# print users_by_id[x]
|
|
|
|
|
|
teachers = csv.reader(open('teachers/current_semester.txt','r'), delimiter="\t")
|
|
for row in teachers:
|
|
print(row[0] + " is id: " + row[1])
|
|
uid = row[1]
|
|
print("Comes up as: " + str(users_by_id[int(uid)]))
|
|
info = users_by_id[int(uid)]
|
|
goo = info['login_id']
|
|
|
|
output_file = open('logs/users/byweek/'+ goo.lower() + '.csv', 'w')
|
|
|
|
|
|
# okay, actually, the first week here is the week before school IRL
|
|
start = isoweek.Week.withdate( datetime.date(2017,8,21))
|
|
end = isoweek.Week.thisweek()
|
|
byweek = []
|
|
|
|
i = 0
|
|
while(1):
|
|
results = []
|
|
start = start + 1
|
|
if start > end: break
|
|
|
|
myStart = start.day(0).isoformat() + 'T00:00-0700'
|
|
myEnd = start.day(6).isoformat() + 'T11:59:59-0700'
|
|
t = url + "/api/v1/users/" + str(uid) + "/page_views?start_time=" + myStart + '&end_time=' + myEnd + "&perpage=500"
|
|
print(t)
|
|
while(t):
|
|
print(".", end=' ')
|
|
t = fetch(t)
|
|
print("")
|
|
thisWeek = len(results)
|
|
print("Week # " + str(i) + "\t" + str(thisWeek))
|
|
byweek.append( "Week # " + str(i) + "\t" + str(thisWeek) )
|
|
output_file.write( start.isoformat() + "," + str(thisWeek) + "\n")
|
|
i += 1
|
|
for j in byweek:
|
|
print(j)
|
|
"""
|
|
|
|
"""
|
|
def summarize_student_teacher_role(u):
|
|
# u is a "group" from the groupby fxn
|
|
# term is sp18 now
|
|
t = 0
|
|
s = 0
|
|
for a in u:
|
|
if a=='TeacherEnrollment': t += 1
|
|
else: s += 1
|
|
if NUM_ONLY:
|
|
if t > s: return 'teacher'
|
|
return 'student'
|
|
else:
|
|
if t > s: return '1'
|
|
return '0'
|
|
"""
|
|
"""
|
|
def user_roles2():
|
|
# cross list users, classes enrolled, and their roles
|
|
global role_table, term_courses
|
|
|
|
role_table = enrollment_file()
|
|
user_table = users_file()
|
|
course_table = courses_file() # from canvas
|
|
term_table = term_file()
|
|
schedule = current_schedule() # from banner
|
|
|
|
# current semester
|
|
current = term_table[lambda d: d.course_section=='2018 Spring']
|
|
term_id = current['id'].values[0]
|
|
term_courses = course_table[lambda d: d.termid==term_id] # courses this semester
|
|
|
|
# add is_online flag (for courses listed in schedule as online-only)
|
|
term_courses['is_online'] = term_courses['code'].map( lambda x: course_is_online( get_crn_from_name(x) ) )
|
|
|
|
new_df = pd.DataFrame(columns=['type','oo','num'])
|
|
|
|
m = 0
|
|
data = []
|
|
for u in user_table.iterrows():
|
|
if m % 1000 == 0: print("on row " + str(m))
|
|
m += 1
|
|
data.append(categorize_user(u))
|
|
#if m > 1500: break
|
|
new_df = pd.DataFrame(data,columns=['i','type','onlineonly','numcls']).set_index('i')
|
|
print(new_df)
|
|
|
|
user_table = user_table.merge(new_df,left_index=True,right_index=True)
|
|
user_table.to_csv('canvas_data/users_online.csv')
|
|
"""
|
|
|
|
### IS THIS IN CANVAS_DATA.py?
|
|
|
|
|
|
|
|
|
|
""" Collate the raw logs into something more compact and useful. Version 1:
|
|
- # of accesses, user/day
|
|
- # of participations, user/day
|
|
-
|
|
|
|
- where day is the number of days into the semester. Classes shorter than 16 weeks should get a multiplier
|
|
-
|
|
|
|
- 2 initial goals:
|
|
a. data for statistics / clustering / regression / learning
|
|
b. data for visualization
|
|
"""
|
|
def req_to_db(fname_list):
|
|
fields = ','.join("id timestamp timestamp_year timestamp_month timestamp_day user_id course_id root_account_id course_account_id quiz_id discussion_id conversation_id assignment_id url user_agent http_method remote_ip interaction_micros web_application_controller web_applicaiton_action web_application_context_type web_application_context_id real_user_id session_id user_agent_id http_status http_version".split(" "))
|
|
sqlite_file = 'canvas_data/data.db'
|
|
conn = sqlite3.connect(sqlite_file)
|
|
c = conn.cursor()
|
|
# merge all requests into db
|
|
by_date_course = defaultdict( lambda: defaultdict(int) )
|
|
by_date_user = defaultdict( lambda: defaultdict(int) )
|
|
df_list = []
|
|
df_list_crs = []
|
|
users = defaultdict( lambda: defaultdict(int) )
|
|
i = 0
|
|
limit = 300
|
|
for fname in fname_list:
|
|
print((fname+"\n"))
|
|
for line in gzip.open('canvas_data/'+fname,'r'):
|
|
r = line.split('\t')
|
|
#tot = len(fields.split(','))
|
|
#i = 0
|
|
#for x in fields.split(','):
|
|
# print x + "\t" + r[i]
|
|
# i+= 1
|
|
|
|
qry = "insert into requests("+fields+") values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
|
|
conn.execute(qry, r)
|
|
|
|
|
|
# New method for below:
|
|
# read collated data from sqlite
|
|
# collate from more logs
|
|
# write back....?
|
|
|
|
"""
|
|
date = datetime.datetime.strptime( r['timestamp'], "%Y-%m-%d %H:%M:%S.%f" )
|
|
if r['userid'] in users:
|
|
users[r['userid']]['freq'] += 1
|
|
if users[r['userid']]['lastseen'] < date:
|
|
users[r['userid']]['lastseen'] = date
|
|
else:
|
|
users[r['userid']] = {"id":r['userid'], "lastseen":date, "freq":1}
|
|
by_date_course[ r['day'] ][ r['courseid'] ] += 1
|
|
by_date_user[ r['day'] ][ r['userid'] ] += 1
|
|
#if r['userid'] in by_user: by_user[r['userid']] += 1
|
|
#else: by_user[r['userid']] = 1
|
|
#if r['courseid'] in by_course: by_course[r['courseid']] += 1
|
|
#else: by_course[r['courseid']] = 1
|
|
#mylog.write("by_user = " + str(by_user))
|
|
df_list.append(pd.DataFrame(data=by_date_user))
|
|
df_list_crs.append(pd.DataFrame(data=by_date_course))
|
|
"""
|
|
i += 1
|
|
if i > limit: break
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
"""
|
|
Making columns:
|
|
table_data = [['a', 'b', 'c'], ['aaaaaaaaaa', 'b', 'c'], ['a', 'bbbbbbbbbb', 'c']]
|
|
for row in table_data:
|
|
print("{: >20} {: >20} {: >20}".format(*row))
|
|
|
|
Transpose a matrix:
|
|
rez = [[m[j][i] for j in range(len(m))] for i in range(len(m[0]))]
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
"""
|
|
ilearn_by_id = {}
|
|
ilearn_by_name = {}
|
|
for x in ilearn_list:
|
|
ilearn_by_id[x[3]] = x
|
|
ilearn_by_name[x[0]] = x
|
|
|
|
for ml in open('cache/teacher_manual_name_lookup.csv','r').readlines():
|
|
parts = ml.strip().split(',')
|
|
try:
|
|
manual_list[parts[0]] = ilearn_by_id[parts[1]]
|
|
except Exception as e:
|
|
print "Teacher missing: " + parts[0]
|
|
|
|
il_names = [ x[0] for x in ilearn_list ]
|
|
il_byname = {}
|
|
for x in ilearn_list: il_byname[x[0]] = x
|
|
sched_list_missed = [x for x in sched_list]
|
|
|
|
#
|
|
# key is long name (with middle name) from schedule, value is tuple with everything
|
|
name_lookup = manual_list
|
|
matches = []
|
|
|
|
#print ilearn_list
|
|
|
|
num_in_sched = len(sched_list)
|
|
num_in_ilearn = len(ilearn_list)
|
|
|
|
#for i in range(min(num_in_sched,num_in_ilearn)):
|
|
# print "|"+sched_list[i] + "|\t\t|" + ilearn_list[i][0] + "|"
|
|
|
|
print("Sched names: %i, iLearn names: %i" % (num_in_sched,num_in_ilearn))
|
|
|
|
for s in sched_list:
|
|
for t in il_names:
|
|
if first_last(s) == t:
|
|
#print ' MATCHED ' + s + ' to ' + t
|
|
sched_list_missed.remove(s)
|
|
try:
|
|
name_lookup[s] = ilearn_by_name[ first_last(s) ]
|
|
except Exception as e:
|
|
print "Teacher missing (2): " + s
|
|
il_names.remove(first_last(s))
|
|
matches.append(s)
|
|
|
|
|
|
print "Matched: " + str(matches)
|
|
|
|
print "\nDidn't match: " + str(len(sched_list_missed)) + " schedule names."
|
|
|
|
print "\nFinal results: "
|
|
print name_lookup
|
|
|
|
nlf = codecs.open('cache/sched_to_ilearn_names.json','w','utf-8')
|
|
nlf.write(json.dumps(name_lookup,indent=2))
|
|
# STRING DISTANCE
|
|
#sim = find_most_similar(s,i_names)
|
|
#print ' CLOSEST MATCHES to ' + s + ' are: ' + str(sim)
|
|
#mm.write(s+',\n')
|
|
"""
|
|
|
|
|
|
#ilearn_list = sorted(list(set(map(
|
|
# lambda x: #(tfi[x]['name'],tfi[x]['email'],tfi[x]['dept'],str(tfi[x]['id']),tfi[x]['goo']),
|
|
# tfi.keys()))))
|
|
#i_names = [ x[0] for x in ilearn_list ]
|
|
|
|
#print json.dumps(i_names,indent=2)
|
|
#return
|
|
|
|
|
|
|
|
# how to filter a dict based on values
|
|
# filtered = {k: v for k, v in course_combos.items() if v['dept'] == 'LIB' or v['dept'] == 'CSIS' }
|
|
|
|
# more pandas
|
|
# gapminder['continent'].unique()
|
|
|
|
|
|
|
|
|
|
|
|
#for name,group in bycode:
|
|
# #print name
|
|
# print name, " ", group['type']
|
|
|
|
#onl = gg.agg( lambda x: has_online(x) )
|
|
#ttl = gg.agg( lambda x: len(x) )
|
|
#ttl = ttl.rename(columns={'type':'total_sections'})
|
|
|
|
#onl.join(gg.agg( lambda x: has_hybrid(x) ),how='outer')
|
|
#onl.join(gg.agg( lambda x: has_lecture(x) ), how='outer')
|
|
|
|
#onl['num_sections'] = 0
|
|
#onl['num_lec'] = 0
|
|
#onl['num_online'] = 0
|
|
|
|
#all = pd.merge([onl,hyb,lec])
|
|
#print onl
|
|
#total=len, f2f=lambda x: ) set(x)
|
|
#{ 'num_sections': "count",
|
|
# 'num_lec': lambda x: 5,
|
|
# 'num_online': lambda x: 5 } )
|
|
#print gg
|
|
"""
|
|
|
|
|
|
def has_online(series):
|
|
# if any items of the series have the string 'online', return 1
|
|
for i in series:
|
|
if i == 'online': return 1
|
|
return 0
|
|
def has_lecture(series):
|
|
# if any items of the series have the string 'online', return 1
|
|
for i in series:
|
|
if i == 'online': return 1
|
|
return 0
|
|
def has_hybrid(series):
|
|
# if any items of the series have the string 'online', return 1
|
|
for i in series:
|
|
if i == 'hybrid': return 1
|
|
return 0
|
|
"""
|
|
#### RIGHT HERE IS WHERE I THINK... MAYBE THIS ISN'T THE RIGHT APPROACH. I DON'T SEEM
|
|
#### TO BE ABLE TO QUERY THE FACT BASE. IS THAT TRUE? SHOULD I JUST BE USING TABLES?
|
|
|
|
#### CHANGING COURSE... USE THE RULES TO UPDATE A DATABASE/TABLE/DATAFRAME
|
|
#### OR SET OF DICTS.
|
|
|
|
# ultimately i want this to be more flexible, so i can categorize degrees as 'available evening' etc
|
|
#
|
|
|
|
|
|
# Simple data structure. In this function, a degree is
|
|
""" degree = { 'name': 'History AA',
|
|
'blocks': [ { 'original_title':'xxx', 'rulecode':'u3',
|
|
'courses': [ {'code':'math1a', 'units': '3.0', 'wasonline':False },
|
|
{'code':'math2a', 'units': '3.0', 'wasonline':False },
|
|
{'code':'math3a', 'units': '3.0', 'wasonline':False } ] },
|
|
{ 'original_title':'xyz', 'rulecode':'a',
|
|
'courses': [ {'code':'math5a', 'units': '3.0', 'wasonline':False },
|
|
{'code':'math6a', 'units': '3.0', 'wasonline':False },
|
|
{'code':'math7a', 'units': '3.0', 'wasonline':False } ] } ] }
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Wrapper to get 2 schedules at once
|
|
def dl_sched():
|
|
global SEMESTER, semester_begin, filename, short_sem
|
|
SEMESTER = 'Fall 2019'
|
|
short_sem = 'fa19'
|
|
semester_begin = strptime('08/26', '%m/%d')
|
|
filename = 'fa19_sched.json'
|
|
|
|
txt = login()
|
|
codecs.open('output/'+filename,'w').write( json.dumps( to_section_list(txt) ) )
|
|
#stats()
|
|
#reg_nums()
|
|
|
|
#todo: these semesters
|
|
SEMESTER = 'Summer 2019'
|
|
short_sem = 'su19'
|
|
semester_begin = strptime('06/17', '%m/%d')
|
|
filename = 'su19_sched.json'
|
|
|
|
txt = login()
|
|
codecs.open('output/'+filename,'w').write( json.dumps( to_section_list(txt) ) )
|
|
#stats()
|
|
#reg_nums()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Send a personalized email regarding ZTC
|
|
def send_z_email(fullname, firstname, addr, courses_list):
|
|
FULLNAME = fullname #"Sabrina Lawrence"
|
|
FNAME = firstname # "Sabrina"
|
|
to_email = addr # "slawrence@gavilan.edu"
|
|
courses = courses_list # ["CSIS45", "CSIS85"]
|
|
|
|
course_template = "<a href='%s'>%s</a> "
|
|
url_template = "https://docs.google.com/forms/d/e/1FAIpQLSfZLQp6wHFEdqsmpZ7jz2Y8HtKLo8XTAhrE2fyvTDOEgquBDQ/viewform?usp=pp_url&entry.783353363=%s&entry.1130271051=%s" # % (FULLNAME, COURSE1)
|
|
|
|
bare_link = "https://forms.gle/pwZJHdWSkyvmH4L19"
|
|
|
|
COURSELINKS = ''
|
|
PLAINCOURSES = ''
|
|
for C in courses:
|
|
ut = url_template % (FULLNAME, C)
|
|
COURSELINKS += course_template % (ut, C)
|
|
PLAINCOURSES += C + " "
|
|
|
|
text_version = open('cache/ztc_mail1.txt','r').read()
|
|
html_version = open('cache/ztc_mail1_h.txt','r').read()
|
|
|
|
# replace these: $FNAME $COURSELINKS $LINK
|
|
|
|
email = re.sub( r'\$FNAME', FNAME, text_version )
|
|
email = re.sub( r'\$COURSELINKS', PLAINCOURSES, email )
|
|
email = re.sub( r'\$LINK', bare_link, email )
|
|
|
|
email_h = re.sub( r'\$FNAME', FNAME, html_version )
|
|
email_h = re.sub( r'\$COURSELINKS', COURSELINKS, email_h )
|
|
|
|
print(email_h+"\n\n"+email)
|
|
|
|
from O365 import Account
|
|
|
|
credentials = ('phowell@gavilan.edu', 'xxx')
|
|
client_secret = 'xxx' # expires 10/28/2020
|
|
tenant_id = "4ad609c3-9156-4b89-9496-0c0600aeb0bb"
|
|
# application client id: 29859402-fa55-4646-b717-752d90c61cde
|
|
|
|
account = Account(credentials, auth_flow_type='credentials', tenant_id=tenant_id)
|
|
if account.authenticate():
|
|
print('Authenticated!')
|
|
|
|
#account = Account(credentials)
|
|
#if account.authenticate(scopes=['message_all']):
|
|
# print('Authenticated!')
|
|
m = account.new_message()
|
|
m.to.add(addr)
|
|
m.subject = 'Quick question about your course textbook'
|
|
m.body = "email_h"
|
|
m.send()
|
|
|
|
"""
|
|
import smtplib
|
|
from email.mime.multipart import MIMEMultipart
|
|
from email.mime.text import MIMEText
|
|
|
|
msg = MIMEMultipart('alternative')
|
|
msg['Subject'] = "Quick question about your course textbook"
|
|
msg['From'] = "gavdisted@gmail.com"
|
|
msg['To'] = to_email
|
|
|
|
msg.attach(MIMEText(email, 'plain'))
|
|
msg.attach(MIMEText(email_h, 'html'))
|
|
|
|
|
|
#s = smtplib.SMTP('smtp.gmail.com', 587)
|
|
#s.starttls()
|
|
#s.login("gavdisted", "xxx")
|
|
|
|
|
|
s = smtplib.SMTP_SSL('smtp.office365.com',587)
|
|
s.ehlo()
|
|
s.starttls()
|
|
s.login('phowell@gavilan.edu', 'xxx')
|
|
|
|
#s.sendmail(msg['From'], msg['To'], msg.as_string())
|
|
s.sendmail(msg['From'], msg['To'], "Testing")
|
|
s.quit()"""
|
|
|
|
|
|
|
|
def getInactiveTeachersInTerm(t=23): # a list
|
|
global results
|
|
teachers = {}
|
|
emails = {}
|
|
outfile = codecs.open('canvas/inactive_teachers.txt','w', encoding='utf-8')
|
|
efile = codecs.open('canvas/inactive_teachers_emails.txt','w', encoding='utf-8')
|
|
|
|
#yn = raw_input('All courses? y=all n=only active ')
|
|
#all = 0
|
|
#if yn=='y': all = 1
|
|
|
|
if not t:
|
|
t = askForTerms()
|
|
else: t = [ t, ]
|
|
for term in t:
|
|
r = url + '/api/v1/accounts/1/courses?enrollment_term_id=' + str(term) + '&perpage=100'
|
|
while(r): r = fetch(r)
|
|
all_courses = results #json.loads(results)
|
|
#print "All unpublished courses: "
|
|
i = 0
|
|
j = 0
|
|
for k in all_courses:
|
|
j += 1
|
|
if k['workflow_state'] != 'available':
|
|
i += 1
|
|
print(str(i), "\t", k['name'], "\t", k['workflow_state'])
|
|
results = []
|
|
t2 = url + '/api/v1/courses/' + str(k['id']) + '/search_users?enrollment_type=teacher'
|
|
|
|
|
|
while(t2): t2 = fetch(t2)
|
|
#print results
|
|
for r in results:
|
|
key = r['sortable_name'] + "\t" + str(r['id'])
|
|
#if not 'email' in r: pdb.set_trace()
|
|
emails[key] = str(r['sis_user_id'])
|
|
#print r
|
|
if key in teachers:
|
|
teachers[key].append(k['name'])
|
|
else:
|
|
teachers[key] = [ k['name'], ]
|
|
#print json.dumps(results, indent=4, sort_keys=True)
|
|
#a = raw_input()
|
|
|
|
print(str(i), "/", str(j), " sections are unpublished")
|
|
for t in list(emails.keys()):
|
|
efile.write(emails[t] + ", ")
|
|
for t in list(teachers.keys()):
|
|
outfile.write(t + "\t")
|
|
for c in teachers[t]:
|
|
outfile.write(c + ",")
|
|
outfile.write("\n")
|
|
#f.write(json.dumps(teachers, indent=4, sort_keys=True))
|
|
print("Output file is in ./teachers/current_semester.txt")
|
|
#print json.dumps(all_courses, indent=4, sort_keys=True)
|
|
"""for x in all_courses:
|
|
qry = '/api/v1/courses/' + str(course_id) + '/search_users?enrollment_type=teacher'
|
|
t = url + qry
|
|
while(t): t = fetch(t)
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
#for t,v in teachers.items():
|
|
# outfile.write( "|".join( [ v['goo'], v['name'], v['email'], v['dept'], str(v['num_courses']), str(v['num_active_courses']) ] ) + "\n" )
|
|
|
|
#{"goo": "G00275722", "name": "Agaliotis, Paul", "num_courses": 1, "num_active_courses": 1, "id": 5092, "dept": "AMT", "classes": [["AMT120 POWERPLANT TECH FA18 10958", 5322, 1]], "email": "PAgaliotis@gavilan.edu"},
|
|
|
|
#for t in teachers.keys():
|
|
# outfile.write(t + "\t")
|
|
# for c in teachers[t]:
|
|
# outfile.write(c + ",")
|
|
# outfile.write("\n")
|
|
#f.write(json.dumps(teachers, indent=4, sort_keys=True))
|
|
#print "Output file is in ./teachers/current_semester.txt"
|
|
#print json.dumps(all_courses, indent=4, sort_keys=True)
|
|
"""for x in all_courses:
|
|
qry = '/api/v1/courses/' + str(course_id) + '/search_users?enrollment_type=teacher'
|
|
t = url + qry
|
|
while(t): t = fetch(t)
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def course_location(course):
|
|
if len(course[0]) > 13:
|
|
period = Set( [course_location_raw(course[0][13])], )
|
|
else:
|
|
period = Set()
|
|
|
|
if len(course) > 1:
|
|
period.add(course_location_raw(course[1][13]))
|
|
|
|
if len(course) > 2:
|
|
period.add(course_location_raw(course[2][13]))
|
|
|
|
if len(course) > 3:
|
|
period.add(course_location_raw(course[3][13]))
|
|
|
|
if len(course) > 4:
|
|
period.add(course_location_raw(course[4][13]))
|
|
|
|
if len(course) > 5:
|
|
period.add(course_location_raw(course[5][13]))
|
|
|
|
|
|
if 'TBA' in period:
|
|
period.remove('TBA')
|
|
|
|
period = list(period)
|
|
|
|
if len(period)==0:
|
|
return ''
|
|
|
|
if len(period)==1:
|
|
return period[0]
|
|
|
|
if len(period)==2 and 'Online' in period:
|
|
period.remove('Online')
|
|
return 'Hybrid at ' + period[0]
|
|
return '/'.join(period)
|
|
|
|
|
|
def course_time(course):
|
|
# is it morning, mid, or evening?
|
|
|
|
period = Set( [raw_course_time(course[0][7])], )
|
|
|
|
if len(course) > 1:
|
|
#time += ", " + course[1][7]
|
|
period.add(raw_course_time(course[1][7]))
|
|
|
|
if len(course) > 2:
|
|
#time += ", " + course[2][7]
|
|
period.add(raw_course_time(course[2][7]))
|
|
|
|
if len(course) > 3:
|
|
#time += ", " + course[3][7]
|
|
period.add(raw_course_time(course[3][7]))
|
|
|
|
if len(course) > 4:
|
|
#time += ", " + course[4][7]
|
|
period.add(raw_course_time(course[4][7]))
|
|
|
|
if len(course) > 5:
|
|
#time += ", " + course[5][7]
|
|
period.add(raw_course_time(course[5][7]))
|
|
|
|
#print raw_course_time(course[0][7]),
|
|
|
|
if 'TBA' in period:
|
|
period.remove('TBA')
|
|
|
|
period = list(period)
|
|
|
|
if len(period)==0:
|
|
return ''
|
|
|
|
if len(period)==1:
|
|
return period[0]
|
|
|
|
return '/'.join(period)
|
|
|
|
|
|
|
|
def course_teacher(course):
|
|
t = Set()
|
|
for c in course:
|
|
t.add(c[11])
|
|
return " / ".join(list(t))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def reg_nums():
|
|
courses = []
|
|
dates = []
|
|
sections = categorize()
|
|
|
|
today = todays_date_filename()
|
|
|
|
out = open(today+'.csv','w')
|
|
dates = {'loc':{}, 'time':{}, 'start':{}, 'teacher':{}}
|
|
i = 1
|
|
for f in os.listdir('.'):
|
|
m = re.search('reg_'+short_sem+'_(\d+)\.csv',f)
|
|
if m:
|
|
filein = open(f,'r').readlines()[1:]
|
|
d = m.group(1)
|
|
dates[d] = {}
|
|
for L in filein:
|
|
parts = L.split(',') # crn,code,sec,cmp,cred,name,days,time,cap,act,rem,teacher,date,loc
|
|
if not re.search('(\d+)',parts[0]): continue
|
|
if len(parts)<8: continue
|
|
if not parts[8]: continue
|
|
if float(parts[8])==0: continue
|
|
|
|
dates[d][parts[0] + " " + parts[1]] = (1.0* float(parts[9])) / float(parts[8])
|
|
|
|
if i == 1 and parts[0] in sections:
|
|
dates['loc'][parts[0] + " " + parts[1]] = course_location( sections[parts[0]] )
|
|
dates['time'][parts[0] + " " + parts[1]] = course_time(sections[parts[0]] )
|
|
dates['start'][parts[0] + " " + parts[1]] = course_start( sections[parts[0]] )
|
|
dates['teacher'][parts[0] + " " + parts[1]] = course_teacher( sections[parts[0]] )
|
|
|
|
#dates[d]['act'] = parts[9]
|
|
#dates[d]['nam'] = parts[5]
|
|
#dates[d]['onl'] = ''
|
|
#print parts
|
|
#if len(parts)>13 and parts[13]=='ONLINE': dates[d]['onl'] = 'online'
|
|
i += 1
|
|
"""for d in sorted(dates.keys()):
|
|
for c in d:
|
|
print d
|
|
print dates[d]['crs']"""
|
|
|
|
df = pd.DataFrame(dates)
|
|
df.to_csv(out)
|
|
|
|
# In the schedule, is this a class or a continuation of the class above?
|
|
def categorize():
|
|
# todo: must we open all these files?
|
|
dates = {}
|
|
|
|
files = sorted(os.listdir('.'))
|
|
files = list( filter( lambda x: re.search('reg(\d+)\.csv',x), files) )
|
|
files.reverse()
|
|
|
|
f = files[0]
|
|
filein = codecs.open(f,'r','utf-8').readlines()[1:]
|
|
sections = {}
|
|
this_section = []
|
|
|
|
for L in filein:
|
|
parts = L.strip().split(',') # crn,code,sec,cmp,cred,name,days,time,cap,act,rem,teacher,date,loc
|
|
parts = list( map( lambda x: clean_funny3(x), parts ) )
|
|
|
|
if not re.search('(\d+)',parts[0]): # This is a continuation
|
|
this_section.append(parts)
|
|
else: # this is a new section or the first line
|
|
if this_section:
|
|
sections[ this_section[0][0] ] = this_section
|
|
#print "Section: " + this_section[0][0] + " is: " + str(this_section) + "\n"
|
|
#print this_section[0][0] + "\t", course_start(this_section)
|
|
#print this_section[0][0] + "\t", course_time(this_section)
|
|
#print this_section[0][0] + "\t", course_location(this_section)
|
|
this_section = [ parts, ]
|
|
return sections
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Deprecated. call perl.
|
|
def constructSchedule():
|
|
term = raw_input("Name of html file? (ex: sp18.html) ")
|
|
os.chdir('make-web-sched')
|
|
cmd = 'perl make.pl ' + term
|
|
print "command: " + cmd
|
|
os.system(cmd)
|
|
|
|
|
|
|
|
"""
|
|
def fetch_dict(target,params={}):
|
|
# if there are more results, return the url for more fetching.
|
|
# else return false
|
|
#print target
|
|
global results_dict
|
|
r2 = requests.get(target, headers = header, params=params)
|
|
output = r2.text
|
|
if output.startswith('while('):
|
|
output = output[9:]
|
|
#print output
|
|
mycopy = results_dict.copy()
|
|
results_dict = {}
|
|
results_dict.update(json.loads(output))
|
|
results_dict.update(mycopy)
|
|
f.write(json.dumps(results_dict, indent=2))
|
|
#print "\n"
|
|
if ('link' in r2.headers):
|
|
links = r2.headers['link'].split(',')
|
|
for L in links:
|
|
ll = L.split(';')
|
|
link = ll[0].replace("<","")
|
|
link = link.replace(">","")
|
|
if re.search(r'next', ll[1]):
|
|
#print ll[1] + ":\t" + link
|
|
return link
|
|
return ""
|
|
"""
|
|
|
|
def get_schedule(term='201870', sem='fall'):
|
|
"""
|
|
sched_data = { 'term_in':term, 'sel_subj':'dummy', 'sel_day':'dummy',
|
|
'sel_schd':'dummy', 'sel_insm':'dummy', 'sel_camp':'dummy', 'sel_levl':'dummy', 'sel_sess':'dummy',
|
|
'sel_instr':'dummy', 'sel_ptrm':'dummy', 'sel_attr':'dummy', 'sel_subj':'%', 'sel_crse':'', 'sel_title':'',
|
|
'sel_schd':'%', 'sel_from_cred':'', 'sel_to_cred':'', 'sel_camp':'%', 'sel_ptrm':'%', 'sel_sess':'%',
|
|
'sel_attr':'%', 'begin_hh':'0', 'begin_mi':'0', 'begin_ap':'a', 'end_hh':'0', 'end_mi':'0', 'end_ap':'a' }
|
|
initial_headers = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Encoding':'gzip, deflate, sdch, br',
|
|
'Accept-Language':'en-US,en;q=0.8',
|
|
'Connection':'keep-alive',
|
|
'Host':'ssb.gavilan.edu',
|
|
'Upgrade-Insecure-Requests':'1',
|
|
} #'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' }
|
|
headers = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Encoding':'gzip, deflate, br',
|
|
'Accept-Language':'en-US,en;q=0.8',
|
|
'Cache-Control':'max-age=0',
|
|
'Connection':'keep-alive',
|
|
'Content-Type':'application/x-www-form-urlencoded',
|
|
'Host':'ssb.gavilan.edu',
|
|
'Origin':'https://ssb.gavilan.edu',
|
|
'Referer':'https://ssb.gavilan.edu/prod/bwckgens.p_proc_term_date?p_calling_proc=bwckschd.p_disp_dyn_sched&p_term='+term,
|
|
'Upgrade-Insecure-Requests':'1',
|
|
} #'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' }
|
|
initial_url = 'https://ssb.gavilan.edu/prod/bwckgens.p_proc_term_date?p_calling_proc=bwckschd.p_disp_dyn_sched&p_term=' + term
|
|
sesh = requests.Session()
|
|
#r1 = sesh.get(initial_url,headers=initial_headers)
|
|
#sesh.headers.update(headers)
|
|
url = 'https://ssb.gavilan.edu/prod/bwckschd.p_get_crse_unsec'
|
|
r1 = sesh.get(initial_url)
|
|
r = sesh.post(url, data=sched_data)
|
|
print r.headers
|
|
data = r.text
|
|
out = open('data/temp/'+term+'.html','w')
|
|
out.write(data)
|
|
out.close()"""
|
|
os.system('perl parse_schedule.pl data/temp/' + term + '.html' + ' ' + sem)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#####
|
|
#####
|
|
##### conf.py ?
|
|
|
|
|
|
str="""355 985 1296
|
|
354 730 1295
|
|
353 319 1290
|
|
352 985 1289
|
|
351 813 1285
|
|
350 281 1285
|
|
349 267 1279
|
|
348 981 1252
|
|
347 994 1252
|
|
346 26 1250
|
|
345 757 1288
|
|
344 368 1288
|
|
343 1 1286
|
|
259 703 1295
|
|
256 693 1293
|
|
255 660 1292
|
|
254 1 1291
|
|
250 482 1287
|
|
246 2 1284
|
|
245 333 1283
|
|
244 27 1282
|
|
243 703 1281
|
|
242 730 1281
|
|
241 482 1280
|
|
239 211 1278
|
|
238 794 1278
|
|
237 2 1277
|
|
236 297 1276
|
|
235 831 1276
|
|
233 482 1251"""
|
|
|
|
for L in str.split("\n"):
|
|
(id,host,session) = L.split("\t")
|
|
qry = "INSERT INTO conf_signups (user,session,timestamp) VALUES (%s,%s,'2022-08-08 17:20:00');" % (host,session)
|
|
print(qry)
|
|
|
|
|
|
|
|
## sched.py
|
|
|
|
import codecs
|
|
import requests, re, csv, json, funcy, sys
|
|
|
|
from content import upload_page
|
|
|
|
|
|
def dates(s):
|
|
#print(s)
|
|
m = re.match(r'(\d\d\d\d)\-(\d\d)\-(\d\d)',s)
|
|
if m:
|
|
s = m.group(2) + "/" + m.group(3)
|
|
#print(s)
|
|
return s
|
|
# "Course Code","Start Date","End Date",Term,Delivery,CRN,Status,"Course Name","Course Description","Units/Credit hours","Instructor Last Name","Instructor First Name",Campus/College,"Meeting Days and Times","Pass/No Pass available?","Class Capacity","Available Seats","Waitlist Capacity","Current Waitlist Length","Meeting Locations","Course Notes",ZTC
|
|
# ACCT103,2021-06-14,2021-07-23,"Summer 2021",Online,80386,Active,"General Office Accounting","This course is designed to prepare students for entry-level office accounting positions. Emphasis is on practical accounting applications. This course has the option of a letter grade or pass/no pass. ADVISORY: Eligible for Mathematics 430."," 3.00","Valenzuela Roque",Karla,"Gavilan College"," ",T," 30"," 18"," 20"," 0",,,
|
|
|
|
|
|
def parse_www_csv_sched():
|
|
old_keys = [ "CRN","Course Code","Units/Credit hours","Course Name","Meeting Days and Times","Class Capacity","Available Seats","Waitlist Capacity","Current Waitlist Length","Instructor Last Name","Start Date","Meeting Locations","ZTC","Delivery","Campus/College","Status","Course Description","Pass/No Pass available?","Course Notes" ]
|
|
|
|
# "Instructor First Name","End Date","Term",
|
|
|
|
new_keys = [ "crn", "code","cred", "name", "days", "cap", "rem", "wl_cap", "wl_act", "teacher", "date", "loc", "ztc", "type", "site","status","desc","pnp","note" ]
|
|
|
|
# "time","act","wl_rem", "partofday",
|
|
|
|
|
|
url = "https://gavilan.edu/_files/php/current_schedule.csv"
|
|
|
|
sched_txt = requests.get(url).text.splitlines()
|
|
sched = {"Fall 2021":[], "Spring 2022":[], "Winter 2022":[], "Summer 2021":[]}
|
|
shortsems = {"Fall 2021":"fa21", "Spring 2022":"sp22", "Winter 2022":"wi22", "Summer 2021":"su21","Summer 2022":"su22","Fall 2022":"fa22"}
|
|
for row in csv.DictReader(sched_txt):
|
|
d = dict(row)
|
|
for (old_key,new_key) in zip(old_keys,new_keys):
|
|
d[new_key] = d.pop(old_key).strip()
|
|
d['teacher'] = d.pop('Instructor First Name').strip() + " " + d['teacher']
|
|
d['date'] = dates(d['date']) + '-' + dates(d.pop('End Date').strip())
|
|
d['term'] = shortsems[d.pop('Term')]
|
|
if d['cred'] == ".00":
|
|
d['cred'] = "0"
|
|
if d['type'] == "Online":
|
|
d["loc"] = "ONLINE"
|
|
d["site"] = "Online"
|
|
d["type"] = "online"
|
|
#d.pop('Instructor First Name').strip() + " " + d['teacher']
|
|
#d["code"] = d.pop("Course Code")
|
|
#d["crn"] = d.pop("CRN")
|
|
sched[row['Term']].append(d) #print(row)
|
|
|
|
print( json.dumps(sched,indent=2))
|
|
for k,v in sched.items():
|
|
print("%s: %i" % (k,len(v)))
|
|
|
|
for v in sched["Fall 2021"]:
|
|
print("%s\t %s\t %s\t %s" % ( v['code'], v['days'], v['type'], v['loc'] ))
|
|
#print("%s\t %s\t %s\t %s" % ( v['Course Code'], v['Meeting Days and Times'], v['Delivery'], v['Meeting Locations'] ))
|
|
|
|
def parse_json_test_sched():
|
|
j2 = open('cache/classes_json.json','r').readlines()
|
|
|
|
for L in j2:
|
|
o3 = json.loads(L)
|
|
print(json.dumps(o3,indent=2))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
print ('')
|
|
options = {
|
|
1: ['fetch and parse the csv on www.', parse_www_csv_sched],
|
|
2: ['parse the test json file.', parse_json_test_sched ],
|
|
}
|
|
|
|
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
|
resp = int(sys.argv[1])
|
|
print("\n\nPerforming: %s\n\n" % options[resp][0])
|
|
|
|
else:
|
|
print ('')
|
|
for key in options:
|
|
print(str(key) + '.\t' + options[key][0])
|
|
|
|
print('')
|
|
resp = input('Choose: ')
|
|
|
|
# Call the function in the options dict
|
|
options[ int(resp)][1]()
|
|
|
|
|
|
def put_revised_pages():
|
|
course_num = '6862'
|
|
course_folder = '../course_temps/course_6862'
|
|
filein = codecs.open(course_folder+'/fullcourse.v2.html','r','utf-8')
|
|
my_titles = []
|
|
my_urls = []
|
|
my_bodys = []
|
|
started = 0
|
|
current_body = ""
|
|
for L in filein.readlines():
|
|
ma = re.search('^<h1>(.*)</h1>.*$',L)
|
|
if ma:
|
|
my_titles.append(ma.group(1))
|
|
my_urls.append(ma.group(2))
|
|
if started:
|
|
my_bodys.append(current_body)
|
|
current_body = ""
|
|
started = 1
|
|
else:
|
|
current_body += "\n" + L
|
|
my_bodys.append(current_body)
|
|
|
|
i = 0
|
|
for U in my_urls:
|
|
# and now upload it....lol
|
|
upload_page(course_num,U,my_bodys[i])
|
|
i += 1
|
|
|
|
# Upload pages. Local copy has a particular format.
|
|
# Appears to not be used
|
|
|
|
def put_course_pages():
|
|
course_num = '6862'
|
|
filein = codecs.open('cache/pages/course_'+str(course_num)+'.html','r','utf-8')
|
|
my_titles = []
|
|
my_urls = []
|
|
my_bodys = []
|
|
started = 0
|
|
current_body = ""
|
|
for L in filein.readlines():
|
|
ma = re.search('^###\s(.*)###\s(.*)$',L)
|
|
if ma:
|
|
my_titles.append(ma.group(1))
|
|
my_urls.append(ma.group(2))
|
|
if started:
|
|
my_bodys.append(current_body)
|
|
current_body = ""
|
|
started = 1
|
|
else:
|
|
current_body += "\n" + L
|
|
my_bodys.append(current_body)
|
|
|
|
i = 0
|
|
for U in my_urls:
|
|
# and now upload it....lol
|
|
upload_page(course_num,U,my_bodys[i])
|
|
i += 1
|
|
|
|
|
|
|
|
|
|
|
|
def freshdesk():
|
|
path = "C:\\Users\\peter\\Downloads\\freshdesk\\Solutions.xml"
|
|
soup = bs( codecs.open(path,'r','utf-8').read() ,features="lxml")
|
|
|
|
outpt = codecs.open('cache/faqs.txt','w')
|
|
out = ""
|
|
for a in soup.find_all('solution-article'):
|
|
|
|
print("TITLE\n"+a.find('title').get_text())
|
|
out += a.find('title').get_text()
|
|
|
|
"""for d in a.find_all('description'):
|
|
#print(d)
|
|
if d:
|
|
d = h.unescape(d.get_text())
|
|
e = stripper(d)
|
|
m = tomd.convert( e )
|
|
m = mycleaner(m)
|
|
print("\nDESCRIPTION\n"+m)"""
|
|
|
|
#print("\nWHAT IS THIS?\n" +
|
|
hh = a.find('desc-un-html').get_text()
|
|
d = h.unescape(hh)
|
|
e = stripper(d)
|
|
m = tomd.convert( e )
|
|
m = mycleaner(m)
|
|
print("\nDESCRIPTION\n"+m)
|
|
out += "\n\n" + m + "\n\n"
|
|
|
|
print("-----------\n\n")
|
|
outpt.write(out)
|
|
|
|
|
|
|
|
|
|
|
|
#### content.py
|
|
|
|
|
|
from pattern.web import plaintext, extension
|
|
from pattern.web import download
|
|
#from pattern import URL, MIMETYPE_IMAGE
|
|
from pattern.web import Crawler, DEPTH, FIFO, MIMETYPE_IMAGE, MIMETYPE_PDF
|
|
|
|
class GavCrawl(Crawler):
|
|
def visit(self, link, source=None):
|
|
print('visited:', repr(link.url), 'from:', link.referrer)
|
|
print(' ', link.url.mimetype)
|
|
#txt = plaintext(source, keep={'h1':[], 'h2':[], 'h3':[], 'h4':[], 'td':[], 'strong':[], 'b':[], 'a':['href'], 'img':['src'], 'ul':[], 'ol':[], 'li':[], 'dd':[], 'dt':[], 'i':[]})
|
|
#codecs.open(save_folder + '/' + mycleaner(clean_title(link.url)) + '.txt','w','utf-8').write(tomd.convert(txt))
|
|
|
|
codecs.open(save_folder + '/' + clean_fn(link.url) + '.txt','w','utf-8').write(trafilatura.extract(source,include_links=True, deduplicate=True, include_images=True, include_formatting=True))
|
|
|
|
|
|
def fail(self, link):
|
|
print('failed:', repr(link.url))
|
|
if re.search(r'\.pdf$', link.url):
|
|
m = re.search(r'\/([^\/]+\.pdf)$', link.url)
|
|
if m:
|
|
save_file = m.group(1)
|
|
print("saving to ", save_folder + '/' + save_file)
|
|
pdf_response = requests.get(link.url)
|
|
with open(save_folder + '/' + save_file, 'wb') as f:
|
|
f.write(pdf_response.content)
|
|
text = extract_text(save_folder + '/' + save_file)
|
|
#print(text)
|
|
codecs.open(save_folder + '/' + save_file + '.txt','w','utf-8').write(text)
|
|
else:
|
|
print("no match for pdf url: ", link.url)
|
|
|
|
for ext in ['jpg','jpeg','gif','webp']:
|
|
if re.search(r'\.'+ext+'$', link.url):
|
|
m = re.search(r'\/([^\/]+\.'+ext+')$', link.url)
|
|
if m:
|
|
save_file = m.group(1)
|
|
print("saving to ", save_folder + '/' + save_file)
|
|
pdf_response = requests.get(link.url)
|
|
with open(save_folder + '/' + save_file, 'wb') as f:
|
|
f.write(pdf_response.content)
|
|
else:
|
|
print('no match for '+ext+' url: ', link.url)
|
|
|
|
def crawl2():
|
|
#p = GavCrawl(links=['http://www.gavilan.edu/'], domains=['gavilan.edu', 'gavilan.curriqunet.com','www.boarddocs.com'], delay=0.75)
|
|
#p = GavCrawl(links=['https://gavilan.edu/finaid/2022-23DirectLoanApplication1.pdf'], domains=['gavilan.edu', 'gavilan.curriqunet.com','www.boarddocs.com'], delay=0.75)
|
|
p = GavCrawl(links=['https://gavilan.curriqunet.com/catalog/iq/1826'], domains=['gavilan.edu', 'gavilan.curriqunet.com','www.boarddocs.com'], delay=0.75)
|
|
|
|
|
|
|
|
|
|
while not p.done:
|
|
try:
|
|
p.crawl(method=DEPTH, cached=False, throttle=0.76)
|
|
except Exception as e:
|
|
print("Exception: ", e)
|
|
|
|
|
|
|
|
def samples():
|
|
crawler = Crawler(links=[], domains=[], delay=20.0, sort=FIFO)
|
|
|
|
url = URL('http://www.clips.ua.ac.bemedia/pattern_schema.gif')
|
|
print(url.mimetype in MIMETYPE_IMAGE)
|
|
|
|
|
|
#html = download('http://www.clips.ua.ac.be/', unicode=True)
|
|
s = URL('http://www.clips.ua.ac.be').download()
|
|
s = plaintext(s, keep={'h1':[], 'h2':[], 'strong':[], 'a':['href']})
|
|
|
|
|
|
# getting absolute urls
|
|
from pattern.web import URL, DOM, abs
|
|
|
|
url = URL('http://www.clips.ua.ac.be')
|
|
dom = DOM(url.download())
|
|
for link in dom('a'):
|
|
print(abs(link.attributes.get('href',''), base=url.redirect or url.string))
|
|
|
|
# get pdfs
|
|
from pattern.web import URL, PDF
|
|
|
|
url = URL('http://www.clips.ua.ac.be/sites/default/files/ctrs-002_0.pdf')
|
|
pdf = PDF(url.download())
|
|
print(pdf.string)
|
|
|
|
|