From 09fb62577258d23701710ee2dc71dd55abcc6951 Mon Sep 17 00:00:00 2001
From: Coding with Peter
Date: Wed, 22 Mar 2023 09:29:52 -0700
Subject: [PATCH] initial commit from canvasapp HDD
---
.gitignore | 15 +
__init__.py | 0
apphelp.py | 1619 ++++++++++++++++
checker.py | 225 +++
content.py | 860 +++++++++
courses.py | 1446 ++++++++++++++
cq_demo.py | 48 +
credentials.json | 1 +
curric2022.py | 848 ++++++++
curriculum.py | 2252 ++++++++++++++++++++++
curriculum2020.py | 661 +++++++
curriculum_patterns.py | 481 +++++
depricated.py | 1289 +++++++++++++
fa19_sched.json | 0
geckodriver.log | 32 +
gpt.py | 28 +
graphics.py | 206 ++
interactive.py | 919 +++++++++
interactivex.py | 759 ++++++++
localcache.py | 2065 ++++++++++++++++++++
main.py | 141 ++
myconsole.py | 57 +
new flex app.md | 68 +
notebook.ipynb | 1577 +++++++++++++++
outcomes.py | 1340 +++++++++++++
outcomes2022.py | 130 ++
patterns_8020.py | 27 +
patterns_topdown.py | 560 ++++++
pipelines.py | 1958 +++++++++++++++++++
queries.sql | 188 ++
requirements.2019.txt | 61 +
requirements.txt | 288 +++
sched.py | 94 +
server.py | 679 +++++++
stats.py | 223 +++
tasks.py | 1418 ++++++++++++++
temp.py | 52 +
tempget.py | 136 ++
templates.py | 444 +++++
templates/dir.html | 171 ++
templates/hello.html | 112 ++
templates/images.html | 134 ++
templates/personnel.html | 197 ++
templates/sample-simple-vue-starter.html | 194 ++
timer.py | 35 +
token.pickle | Bin 0 -> 730 bytes
users.py | 2203 +++++++++++++++++++++
util.py | 156 ++
48 files changed, 26397 insertions(+)
create mode 100644 .gitignore
create mode 100644 __init__.py
create mode 100644 apphelp.py
create mode 100644 checker.py
create mode 100644 content.py
create mode 100644 courses.py
create mode 100644 cq_demo.py
create mode 100644 credentials.json
create mode 100644 curric2022.py
create mode 100644 curriculum.py
create mode 100644 curriculum2020.py
create mode 100644 curriculum_patterns.py
create mode 100644 depricated.py
create mode 100644 fa19_sched.json
create mode 100644 geckodriver.log
create mode 100644 gpt.py
create mode 100644 graphics.py
create mode 100644 interactive.py
create mode 100644 interactivex.py
create mode 100644 localcache.py
create mode 100644 main.py
create mode 100644 myconsole.py
create mode 100644 new flex app.md
create mode 100644 notebook.ipynb
create mode 100644 outcomes.py
create mode 100644 outcomes2022.py
create mode 100644 patterns_8020.py
create mode 100644 patterns_topdown.py
create mode 100644 pipelines.py
create mode 100644 queries.sql
create mode 100644 requirements.2019.txt
create mode 100644 requirements.txt
create mode 100644 sched.py
create mode 100644 server.py
create mode 100644 stats.py
create mode 100644 tasks.py
create mode 100644 temp.py
create mode 100644 tempget.py
create mode 100644 templates.py
create mode 100644 templates/dir.html
create mode 100644 templates/hello.html
create mode 100644 templates/images.html
create mode 100644 templates/personnel.html
create mode 100644 templates/sample-simple-vue-starter.html
create mode 100644 timer.py
create mode 100644 token.pickle
create mode 100644 users.py
create mode 100644 util.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..55dc7c6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,15 @@
+secrets.py
+*.bak
+.ipynb_checkpoints
+104ab42f11
+__pycache__
+cache
+mergeme
+qanda
+qanda_student
+sftp
+static
+ipython_log.*
+completer.hist
+*.zip
+*.un~
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apphelp.py b/apphelp.py
new file mode 100644
index 0000000..0d117af
--- /dev/null
+++ b/apphelp.py
@@ -0,0 +1,1619 @@
+
+
+import os,re
+
+output = ''
+todos = 0
+
+todos_d = {}
+
+# TODO: Make a second pass, and look for fxn calls inside of
+# each function. Draw a graphviz.
+#
+
+def fxns(fname):
+ global output, todos
+ lines = open(fname).readlines()
+ prev_L = ""
+ for L in lines:
+
+ # is it a todo
+ a = re.search(r'(TODO|todo)\s*:?\s*(.*)$',L)
+ if a:
+ output += "\t\ttodo: " + a.group(2) + "\n"
+ todos += 1
+ if fname in todos_d:
+ todos_d[fname] += 1
+ else:
+ todos_d[fname] = 1
+
+ # is it a function def?
+ if re.search('^\s*def\s',L):
+ output += "\n"
+ if re.search('^#',prev_L): output += "\t"+prev_L
+ output += "\t"+L+""
+ prev_L = L
+
+files = os.listdir('.')
+files.sort()
+
+
+for F in files:
+ if F=='apphelp.py': continue
+ if re.search('\.py$',F):
+ output += "\n" + F + "\n"
+ fxns(F)
+
+prog_in = open('apphelp.py','r')
+prog_out = ''
+td = '# Total TODOs remaining: %i' % todos
+
+td2 = '# TODOs per file: \n#\n'
+for k in sorted(todos_d.keys()):
+ td2 += "#\t%i - %s \n" % (todos_d[k],k)
+
+
+for R in prog_in.readlines():
+ if re.search('^##\sF',R):
+ prog_out += "## Functions\n#\n%s\n#\n%s\n#\n" % (td,td2)
+ break
+ prog_out += R
+
+prog_out += '\n"""\n\n' + output + '\n\n"""\n\n'
+prog_in.close()
+prog_write = open('apphelp.py','w')
+prog_write.write(prog_out)
+prog_write.close()
+
+
+
+
+
+## Functions
+#
+# Total TODOs remaining: 57
+#
+# TODOs per file:
+#
+# 1 - checker.py
+# 1 - content.py
+# 6 - courses.py
+# 3 - curriculum.py
+# 5 - depricated.py
+# 6 - localcache.py
+# 2 - outcomes.py
+# 20 - pipelines.py
+# 2 - server.py
+# 2 - tasks.py
+# 1 - tempget.py
+# 8 - users.py
+
+#
+
+"""
+
+
+__init__.py
+
+checker.py
+ todo: make this sweet
+
+ def safe_html(html):
+
+ def _attr_name_whitelisted(attr_name):
+
+ def safe_css(attr, css):
+
+ def plaintext(input):
+
+ def _unescape(text):
+
+ def fixup(m):
+
+ def check_folder(fname,path):
+
+ def check_class(folder):
+
+ def check_all():
+
+content.py
+
+ def d(s):
+
+ def stripper(s):
+
+ def mycleaner(s):
+
+ def freshdesk():
+
+ # Build a master file with the entire class content
+ def accessible_check(id=""):
+ todo: include linked pages even if they aren't in module
+
+ def pan_testing():
+
+ # Given course, page url, and new content, upload the new revision of a page
+ def create_page(course_num,new_title,new_content):
+
+ def md_to_course():
+
+ # DL pages only
+ def grab_course_pages(course_num=-1):
+
+ # Appears to not be used
+ def put_course_pages():
+
+ # Also not used
+ def put_revised_pages():
+
+ # Download, clean html, and reupload page
+ def update_page():
+
+ # Given course, page url, and new content, upload the new revision of a page
+ def upload_page(course_num,pageurl,new_content):
+
+ # Use template to build html page with homegrown subtitles
+ def build_srt_embed_php(data):
+
+ def yt_title(code):
+
+ def swap_youtube_subtitles():
+
+ def test_swap():
+
+ def multiple_downloads():
+
+courses.py
+ todo:
+
+ def int_or_zero(x):
+
+ def float_or_zero(x):
+
+ # Gott 1 Bootcamp - report on who completed it.
+ def get_gott1_passers():
+
+ # Plagiarism Module - report on who completed it.
+ def get_plague_passers():
+
+ # Who, in a class, passed?
+ def get_course_passers(course, min_passing, passers_filename, still_active_filename):
+
+ # Change courses to show 2 announcements
+ def change_course_ann_homepage(id="10458"):
+
+ def scrape_bookstore():
+ todo: where does the most recent schedule come from?
+
+ # Input: xxxx_sched.json. Output: xxxx_latestarts.txt
+ def list_latestarts():
+
+ # All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids.
+ def users_in_semester():
+ todo:
+
+ # All students in STEM (or any list of depts.. match the course_code). Return SET of canvas ids.
+ def users_in_depts_live(depts=[], termid='171'):
+
+ def course_enrollment(id=''):
+
+ def askForTerms():
+
+ # Return a list of term names and IDs. Also store in cache/courses/terms.txt
+ def getTerms(printme=1, ask=1):
+ todo: unsafe overwrite
+
+ def getCourses(): # a dict
+
+ # Relevant stuff trying to see if its even being used or not
+ def course_term_summary():
+
+ # Fetch all courses in a given term
+ def getCoursesInTerm(term=0,show=1,active=0): # a list
+
+ def getCoursesTermSearch(term=0,search='',v=0):
+
+ def courseLineSummary(c,sections={}):
+
+ def xlistLineSummary(c,sections={}):
+
+ def eslCrosslister():
+
+ def xlist(parasite='', host=''): # section id , new course id
+ todo: need to get the section id from each course:
+
+ def unenroll_student(courseid,enrolid):
+
+ def enroll_stem_students_live():
+
+ def enroll_orientation_students():
+
+ def summarize_proportion_online_classes(u):
+
+ def summarize_num_term_classes(u):
+
+ def make_ztc_list(sem='sp20'):
+
+ def course_search_by_sis():
+
+ def add_evals(section=0):
+ todo: wanted: group shell for each GP (guided pathway) as a basic student services gateway....
+
+curriculum.py
+ todo: These secrets
+
+ def another_request(url,startat):
+
+ def fetch_all_classes():
+
+ def fetch_all_programs():
+
+ def sortable_class(li):
+
+ def c_name(c):
+
+ def show_classes(createoutput=1):
+
+ def clean_d_name(d):
+
+ def show_programs():
+
+ def dd(): return defaultdict(dd)
+
+ def organize_courses():
+
+ def check_de():
+
+ def clean_programs():
+
+ def course_lil_format(s):
+
+ def header_lil_format(s):
+
+ def organize_programs():
+
+ def divide_courses_list(li,rwd,online):
+
+ def organize_programs2():
+
+ # sorting by order key of dict
+ def cmp_2(a):
+
+ def cmp_order(a,b):
+
+ # decipher the grouped up courses line
+ def split_course(st):
+
+ # Any number gets an X (checked). Blank or zero gets no check.
+ def units_to_x(u):
+
+ def p_block_rule(r,printme,doc,out=0):
+
+ def p_cert_header(type,doc,r='',out=0):
+
+ def p_block_header(r,doc,out=0):
+
+ def p_cert_course_missing(cd,doc,out=0):
+
+ def p_cert_course(cd,history,doc,out=0):
+
+ def p_end_block(out=0):
+
+ def p_end_cert(bigdoc, out=0):
+
+ def ask_for_rule(r):
+
+ def action_to_english(a):
+
+ # Return True if the courses satisfy the rule
+ def check_a_block(b, courses, verbose=False):
+
+ def read_block_english_to_code():
+
+ def read_section_online_history():
+ todo: this file depends on other fxns. which?
+
+ # This is the 3rd attempt.
+ def simple_find_online_programs():
+ todo: courses with a HYPHEN in NAME get parsed wrong.
+
+ def check_a_block_a(b,verbose=False):
+
+ def smart_find_online_programs():
+
+ def show_contained_class(c):
+
+ def show_block(c):
+
+ def show_block(c):
+
+ def is_online(c):
+
+ def is_online(c):
+
+ def is_online_inblock(c):
+
+ def is_online_inblock(c):
+
+ # of all the programs, what can be accomplished online?
+ def find_online_programs():
+
+ # take a string of all the types of classes offered, return a vector of [tot,lec,hyb,onl]
+ def string_to_types(st):
+
+ def my_default_counter():
+
+ # Of the recent schedules, what was actually offered online?
+ def summarize_online_sections():
+
+ def fibonacci(n):
+
+ def test_pampy():
+
+ def cq_parse_experiment(root=0, indent=''):
+
+ def cq_start():
+
+ def cq_pattern_backup1(root=0, indent=''):
+
+ def found(*x):
+
+ def lookForMatch(rules,item):
+
+ def cq_pattern(root=0, indent=''):
+
+ def myprinter(item, indent=''):
+
+ def cq_pattern_start():
+
+ def baby_int(j):
+
+ def find_deg_in_cluster( clusters, deg ):
+
+ def try_match_deg_programs():
+
+ def dict_generator(indict, pre=None):
+
+ def print_dict(v, prefix='',indent=''):
+
+ def walk_file():
+
+ def tag(x,y): return "<%s>%s%s>" % (x,y,x)
+
+ def tagc(x,c,y): return '<%s class="%s">%s%s>' % (x,c,y,x)
+
+ def a(t,h): return '%s ' % (h,t)
+
+ def server_save(key,value):
+
+ def flask_thread(q):
+
+ def home():
+
+ def s(key,val):
+
+ def hello():
+
+ def sd():
+
+ def serve():
+
+ def attempt_match8020(rules,item):
+
+ def clever_printer(item, indent=''):
+
+ def print_return(x):
+
+ def cq_8020(root=0, indent=''):
+
+ def cq_8020_start():
+
+curriculum2020.py
+
+ def to_md(s):
+
+ def print_return(x):
+
+ def cq_8020(root,indent=0):
+
+ def cq_8021(root,indent=0):
+
+ def cq_8021_start():
+
+ def cq_8022(root,indent=0):
+
+ def cq_8022_start():
+
+ def sortable_class(li):
+
+ def c_name(c):
+
+ def show_classes2020():
+
+ def show_classes2020_start():
+
+curriculum_patterns.py
+
+ def div1(a,b):
+
+ def d2(a,b):
+
+ def d3(a,b):
+
+ def pp1(a,b):
+
+ def pp2(a,b):
+
+ def pp3(a,b,c,d,e,f,g):
+
+ def pp4(a,b):
+
+ def pp5(a,b,c):
+
+ def pp6(a,b):
+
+ def pp7(a,b):
+
+ def pp8(a,b):
+
+ def jj1(a,b,c,d,e):
+
+ def jj2(a,b,c,d,e,f):
+
+ def jj3(a,b,c,d,e):
+
+ def jj4(a,b,c,d):
+
+ def jj5(a,b,c,d,e,f):
+
+ def jj6(a,b,c,d):
+
+ def jj2(a,b,c,d):
+
+ def jj2(a,b,c,d):
+
+depricated.py
+
+ # Don't know
+ def demo():
+
+ def stats():
+
+ def dict_generator(indict, pre=None):
+
+ def print_dict(v, prefix='',indent=''):
+
+ def walk_file():
+
+ def tag(x,y): return "<%s>%s%s>" % (x,y,x)
+
+ def tagc(x,c,y): return '<%s class="%s">%s%s>' % (x,c,y,x)
+
+ def a(t,h): return '%s ' % (h,t)
+
+ def server_save(key,value):
+
+ def flask_thread(q):
+
+ def home():
+
+ def s(key,val):
+
+ def hello():
+
+ def sd():
+
+ def serve():
+ todo: this duplicates courses.py ??
+
+ # Prompt for course id, return list of user dicts. TODO this duplicates courses.py ??
+ def getUsersInCourse(id=0): # returns list
+ todo:
+
+ # NO LONGER USED - SEE COURSES
+ def enroll_stem_students():
+
+ # unused?
+ def getAllTeachersInTerm(): # a list
+ todo: hits in courses by teachers https://gavilan.instructure.com:443/api/v1/users/2/page_views?end_time=Dec%2010%2C%202018
+
+ def teacherActivityLog(uid=1): ### Next: save results in a hash and return that....
+
+ def summarize_student_teacher_role(u):
+
+ def user_roles2():
+
+ def req_to_db(fname_list):
+
+ def has_online(series):
+
+ def has_lecture(series):
+
+ def has_hybrid(series):
+
+ # Wrapper to get 2 schedules at once
+ def dl_sched():
+ todo: these semesters
+
+ # Send a personalized email regarding ZTC
+ def send_z_email(fullname, firstname, addr, courses_list):
+
+ def getInactiveTeachersInTerm(t=23): # a list
+
+ def course_location(course):
+
+ def course_time(course):
+
+ def course_teacher(course):
+
+ def reg_nums():
+
+ # In the schedule, is this a class or a continuation of the class above?
+ def categorize():
+ todo: must we open all these files?
+
+ # Deprecated. call perl.
+ def constructSchedule():
+
+ def fetch_dict(target,params={}):
+
+ def get_schedule(term='201870', sem='fall'):
+
+interactive.py
+
+ def dict_generator(indict, pre=None):
+
+ def print_dict(v, prefix='',indent=''):
+
+ def walk_file():
+
+ def tag(x,y): return "<%s>%s%s>" % (x,y,x)
+
+ def tagc(x,c,y): return '<%s class="%s">%s%s>' % (x,c,y,x)
+
+ def a(t,h): return '%s ' % (h,t)
+
+ def server_save(key,value):
+
+ def flask_thread(q):
+
+ def before_request():
+
+ def save_post():
+
+ def restart():
+
+ def dispatch3(func,arg,arrg):
+
+ def dispatch2(func,arg):
+
+ def dispatch(func):
+
+ def dispatch3j(func,arg,arrg):
+
+ def dispatch2j(func,arg):
+
+ def dispatch1j(func):
+
+ def home():
+
+ def send_jslib(path):
+
+ def send_cachedata(path):
+
+ def send_js(path):
+
+ def s(key,val):
+
+ def do_sample():
+
+ def media(file_id):
+
+ def podcast():
+
+ def weblec():
+
+ def hello():
+
+ def sd():
+
+ def test_message(message):
+
+ def serve():
+
+ def make_teacher_rel(self, tchr, clss):
+
+ def __init__(self, uri, user, password):
+
+ def close(self):
+
+ def print_greeting(self, message):
+
+ def _create_and_return_greeting(tx, message):
+
+ def make_teacher_rel(g, tchr, clss):
+
+ def testgraph():
+
+ def Memoize( func):
+
+ def wrapper(*args):
+
+ def startup(self, outfile):
+
+ def set_my_dict(self,d):
+
+ def cycle_color(self, s):
+
+ def ascii_art(self, text):
+
+ def close_window(self, ):
+
+ def suggest(self, word):
+
+ def curses_print_word(self, word,color_pair_code):
+
+ def curses_print_line(self, line,color_pair_code):
+
+ def redraw(self, start_y,end_y,fallback_y,fallback_x):
+
+ def scroll_down(self, noredraw,fallback_y,fallback_x):
+
+ def clear_upside(self, n,y,x):
+
+ def display_suggest(self, y,x,word):
+
+ def inputloop(self, ):
+
+ def setup_command(self,outfile):
+
+ def cleanup_command(self):
+
+ def handle_command(self, cmd):
+
+ def repl_staff():
+
+ def repl_degs():
+
+ def repl():
+
+ def repl():
+
+interactivex.py
+
+ def dict_generator(indict, pre=None):
+
+ def print_dict(v, prefix='',indent=''):
+
+ def walk_file():
+
+ def tag(x,y): return "<%s>%s%s>" % (x,y,x)
+
+ def tagc(x,c,y): return '<%s class="%s">%s%s>' % (x,c,y,x)
+
+ def a(t,h): return '%s ' % (h,t)
+
+ def server_save(key,value):
+
+ def flask_thread(q):
+
+ def before_request():
+
+ def restart():
+
+ def dispatch3(func,arg,arrg):
+
+ def dispatch2(func,arg):
+
+ def dispatch(func):
+
+ def dispatch3j(func,arg,arrg):
+
+ def dispatch2j(func,arg):
+
+ def dispatch1j(func):
+
+ def home():
+
+ def send_jslib(path):
+
+ def send_cachedata(path):
+
+ def send_js(path):
+
+ def s(key,val):
+
+ def do_sample():
+
+ def hello():
+
+ def sd():
+
+ def serve():
+
+ def make_teacher_rel(self, tchr, clss):
+
+ def __init__(self, uri, user, password):
+
+ def close(self):
+
+ def print_greeting(self, message):
+
+ def _create_and_return_greeting(tx, message):
+
+ def make_teacher_rel(g, tchr, clss):
+
+ def Memoize( func):
+
+ def wrapper(*args):
+
+ def startup(self, outfile):
+
+ def set_my_dict(self,d):
+
+ def cycle_color(self, s):
+
+ def ascii_art(self, text):
+
+ def close_window(self, ):
+
+ def suggest(self, word):
+
+ def curses_print_word(self, word,color_pair_code):
+
+ def curses_print_line(self, line,color_pair_code):
+
+ def redraw(self, start_y,end_y,fallback_y,fallback_x):
+
+ def scroll_down(self, noredraw,fallback_y,fallback_x):
+
+ def clear_upside(self, n,y,x):
+
+ def display_suggest(self, y,x,word):
+
+ def inputloop(self, ):
+
+ def setup_command(self,outfile):
+
+ def cleanup_command(self):
+
+ def handle_command(self, cmd):
+
+ def repl_staff():
+
+ def repl_degs():
+
+ def repl():
+
+ def repl():
+
+ipython_log.py
+
+localcache.py
+
+ def db():
+
+ def setup_table(table='requests'):
+
+ # Help the next function to upload new users directly to conf database on gavilan.
+ def employees_refresh_flex(data):
+
+ # Everyone in iLearn DB with an xyz@gavilan.edu email address.
+ def all_gav_employees():
+
+ #
+ def teachers_courses_semester():
+
+ #
+ def teachers_by_term():
+
+ # Report for AEC
+ def aec_su20_report():
+
+ # Return the most up do date version of the given file. Useful for 'dimensions'.
+ def most_recent_file_of( target ):
+
+ def finder(st):
+
+ # Given a table schema, parse log file, return a list of dicts. Optionally remove some columns.
+ def parse_file_with( file, format, with_gid=0 ):
+
+ # I return a list of the read lines if the log dates in the file are within dates (top of this file), or FALSE
+ def is_requestfile_interesting(fname):
+ todo: more robust here
+ todo: - investigate pearson, developer key: 170000000000376 and their ridiculous amounts of hits.
+
+ # Return a 'timeblock'. An integer number of 15 minute blocks from my epoch. Expects a datetime object in PST timezone.
+ def timeblock_from_dt(dt_obj):
+
+ # Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time
+ def dt_from_timeblock(tb):
+
+ # Twenty Four hour timeblocks
+ def timeblock_24hr_from_dt(dt_obj):
+
+ # Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time
+ def dt_from_24hr_timeblock(tb):
+
+ # Four hour timeblocks
+ def timeblock_4hr_from_dt(dt_obj):
+
+ # Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time
+ def dt_from_4hr_timeblock(tb):
+
+ # I make the line into a dict, erase keys with no data, make a DT field called date, make a time_block (int) field.
+ def requests_line(line,i=0):
+
+ # Bulk insert of requests logs. Too much data to be useful.
+ def requests_file(fname_list):
+ todo: select if timeblock exists
+
+ # Insert or update a request line.
+ def upsert_request(line, vals):
+
+ # Generic insert of a dict into a table. Keys of dict must match table columns.
+ def dict_to_insert(thisline,table): # a dict
+
+ # This now does tallying by timeblock.
+ def merge_requests():
+
+ def merge_comm_channel():
+
+ def merge_pseudonym():
+
+ def merge_users():
+
+ def merge_courses():
+
+ def merge_enrollment():
+
+ def merge_term():
+
+ def merge_roles():
+
+ def merge_convos():
+
+ # For returning sqlite results as dicts
+ def dict_factory(cursor, row):
+ todo: ... approaches to all this data... list requests in order descending time, unique users, and just
+
+ # Attempt to do tallying
+ def make_views_summarys():
+
+ # original without time_blocks info.
+ def make_views_summarys_v1():
+
+ # Setup my basic db stats base from scratch
+ def full_reload():
+
+ def guess_dept(t):
+
+ # Main view of all class / all user overview...
+ def dept_with_studentviews(dept="", sem=''):
+
+ def f(x):
+
+ # get student count and teacher name from local db
+ def course_quick_stats(canvasid):
+
+ # What a student has taken / teacher has taught
+ def user_enrolled_in(userid):
+
+ # All students in this semester ...
+ def users_this_semester_db(sem=''):
+
+ # Everyone whose first semester is .....
+ def users_new_this_semester(sem=''):
+
+ # All student users in STEM - from local db
+ def user_in_stem():
+
+ # Get all the classes in one dept
+ def dept_classes(dept,sem=''):
+ todo:
+
+ def depts_with_classcounts(sem=''):
+ todo:
+
+ def f(x):
+
+ def name_with_count(name,li):
+
+ def arrange_data_for_web(dept='', sem=''):
+
+ def f(x):
+
+ # Get enrollments. (Best to freshly run pipelines/get_rosters) and put them into DB
+ def build_tables(headers,name):
+
+ def load_tables(table,headers,row,verbose=0):
+
+ def semester_enrollments(verbose=0):
+
+ def qstrip(txt): return txt.strip('"')
+
+ def more_unused_xreferencing():
+
+ def user_role_and_online():
+
+ def comm_channel_file():
+
+ def pseudonym_file():
+
+ def users_p_file():
+
+ def com_channel_dim():
+
+ def abcd():
+
+ def crns_to_teachers():
+
+main.py
+
+outcomes.py
+
+ def outcome_overview(term=21):
+
+ def create_acct_lvl_outcomes(src,dept,makefolder='',folder=0):
+
+ def connect_acct_oc_to_course(course_id,oc_group_id):
+
+ def outcome_groups():
+
+ def outcome_groups_backup():
+
+ def x_ref_dept_names():
+
+ def create_course_group(short,parent):
+
+ def create_dept_group(short):
+
+ def outcomes_attached_to_courses(term=65,limitdept=''):
+ todo: Handle this: CSIS/DM85 WEB DESIGN 40823/24
+
+ def summarize_course_online_slo(outcome_list):
+
+ def fetch_outcome_details(id):
+
+ # Report on the actual evaluation data?
+ def outcome_report1():
+ todo:
+
+ # For the given course, get all outcome measurements, and display scores and stats.
+ def outcome_report2():
+
+ def fix_joined_class(str):
+
+ def split_slo_name(str):
+
+ def outcome_report3():
+
+ def read_slo_source():
+
+ def slo_source_by_dept():
+
+patterns_8020.py
+
+patterns_topdown.py
+
+ def pp0(a,b,c,d,e):
+
+ def pp1(a,b,c,d,e,f):
+
+ def pp2(a,b,c,d,e):
+
+ def pp3(a,b,c,d):
+
+ def pp4(a,b,c):
+
+ def pp5(a,b,c):
+
+ def pp6(a,b,c):
+
+ def div1(a,b):
+
+ def d2(a,b):
+
+ def d3(a,b):
+
+ def pp1(a,b):
+
+ def pp2(a,b):
+
+ def pp3(a,b,c,d,e,f,g):
+
+ def pp4(a,b):
+
+ def pp5(a,b,c):
+
+ def pp6(a,b):
+
+ def pp7(a,b):
+
+ def pp8(a,b):
+
+ def jj3(a,b,c,d,e):
+
+ def jj5(a,b,c,d,e,f):
+
+ def jj2(a,b,c,d):
+
+ def jj2(a,b,c,d):
+
+pipelines.py
+ todo: secrets
+ todo: all these constants for SSB -- line 1008
+ todo: secrets
+
+ def d(s):
+
+ # Main canvas querying fxn
+ def fetch(target,verbose=0):
+
+ # Main canvas querying fxn - stream version - don't die on big requests
+ def fetch_stream(target,verbose=0):
+
+ # paging makes problems... example: enrollment_terms
+ def fetch_collapse(target,collapse='',verbose=0):
+
+ # Teacher name format changed. Remove commas and switch first to last
+ def fix_t_name(str):
+
+ # Separate dept and code
+ def split_class_dept(c):
+
+ def split_class_code(c):
+
+ def split_class_code_letter(c):
+
+ # go from sp20 to 2020spring
+ def shortToLongSem(s):
+
+ # Go to the semesters folder and read the schedule. Return dataframe
+ def getSemesterSchedule(short='sp21'): # I used to be current_schedule
+ todo: Some semesters have a different format.... partofday type site xxx i just dL'd them again
+
+ def prep_online_courses_df():
+
+ def course_is_online(crn):
+
+ def get_crn_from_name(name):
+
+ def get_enrlmts_for_user(user,enrollments):
+
+ # Get something from Canvas Data
+ def do_request(path):
+
+ # Canvas data, download all new files
+ def sync_non_interactive():
+
+ # list files in canvas_data (online) and choose one or some to download.
+ def interactive():
+
+ def todays_date_filename(): # helper
+
+ def nowAsStr(): # possible duplicate
+
+ def row_has_data(r): # helper
+
+ def row_text(r): # helper
+
+ # Take banner's html and make a csv(?) file
+ def ssb_to_csv(src):
+
+ def clean_funny(str):
+
+ def clean_funny2(str):
+
+ def clean_funny3(str):
+
+ ### course is a list of 1-3 lists, each one being a line in the schedule's output. First one has section
+ def course_start(course):
+ todo: use this to make a early/late/short field and store semester dates w/ other constants
+
+ def time_to_partofday(t):
+ todo: account for multiple sites/rows
+
+ # Deduce a 'site' field, based on room name and known offsite locations
+ def room_to_site(room,verbose=0):
+ todo: account for multiple sites/rows
+ todo: better way to store these offsite labels
+
+ # take text lines and condense them to one dict per section
+ def to_section_list(input_text,verbose=0):
+ todo: no output files
+ todo: if extra line is different type?
+
+ # Log the history of enrollments per course during registration
+ def log_section_filling(current_sched_list):
+
+ # Same as above, but compressed, act only
+ def log_section_filling2(current_sched_list):
+
+ # Use Firefox and log in to ssb and get full schedule. Only works where selenium is installed
+ def scrape_schedule():
+ todo: my data here.... secret
+ todo:
+
+ # recreate schedule json files with most current online schedule format.
+ def recent_schedules():
+ todo: sems is a global in this file. Is that the right thing to do?
+ todo: the pipeline is disorganized. Organize it to have
+ todo: where does this belong in the pipeline? compare with recent_schedules()
+
+ # Take the generically named rosters uploads files and move them to a semester folder and give them a date.
+ def move_to_folder(sem,year,folder):
+
+ # This relates to enrollment files, not schedule.
+ def convert_roster_files(semester="",year="",folder=""):
+
+ # From instructure sftp site
+ def fetch_current_rosters():
+ todo: secret
+
+ def fetch_current_rosters_auto():
+
+ # read schedule file with an eye toward watching what's filling up
+ def schedule_filling():
+ todo: hardcoded
+
+ # Upload a json file to www
+ def put_file(remotepath,localpath, localfile,prompt=1):
+ todo: remove this secret
+ todo: these paths
+
+ def sec(t): return ""+t+" \n"
+
+ def para(t): return ""+t+"
\n"
+
+ def ul(t): return "\n"
+
+ def li(t): return ""+t+" \n"
+
+ def question(t,bracket=1):
+
+ def answer(t):
+
+ def read_paragraph_element(element,type="NORMAL_TEXT"):
+
+ def get_doc(docid, bracket=1, verbose=0):
+ todo: x link, x bold, list, image.
+
+ def read_paragraph_element_2(element,type="NORMAL_TEXT"):
+
+ # t is a string that begins with "Icons: " ... and contains comma(space) separated list
+ def handle_icons(t):
+
+ # t is a string that begins with "Tags: " ... and contains comma(space) separated list
+ def handle_tags(t):
+
+ def handle_question(t,bracket=1):
+
+ def handle_answer(t):
+
+ def handle_sec(t): return ('section',t)
+
+ def handle_para(t): return ('paragraph',t)
+
+ def handle_ul(t): return ('unorderdedlist',t)
+
+ def handle_li(t): return ('listitem',t)
+
+ def fetch_doc_image(k,value):
+
+ def get_doc_generic(docid, bracket=1, verbose=0):
+
+ def scrape_schedule_py():
+
+server.py
+
+ def tag(x,y): return "<%s>%s%s>" % (x,y,x)
+
+ def tagc(x,c,y): return '<%s class="%s">%s%s>' % (x,c,y,x)
+
+ def a(t,h): return '%s ' % (h,t)
+
+ def homepage():
+
+ def orgline(L):
+ todo: \s\[\#A\](.*)$', L)
+
+ def editor(src):
+
+ def in_form(txt,path):
+
+ def mytime(fname):
+
+ def index():
+
+ def writing(fname):
+
+ def dashboard():
+
+ def dash():
+
+ def mycalendar():
+
+ def most_recent_file_of( target, folder ):
+
+ def finder(st):
+
+ def news():
+
+ def randPic():
+
+ def sample():
+
+ def sample2(a=""):
+
+ # Filter a stream of loglines for those that match a course's url / id
+ def has_course(stream,courseid):
+
+ def js(s):
+
+ def sem_from_array_crn(crn):
+
+ def user_courses(uid):
+
+ def user_course_history_summary(usr_id):
+
+ def roster(crn):
+
+ def user_course_hits(usr,courseid):
+
+ def profiles(id=1,b=2,c=3):
+
+ # Departments, classes in each, and students (with hits) in each of those.
+ def enrollment(a):
+
+ # All the classes in this dept, w/ all the students in each, with count of their views.
+ def dept(d=''):
+
+ def user(canvas_id=None):
+
+ def lectures():
+
+ def web_lectures():
+ todo: update: dept, title, any of the other fields.
+
+ # update a value: dept id of a personnel id
+ def update_pers_title(pid, tid):
+
+ # update a value: dept id of a personnel id
+ def update_pers_dept(pid, did):
+
+ def user_edit(canvas_id='2'):
+
+ def staff_dir(search=''):
+
+ def server_save(key,value):
+
+ def server_dispatch_json(function_name,arg='', arg2=''):
+
+ def server_dispatch(function_name,arg='', arg2=''):
+
+stats.py
+
+ def grades_rundown():
+
+ def class_logs():
+
+ def user_logs():
+
+ def recent_logins():
+
+ def userHitsThisSemester(uid=2):
+
+ def getCurrentActivity(): # a dict
+
+ def externaltool(): # a list
+
+tasks.py
+
+ def survey_answer(q=0):
+
+ def survey_organize():
+
+ def build_quiz(filename=""):
+
+ # Send an email
+ def send_email(fullname, firstname, addr, subj, content):
+
+ def convert_to_pdf(name1, name2):
+
+ # Build (docx/pdf) certificates for gott graduates
+ def certificates_gott_build():
+
+ # Email experiment
+ def mail_test():
+
+ # Change LTI Settings. Experimental
+ def modify_x_tool():
+
+ # Upload with sftp to www website folder: student/online/srt/classfoldername
+ def put_file(classfoldername):
+ todo: ',cnopts=cnopts) as sftp:
+
+ # Switch everyone in a class to a teacher
+ def switch_enrol():
+
+ # Change dates & term of a class to unrestrict enrollment
+ def unrestrict_course():
+
+ # Bulk enroll users into a course
+ def enroll_accred():
+
+ # Calculate attendance stats based on enrollment/participation at 20% of term progressed, then 60% of term progressed.
+ def twenty_sixty_stats(li):
+
+ # Older positive attendance hours calculation.
+ def hours_calc():
+
+ def course_2060_dates(crn=""):
+ todo:
+
+ def course_update_all_users_locallogs(course_id=''):
+
+ def hours_calc_pulldata(course_id=''):
+
+ def xlist_cwe():
+
+ def pos_atten():
+
+temp.py
+
+tempget.py
+
+ # Use Firefox and log in to ssb and get full schedule
+ def login():
+ todo: my data here.... secret
+
+ def filename_friendly(str):
+
+ def otter():
+
+templates.py
+
+ def item_to_masonry(item):
+
+ def try_untemplate():
+
+ def php_remove(m):
+
+ def php_add(m):
+
+ def do_template(temp,source,side):
+
+ def remove_filetype(f):
+
+ def make():
+
+ def txt_2_table():
+
+ def studenttech_faq():
+
+ # https://docs.google.com/document/d/1tI_b-q75Lzu25HcA0GCx9bGfUt9ccM8m2YrrioDFZcA/edit?usp=sharing
+ def de_faq():
+
+ def degwork_faq():
+
+ def vrc_faq():
+
+ def counseling_faq():
+
+ def finaid_faq():
+
+ def coun_loc():
+
+ def tutor_faq():
+
+ def test_repl():
+
+timer.py
+
+ def func(a, b):
+
+users.py
+ todo: these constants
+
+ # All users to a cache file cache/allusers.json
+ def fetchAllUsers():
+
+ # Fetch teacher users objects from local cache
+ def teacherRolesCache(): # I used to be load_users
+
+ # Canvas: Fetch all people with gavilan.edu email address
+ def teacherRolesUpdateCache(): # I used to be get_users
+
+ # Fetch preferred email address for a given user id. ( Canvas )
+ def getEmail(user_id):
+
+ # All teachers in a particular course
+ def getAllTeachers(course_id=59): # a list
+
+ #
+ def classType(t):
+ todo: fix bug in schedule parser so non-online classes have a type field
+
+ def my_blank_string(): return "no data"
+
+ def my_blank_dict(): return {'name':'NoName','email':'noemail@gavilan.edu'}
+
+ def my_empty_dict(): return defaultdict(my_blank_string)
+
+ def get_email_from_rec(name,name_to_record):
+
+ # Pull the staff directory on the webpage. Convert to pandas dataframe
+ def staff_dir(get_fresh=False):
+ todo: lol get fresh again...
+
+ def schedForTeacherOverview(long,short):
+
+ # Return a dataframe of the last 4 semester schedules put together
+ def oneYearSchedule():
+
+ def num_sections_last_year(line):
+
+ def sec_type_stats(line):
+
+ def prct_online(line):
+
+ def prct_lecture(line):
+
+ def prct_hybrid(line):
+
+ # Given the names of teachers in last year's schedules, fill in email, etc. from ilearn files
+ def teacher_basic_info(sched, from_ilearn, names):
+
+ def find_that_name(x):
+
+ # Outputs: cache/teacher_by_semester.csv,
+ def teacherModalityHistory(sched=[],names=[]):
+
+ def teacherCourseHistory(a,names):
+ todo: sort by dept also
+
+ # Outputs: cache/course_teacher_combos.csv,
+ def teacherSharedCourses(a=[]):
+
+ # How many courses in each department were taught in the last year?
+ def departmentCountCourses(a=[]):
+
+ def clean_nonprint(s):
+
+ def read_cmte(names):
+
+ def read_training_records():
+
+ # open a file and mark the people with their ids given. Return a dataframe
+ def read_bootcamp1(filename):
+
+ # open a file and mark the people with their ids given. Return a dataframe
+ def read_bootcamp2(filename):
+
+ def not_blank_or_pound(L):
+
+ def temp1(x):
+
+ def add_realnames(df,names): # the surveys. raw name is in 2nd column
+
+ def compareToughNames(a,b):
+
+ def compareNames(a,b,verbose=0):
+
+ def find_ilearn_record(ilearn_records,manual_records, othername,verbose=0):
+
+ def manualNamesAndDept():
+
+ def manualNames():
+
+ # given a list of class codes, return the most common (academic) department
+ def guessDept(d_list):
+
+ # Make one big csv file of everything I know about a teacher
+ def getTeachersInfoMain():
+
+ def enroll_staff_shell():
+
+ # take a list of raw hits.
+ def activity_summary(hits):
+ todo: month is hardcoded here
+
+ # Get views counts on current teachers. todo: month is hardcoded here
+ def get_recent_views(id=1):
+
+ # Have they taught online or hybrid classes?
+ def categorize_user(u):
+ todo: threaded
+
+ # Doest the account have a photo loaded?
+ def checkForAvatar(id=2):
+
+ # Grab em. Change the first if when continuing after problems....
+ def downloadPhoto():
+
+ def mergePhotoFolders():
+
+ def mergePhotoFolders2():
+
+ # Go through my local profile pics, upload any that are missing.
+ def uploadPhoto():
+
+ def test_email():
+
+ def create_ztc_list():
+
+ def get_user_info(id):
+
+ # these are any messages that get pushed out to their email
+ def comm_mssgs_for_user(uid=0):
+
+ #
+ def convos_for_user(uid=0):
+
+ # single q sub
+ def quiz_get_sub(courseid, quizid, subid=0):
+
+ # quiz submissions for quiz id x, in course id y
+ def quiz_submissions(courseid=9768, quizid=32580):
+
+ # return (timeblock, course, read=0,write=1)
+ def requests_line(line,i=0):
+
+ #
+ def report_logs(id=0):
+
+ def track_users_in_sem():
+
+ def track_users_in_class(L=[]):
+
+ def track_user_q(id, q):
+
+ # Maintain local logs. Look to see if we have some, download logs since then for a user.
+ def track_user(id=0,qid=0):
+ todo: set up this info file if it isn't there. check any changes too. it
+ todo:
+
+ #
+ def track_users_by_teacherclass():
+
+ def nlp_sample():
+
+ def nlp_sample2():
+
+ def one_course_enrol():
+
+util.py
+
+ def print_table(table):
+
+ def remove_nl(str):
+
+ def UnicodeDictReader(utf8_data, **kwargs):
+
+ def minimal_string(s):
+
+ def to_file_friendly(st):
+
+ def clean_title(st):
+
+ def match59(x):
+
+ def item_2(x): return x[2]
+
+ def unix_time_millis(dt):
+
+ # ENGL250 returns ENGL
+ def dept_from_name(n):
+
+ def most_common_item(li):
+
+ def srt_times(a,b):
+
+ def how_long_ago(a): # number of hours ago 'a' was...
+
+ def partition(times_list):
+
+
+"""
+
diff --git a/checker.py b/checker.py
new file mode 100644
index 0000000..53a6d2d
--- /dev/null
+++ b/checker.py
@@ -0,0 +1,225 @@
+# Common functions for checking web and canvas for accessibility
+
+import os, sys, glob, codecs
+import subprocess, re, pdb, html
+from bs4 import BeautifulSoup, Comment
+import html.entities
+from datetime import datetime
+import pdb
+#from html.parser import HTMLParseError
+
+
+# the following from: https://chase-seibert.github.io/blog/2011/01/28/sanitize-html-with-beautiful-soup.html#
+# hasnt been tested yet
+
+
+def safe_html(html):
+
+ if not html:
+ return None
+
+ # remove these tags, complete with contents.
+ blacklist = ["script", "style" ]
+
+ whitelist = [
+ "div", "span", "p", "br", "pre","a",
+ "blockquote",
+ "ul", "li", "ol",
+ "b", "em", "i", "strong", "u", "iframe","img",
+ "h1","h2","h3","h4","h5","h6"
+ ]
+
+ try:
+ # BeautifulSoup is catching out-of-order and unclosed tags, so markup
+ # can't leak out of comments and break the rest of the page.
+ soup = BeautifulSoup(html,'lxml')
+ except Exception as e:
+ # special handling?
+ raise e
+
+ removelist = ['table','tbody','thead','th','tr','td']
+
+ # now strip HTML we don't like.
+ for tag in soup.findAll():
+ if tag.name.lower()=='iframe': continue
+ if tag.name.lower()=='img': continue
+ if tag.name.lower() in blacklist:
+ # blacklisted tags are removed in their entirety
+ tag.extract()
+ elif tag.name.lower() in whitelist:
+ # tag is allowed. Make sure all the attributes are allowed.
+ #print tag
+ #print tag.attrs
+ #pdb.set_trace()
+ #tag.attrs = [(a[0], safe_css(a[0], a[1])) for a in tag.attrs if _attr_name_whitelisted(a[0])]
+ for k,v in list(tag.attrs.items()):
+ #print 'attr: ' + str(k) + ' = ' + str(v) + '.... ',
+ if not _attr_name_whitelisted(k):
+ tag.attrs.pop(k)
+ #print ' removed'
+ else:
+ tag.attrs[k] = v
+ #print ' kept'
+ elif tag.name.lower() in removelist:
+ tag.unwrap()
+ else:
+ # not a whitelisted tag. I'd like to remove it from the tree
+ # and replace it with its children. But that's hard. It's much
+ # easier to just replace it with an empty span tag.
+
+ #tag.name = "span"
+ #tag.attrs = []
+ tag.unwrap()
+
+ # scripts can be executed from comments in some cases
+ comments = soup.findAll(text=lambda text:isinstance(text, Comment))
+ for comment in comments:
+ comment.extract()
+
+ safe_html = str(soup)
+
+ if safe_html == ", -":
+ return None
+
+ return safe_html
+
+def _attr_name_whitelisted(attr_name):
+ return attr_name.lower() in ["href", "src","width","height","alt","target","title","class","id"]
+
+def safe_css(attr, css):
+ if attr == "style":
+ return re.sub("(width|height):[^;]+;", "", css)
+ return css
+
+def plaintext(input):
+ """Converts HTML to plaintext, preserving whitespace."""
+
+ # from http://effbot.org/zone/re-sub.htm#unescape-html
+ def _unescape(text):
+ def fixup(m):
+ text = m.group(0)
+ if text[:2] == "":
+ # character reference
+ try:
+ if text[:3] == "":
+ return chr(int(text[3:-1], 16))
+ else:
+ return chr(int(text[2:-1]))
+ except ValueError:
+ pass
+ else:
+ # named entity
+ try:
+ text = chr(html.entities.name2codepoint[text[1:-1]])
+ except KeyError:
+ pass
+ return text # leave as is
+ return re.sub("?\w+;", fixup, text)
+
+ input = safe_html(input) # basic sanitation first
+ text = "".join(BeautifulSoup("%s" % input).body(text=True))
+ text = text.replace("xml version='1.0' encoding='%SOUP-ENCODING%'", "") # strip BS meta-data
+ return _unescape(text)
+
+
+#os.system("node node_modules/pa11y/bin/pa11y.js --standard Section508 http://www.gavilan.edu/student/online")
+
+
+def check_folder(fname,path):
+ report = '' + fname + ' \n'
+ number = -1
+ count = 0
+ try:
+ for F in os.listdir(path+fname): #'assignments'):A
+ cmd = "/usr/bin/node " + \
+ "/home/phowell/Documents/access/node_modules/pa11y/bin/pa11y.js --standard Section508 " + \
+ path + fname + "/" + F
+ print(("" + path + fname + "/" + F))
+ output = subprocess.run(cmd, stdout=subprocess.PIPE,
+ universal_newlines=True, shell=True, check=False)
+
+ report += "" + F + " \n"
+ line = output.stdout.split('\n')[-3]
+ if re.search('No\sissues',line):
+ pass
+ #print("Got zero")
+ else:
+ m = re.search('(\d+)\sErr',line)
+ if m:
+ count += int(m.group(1))
+ lines = output.stdout.split("\n")
+ #pdb.set_trace()
+ lines = lines[4:]
+ report += "" + html.escape("\n".join(lines)) + " \n\n\n"
+ except Exception as e:
+ print('finished with error or folder missing')
+ print(e)
+ return int(count), report
+
+def check_class(folder):
+ path = "/home/phowell/hdd/SCRIPTS/everything-json/course_temps/" + folder + "/"
+ class_report = "Report on course: " + folder + " \n\n"
+ (cnt_a,rep_a) = check_folder('assignments',path)
+ (cnt_p,rep_p) = check_folder('pages',path)
+ class_report += rep_a
+ class_report += rep_p
+
+ #oo = open(path+'report.html','w')
+ #oo.write(class_report)
+ #oo.close()
+ #print(class_report)
+ return cnt_a+cnt_p, class_report
+
+def check_all():
+ hd_path = '/home/phowell/hdd/SCRIPTS/everything-json/'
+
+ rep_f = open(hd_path+'report.html','w')
+ rep_s = open(hd_path+'summary.html','w')
+
+ rep_f.write(' \n')
+
+ listt = os.listdir('/home/phowell/hdd/SCRIPTS/everything-json/course_temps')
+ #listt = ['course_4341',] # for testing
+ for L in listt:
+ print(('Directory is: '+L))
+ m = glob.glob('/home/phowell/hdd/SCRIPTS/everything-json/course_temps/' +L+'/*.txt')
+ if m: name = m[0]
+ else: name = 'unknown.txt'
+ name = name.split('.')[0]
+ name = name.split('/')[-1]
+
+ print(('name is: ' + name))
+ (cnt,rep) = check_class(L)
+ rep_f.write(""+name+" \n"+rep+"\n\n \n\n")
+ rep_f.flush()
+ rep_s.write("("+str(cnt)+") Class: "+name+" \n")
+ rep_s.flush()
+
+if __name__ == "__main__":
+ check_all()
+
+ #print(('arguments: '+str(sys.argv)))
+
+ # test
+ """
+ file = 'course_temps/course_6862/pages/choose-the-right-browser.html'
+ dir = 'course_temps/course_6862/pages/'
+ #ff = open(file,'r').read()
+ #print safe_html(ff)
+
+ for file in os.listdir(dir):
+ if re.search('_cleaned\.html',file):
+ os.remove(dir+file)
+
+ for file in os.listdir(dir):
+ if file.endswith(".html"):
+ newfname = re.sub('\.html','_cleaned.html',file)
+ ff = codecs.open(dir+file,'r','utf-8').read()
+ print(file)
+ print(newfname)
+ newf = codecs.open(dir+newfname,'w','utf-8')
+ newf.write(safe_html(ff))
+ newf.close()
+ """
+
+
diff --git a/content.py b/content.py
new file mode 100644
index 0000000..be5553d
--- /dev/null
+++ b/content.py
@@ -0,0 +1,860 @@
+
+
+#saved_titles = json.loads( codecs.open('cache/saved_youtube_titles.json','r','utf-8').read() )
+import requests, codecs, os, re, json
+from pipelines import header, fetch, url
+from util import clean_title, to_file_friendly
+from bs4 import BeautifulSoup as bs
+from html.parser import HTMLParser
+import tomd, checker
+import html2markdown as h2m
+import pypandoc
+h = HTMLParser()
+
+
+DBG = 1
+
+def d(s):
+ global DBG
+ if DBG: print(s)
+
+def stripper(s):
+ REMOVE_ATTRIBUTES = [
+ 'lang','language','onmouseover','onmouseout','script','style','font',
+ 'dir','face','size','color','style','class','width','height','hspace',
+ 'border','valign','align','background','bgcolor','text','link','vlink',
+ 'alink','cellpadding','cellspacing']
+
+ #doc = '''Page title This is paragraph one .
This is paragraph two .'''
+ soup = bs(s, features='lxml')
+ for tag in soup.recursiveChildGenerator():
+ try:
+ tag.attrs = {key:value for key,value in tag.attrs.iteritems()
+ if key not in REMOVE_ATTRIBUTES}
+ except AttributeError:
+ # 'NavigableString' object has no attribute 'attrs'
+ pass
+ return soup.prettify()
+
+def mycleaner(s):
+ s = re.sub(r' ','\n',s)
+ s = re.sub(r'<\/?b>','',s)
+ s = re.sub(r' +',' ',s)
+ s = re.sub(r'^[\s\t\r\n]+$','',s,flags=re.MULTILINE)
+ s = re.sub('^ ','',s)
+ return s
+
+def freshdesk():
+ path = "C:\\Users\\peter\\Downloads\\freshdesk\\Solutions.xml"
+ soup = bs( codecs.open(path,'r','utf-8').read() ,features="lxml")
+
+ outpt = codecs.open('cache/faqs.txt','w')
+ out = ""
+ for a in soup.find_all('solution-article'):
+
+ print("TITLE\n"+a.find('title').get_text())
+ out += a.find('title').get_text()
+
+ """for d in a.find_all('description'):
+ #print(d)
+ if d:
+ d = h.unescape(d.get_text())
+ e = stripper(d)
+ m = tomd.convert( e )
+ m = mycleaner(m)
+ print("\nDESCRIPTION\n"+m)"""
+
+ #print("\nWHAT IS THIS?\n" +
+ hh = a.find('desc-un-html').get_text()
+ d = h.unescape(hh)
+ e = stripper(d)
+ m = tomd.convert( e )
+ m = mycleaner(m)
+ print("\nDESCRIPTION\n"+m)
+ out += "\n\n" + m + "\n\n"
+
+ print("-----------\n\n")
+ outpt.write(out)
+
+# Download everything interesting in a course to a local folder
+# Build a master file with the entire class content
+def accessible_check(id=""):
+ if not id:
+ id = input("ID of course to check? ")
+ pagebreak = '\n\n\n\n'
+ verbose = 1
+
+ save_file_types = ['application/pdf','application/docx','image/jpg','image/png','image/gif','image/webp','application/vnd.openxmlformats-officedocument.wordprocessingml.document']
+
+ courseinfo = fetch('/api/v1/courses/' + str(id), verbose )
+
+ item_id_to_index = {}
+ items_inorder = ["" + courseinfo['name'] + " \n\n" + pagebreak,]
+ running_index = 1
+
+ modules = fetch('/api/v1/courses/' + str(id) + '/modules',verbose)
+
+ items = []
+ for x in range(9000): items.append(0)
+
+ video_link_list = []
+
+ for m in modules:
+ items[running_index] = '
%s %s\n' % ( m['name'], pagebreak )
+ running_index += 1
+
+ mod_items = fetch('/api/v1/courses/' + str(id) + '/modules/'+str(m['id'])+'/items', verbose)
+
+ for I in mod_items:
+
+ if I['type'] in ['SubHeader', 'Page', 'Quiz', 'Discussion', 'ExternalUrl' ] or 'content_id' in I:
+ running_index += 1
+
+ if I['type'] == 'SubHeader':
+ #print('subheader: ' + str(I))
+ items[running_index] = '%s \n' % str(json.dumps(I,indent=2))
+
+ if I['type'] == 'Page':
+ item_id_to_index[ I['page_url'] ] = running_index
+
+ if I['type'] == 'Quiz':
+ item_id_to_index[ I['content_id'] ] = running_index
+
+ if I['type'] == 'Discussion':
+ item_id_to_index[ I['content_id'] ] = running_index
+
+ if I['type'] == 'ExternalUrl':
+ items[running_index] = "%s \n\n" % (I['external_url'], I['title'])
+
+ # ?
+ #if 'content_id' in I:
+ # item_id_to_index[ I['content_id'] ] = running_index
+ else:
+ print("What is this item? " + str(I))
+
+
+ #items_inorder.append('Not included: '+ I['title'] + '(a ' + I['type'] + ') \n\n\n' )
+
+ # I['title']
+ # I['content_id']
+ # I['page_url']
+ # I['type']
+ # I['published']
+ # assignments and files have content_id, pages have page_url
+
+ course_folder = '../course_temps/course_'+id
+ index = []
+ try:
+ os.mkdir(course_folder)
+ except:
+ print("Course folder exists.")
+ ###
+ ### FILES
+ ###
+ files_f = course_folder + '/files'
+ headered = 0
+ print("\nFILES")
+ try:
+ os.mkdir(files_f)
+ except:
+ print(" * Files folder already exists.")
+
+ files = fetch('/api/v1/courses/' + str(id) + '/files', verbose)
+ print("LISTING COURSE FILES")
+ for f in files:
+ for arg in 'filename,content-type,size,url'.split(','):
+ if arg=='size':
+ f['size'] = str(int(f['size']) / 1000) + 'k'
+
+ if f['content-type'] in save_file_types:
+ d(' - %s' % f['filename'])
+
+ if not os.path.exists(files_f + '/' + f['filename']):
+ r = requests.get(f['url'],headers=header, stream=True)
+ with open(files_f + '/' + f['filename'], 'wb') as fd:
+ for chunk in r.iter_content(chunk_size=128):
+ fd.write(chunk)
+ else:
+ d(" - already downloaded %s" % files_f + '/' + f['filename'])
+
+ if not headered:
+ index.append( ('Files ') )
+ headered = 1
+ index.append( ('files/' + f['filename'], f['filename']) )
+
+ ###
+ ### PAGES
+ ###
+ pages_f = course_folder + '/pages'
+ headered = 0
+ image_count = 0
+ print("\nPAGES")
+ try:
+ os.mkdir(pages_f)
+ except:
+ print(" * Pages folder already exists.")
+
+
+ pages = fetch('/api/v1/courses/' + str(id) + '/pages', verbose)
+ for p in pages:
+ d(' - %s' % p['title'])
+
+ p['title'] = clean_title(p['title'])
+ easier_filename = clean_title(p['url'])
+ this_page_filename = "%s/%s.html" % (pages_f, easier_filename)
+ #for a in 'title,updated_at,published'.split(','):
+ # print(str(p[a]), "\t", end=' ')
+
+ if not headered:
+ index.append( ('Pages ') )
+ headered = 1
+ index.append( ( 'pages/' + easier_filename + '.html', p['title'] ) )
+
+
+ if os.path.exists(this_page_filename):
+ d(" - already downloaded %s" % this_page_filename)
+ this_page_content = open(this_page_filename,'r').read()
+ elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
+ d(' * skipping file behind passwords')
+ else:
+ t2 = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
+ if t2 and 'body' in t2 and t2['body']:
+ bb = bs(t2['body'],features="lxml")
+ a_links = bb.find_all('a')
+ for A in a_links:
+ if re.search( r'youtu', A['href']):
+ video_link_list.append( (A['href'], A.text, 'pages/'+easier_filename + ".html") )
+
+
+ page_images = bb.find_all('img')
+ for I in page_images:
+ d(' - %s' % I['src'])
+ if re.search(r'eis-prod',I['src']) or re.search(r'gavilan\.ins',I['src']):
+ d(' * skipping file behind passwords')
+ else:
+ try:
+ r = requests.get(I['src'],headers=header, stream=True)
+ mytype = r.headers['content-type']
+ #print("Response is type: " + str(mytype))
+ r_parts = mytype.split("/")
+ ending = r_parts[-1]
+
+ with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
+ for chunk in r.iter_content(chunk_size=128):
+ fd.write(chunk)
+ image_count += 1
+ except Exception as e:
+ d( ' * Error downloading page image, %s' % str(e) )
+
+ try:
+ with codecs.open(this_page_filename, 'w','utf-8') as fd:
+ this_page_content = "%s \n%s" % ( t2['title'], t2['body'] )
+ fd.write(this_page_content)
+ except:
+ d(' * problem writing page content')
+ ## TODO include linked pages even if they aren't in module
+ else:
+ d(' * nothing returned or bad fetch')
+ # write to running log of content in order of module
+ if p and p['url'] in item_id_to_index:
+ items[ item_id_to_index[ p['url'] ] ] = this_page_content +'\n\n'+pagebreak
+ else:
+ d(' -- This page didnt seem to be in the modules list.')
+
+
+ ###
+ ### ASSIGNMENTS
+ ###
+ headered = 0
+ asm_f = course_folder + '/assignments'
+ print("\nASSIGNMENTS")
+ try:
+ os.mkdir(asm_f)
+ except:
+ d(" - Assignments dir exists")
+
+ asm = fetch('/api/v1/courses/' + str(id) + '/assignments', verbose)
+ for p in asm:
+ d(' - %s' % p['name'])
+
+
+ try:
+ friendlyfile = to_file_friendly(p['name'])
+ this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
+ if os.path.exists(this_assmt_filename):
+ d(" - already downloaded %s" % this_assmt_filename)
+ this_assmt_content = open(this_assmt_filename,'r').read()
+ else:
+ t2 = fetch('/api/v1/courses/' + str(id) + '/assignments/'+str(p['id']), verbose)
+ with codecs.open(this_assmt_filename, 'w','utf-8') as fd:
+ this_assmt_content = "%s \n%s\n\n" % (t2['name'], t2['description'])
+ fd.write(this_assmt_content)
+ if not headered:
+ index.append( ('Assignments ') )
+ headered = 1
+ index.append( ('assignments/' + str(p['id'])+"_"+friendlyfile + '.html', p['name']) )
+
+ # write to running log of content in order of module
+ if p['id'] in item_id_to_index:
+ items[ item_id_to_index[ p['url'] ] ] = this_assmt_content+'\n\n'+pagebreak
+ except Exception as e:
+ d(' * Problem %s' % str(e))
+
+ ###
+ ### FORUMS
+ ###
+ """forum_f = course_folder + '/forums'
+ headered = 0
+ image_count = 0
+ print("\nFORUMS")
+ try:
+ os.mkdir(forum_f)
+ forums = fetch('/api/v1/courses/' + str(id) + '/discussion_topics', verbose)
+ for p in forums:
+ p['title'] = clean_title(p['title'])
+ forum_id = p['id']
+ easier_filename = p['title']
+ for a in 'title,posted_at,published'.split(','):
+ print(str(p[a]), "\t", end=' ')
+ print("")
+ t2 = fetch('/api/v1/courses/' + str(id) + '/discussion_topics/'+str(forum_id), verbose)
+
+
+ #### REMOVED
+ bb = bs(t2['body'],features="lxml")
+ print("IMAGES IN THIS PAGE")
+ page_images = bb.find_all('img')
+ for I in page_images:
+ r = requests.get(I['src'],headers=header, stream=True)
+ mytype = r.headers['content-type']
+ print("Response is type: " + str(mytype))
+ r_parts = mytype.split("/")
+ ending = r_parts[-1]
+
+ with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
+ for chunk in r.iter_content(chunk_size=128):
+ fd.write(chunk)
+ image_count += 1
+ #### END REMOVED
+
+ try:
+ with codecs.open(forum_f + '/' + easier_filename + '.html', 'w','utf-8') as fd:
+ fd.write(""+t2['title']+" \n")
+ fd.write(t2['message'])
+ if not headered: index.append( ('Discussion Forums ') )
+ headered = 1
+ index.append( ( 'forums/' + easier_filename + '.html', p['title'] ) )
+
+ # write to running log of content in order of module
+ if p['id'] in item_id_to_index:
+ items_inorder[ item_id_to_index[ p['id'] ] ] = ''+t2['title']+' \n\n'+t2['message']+'\n\n'+pagebreak
+ else:
+ print(' This forum didnt seem to be in the modules list.')
+ except Exception as e:
+ print("Error here:", e)
+ #print p
+ #print results_dict
+ except Exception as e:
+ print("** Forum folder seems to exist. Skipping those.")
+ print(e)
+
+
+
+
+
+
+ ###
+ ### QUIZZES
+ ###
+
+
+ # get a list external urls
+ headered = 0
+ t = url + '/api/v1/courses/' + str(id) + '/modules'
+ while t: t = fetch(t)
+ mods = results
+ results = []
+ for m in mods:
+ results = []
+ t2 = url + '/api/v1/courses/' + str(id) + '/modules/' + str(m['id']) + '/items'
+ while t2: t2 = fetch(t2)
+ items = results
+ for i in items:
+ #print i
+ if i['type'] == "ExternalUrl":
+ #print i
+ for j in 'id,title,external_url'.split(','):
+ print unicode(i[j]), "\t",
+ print ""
+ if not headered: index.append( ('External Links ') )
+ headered = 1
+ index.append( (i['external_url'], i['title']) )
+ """
+
+
+
+ # Create index page of all gathered items
+ myindex = codecs.open(course_folder+'/index.html','w','utf-8')
+ for i in index:
+ if len(i)==2: myindex.write(""+i[1]+" \n")
+ else: myindex.write(i)
+
+
+
+ # Full course content in single file
+ print("Writing main course files...")
+ mycourse = codecs.open(course_folder+'/fullcourse.raw.html','w','utf-8')
+
+ for I in items:
+ if I:
+ mycourse.write( I )
+
+
+
+ temp = open('cache/coursedump.txt','w')
+ temp.write( "items: " + json.dumps(items,indent=2) )
+ temp.write("\n\n\n")
+ temp.write( "index: " + json.dumps(index,indent=2) )
+ temp.write("\n\n\n")
+ temp.write( "items_inorder: " + json.dumps(items_inorder,indent=2) )
+ temp.write("\n\n\n")
+ temp.write( "item_id_to_index: " + json.dumps(item_id_to_index,indent=2) )
+
+
+
+
+
+
+
+ if video_link_list:
+ mycourse.write('\nVideos Linked in Pages \n')
+ for V in video_link_list:
+ (url, txt, pg) = V
+ mycourse.write(""+txt+" on " + pg + " \n")
+ mycourse.write("
\n")
+
+ mycourse.close()
+ output = pypandoc.convert_file(course_folder+'/fullcourse.raw.html', 'html', outputfile=course_folder+"/fullcourse.html")
+ output1 = pypandoc.convert_file(course_folder+'/fullcourse.html', 'md', outputfile=course_folder+"/fullcourse.md")
+ output2 = pypandoc.convert_file(course_folder+'/fullcourse.html', 'docx', outputfile=course_folder+"/fullcourse.docx")
+
+
+def pan_testing():
+ course_folder = '../course_temps/course_6862'
+ output3 = pypandoc.convert_file(course_folder+'/fullcourse.md', 'html', outputfile=course_folder+"/fullcourse.v2.html")
+
+# Given course, page url, and new content, upload the new revision of a page
+def create_page(course_num,new_title,new_content):
+ t3 = url + '/api/v1/courses/' + str(course_num) + '/pages'
+ #xyz = raw_input('Enter 1 to continue and send back to: ' + t3 + ': ')
+ #print("Creating page: %s\nwith content:%s\n\n\n" % (new_title,new_content))
+ print("Creating page: %s" % new_title)
+ xyz = input('type 1 to confirm: ') #'1'
+ if xyz=='1':
+ data = {'wiki_page[title]':new_title, 'wiki_page[body]':new_content}
+ r3 = requests.post(t3, headers=header, params=data)
+ print(r3)
+ print('ok')
+
+
+def md_to_course():
+ #input = 'C:/Users/peter/Nextcloud/Documents/gavilan/student_orientation.txt'
+ #output = 'C:/Users/peter/Nextcloud/Documents/gavilan/stu_orientation/student_orientation.html'
+ id = "11214"
+ infile = 'cache/pages/course_%s.md' % id
+ output = 'cache/pages/course_%s_fixed.html' % id
+ output3 = pypandoc.convert_file(infile, 'html', format='md', outputfile=output)
+
+ xx = codecs.open(output,'r','utf-8').read()
+ soup = bs( xx, features="lxml" )
+ soup.encode("utf-8")
+
+ current_page = ""
+ current_title = ""
+
+ for child in soup.body.children:
+ if child.name == "h1" and not current_title:
+ current_title = child.get_text()
+ elif child.name == "h1":
+ upload_page(id,current_title,current_page)
+ current_title = child.get_text()
+ current_page = ""
+ print( "Next page: %s" % current_title )
+ else:
+ #print(dir(child))
+ if 'prettify' in dir(child):
+ current_page += child.prettify(formatter="html")
+ else:
+ current_page += child.string
+
+ upload_page(id,current_title,current_page)
+ print("Done")
+
+
+# DL pages only
+def grab_course_pages(course_num=-1):
+ global results, results_dict, url, header
+ # course_num = raw_input("What is the course id? ")
+ if course_num<0:
+ course_num = input("Id of course? ")
+ else:
+ course_num = str(course_num)
+ modpagelist = []
+ modurllist = []
+ # We want things in the order of the modules
+ t4 = url + '/api/v1/courses/'+str(course_num)+'/modules?include[]=items'
+ results = fetch(t4)
+ i = 1
+ pageout = codecs.open('cache/pages/course_'+str(course_num)+'.html','w','utf-8')
+ pageoutm = codecs.open('cache/pages/course_'+str(course_num)+'.md','w','utf-8')
+ divider = "\n### "
+ for M in results:
+ print("Module Name: " + M['name'])
+ for I in M['items']:
+ if I['type']=='Page':
+ modpagelist.append(I['title'])
+ modurllist.append(I['page_url'])
+ pageout.write(divider+I['title']+'### '+I['page_url']+'\n')
+ easier_filename = clean_title(I['page_url'])
+ print(" " + str(i) + ". " + I['title'])
+ t2 = url + '/api/v1/courses/' + str(course_num) + '/pages/'+I['page_url']
+ print('Getting: ' + t2)
+ mypage = fetch(t2)
+ fixed = checker.safe_html(mypage['body'])
+ if fixed:
+ #markdown = h2m.convert(fixed)
+ #p_data = pandoc.read(mypage['body'])
+ markdown = pypandoc.convert_text("\n" + I['title'] + " \n" + mypage['body'], 'md', format='html')
+ pageout.write(fixed+'\n')
+ pageoutm.write(markdown+'\n')
+ pageout.flush()
+ i += 1
+ pageout.close()
+ pageoutm.close()
+
+# Upload pages. Local copy has a particular format.
+# Appears to not be used
+def put_course_pages():
+ course_num = '6862'
+ filein = codecs.open('cache/pages/course_'+str(course_num)+'.html','r','utf-8')
+ my_titles = []
+ my_urls = []
+ my_bodys = []
+ started = 0
+ current_body = ""
+ for L in filein.readlines():
+ ma = re.search('^###\s(.*)###\s(.*)$',L)
+ if ma:
+ my_titles.append(ma.group(1))
+ my_urls.append(ma.group(2))
+ if started:
+ my_bodys.append(current_body)
+ current_body = ""
+ started = 1
+ else:
+ current_body += "\n" + L
+ my_bodys.append(current_body)
+
+ i = 0
+ for U in my_urls:
+ # and now upload it....lol
+ upload_page(course_num,U,my_bodys[i])
+ i += 1
+
+# Also not used
+def put_revised_pages():
+ course_num = '6862'
+ course_folder = '../course_temps/course_6862'
+ filein = codecs.open(course_folder+'/fullcourse.v2.html','r','utf-8')
+ my_titles = []
+ my_urls = []
+ my_bodys = []
+ started = 0
+ current_body = ""
+ for L in filein.readlines():
+ ma = re.search('^(.*) .*$',L)
+ if ma:
+ my_titles.append(ma.group(1))
+ my_urls.append(ma.group(2))
+ if started:
+ my_bodys.append(current_body)
+ current_body = ""
+ started = 1
+ else:
+ current_body += "\n" + L
+ my_bodys.append(current_body)
+
+ i = 0
+ for U in my_urls:
+ # and now upload it....lol
+ upload_page(course_num,U,my_bodys[i])
+ i += 1
+
+# Download, clean html, and reupload page
+def update_page():
+ global results, results_dict, url, header
+ # course_num = raw_input("What is the course id? ")
+ course_num = '6862'
+ t = url + '/api/v1/courses/' + str(course_num) + '/pages'
+ while t: t = fetch(t)
+ pages = results
+ results = []
+ mypagelist = []
+ myurllist = []
+ modpagelist = []
+ modurllist = []
+ for p in pages:
+ p['title'] = clean_title(p['title'])
+ mypagelist.append(p['title'])
+ myurllist.append(p['url'])
+ easier_filename = clean_title(p['url'])
+ #for a in 'title,updated_at,published'.split(','):
+ # print unicode(p[a]), "\t",
+ #print ""
+
+ # We want things in the order of the modules
+ t4 = url + '/api/v1/courses/'+str(course_num)+'/modules?include[]=items'
+ while t4: t4 = fetch(t4)
+ mods = results
+ results = []
+ i = 1
+ print("\nWhat page do you want to repair?")
+ for M in mods:
+ print("Module Name: " + M['name'])
+ for I in M['items']:
+ if I['type']=='Page':
+ modpagelist.append(I['title'])
+ modurllist.append(I['page_url'])
+ print(" " + str(i) + ". " + I['title'])
+ i += 1
+
+ choice = input("\n> ")
+ choice = int(choice) - 1
+ chosen_url = modurllist[choice]
+ print('Fetching: ' + modpagelist[choice])
+ t2 = url + '/api/v1/courses/' + str(course_num) + '/pages/'+chosen_url
+ print('From: ' + t2)
+
+ results_dict = {}
+ while(t2): t2 = fetch_dict(t2)
+ mypage = results_dict
+ fixed_page = checker.safe_html(mypage['body'])
+ upload_page(course_num,chosen_url,fixed_page)
+
+# Given course, page url, and new content, upload the new revision of a page
+def upload_page(course_num,pageurl,new_content):
+ print("Repaired page:\n\n")
+ #print new_content
+ print(pageurl)
+ t3 = url + '/api/v1/courses/' + str(course_num) + '/pages/' + pageurl
+ xyz = input('Enter 1 to continue and send back to: ' + t3 + ': ')
+ #xyz = '1'
+ if xyz=='1':
+ data = {'wiki_page[body]':new_content}
+ r3 = requests.put(t3, headers=header, params=data)
+ print(r3)
+ print('ok')
+
+# Use template to build html page with homegrown subtitles
+def build_srt_embed_php(data):
+ template = codecs.open('template_srt_and_video.txt','r','utf-8').readlines()
+ result = ''
+ for L in template:
+ L = re.sub('FRAMEID',data['frameid'],L)
+ L = re.sub('TITLE',data['title'],L)
+ L = re.sub('EMBEDLINK',data['embedlink'],L)
+ L = re.sub('SRTFOLDERFILE',data['srtfolderfile'],L)
+ result += L
+ return result
+
+
+
+
+def yt_title(code):
+ global saved_titles
+ if code in saved_titles:
+ return saved_titles[code]
+ a = requests.get('https://www.youtube.com/watch?v=%s' % code)
+ bbb = bs(a.content,"lxml")
+ ccc = bbb.find('title').text
+ ccc = re.sub(r'\s\-\sYouTube','',ccc)
+ saved_titles[code] = ccc
+ codecs.open('saved_youtube_titles.json','w','utf-8').write(json.dumps(saved_titles))
+ return ccc
+
+def swap_youtube_subtitles():
+ # example here: http://siloor.github.io/youtube.external.subtitle/examples/srt/
+
+ # srt folder, look at all filenames
+ srtlist = os.listdir('video_srt')
+ i = 0
+ for V in srtlist:
+ print(str(i) + '. ' + V)
+ i += 1
+ choice = input("Which SRT folder? ")
+ choice = srtlist[int(choice)]
+ srt_folder = 'video_srt/'+choice
+ class_srt_folder = choice
+ srt_files = os.listdir(srt_folder)
+ srt_shorts = {}
+ print("\nThese are the subtitle files: " + str(srt_files))
+ for V in srt_files:
+ if V.endswith('srt'):
+ V1 = re.sub(r'(\.\w+$)','',V)
+ srt_shorts[V] = minimal_string(V1)
+
+ crs_id = input("What is the id of the course? ")
+ grab_course_pages(crs_id)
+ v1_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
+ v1_content = v1_pages.read()
+
+ # a temporary page of all youtube links
+ tp = codecs.open('page_revisions/links_' + str(crs_id) + '.html', 'w','utf-8')
+
+ # course pages, get them all and look for youtube embeds
+ title_shorts = {}
+ title_embedlink = {}
+ title_list = []
+ print("I'm looking for iframes and youtube links.")
+ for L in v1_content.split('\n'):
+ if re.search('%s " % (this_title, this_src, this_src) )
+ # match them
+ # lowercase, non alpha or num chars become a single space, try to match
+ # if any srts remain unmatched, ask.
+ tp.close()
+ webbrowser.open_new_tab('file://C:/SCRIPTS/everything-json/page_revisions/links_'+str(crs_id)+'.html')
+
+ matches = {} # key is Title, value is srt file
+ for S,v in list(srt_shorts.items()):
+ found_match = 0
+ print(v, end=' ')
+ for T, Tv in list(title_shorts.items()):
+ if v == Tv:
+ print(' \tMatches: ' + T, end=' ')
+ found_match = 1
+ matches[T] = S
+ break
+ #print "\n"
+
+ print("\nThese are the srt files: ")
+ print(json.dumps(srt_shorts,indent=2))
+ print("\nThese are the titles: ")
+ print(json.dumps(title_shorts,indent=2))
+ print("\nThese are the matches: ")
+ print(json.dumps(matches,indent=2))
+
+ print(("There are %d SRT files and %d VIDEOS found. " % ( len(list(srt_shorts.keys())), len(list(title_shorts.keys())) ) ))
+
+ for S,v in list(srt_shorts.items()):
+ if not S in list(matches.values()):
+ print("\nDidn't find a match for: " + S)
+ i = 0
+ for T in title_list:
+ if not T in list(matches.keys()): print(str(i+1) + ". " + T.encode('ascii', 'ignore'))
+ i += 1
+ print("Here's the first few lines of the SRT:")
+ print(( re.sub(r'\s+',' ', '\n'.join(open(srt_folder+"/"+S,'r').readlines()[0:10]))+"\n\n"))
+ choice = input("Which one should I match it to? (zero for no match) ")
+ if int(choice)>0:
+ matches[ title_list[ int(choice)-1 ] ] = S
+ print("SRT clean name was: %s, and TITLE clean name was: %s" % (v,title_shorts[title_list[ int(choice)-1 ]] ))
+ print("ok, here are the matches:")
+ print(json.dumps(matches,indent=2))
+
+ # construct subsidiary pages, upload them
+ i = 0
+ for m,v in list(matches.items()):
+ # open template
+ # do replacement
+ i += 1
+ data = {'frameid':'videoframe'+str(i), 'title':m, 'embedlink':title_embedlink[m], 'srtfolderfile':v }
+ print(json.dumps(data,indent=2))
+ file_part = v.split('.')[0]
+ new_php = codecs.open(srt_folder + '/' + file_part + '.php','w','utf-8')
+ new_php.write(build_srt_embed_php(data))
+ new_php.close()
+ #srt_files = os.listdir(srt_folder)
+ put_file(class_srt_folder)
+
+
+def test_swap():
+ crs_id = '6923'
+ # swap in embed code and re-upload canvas pages
+ v2_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
+ v2_content = v2_pages.read()
+ ma = re.compile('(\w+)=(".*?")')
+
+ for L in v2_content.split('\n'):
+ find = re.findall('
"
+
+ output = open("C:/Users/peter/Documents/gavilan/www mirror/counseling_2019/who_call_out.txt.html",'w').write(output)
+
+
+# https://docs.google.com/document/d/1Jw3rSGxuCkujMLrm-5p_zxSzCQavfwo_7Esthjzg0rQ/edit?usp=sharing
+
+def studenttech_faq():
+ """f = "../www mirror/student/online/template/tech_faq.html"
+ input = open(f,'r')
+ lines = input.readlines()
+ input.close()
+
+ output = open(f,'w')
+ for L in lines:
+ output.write(L )
+ if re.search('',L):
+ break
+
+ output.write( get_doc('1Jw3rSGxuCkujMLrm-5p_zxSzCQavfwo_7Esthjzg0rQ', 1) )"""
+ codecs.open('qanda_student/public/questions.json','w','utf-8').write( \
+ get_doc_generic('1Jw3rSGxuCkujMLrm-5p_zxSzCQavfwo_7Esthjzg0rQ', bracket=0,verbose=0))
+ put_file('/gavilan.edu/student/', 'qanda_student/public/', 'questions.json')
+ print("I uploaded the questions, but remember to do the images too if they changed.")
+
+# https://docs.google.com/document/d/1tI_b-q75Lzu25HcA0GCx9bGfUt9ccM8m2YrrioDFZcA/edit?usp=sharing
+def de_faq():
+ """f = "cache/faq_template.html"
+ input = codecs.open(f,'r','utf-8')
+ lines = input.readlines()
+ input.close()
+
+ output = codecs.open('cache/de_teach_faq.html','w','utf-8')
+ for L in lines:
+ output.write(L )
+ if re.search('',L):
+ output.write( get_doc_generic('1tI_b-q75Lzu25HcA0GCx9bGfUt9ccM8m2YrrioDFZcA', bracket=0,verbose=1))
+ """
+ codecs.open('qanda/public/questions.json','w','utf-8').write( \
+ get_doc_generic('1tI_b-q75Lzu25HcA0GCx9bGfUt9ccM8m2YrrioDFZcA', bracket=0,verbose=0))
+ put_file('/gavilan.edu/staff/tlc/canvas_help/', 'qanda/public/', 'questions.json')
+ print("I uploaded the questions, but remember to do the images too if they changed.")
+
+def degwork_faq():
+ f = "../www mirror/counseling_2019/template/degreeworks.html"
+ input = open(f,'r')
+ lines = input.readlines()
+ input.close()
+
+ output = open(f,'w')
+ for L in lines:
+ output.write(L )
+ if re.search('',L):
+ break
+
+ output.write( ' \n' + get_doc('1ctmPkWwrIJ1oxlj8Z8UXYjijUzMW2VxnsVDSE1KfKME') )
+
+def vrc_faq():
+ # https://docs.google.com/document/d/1anAmnSusL-lTSAz-E4lcjlzq1CA8YJyUfUHxnKgmJEo/edit?usp=sharing
+ f = "../www mirror/student/veterans/template/faq.html"
+ input = open(f,'r')
+ lines = input.readlines()
+ input.close()
+
+ output = open(f,'w')
+ for L in lines:
+ output.write(L )
+ if re.search('',L):
+ break
+
+ output.write( ' \n' + get_doc('1anAmnSusL-lTSAz-E4lcjlzq1CA8YJyUfUHxnKgmJEo',verbose=1) )
+
+def counseling_faq():
+ f = "../www mirror/counseling_2019/template/faq.html"
+ input = open(f,'r')
+ lines = input.readlines()
+ input.close()
+
+ output = open(f,'w')
+ for L in lines[0:3]:
+ output.write(L)
+
+ output.write( get_doc('101iOplZearjv955FX2FX9AM6bUnkcryo7BShKuzE9tI') )
+
+def finaid_faq():
+ f = "../www mirror/finaid_2019/template/faq.html"
+ input = open(f,'r')
+ lines = input.readlines()
+ input.close()
+
+ output = open(f,'w')
+ i = 0
+ for L in lines[0:3]:
+ #print("%i, %s" % (i,L))
+ output.write(L)
+ i+=1
+
+ output.write( get_doc('1-FarjfyzZceezdSBXDHpP2cF_vaa9Qx6HvnIqwipmA4') )
+
+def coun_loc():
+ f = "../www mirror/counseling_2019/template/location.html"
+ input = open(f,'r')
+ lines = input.readlines()
+ input.close()
+
+ output = open(f,'w')
+ i = 0
+ for L in lines[0:3]:
+ #print("%i, %s" % (i,L))
+ output.write(L)
+ i+=1
+
+ output.write( get_doc('1hxQZ9iXMWvQQtaoVlRgor9v4pdqdshksjeHD2Z4E6tg') )
+
+def tutor_faq():
+ f = "../www mirror/student/learningcommons/template/faq.html"
+ input = open(f,'r')
+ lines = input.readlines()
+ input.close()
+
+ output = open(f,'w')
+ i = 0
+ for L in lines[0:3]:
+ #print("%i, %s" % (i,L))
+ output.write(L)
+ i+=1
+
+ output.write( get_doc('1gCYmGOanQ2rnd-Az2HWFjYErBm_4tp_RuJs6a7MkYrE',1) )
+
+def test_repl():
+ from interactive import MyRepl
+
+ c = MyRepl()
+ c.set_my_dict( { "Peter": "thats me", "Mike": "a VP", "Pablo": "isn't here", "Mary": "Far away" })
+ c.inputloop()
+
+
+
+if __name__ == "__main__":
+
+ print ('')
+ options = { 1: ['Build www pages', make] ,
+ 2: ['De-template an existing page', try_untemplate],
+ 3: ['Text to table', txt_2_table],
+ 4: ['Pull the Counseling FAQ from gdocs', counseling_faq] ,
+ 5: ['Pull the DegreeWorks FAQ from gdocs', degwork_faq] ,
+ 6: ['Pull the Finaid FAQ from gdocs', finaid_faq] ,
+ 7: ['Pull the Tutoring FAQ from gdocs', tutor_faq] ,
+ 8: ['Pull the Counseling Location page from gdocs', coun_loc] ,
+ 9: ['Pull the student tech faq page from gdocs', studenttech_faq] ,
+ 10: ['Pull the DE faq page from gdocs', de_faq] ,
+ 11: ['Pull the VRC faq page from gdocs', vrc_faq] ,
+ 12: ['Test a REPL', test_repl ],
+ }
+
+ for key in options:
+ print(str(key) + '.\t' + options[key][0])
+
+ print('')
+ resp = input('Choose: ')
+
+ # Call the function in the options dict
+ options[ int(resp)][1]()
+
+
\ No newline at end of file
diff --git a/templates/dir.html b/templates/dir.html
new file mode 100644
index 0000000..b46d603
--- /dev/null
+++ b/templates/dir.html
@@ -0,0 +1,171 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/templates/hello.html b/templates/hello.html
new file mode 100644
index 0000000..f540101
--- /dev/null
+++ b/templates/hello.html
@@ -0,0 +1,112 @@
+
+Welcome To Gavilan College
+{% if name %}
+iLearn Hits for: {{ name }}
+ canvas id: {{ id }}
+{% else %}
+
Hello, World!
+{% endif %}
+
+Shutdown
+
+
+
diff --git a/templates/images.html b/templates/images.html
new file mode 100644
index 0000000..d74d505
--- /dev/null
+++ b/templates/images.html
@@ -0,0 +1,134 @@
+
+Welcome To Gavilan College
+
+
+
+
+
+
+
+
+
+
+
+
+
Photo Cropper
+
+ x
+ [[ff.conf_name]]
+
+
x [[ff]]
+
+
+
+
Make Crop
+
+
+
+
+
+
+
+
diff --git a/templates/personnel.html b/templates/personnel.html
new file mode 100644
index 0000000..7c3728e
--- /dev/null
+++ b/templates/personnel.html
@@ -0,0 +1,197 @@
+
+Welcome To Gavilan College
+{% if name %}
+Editor for: {{ name }}
+ canvas id: {{ id }}
+{% else %}
+
Hello, World!
+{% endif %}
+
+
+This is a page. Vue is: 1. set up your data. fetch json of either 1 item or the whole list.
+
+
+
+
+
+
+
+
+
+
+
+I'm making a vue app. Again. And I like it.
+
+1.1 Make your main div with id, and custom tags in it.
+
+
+
+
+
+
+
+ Name
+ Title
+ Department
+ Old Department
+ Email
+ Phone
+
+
+
+
+2. Make some components
+
+
+
+
+3. Including the one that corresponds to the html / main div above.
+
+
+
+
+
diff --git a/templates/sample-simple-vue-starter.html b/templates/sample-simple-vue-starter.html
new file mode 100644
index 0000000..90fddb0
--- /dev/null
+++ b/templates/sample-simple-vue-starter.html
@@ -0,0 +1,194 @@
+
+Welcome To Gavilan College
+{% if name %}
+Editor for: {{ name }}
+ canvas id: {{ id }}
+{% else %}
+
Hello, World!
+{% endif %}
+
+
+This is a page. Vue is: 1. set up your data. fetch json of either 1 item or the whole list.
+
+
+
+
+
+
+1.1 Make your main div with id, and custom tags in it.
+
+
+
+
+2. Make some components
+
+
+
+
+3. Including the one that corresponds to the html / main div above.
+
+
+
+
+
+
+
+
+
diff --git a/timer.py b/timer.py
new file mode 100644
index 0000000..a084727
--- /dev/null
+++ b/timer.py
@@ -0,0 +1,35 @@
+from threading import Timer
+import time, datetime
+
+mm = 18
+
+t = datetime.datetime.today()
+future = datetime.datetime(t.year,t.month,t.day,23,mm)
+diff = future - t
+delta = diff.total_seconds()
+
+print("waiting until 11:%i PM, which is %i seconds from now." % (mm,delta))
+
+
+
+
+
+def func(a, b):
+ print("Called function")
+ return a * b
+
+# Schedule a timer for 5 seconds
+# We pass arguments 3 and 4
+t = Timer(delta, func, [3, 4])
+
+start_time = time.time()
+
+# Start the timer
+t.start()
+
+end_time = time.time()
+
+if end_time - start_time < 5.0:
+ print("Timer will wait for sometime before calling the function")
+else:
+ print("%i seconds already passed. Timer finished calling func()" % mm)
\ No newline at end of file
diff --git a/token.pickle b/token.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..f58564b9b6ab6d5ae69fd9fb9a413ff60c4c761c
GIT binary patch
literal 730
zcmZ9K%Wl(95QYP78XE2tB=+6FqK@svj?FH0?IvkssM9!3B^3E`e5tQ%ZX&{hi(DbK
zcu}UAReyYwC}*O2<)4j~fO0(25K`|Zl2RnYkWb?t{k}Wr^@{GYITx3(ZyeN_QI!RF
z4NcoIbCd@$YX2?ZKx(&sG@D1qt^J?hq4gCWXmDt>>aHcR(+m-4e}snyTMK@kXXIU{
zQ4AhpuIVG>(?H)vE8#rOYG~cw!uGA_UHE`cu_tZ-n1jmh~;*nQ>eTf7aXm
z#(x`blRw|K+EGyNSzE#jV_3I}9M|m&Y>(xb-bpNSJ=g2Gw8tb&V)3CJX1st4nr8eg
zNomT=GO`0$5QObC$!oU_M>yKz+W8YyKWGjwjo0;za9ZWLT9iRvI+AW*RiOH`6S(lI
Gu&7_^JL+8k
literal 0
HcmV?d00001
diff --git a/users.py b/users.py
new file mode 100644
index 0000000..bf946aa
--- /dev/null
+++ b/users.py
@@ -0,0 +1,2203 @@
+
+import json, codecs, requests, re, pdb, csv, textdistance
+import sys, csv, string, funcy, math, shutil, imghdr, os
+import pytz, time
+import pandas as pd
+import matplotlib.pyplot as plt
+
+#from pandas import TimeGrouper
+from collections import defaultdict
+from pipelines import fetch, fetch_stream, getSemesterSchedule, header, url, FetchError, put_file
+from courses import course_enrollment, users_in_semester
+from localcache import users_this_semester_db, unwanted_req_paths, timeblock_24hr_from_dt, dt_from_24hr_timeblock
+from localcache import teachers_courses_semester
+from util import dept_from_name, most_common_item
+from os.path import exists, getmtime
+
+#from localcache import users_file, com_channel_dim
+
+from dateutil import parser
+from datetime import datetime as dt
+from datetime import timedelta
+import datetime
+
+import queue
+from threading import Thread
+from os import path
+
+# for NLP
+import spacy
+from gensim import corpora, models, similarities, downloader, utils
+from nltk import stem
+
+
+# todo: these constants
+
+#last_4_semesters = 'fall2020 summer2020 spring2020 fall2019'.split(' ')
+#last_4_semesters_ids = [62, 60, 61, 25]
+last_4_semesters = 'spring2021 fall2020 summer2020 spring2020'.split(' ')
+last_4_semesters_ids = [168, 65, 64, 62]
+
+log_default_startdate = "2021-08-23T00:00:00-07:00"
+lds_stamp = parser.parse(log_default_startdate)
+
+recvd_date = '2021-08-23T00:00:00Z'
+num_threads = 25
+max_log_count = 250000
+
+
+##########
+##########
+########## GETTING USER DATA
+##########
+##########
+
+# All users to a cache file cache/allusers.json
+def fetchAllUsers():
+
+ if exists('cache/allusers.json'):
+ time = date_time = dt.fromtimestamp( getmtime('cache/allusers.json') )
+ newname = 'cache/allusers_'+ time.strftime('%Y%m%d') + ".json"
+ print("renaming old data file to %s" % newname)
+ os.rename('cache/allusers.json', newname)
+
+
+
+ out1 = codecs.open('cache/allusers.json','w','utf-8')
+ out2 = codecs.open('cache/allusers_ids.json','w','utf-8')
+ all_u = fetch_stream(url + '/api/v1/accounts/1/users?per_page=100', 1)
+
+ ids = []
+ main_list = []
+ for this_fetch in all_u:
+ for U in this_fetch:
+ ids.append(U['id'])
+ main_list.append(U)
+
+ ids.sort()
+ out2.write( json.dumps(ids, indent=2))
+ out1.write( json.dumps(main_list, indent=2))
+ out2.close()
+ out1.close()
+ return ids
+
+
+
+##########
+##########
+########## TEACHERS LIST AND LOCAL USERS FILE
+##########
+##########
+
+# Fetch teacher users objects from local cache
+def teacherRolesCache(): # I used to be load_users
+ users_raw = json.load(open('cache/ilearn_staff.json','r'))
+ users = {}
+ users_by_id = {}
+ for U in users_raw:
+ users[ U['login_id'] ] = U
+ users_by_id[ U['id'] ] = U
+ return users, users_by_id
+
+
+
+
+
+
+# Outputs: cache/ilearn_staff.json
+# Canvas: Fetch all people with gavilan.edu email address
+def teacherRolesUpdateCache(): # I used to be get_users
+ t = fetch('/api/v1/accounts/1/users?per_page=500&search_term=%40gavilan.edu&include[]=email')
+ g = open('cache/ilearn_staff.json','w')
+ g.write( json.dumps(t) )
+ g.close()
+ #put_file('/gavilan.edu/staff/flex/2020/','cache/','ilearn_staff.json')
+ print("Wrote to 'cache/ilearn_staff.json'")
+ return teacherRolesCache()
+
+
+# Fetch preferred email address for a given user id. ( Canvas )
+def getEmail(user_id):
+ results = fetch("/api/v1/users/" + str(user_id) + "/communication_channels")
+ for r in results:
+ if r['type']=='email':
+ return r['address']
+ return ''
+
+
+##########
+##########
+########## TEACHERS AND OTHER STAFF
+##########
+##########
+#
+# Gather all my info, CRM style, in the folder teacherdata
+#
+#
+# Typical actions: For everyone with a teacher role:
+# - What are the courses they taught for the last X semesters?
+# - What's their activity level each semester?
+# - Which of those courses are Online, Hybrid or Face2face?
+# + column for each semester: OHLOHL
+# - How many online classes have they taught in the past?
+# - Are they brand new, or brand new online?# further...
+# - what's their department?
+# - what's their badges and 'tech level?'
+# -
+
+
+# All teachers in a particular course
+def getAllTeachers(course_id=59): # a list
+ qry = '/api/v1/courses/' + str(course_id) + '/search_users?enrollment_type=teacher'
+ t = url + qry
+ while(t): t = fetch(t)
+#
+def classType(t):
+ if t == 'lecture': return 'L'
+ if t == 'online': return 'O'
+ if t == 'hours': return 'R'
+ if t == 'lab': return 'A'
+ if t == 'hybrid': return 'H'
+ else: return 'L' # todo: fix bug in schedule parser so non-online classes have a type field
+
+def my_blank_string(): return "no data"
+def my_blank_dict(): return {'name':'NoName','email':'noemail@gavilan.edu'}
+def my_empty_dict(): return defaultdict(my_blank_string)
+
+def get_email_from_rec(name,name_to_record):
+ #print "Looking up: " + name
+ try:
+ return name_to_record[name]['email']
+ except Exception as e:
+ print("Missing Teacher %s" % name)
+ return 'noemail@gavilan.edu'
+
+
+
+
+# Pull the staff directory on the webpage. Convert to pandas dataframe
+def staff_dir(get_fresh=False):
+ """
+ if get_fresh:
+ url = "http://www.gavilan.edu/staff/dir.php"
+ regex = "var\slist=(\[.*\]);"
+ response = requests.get(url).text
+ m = re.search(regex,response)
+ if m:
+ output = '{"staff":' + m.group(1) + '}'
+ of = open('cache/teacherdata/staff_dir.json','w')
+ of.write(output)
+ js = json.loads(output)
+ df = pd.DataFrame(js['staff'])
+ return df
+ print("Wrote cache/teacherdata/staff_dir.json")
+ else:
+ print("Failed on staff directory scrape")
+ return ''
+ else:
+ input = json.loads(open('cache/teacherdata/staff_dir.json','r').read())
+ df = pd.DataFrame(input['staff'])
+ return df
+ """
+
+ # TODO lol get fresh again...
+
+ old_dir = csv.reader(open('cache/personnel2020_04_12.csv'), delimiter=',')
+ dept1_crxn = {r[0]:r[1] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') }
+ dept2_crxn = {r[0]:r[2] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') }
+ title_crxn = {r[0]:r[3] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') }
+ revised_dir = [ ]
+ columns = next(old_dir)
+
+ for r in old_dir:
+ old_dept = r[2]
+ if old_dept in dept1_crxn:
+ new_one = dept1_crxn[old_dept]
+ if dept2_crxn[old_dept]: new_one += '/' + dept2_crxn[old_dept]
+ if title_crxn[old_dept]: new_one += '/' + title_crxn[old_dept]
+ r[2] = new_one
+ revised_dir.append(r)
+ print(revised_dir)
+ return pd.DataFrame(revised_dir,columns=columns)
+
+
+#
+#
+#
+# ###
+# ### TEACHER CRM FUNCTIONS
+# ###
+#
+
+def schedForTeacherOverview(long,short):
+ sem = getSemesterSchedule(short)
+ sem['type'] = sem['type'].apply(classType)
+ #sem['code'] = sem[['code','type']].apply(' '.join,axis=1)
+ sem['sem'] = short
+ sem = sem.drop(['time','loc','name','date','days'],axis=1) # ,'crn'
+ return sem
+
+
+
+
+# Return a dataframe of the last 4 semester schedules put together
+def oneYearSchedule():
+ sp19 = schedForTeacherOverview('2019spring','sp19')
+ su19 = schedForTeacherOverview('2019summer','su19')
+ fa19 = schedForTeacherOverview('2019fall','fa19')
+ sp20 = schedForTeacherOverview('2020spring','sp20')
+
+ # The four-semester schedule
+ a = pd.concat([sp19,su19,fa19,sp20], sort=True, ignore_index=True)
+ a = a.drop(['cap','cmp','extra','rem','sec','cred','act'], axis=1)
+ a.to_csv('cache/one_year_schedule.csv')
+ return a
+
+def num_sections_last_year(line):
+ #if not type(line)=='str': return 0
+ parts = line.split(' ')
+ return len(parts)
+
+def sec_type_stats(line):
+ #print(type(line))
+ #if not type(line)=='str': return {'fail':1}
+ #print("in sts: " + str(line))
+ parts = line.split(' ')
+ output = defaultdict(int)
+ for p in parts: output[p] += 1
+ return output
+
+def prct_online(line):
+ d = sec_type_stats(line)
+ #print(d)
+ total = 0
+ my_total = 0
+ for k,v in d.items():
+ total += v
+ if k == 'O': my_total += v
+ return int(100 * ((1.0)*my_total / total))
+
+def prct_lecture(line):
+ #print(line)
+ d = sec_type_stats(line)
+ #if 'fail' in d: return 0
+ total = 0
+ my_total = 0
+ for k,v in d.items():
+ total += v
+ if k == 'L': my_total += v
+ return int(100 * ((1.0)*my_total / total))
+
+
+def prct_hybrid(line):
+ d = sec_type_stats(line)
+ #if 'fail' in d: return 0
+ total = 0
+ my_total = 0
+ for k,v in d.items():
+ total += v
+ if k == 'H': my_total += v
+ return int(100 * ((1.0)*my_total / total))
+
+# Given the names of teachers in last year's schedules, fill in email, etc. from ilearn files
+def teacher_basic_info(sched, from_ilearn, names):
+ bi = from_ilearn # pd.DataFrame(from_ilearn)
+ bi.rename(columns={'id':'canvasid','login_id':'goo'}, inplace=True)
+ # bi.drop(['name',],axis=1,inplace=True)
+
+ #print(bi)
+ #input('xx')
+
+ sp20 = schedForTeacherOverview('2020spring','sp20')
+
+
+ codes_sp20 = sp20.groupby('teacher')['code'].apply( lambda x: ' '.join(funcy.distinct(x)) )
+ crns_sp20 = sp20.groupby('teacher')['crn'].apply( lambda x: ' '.join( map( str, funcy.distinct(x))) )
+ codes_sp20.rename(columns={'code':'sp20code'}, inplace=True)
+ codes_sp20.to_csv('cache/trash/codes_sp20.csv',header=True)
+ crns_sp20.rename(columns={'crn':'sp20crn'}, inplace=True)
+ crns_sp20.to_csv('cache/trash/crns_sp20.csv',header=True)
+
+
+ a = sched.groupby('teacher')['code'].apply( lambda x: ' '.join(funcy.distinct(x)) )
+ a = pd.DataFrame(a)
+ a.reset_index(inplace=True)
+ a['dept'] = a.apply(guessDept,axis=1)
+ print(a)
+
+ def find_that_name(x):
+ #print(x)
+ if 'teacher' in x: return names(x['teacher'])
+ #print('name not found?')
+ return ''
+
+ a['ilearn_name'] = a.apply( find_that_name, axis=1)
+
+ a.rename(columns={'code':'courses'}, inplace=True)
+ #print(type(a))
+ a.reset_index(inplace=True)
+
+ a = pd.merge(a,codes_sp20.rename('sp20courses'), on='teacher')
+ a = pd.merge(a,crns_sp20.rename('sp20crns'), on='teacher')
+ a.to_csv('cache/trash/sched_w_sp20.csv',header=True)
+ print(a)
+
+ a['canvasid'] = a['teacher'].map(names)
+ #print(a)
+ c = pd.merge(bi, a, left_on='name', right_on='ilearn_name', how='outer')
+ c.to_csv('cache/trash/basic.csv',header=True)
+ #print(c)
+ return c
+
+
+# what percentage of their sections were online / hybrid /lecture ?
+# Consumes: output/semesters/fa19_sched.json and etc for 1 year
+# Outputs: cache/teacher_by_semester.csv,
+def teacherModalityHistory(sched=[],names=[]):
+ if not len(sched):
+ sched = oneYearSchedule()
+ names = match_username()
+
+ # How many classes a teacher taught lect/online/hybrid/hours
+ sec_type = sched.groupby(['teacher','sem'])['type'].apply(' '.join)
+ sec_type.to_csv('cache/teacherdata/teacher_by_semester.csv',header=True)
+ ## THIS IS THE LIST of how many
+ ## lecture, hybrid, online they've taught
+
+ #sec_type = pd.read_csv('cache/teacherdata/teacher_by_semester.csv')
+
+ sec_grp = sec_type.groupby('teacher').aggregate( ' '.join )
+ #sec_grp.to_csv('cache/trash/sec_grp_3.csv',header=True)
+
+ #sec_grp = sec_grp.iloc[1:] ## I'm seeing bad items on the first 2
+ #sec_grp.drop(index='teacher')
+ #sec_grp.to_csv('cache/trash/sec_grp_0.csv',header=True)
+
+ #
+ sec_grp = pd.DataFrame(sec_grp)
+ #print(type(sec_grp))
+ sec_grp['prct_online'] = sec_grp['type'].map(prct_online)
+
+ sec_grp['prct_lecture'] = sec_grp['type'].map(prct_lecture)
+ sec_grp['prct_hybrid'] = sec_grp['type'].map(prct_hybrid)
+ sec_grp['num_sections_last_year'] = sec_grp['type'].map(num_sections_last_year)
+ sec_grp.drop('type',axis=1,inplace=True)
+ sec_grp.reset_index(inplace=True)
+ sec_grp.to_csv('cache/teacherdata/modality_history.csv')
+ return sec_grp
+
+
+
+def teacherCourseHistory(a,names):
+ pass
+ # actually not using this. moved to _basic_info
+
+ # YEEEAH
+ sched = a.groupby(['teacher','code'])
+ #for name,group in sched:
+ # print(name)
+ #print(sched.count())
+ return
+ a['name'] = a.apply(lambda x: records_by_sname[x['teacher']]['name'],axis=1)
+ a['email'] = a.apply(lambda x: records_by_sname[x['teacher']]['email'],axis=1)
+ a.sort_values(by=['dept','teacher','codenum'],inplace=True)
+ a = a.drop(['teacher'],axis=1)
+ a.to_csv('cache/teacherdata/courses_taught.csv')
+
+ return a
+ """
+ d = a.groupby(['teacher']) # ,'dept','codenum','codeletter'
+
+ out1 = open('teacherdata/courses_taught.csv','w')
+ by_dept = {} # x todo: sort by dept also
+ for name, group in d:
+ #print name
+ if re.search(r'^\d+',name) or name=='TBA':
+ print("Skipping weird name: ", name)
+ continue
+ rec = {'email':'xx'}
+ try:
+ rec = records_by_sname[name]
+ #print rec
+ except Exception as e:
+ print("Missing Teacher %s" % name)
+ continue
+ out1.write(name+"\t"+rec['email'])
+ s = set()
+ #print group
+ for idx,r in group.iterrows():
+ s.add( str(r[1]) + str(r[2]) + str(r[3]))
+ for clas in sorted(s):
+ d = dept_from_name(clas)
+ if d in by_dept:
+ if name in by_dept[d]:
+ by_dept[d][name].append(clas)
+ else:
+ by_dept[d][name] = [ clas, ]
+ else:
+ by_dept[d] = { name: [ clas, ] }
+
+ out1.write("\n\t"+str(clas))
+ out1.write("\n")
+ out1.write( json.dumps(by_dept,indent=2))"""
+
+
+
+# Consumes: output/semesters/fa19_sched.json and etc for 1 year
+# Outputs: cache/course_teacher_combos.csv,
+def teacherSharedCourses(a=[]):
+ if not len(a): a = oneYearSchedule()
+
+ # List of classes. Group by teacher/format. Shows who has historically
+ # taught a class and who teaches it most often.
+ c = a.drop(['code','partofday','sem','site','type'],axis=1) #,'dept','codeletter'
+ c = c.groupby(['dept','codenum','codeletter']) #,'teacher'
+ c = c.aggregate(lambda x: set(x))
+ c.to_csv('teacherdata/course_teacher_combos.csv') ## THIS is the list of teachers who
+ ## share courses
+ return c
+
+
+
+# Consumes: output/semesters/fa19_sched.json and etc for 1 year
+# Outputs: cache/num_courses_per_dept.csv (not teacher_course_oer_deptcount)
+# How many courses in each department were taught in the last year?
+def departmentCountCourses(a=[]):
+ if not len(a): a = oneYearSchedule()
+
+ tt = a.drop(['code','partofday','sem','site','type'],axis=1) #,'dept','codeletter'
+
+ records_by_sname = defaultdict(my_empty_dict, match_usernames())
+ tt.drop_duplicates(keep='first',inplace=True)
+ tt['name'] = tt.apply(lambda x: records_by_sname[x['teacher']]['name'],axis=1)
+ tt['email'] = tt.apply(lambda x: records_by_sname[x['teacher']]['email'],axis=1)
+ tt = tt.drop(['teacher'],axis=1)
+ tt.sort_values(by=['dept','name','codenum'],inplace=True)
+ count = tt['dept'].value_counts()
+ count.to_csv('cache/num_courses_per_dept.csv', header=True)
+
+
+def clean_nonprint(s):
+ return re.sub(f'[^{re.escape(string.printable)}]', '', s)
+
+def read_cmte(names):
+ output = []
+ out2 = defaultdict(list)
+ input = codecs.open('cache/teacherdata/committees_2018_2019.csv','r','utf-8')
+ with input as csvfile:
+ cmtereader = csv.reader(csvfile, delimiter=',', quotechar='"')
+ for row in cmtereader:
+ for R in row:
+ R = R.strip()
+ R = clean_nonprint(R)
+ (fname,lname,cmtes) = row
+ a = re.split(",\s*",cmtes)
+ if len(a)>1:
+ cmtes = a
+ else:
+ cmtes = a
+
+ name1 = lname + ", " + fname
+ name2 = fname + " " + lname
+ name = name1
+ realname = names(name1)
+ if not realname:
+ realname = names(name2)
+ name = name2
+ if realname:
+ for cmm in cmtes:
+ output.append( [realname, cmm] )
+ out2[realname].append(cmm)
+ else:
+ print("committee participant name failed: %s / %s:\t%s" % (name1,name2,str(a)))
+ print(type(name1))
+ #print(out2)
+ return output,out2
+
+def read_training_records():
+ myinput = open('cache/teacherdata/more_2018_2019_training_attendance.txt','r').readlines()
+ current_sesh = ""
+ ppl_in_sesh = {}
+ all_ppl = set()
+
+ for L in myinput:
+ L = L.strip()
+ if L:
+ if L.startswith('#'):
+ ma = re.search(r'^\#\s(.*)$',L)
+ if ma:
+ current_sesh = ma.group(1)
+ else:
+ print("-- read_training_records: Couldn't find training set? " + L)
+ else:
+ if current_sesh in ppl_in_sesh:
+ ppl_in_sesh[current_sesh].append(L)
+ else:
+ ppl_in_sesh[current_sesh] = [ L, ]
+ all_ppl.add(L)
+ if 0:
+ print(ppl_in_sesh)
+ print(all_ppl)
+
+ # Want to pivot the dict, so key is a name, value is another dict, where k2 is session name, v2 is Y/N
+ d_of_d = defaultdict(dict)
+
+ for k,v in ppl_in_sesh.items():
+ for user in v:
+ d_of_d[user][k] = 'Y'
+
+ return d_of_d
+
+# open a file and mark the people with their ids given. Return a dataframe
+def read_bootcamp1(filename):
+ a = pd.read_csv(filename)
+ #print(a)
+ b = a.loc[:, ['canvas_id','grade','last_activity']]
+ b.rename(columns={'canvas_id':'bc1canvasid','grade':'bootcamp_grade','last_activity':'bootcamp_date'}, inplace=True)
+ #print(b)
+ return b
+
+# open a file and mark the people with their ids given. Return a dataframe
+def read_bootcamp2(filename):
+ a = pd.read_csv(filename)
+ #print(a)
+ b = a.loc[:, ['canvas_id','grade','last_activity']]
+ b.rename(columns={'canvas_id':'bc2canvasid','grade':'bootcamp_progress','last_activity':'bootcamp_date'}, inplace=True)
+ #print(b)
+ return b
+
+
+def not_blank_or_pound(L):
+ if L.startswith("#"): return False
+ L = L.strip()
+ if L == "": return False
+ return True
+
+def temp1(x):
+ #print(x[1])
+ return x[1]
+
+def add_realnames(df,names): # the surveys. raw name is in 2nd column
+ df['ilearn_name'] = df.apply( lambda x: names(temp1(x),1), axis=1)
+ return df
+
+def compareToughNames(a,b):
+ # search for a in b
+ m = re.search(a, b)
+ if m: return True
+ return False
+
+
+def compareNames(a,b,verbose=0):
+ if a == b: return True
+
+ cnDBG = 0
+ try:
+ parts_a = [ W.lower() for W in re.split("[\s,]", a) ]
+ [ x.strip() for x in parts_a ]
+
+ parts_b = [ W.lower() for W in re.split("[\s,]", b) ]
+ [ x.strip() for x in parts_b ]
+
+ pa2 = sorted([ parts_a[0], parts_a[-1] ])
+ pb2 = sorted([ parts_b[0], parts_b[-1] ])
+
+ if pa2 == pb2:
+ if cnDBG: print("->Match: %s, %s" % (a,b))
+ return True
+ if pa2[0] == pb2[0] or pa2[-1] == pb2[-1]:
+ if cnDBG: print("--->Near match: %s" % b)
+ return False
+
+ except Exception as e:
+ #print("Problem with compareNames %s , %s" % (a,b))
+ #print(e)
+ return False
+
+ if len(pa2[0])>3 and len(pb2[0])>3:
+ if pa2[0][0] == pb2[0][0]:
+ if pa2[0][1] == pb2[0][1]:
+ if pa2[0][2] == pb2[0][2]:
+ if cnDBG: print("===> Near match (first 3): %s, %s, %s, %s" % (a, b, pa2[0], pb2[0]))
+ pass
+
+ b = b.lower()
+ a = a.lower()
+
+ #if verbose: print("searching: %s / %s" % (a,b))
+ if re.search( b, a):
+ #print("REGEX MATCH: %s | %s" % (a,b))
+ return True
+ if re.search( a, b):
+ #print("REGEX MATCH: %s | %s" % (a,b))
+ return True
+ return False
+
+def find_ilearn_record(ilearn_records,manual_records, othername,verbose=0):
+ # manual records are ('name':'canvas_id')
+ #print(ilearn_records)
+ if not othername: return ""
+ if type(othername) == type(1.25): return ""
+ #if math.isnan(othername): return False
+
+ if othername in manual_records:
+ a = funcy.first( funcy.where( ilearn_records, id=int(manual_records[othername]) ))
+ if a:
+ return a['name']
+
+ for x in ilearn_records:
+ #print('f_i_r')
+ #print(othername)
+ #print(x)
+ if compareNames(othername,x['name'],verbose):
+ return x['name']
+
+ for k,v in manual_records.items():
+ #print(k)
+ #print(othername)
+ #print(type(othername))
+ b = re.search( k, othername)
+ if b:
+ a = funcy.first( funcy.where( ilearn_records, id=int(manual_records[k]) ))
+ if a:
+ return a['name']
+ return ""
+
+
+def manualNamesAndDept():
+ # copied from // getTeachersInfoMain ....
+
+ schedule_one_yr = oneYearSchedule()
+ from_ilearn = list( map( lambda y: funcy.select_keys( lambda z: z in ['name','id','email','login_id','sortable_name'], y), \
+ json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read()) ) )
+ manual_names = manualNames()
+ names_lookup = funcy.partial(find_ilearn_record, from_ilearn, manual_names)
+ teacher_info = teacher_basic_info(schedule_one_yr, from_ilearn, names_lookup)
+ # till here
+
+
+ # the staff directory
+ dr = staff_dir(False)
+ print(dr)
+ print(dr.columns)
+ print( dr['department'].unique() )
+
+ # now to reconcile and combine these....
+ #
+ # we want:
+ # - alternate names of academic / other depts, with one preferred
+ # - some people are PT Fac, FT Fac, Director, assistant, spec, and some titles are unknown.
+ # - sometimes the hierarchy is of departments, and sometimes of people. try not to confuse that.
+ #
+
+
+ # eventually, want to get pics or other info from other sources too, o365, cranium cafe, etc
+ #
+
+
+
+def manualNames():
+ mm = dict([ x.strip().split(',') for x in \
+ open('cache/teacherdata/teacher_manual_name_lookup.csv','r').readlines()])
+ mz = {}
+ for k,v in mm.items():
+ mz[k] = v
+ mz[k.lower()] = v
+ parts = k.split(" ")
+ if len(parts)==2:
+ mz[ parts[1] + ", " + parts[0] ] = v
+ mz[ parts[1] + "," + parts[0] ] = v
+ #print(mz)
+ return mz
+
+# given a list of class codes, return the most common (academic) department
+def guessDept(d_list):
+ li = str(d_list.code).split(" ")
+ count = defaultdict(int)
+ #print(str(d_list.code))
+ for i in li:
+ m = re.search(r'^([A-Z]+)$',i)
+ if m:
+ count[m.group(1)] += 1
+ mmax = 0
+ max_L = ''
+ for k,v in count.items():
+ #print(" %s:%i, " % (k,v), end='')
+ if v > mmax:
+ mmax = v
+ max_L = k
+ print("")
+ return max_L
+
+"""
+# Faculty Info Plans
+
+
+
+bootcamp_active.csv Started bootcamp. Remind them to finish it?
+
+bootcamp_passed.csv Badge'd for BC. Online and Hybrid teachers not on this list need reminding.
+
+courses_taught.csv x
+
+course_teacher_combos.csv Teachers who share the teaching of a course. Courses in common.
+
+emails_deans+chairs.txt Just a email list
+
+FA2017 Faculty Survey.csv Look at answers for video, helpful formats, and comments
+
+faculty_main_info.csv Has percentage mix of a teachers' online/hybrid/lecture history
+
+historical_shells_used.json x
+
+SP2019 Faculty Survey.csv Look at rate tech skills, topics interested in, would add video, and comments
+
+committees 2018 2019.csv Committees people serve on.
+
+
+
+Not so useful:
+
+teacher_by_semester.csv precursor to faculty_main_info. Has semesters separated.
+
+"""
+#
+#
+#
+# Call all the teacher info / CRM gathering stuff
+# Make one big csv file of everything I know about a teacher
+def getTeachersInfoMain():
+
+ schedule_one_yr = oneYearSchedule()
+ #print(schedule_one_yr)
+ #if input('q to quit ')=='q': return
+
+ # comes from teacherRolesUpdateCache ... search for @gavilan.edu in email address
+ from_ilearn = list( map( lambda y: funcy.select_keys( lambda z: z in ['name','id','email','login_id','sortable_name'], y), \
+ json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read()) ) )
+ #names_from_ilearn = list( [x.lower() for x in map( str, sorted(list(funcy.pluck('name',from_ilearn)))) ] )
+ from_ilearn_df = pd.DataFrame(from_ilearn)
+
+
+ manual_names = manualNames()
+ names_lookup = funcy.partial(find_ilearn_record, from_ilearn, manual_names)
+ #print(from_ilearn_df)
+ #if input('q to quit ')=='q': return
+
+
+ #print(schedule_one_yr)
+ #print("This is one year schedule.")
+ #input('\npress enter to continue')
+
+ teacher_info = teacher_basic_info(schedule_one_yr, from_ilearn_df, names_lookup)
+ #print(teacher_info)
+ #input('\nThis is teacher info.\npress enter to continue')
+
+ modality_history = teacherModalityHistory(schedule_one_yr,names_lookup)
+ print(modality_history)
+ #print("This is teacher modality history.")
+ #input('\npress enter to continue')
+
+
+ master = pd.merge( modality_history, teacher_info, on='teacher', how='outer')
+ print(master)
+ master.to_csv('cache/trash/joined1.csv')
+ print(master.columns)
+ #input('\nThis is Joined 1.\npress enter to continue')
+
+ wp = read_bootcamp1('cache/teacherdata/bootcamp_passed.csv')
+ #print(wp)
+ master2 = pd.merge( master, wp, left_on='canvasid_x', right_on='bc1canvasid', how='outer')
+ master2.to_csv('cache/trash/joined2.csv')
+ print(master2)
+ print(master2.columns)
+ #input('\nThis is Joined 2.\npress enter to continue')
+
+
+ wp = read_bootcamp2('cache/teacherdata/bootcamp_active.csv')
+ master3 = pd.merge( master2, wp, left_on='canvasid_x', right_on='bc2canvasid', how='outer')
+ master3.to_csv('cache/trash/joined3.csv')
+ print(master3)
+ print(master3.columns)
+ #input('\nThis is Joined 3.\npress enter to continue')
+
+
+ # THE VIEWS / HISTORY. UPDATE with get_recent_views() .... check it for appropriate dates....
+ views = json.loads( codecs.open('cache/teacherdata/activitysummary.json','r','utf-8').read() )
+ vdf = pd.DataFrame.from_dict(views,orient='index',columns=['cid','cname','views','goo','dates','dateviews'])
+ print(vdf)
+ #input('k')
+
+ #master3.set_index('canvasid_x')
+ master3 = pd.merge(master3, vdf, left_on='canvasid_x', right_on='cid',how='outer')
+
+ dir_records = pd.DataFrame(staff_dir())
+ dir_records['email'] = dir_records['email'].str.lower()
+ master3['email'] = master3['email'].str.lower()
+
+ print(dir_records)
+ master3 = pd.merge(master3, dir_records, on='email',how='outer')
+ print(master3)
+ #if input('q to quit ')=='q': return
+
+ #master3.fillna(0, inplace=True)
+ #master3['views'] = master3['views'].astype(int)
+ #master3['num_sections_last_year'] = master3['num_sections_last_year'].astype(int)
+
+
+ #cmte = pd.read_csv('cache/teacherdata/committees_2018_2019.csv')
+ cmte,cmte_by_name = read_cmte(names_lookup)
+ cmte_str_by_name = {}
+ for k in cmte_by_name.keys():
+ #print(k)
+ #print(cmte_by_name[k])
+ cmte_str_by_name[k] = ",".join(cmte_by_name[k])
+ cc = pd.DataFrame.from_dict(cmte_str_by_name,orient='index',columns=['committees']) # 'teacher',
+ cc.reset_index(inplace=True)
+ master4 = pd.merge(master3, cc, left_on='name', right_on='index', how='outer')
+ master4.to_csv('cache/trash/joined4.csv')
+
+ master4.drop(['teacher','ilearn_name','canvasid_y','bc1canvasid','bc2canvasid','cid','cname','index_y'],axis=1,inplace=True)
+
+ # Exclude surveys for now
+ """
+ survey_2017 = pd.read_csv('cache/teacherdata/FA2017 Faculty Survey.csv')
+ survey_2017 = add_realnames(survey_2017,names_lookup)
+ survey_2017.to_csv('cache/trash/survey1.csv')
+ master5 = pd.merge(master4, survey_2017, left_on='name', right_on='ilearn_name', how='left')
+ master5.to_csv('cache/trash/joined5.csv')
+
+ survey_2019 = pd.read_csv('cache/teacherdata/SP2019 Faculty Survey.csv')
+ survey_2019 = add_realnames(survey_2019,names_lookup)
+ master6 = pd.merge(master5, survey_2019, left_on='name', right_on='ilearn_name', how='left')
+ master6.to_csv('cache/trash/joined6.csv')
+
+
+ newnames = [ x.strip() for x in open('cache/poll_question_names.txt','r').readlines() ]
+ namedict = {}
+ for i,n in enumerate(newnames):
+ if i%3==1: newname = n
+ if i%3==2: namedict[oldname] = newname
+ if i%3==0: oldname = n
+ master6 = master6.rename(columns=namedict)
+ master6.to_csv('cache/teacherdata/staff_main_table.csv')
+ master6.to_csv('cache/teacherdata/staff_main_table.csv')
+ """
+
+
+ master4.to_csv('cache/teacherdata/staff_main_table.csv')
+ master4.to_csv('gui/public/staff_main_table.csv')
+
+ other_training_records = read_training_records()
+ #print(json.dumps(other_training_records,indent=2))
+ #print("This is misc workshops.")
+ tt = pd.DataFrame.from_dict(other_training_records,orient='index')
+ tt = tt.fillna("")
+ #print(tt)
+ #input('\npress enter to continue')
+
+
+
+ #teacherSharedCourses(schedule_one_yr)
+ #getAllTeachersInTerm()
+
+
+
+def enroll_staff_shell():
+ staff = users_with_gavilan_email()
+ for i,s in staff.iterrows():
+ print(s['canvasid'],s['name'])
+ u = url + '/api/v1/courses/8528/enrollments'
+ param = {
+ 'enrollment[user_id]':s['canvasid'],
+ 'enrollment[type]': 'StudentEnrollment',
+ 'enrollment[enrollment_state]': 'active',
+ }
+
+ res = requests.post(u, headers = header, data=param)
+ print(res.text)
+
+#"Jun 28 2018 at 7:40AM" -> "%b %d %Y at %I:%M%p"
+#"September 18, 2017, 22:19:55" -> "%B %d, %Y, %H:%M:%S"
+#"Sun,05/12/99,12:30PM" -> "%a,%d/%m/%y,%I:%M%p"
+#"Mon, 21 March, 2015" -> "%a, %d %B, %Y"
+#"2018-03-12T10:12:45Z" -> "%Y-%m-%dT%H:%M:%SZ"
+
+
+# take a list of raw hits.
+def activity_summary(hits):
+ #infile = "cache/teacherdata/activity/G00101483.json"
+ #data = json.loads(open(infile,'r').read())
+ #hits = data['raw']
+ if not hits:
+ return [ [], [], ]
+ dt_list = []
+
+ one_week = datetime.timedelta(days=14) # actually two....
+ today = dt.now().replace(tzinfo=pytz.timezone('UTC'))
+
+ target = today - one_week
+
+ for h in hits:
+ the_stamp = parser.parse(h['created_at'])
+ if the_stamp > target:
+ dt_list.append(the_stamp)
+ df = pd.DataFrame(dt_list, columns=['date',])
+ df.set_index('date', drop=False, inplace=True)
+ df.rename(columns={'date':'hits'}, inplace=True)
+ #df.resample('1D').count().plot(kind='bar')
+ #return df.resample('1D').count().to_json(date_format='iso')
+ #print(hits)
+ #print(df)
+ if not df.size:
+ return [ [], [], ]
+ bins = df.resample('1D').count().reset_index()
+ bins['date'] = bins['date'].apply(str)
+ #print(bins)
+ return [bins['date'].to_list(), bins['hits'].to_list()]
+
+ #plt.show()
+
+ #df = df.groupby([df['date'].dt.to_period('D')]).count().unstack()
+ #df.groupby(TimeGrouper(freq='10Min')).count().plot(kind='bar')
+ #df.plot(kind='bar')
+
+
+
+# next step
+# 1. save timestamp of the fetch
+#
+# 2. parse it and only fetch since then. afterwards, pull out non-hits. Summarize day/week/month stats.
+#
+# 2a. merge old and new records, and re-summarize.
+#
+# 3. Next improvements in GUI. hook up to python server backend.
+#
+# Get views counts on current teachers. todo: month is hardcoded here
+def get_recent_views(id=1):
+ dt_format = "%Y-%m-%dT%H:%M:%SZ"
+ default_start_time = dt.strptime("2020-08-14T00:00:00Z", dt_format)
+ default_start_time = default_start_time.replace(tzinfo=pytz.timezone('UTC'))
+ end_time = dt.now(pytz.utc)
+ print("End time is: %s" % str(end_time))
+ myheaders = "x,teacher,prct_online,prct_lecture,prct_hybrid,num_sections_last_year,canvasid_x,name,sortable_name,goo,email,index_x,courses,dept,ilearn_name_x,canvasid_y,canvasid_x,bootcamp_grade,bootcamp_date_x,canvasid_y,bootcamp_progress,bootcamp_date_y,index_y,committees".split(",")
+
+ teachers = [row for row in csv.reader(open('cache/teacherdata/staff_main_table.csv','r'))][1:]
+
+ #tt = teachers[6:10]
+
+ summary = {}
+
+ for t in teachers:
+ name = t[1]
+ if name=="" or name=="TBA": continue
+ if not t[6]: continue
+ the_id = int(float(t[6]))
+ if the_id == 290: continue # STAFF STAFF
+ goo = t[9]
+ print(goo)
+
+ # read log of this person:
+ try:
+ prev_logf = codecs.open('cache/teacherdata/activity/%s.json' % goo,'r','utf-8')
+ prev_log = json.loads(prev_logf.read())
+ prev_logf.close()
+ except:
+ print("Exception happened on reading previous temp logs.")
+ prev_log = ''
+
+ if type(prev_log) == dict:
+ lastfetch = dt.strptime(prev_log['meta']['lastfetch'], dt_format)
+ lastfetch = lastfetch.replace(tzinfo=pytz.timezone('UTC'))
+ print("last fetch is: " + str(lastfetch))
+ print("Hits BEFORE was: %i" % len(prev_log['raw']))
+ else:
+ lastfetch = default_start_time
+ prev_log = { "raw":[], }
+
+ end_time = dt.now(pytz.utc)
+ u = url + "/api/v1/users/%s/page_views?start_time=%s&end_time=%s&per_page=100" % (str(the_id),lastfetch.strftime(dt_format), end_time.strftime(dt_format))
+ #print(u)
+ #input('getting this url')
+
+ print(name + "\t",end='\n')
+ if 1: # get fresh data?
+ r = fetch(u)
+ prev_log['raw'].extend( r )
+ summ = activity_summary(prev_log['raw'])
+ mydata = {'meta':{'lastfetch':end_time.strftime(dt_format)},'summary':summ,'raw':prev_log['raw']}
+ codecs.open('cache/teacherdata/activity/%s.json' % goo,'w','utf-8').write( json.dumps(mydata,indent=2))
+ summary[the_id] = [the_id, name, len(prev_log['raw']),goo, summ ,mydata['meta']]
+ print("Hits AFTER is: %i" % len(prev_log['raw']))
+ codecs.open('cache/teacherdata/activitysummary.json','w','utf-8').write( json.dumps(summary,indent=2) )
+ codecs.open('gui/public/activitysummary.json','w','utf-8').write( json.dumps(summary,indent=2) )
+
+
+
+# Have they taught online or hybrid classes?
+def categorize_user(u):
+ global role_table, term_courses
+ their_courses = get_enrlmts_for_user(u, role_table)
+ num_s = 0
+ num_t = 0
+ type = 's'
+ online_only = 1
+ is_online = []
+ #print their_courses
+ for x in their_courses.iterrows():
+ if len(x):
+ ttype = x[1]['type']
+ if ttype=='StudentEnrollment': num_s += 1
+ if ttype=='TeacherEnrollment': num_t += 1
+ cid = x[1]['course_id']
+ current_term = term_courses[lambda x: x['id']==cid]
+ if not current_term.empty:
+ is_online.append(current_term['is_online'].values[0])
+ else: online_only = 0
+ else: online_only = 0
+ if num_t > num_s: type='t'
+ if len(is_online)==0: online_only = 0
+
+ for i in is_online:
+ if i==0: online_only = 0
+ #print "Type: " + type + " All online: " + str(online_only) + " Number courses this term: " + str(len(is_online))
+ return (u[0],type, online_only, len(is_online))
+
+
+
+##########
+##########
+########## PHOTOS
+##########
+########## # todo: threaded
+
+# Doest the account have a photo loaded?
+def checkForAvatar(id=2):
+ try:
+ t = url + '/api/v1/users/%s?include[]=last_login' % str(id)
+ r2 = requests.get(t, headers = header)
+ result = json.loads(r2.text)
+ codecs.open('cache/users/%s.txt' % str(id),'w','utf-8').write( json.dumps(result,indent=2) )
+
+ if 'avatar_url' in result:
+ if re.search(r'avatar\-50',result['avatar_url']): return 0
+ else: return (result['login_id'], result['avatar_url'], result['name'])
+ except Exception as e:
+ print("Looking for an avatar / profile pic had a problem: %s" % str(e))
+ return 0
+
+# Grab em. Change the first if when continuing after problems....
+def downloadPhoto():
+ pix_dir = 'cache/picsCanvas2022/'
+ # Update the list of all ilearn users?
+ i_last_ix = '-1'
+ photo_log_f = ''
+ if 0: ## CHANGE TO 0 IF CRASHED / RESUMING....
+ ii = fetchAllUsers()
+ photo_log_f = open("cache/fotolog.txt", "w")
+ else:
+ ii = json.loads(codecs.open('cache/allusers_ids.json','r').read())
+ photo_log_f = open("cache/fotolog.txt", "r+")
+ i_last_ix = -1
+ try:
+ ab = photo_log_f.read()
+ print(ab)
+ ac = ab.split("\n")
+ print(ac)
+ i_last_ix = ac[-2]
+ print(i_last_ix)
+ except:
+ i_last_ix = -1
+ i_last_ix = int(i_last_ix)
+
+
+ print("Last user index checked was: %s, which is id: %s" % \
+ (i_last_ix, ii[i_last_ix] ))
+
+ print("Max index is: %i" % len(ii))
+
+
+ i_last_ix += 1
+ for index in range(i_last_ix, len(ii)):
+ i = ii[index]
+ photo_log_f.write("\n%i" % i )
+
+ a = checkForAvatar(i)
+ if a:
+ print(str(i) + ":\t" + str(a[0]) + "\t" + str(a[2]) )
+
+ try:
+ r = requests.get(a[1], stream=True)
+ if r.status_code == 200:
+ r.raw.decode_content = True
+ h=r.raw
+ with open(pix_dir + a[0].lower(), 'wb') as f:
+ shutil.copyfileobj(h, f)
+ # rename to right file extension
+ img_type = imghdr.what(pix_dir + a[0].lower())
+ if img_type == 'jpeg': img_type = 'jpg'
+ try:
+ shutil.move(pix_dir + a[0].lower(),pix_dir + a[0].lower()+'.'+img_type)
+ except Exception as e:
+ print(" \tCouldn't rewrite file")
+ else:
+ print(str(i) + ":\t didn't get expected photo")
+ except Exception as e:
+ print(" \tProblem with download " + str(e))
+ else:
+ print(str(i) + ":\tno user or no photo")
+ pass
+
+
+def mergePhotoFolders():
+
+ staff = [ row for row in csv.reader( open('cache/teacherdata/staff_main_table.csv','r') ) ]
+
+ headers = staff[0]
+ staff = staff[1:]
+
+ activestaff = []
+
+ for i,h in enumerate(headers):
+ #print("%i. %s" % (i,h) )
+ pass
+
+ for S in staff:
+ if S[7] and S[15]: # if teacher (name present) and sp20crns (taught in sp20)
+ activestaff.append(S[9].lower())
+ activestaffset=set(activestaff)
+
+ #return
+
+ a = 'cache/picsCanvas'
+ b = 'gui/public/picsCanvas2018'
+ c = 'gui/public/picsCanvasAll'
+
+
+ # I want a big list of who has an avatar pic.
+
+ # and i want to know how many updated since last DL, and how many are in only one or the other.
+
+
+ old = os.listdir(b)
+ count = defaultdict(int)
+
+ oldset = set()
+ newset = set()
+
+ for O in old:
+ if O.endswith('.jpg') or O.endswith('.png'):
+ g = O.split(r'.')[0]
+ oldset.add(g)
+
+ for N in os.listdir(a):
+ if N.endswith('.jpg') or N.endswith('.png'):
+ g = N.split(r'.')[0]
+ newset.add(g)
+
+ """print("Active SP20 Teachers")
+ print(activestaffset)
+
+ print("Old Avatars")
+ print(oldset)
+
+ print("New Avatars")
+ print(newset)"""
+
+ updated_set = oldset.union(newset)
+
+ tch_set = updated_set.intersection(activestaffset)
+
+ only_old = oldset.difference(newset)
+
+ only_new = newset.difference(oldset)
+
+ print("Tch: %i Old: %i New: %i" % (len(activestaffset),len(oldset),len(newset)))
+
+ print("All avatars: %i Teachers: %i Only in old: %i Only in new: %i" % ( len(updated_set), len(tch_set), len(only_old), len(only_new)))
+
+ allpics = os.listdir(c)
+
+ haveapic = {}
+ for A in allpics:
+ if A.endswith('.jpg') or A.endswith('.png'):
+ g = (A.split(r'.')[0]).upper()
+
+ haveapic[g] = A
+ outie = codecs.open('gui/public/pics.json','w').write( json.dumps( haveapic,indent=2))
+
+
+def mergePhotoFolders2():
+
+ staff = [ row for row in csv.reader( open('cache/teacherdata/staff_main_table.csv','r') ) ]
+
+ headers = staff[0]
+ staff = staff[1:]
+
+ activestaff = []
+
+ for i,h in enumerate(headers):
+ #print("%i. %s" % (i,h) )
+ pass
+
+ for S in staff:
+ if S[5]:
+ activestaff.append(S[9].lower())
+
+ a = 'cache/picsCanvas'
+ b = 'gui/public/picsCanvas2018'
+ c = 'gui/public/picsCanvasAll'
+
+ old = os.listdir(b)
+ count = defaultdict(int)
+ for N in os.listdir(a):
+ if N.endswith('.jpg') or N.endswith('.png'):
+ g = N.split(r'.')[0]
+ if g in activestaff:
+ count['s'] += 1
+ if N in old:
+ #print( "Y - %s" % N)
+ count['y'] += 1
+ else:
+ #print( "N - %s" %N )
+ count['n'] += 1
+ else:
+ #print("x - %s" % N)
+ count['x'] += 1
+ print("Of the 2020 avatars, %i are in the 2018 folder, and %i are new." % (count['y'],count['n']))
+ print("Of %i active teachers, %i have avatars." % (len(activestaff),count['s']))
+ #print(json.dumps(count,indent=2))
+
+
+
+# Go through my local profile pics, upload any that are missing.
+def uploadPhoto():
+ files = os.listdir('pics2017')
+ #print json.dumps(files)
+ pics_i_have = {}
+ #goo = "g00188606"
+ canvas_users = json.loads(open('canvas/users.json','r').read())
+ t = url + '/api/v1/users/self/files'
+ i = 0
+ j = 0
+ pics_dir = 'pics2017/'
+
+ for x in canvas_users:
+ j += 1
+ if x['login_id'].lower() + '.jpg' in files:
+ #print x['login_id'] + " " + x['name']
+ i += 1
+ pics_i_have[x['id']] = x
+
+ print('Canvas users: ' + str(j))
+ print('Pic matches: ' + str(i))
+ account_count = 0
+ ids_i_uploaded = []
+
+ for id, target in list(pics_i_have.items()):
+ #if account_count > 50:
+ # print 'Stopping after 5.'
+ # break
+
+ print('trying ' + target['name'] + '(' + str(id) + ')')
+ if checkForAvatar(id):
+ print("Seems to have avatar loaded.")
+ continue
+
+ goo = target['login_id'].lower()
+ local_img = pics_dir + goo + '.jpg'
+ inform_parameters = {
+ 'name':goo + '.jpg',
+ 'size':os.path.getsize(local_img), # read the filesize
+ 'content_type':'image/jpeg',
+ 'parent_folder_path':'profile pictures',
+ 'as_user_id':'{0}'.format(id)
+ }
+
+ res = requests.post(t, headers = header, data=inform_parameters)
+ print("Done prepping Canvas for upload, now sending the data...")
+ json_res = json.loads(res.text,object_pairs_hook=collections.OrderedDict)
+ files = {'file':open(local_img,'rb').read()}
+
+ _data = list(json_res.items())
+ _data[1] = ('upload_params',list(_data[1][1].items()))
+ print("Yes! Done sending pre-emptive 'here comes data' data, now uploading the file...")
+ upload_file_response = requests.post(json_res['upload_url'],data=_data[1][1],files=files,allow_redirects=False)
+ # Step 3: Confirm upload
+ print("Done uploading the file, now confirming the upload...")
+ confirmation = requests.post(upload_file_response.headers['location'],headers=header)
+ if 'id' in confirmation.json():
+ file_id = confirmation.json()['id']
+ else:
+ print('no id here')
+ #print(confirmation.json())
+ print("upload confirmed...nicely done!")
+
+ time.sleep(1)
+ # Make api call to set avatar image to the token of the uploaded imaged (file_id)
+ params = { 'as_user_id':'{0}'.format(id)}
+ avatar_options = requests.get("https://%s/api/v1/users/%s/avatars"%(domain,'{0}'.format(id)),headers=header,params=params)
+ #print "\nAvatar options: "
+ #print avatar_options.json()
+ for ao in avatar_options.json():
+ #print ao.keys()
+ if ao.get('display_name')==goo + '.jpg':
+ #print("avatar option found...")
+ #print((ao.get('display_name'),ao.get('token'), ao.get('url')))
+ params['user[avatar][token]'] = ao.get('token')
+ set_avatar_user = requests.put("https://%s/api/v1/users/%s"%(domain,'{0}'.format(id)),headers=header,params=params)
+ if set_avatar_user.status_code == 200:
+ print(('success uploading user avatar for {0}'.format(id)))
+ account_count += 1
+ ids_i_uploaded.append(id)
+ else:
+ print('some problem setting avatar')
+ else:
+ pass #print 'didnt get right display name?'
+ print("Uploaded these guys: " + json.dumps(ids_i_uploaded))
+
+
+
+
+##########
+##########
+########## EMAILING PEOPLE
+##########
+##########
+
+
+
+def test_email():
+ send_z_email("Peter Howell", "Peter", "phowell@gavilan.edu", ['CSIS85','CSIS42'])
+
+
+def create_ztc_list():
+ course_combos = pd.read_csv('cache/teacher_course_oer_email_list.csv')
+ course_combos.fillna('',inplace=True)
+
+ # read this file and make it a dict (in one line!)
+ dept_counts = { x[0]:x[1].strip() for x in [ y.split(',') for y in open('cache/teacher_course_oer_deptcount.csv','r').readlines() ][1:] }
+
+
+ course_template = "%s "
+ url_template = "https://docs.google.com/forms/d/e/1FAIpQLSfZLQp6wHFEdqsmpZ7jz2Y8HtKLo8XTAhrE2fyvTDOEgquBDQ/viewform?usp=pp_url&entry.783353363=%s&entry.1130271051=%s" # % (FULLNAME, COURSE1)
+
+
+
+ # list depts
+ mydepts = sorted(list(set(course_combos['dept'] )))
+ i = 0
+ outp = open("output/oer_email_list.csv","w")
+ outp.write("fullname,firstname,email,link,courses\n")
+
+ ones_i_did = [ int(x) for x in "40 38 31 21 7 12 24 25 1 13 18 22 44 55 56 51 20 16 2 3 4 5 6 8 9 10 11 14 15 17 23 53 52 50 30 48 39 37 54 49 47 46 45 43 42 41 33 32 29 28 27 26".split(" ") ]
+
+ for D in mydepts:
+ i += 1
+ extra = ''
+ if D in dept_counts:
+ extra = " (%s)" % dept_counts[D]
+ extra2 = ''
+ if i in ones_i_did:
+ extra2 = "xxxx "
+ print("%s %i. %s %s" % (extra2,i,D,extra))
+ choice_list = input("Which department? (for multiple, separate with spaces) ").split(' ')
+
+ all_people_df = []
+
+ for choice in choice_list:
+ is_cs = course_combos['dept']==mydepts[int(choice)-1]
+ filtered = pd.DataFrame(course_combos[is_cs])
+ if len(all_people_df): all_people_df = pd.concat([filtered,all_people_df])
+ else: all_people_df = filtered
+ print(mydepts[int(choice)-1])
+ print(all_people_df)
+ print(' ')
+ all_people_df.sort_values(by=['name'],inplace=True)
+ print(all_people_df)
+
+ b = all_people_df.groupby(['name'])
+ for name,group in b:
+ if name == 'no data': continue
+ nameparts = name.split(', ')
+ fullname = nameparts[1] + ' ' + nameparts[0]
+ firstname = nameparts[1]
+
+ outp.write(fullname + ',' + firstname + ',')
+ email = ''
+ link = ''
+ courses = []
+ flag = 1
+ for i in group.iterrows():
+ g = i[1] # wtf is this shi.....
+ this_course = g.dept + ' ' + str(g.codenum) + g.codeletter
+ courses.append( this_course ) #print(g)
+ email = g.email
+ if flag:
+ link = url_template % (fullname, this_course)
+ flag = 0
+
+ outp.write(email + ',' + link + "," + " ".join(courses) + "\n")
+
+ outp.close()
+
+
+##########
+##########
+########## FORENSICS TYPE STUFF
+##########
+##########
+
+# better name for this standard fetch. so they stay together in alpha order too....
+
+def get_user_info(id):
+ u = fetch( '/api/v1/users/%i' % id )
+ ff = codecs.open('cache/users/%i.txt' % id, 'w', 'utf-8')
+ ff.write( json.dumps(u, indent=2))
+ return u
+
+
+# these are any messages that get pushed out to their email
+def comm_mssgs_for_user(uid=0):
+ if not uid:
+ uid = input('Canvas id of the user? ')
+ u = url + '/api/v1/comm_messages?user_id=%s&start_time=%s&end_time=%s' % (uid,'2021-01-01T01:01:01Z','2021-08-01T01:01:01Z') # &filter[]=user_%s' % uid
+ convos = fetch(u,1)
+
+ oo = codecs.open('cache/comms_push_user_%s.txt' % str(uid), 'w')
+ oo.write('USER %s\n' % uid)
+ oo.write(json.dumps(convos, indent=2))
+
+ print(convos)
+
+
+#
+def convos_for_user(uid=0):
+ if not uid:
+ uid = input('Canvas id of the user? ')
+ u = url + '/api/v1/conversations?include_all_conversation_ids=true&as_user_id=%s' % uid # &filter[]=user_%s' % uid
+ convos = fetch(u,1)
+
+ oo = codecs.open('cache/convo_user_%s.txt' % str(uid), 'w')
+ oo.write('USER %s\n' % uid)
+ oo.write(json.dumps(convos, indent=2))
+
+ convo_ids_list = convos["conversation_ids"]
+ print(convo_ids_list)
+
+ u2 = url + '/api/v1/conversations?include_all_conversation_ids=true&scope=archived&as_user_id=%s' % uid # &filter[]=user_%s' % uid
+ archived_convos = fetch(u2,1)
+ try:
+ aconvo_ids_list = archived_convos["conversations_ids"]
+ print(aconvo_ids_list)
+ except:
+ print("didnt seem to be any archived.")
+ aconvo_ids_list = []
+
+ u3 = url + '/api/v1/conversations?include_all_conversation_ids=true&scope=sent&as_user_id=%s' % uid # &filter[]=user_%s' % uid
+ sent_convos = fetch(u3,1)
+ try:
+ sconvo_ids_list = sent_convos["conversations_ids"]
+ print(sconvo_ids_list)
+ except:
+ print("didnt seem to be any sent.")
+ sconvo_ids_list = []
+
+ convo_ids_list.extend(aconvo_ids_list)
+ convo_ids_list.extend(sconvo_ids_list)
+
+
+ ##
+ ## Now get all the messages in each of these conversations
+ ##
+
+ for cid in convo_ids_list:
+ print("Fetching conversation id: %s" % cid)
+ oo.write("\n\n----------------\nconversation id: %s\n\n" % cid)
+
+ u4 = url + '/api/v1/conversations/%s?as_user_id=%s' % (cid,uid) # ' % (cid, uid
+ coverstn = fetch(u4,1)
+ oo.write("\n%s\n\n" % json.dumps(coverstn,indent=2))
+
+
+
+
+
+ """
+ for c in convos:
+ c['participants'] = ", ".join([ x['name'] for x in c['participants'] ])
+ includes = tuple("last_message subject last_message_at participants".split(" "))
+ convos = list( \
+ reversed([ funcy.project(x, includes) for x in convos ]))
+ """
+
+ #
+
+ #print(json.dumps(convos, indent=2))
+
+
+# single q sub
+def quiz_get_sub(courseid, quizid, subid=0):
+ u = url + "/api/v1/courses/%s/quizzes/%s/submissions/%s" % ( str(courseid), str(quizid), str(subid) )
+
+ u = url + "/api/v1/courses/%s/quizzes/%s/questions?quiz_submission_id=%s" % \
+ ( str(courseid), str(quizid), str(subid) )
+
+ u = url + "/api/v1/courses/%s/assignments/%s/submissions/%s?include[]=submission_history" % \
+ ( str(courseid), str(quizid), str(subid) )
+
+ u = url + "/api/v1/courses/%s/students/submissions?student_ids[]=all&include=submission_history&grouped=true&workflow_state=submitted" % str(courseid)
+ return fetch(u)
+
+ #?quiz_submission_id=%s"
+
+# quiz submissions for quiz id x, in course id y
+def quiz_submissions(courseid=9768, quizid=32580):
+ #subs = quiz_get_sub(courseid, quizid)
+ #print( json.dumps( subs, indent=2 ) )
+
+ if 1:
+ # POST
+ data = { "quiz_report[includes_all_versions]": "true", "quiz_report[report_type]": "student_analysis" }
+
+ u = url + "/api/v1/courses/%s/quizzes/%s/reports?" % ( str(courseid), str(quizid) )
+ res = requests.post(u, headers = header, data=data)
+ print(res.content)
+
+ #u2 = url + "/api/v1/courses/%s/quizzes/%s/reports" % ( str(courseid), str(quizid) )
+ #res2 = fetch(u2)
+ #print( json.dumps(res2.content, indent=2))
+
+ jres2 = json.loads( res.content )
+ print(jres2)
+ if jres2['file'] and jres2['file']['url']:
+ u3 = jres2['file']['url']
+ r = requests.get(u3, headers=header, allow_redirects=True)
+ open('cache/quizreport.txt', 'wb').write(r.content)
+ return
+
+ for R in res2:
+ if R['id'] == 7124:
+ u3 = R['url']
+ r = requests.get(u3, headers=header, allow_redirects=True)
+ open('cache/quizreport.txt', 'wb').write(r.content)
+ return
+
+ u3 = url + "/api/v1/courses/%s/quizzes/%s/reports/%s" % ( str(courseid), str(quizid), res2[''] )
+
+ oo = codecs.open('cache/submissions.json','w', 'utf-8')
+ oo.write('[\n')
+ for s in subs:
+ if len(s['submissions']):
+ j = json.dumps(s, indent=2)
+ print(j)
+ oo.write(j)
+ oo.write('\n')
+
+ oo.write('\n]\n')
+ return 0
+
+
+ #u = url + "/api/v1/courses/%s/quizzes/%s/submissions?include[]=submission" % (str(courseid), str(quizid))
+ u = url + "/api/v1/courses/%s/quizzes/%s/submissions" % (str(courseid), str(quizid))
+ subs = fetch(u, 0)
+ print( json.dumps( subs, indent=1 ) )
+
+ for S in subs['quiz_submissions']:
+ print(json.dumps(S))
+ submis = quiz_get_sub(courseid, quizid, S['id'])
+ print(json.dumps(submis, indent=2))
+
+
+
+# return (timeblock, course, read=0,write=1)
+def requests_line(line,i=0):
+ try:
+ L = line # strip?
+ if type(L) == type(b'abc'): L = line.decode('utf-8')
+ for pattern in unwanted_req_paths:
+ if pattern in L:
+ return 0
+ i = 0
+ line_parts = list(csv.reader( [L] ))[0]
+ #for p in line_parts:
+ # print("%i\t%s" % (i, p))
+ # i += 1
+
+ d = parser.parse(line_parts[7])
+ d = d.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific'))
+ d = timeblock_24hr_from_dt(d)
+
+ #r = re.search('context\'\:\s(\d+)', line_parts[22])
+ #c = 0
+ #if r:
+ # c = r.groups(1)
+ str1 = line_parts[20]
+ str2 = str1.replace("'",'"')
+ str2 = str2.replace("None",'""')
+ #print(str2)
+ j = json.loads(str2 )
+ c = j['context']
+ a = line_parts[5]
+ #print( str( (d, c, a) ))
+ return (d, str(c), a)
+ except Exception as e:
+ #print("Exception: " + str(e))
+ return 0
+
+
+#
+def report_logs(id=0):
+ if not id:
+ L = ['10531', ]
+ else:
+ L = [ id, ]
+ report = []
+ for id in L:
+ emt_by_id = course_enrollment(id)
+ for U in emt_by_id.values():
+ user_d = defaultdict( int )
+ print( "Lookin at user: %s" % U['user']['name'] )
+ report.append( "User: %s\n" % U['user']['name'] )
+ log_file_name = 'cache/users/logs/%i.csv' % U['user']['id']
+ if path.exists(log_file_name):
+ print("Log file %s exists" % log_file_name)
+ temp = open(log_file_name, 'r').readlines()
+ for T in temp[1:]:
+ #print(T)
+ result = requests_line(T)
+ if result:
+ (d, c, a) = result
+ if c == id:
+ user_d[d] += 1
+ print(json.dumps(user_d, indent=2))
+ for V in sorted(user_d.keys()):
+ report.append( "\t%s: %i\n" % ( dt_from_24hr_timeblock(V), user_d[V]) )
+ report.append("\n\n")
+ return report
+
+
+def track_users_in_sem():
+ L = users_this_semester_db()
+ sL = list(L)
+ sL.sort(reverse=True)
+ fetch_queue = queue.Queue()
+
+ for i in range(num_threads):
+ worker = Thread(target=track_user_q, args=(i,fetch_queue))
+ worker.setDaemon(True)
+ worker.start()
+
+ for U in sL:
+ print( "adding %s to the queue" % U )
+ fetch_queue.put( U )
+
+ fetch_queue.join()
+ print("Done.")
+
+
+def track_users_in_class(L=[]):
+ if len(L)==0:
+ #id = '10531'
+ ids = input("Course ids, separated with comma: ")
+ L = [x for x in ids.split(',')]
+ print("Getting users in: " + str(L))
+
+ fetch_queue = queue.Queue()
+
+ for i in range(num_threads):
+ worker = Thread(target=track_user_q, args=(i,fetch_queue))
+ worker.setDaemon(True)
+ worker.start()
+
+
+ users_set = set()
+ for id in L:
+ emt_by_id = course_enrollment(id)
+ print(emt_by_id)
+ for U in emt_by_id.values():
+ if not U['user_id'] in users_set:
+ print(U)
+ print( "adding %s to the queue" % U['user']['name'] )
+ fetch_queue.put( U['user_id'] )
+ users_set.add(U['user_id'])
+
+ all_reports = []
+ fetch_queue.join()
+ print("Done with %i users in these courses." % len(users_set))
+ for id in L:
+ rpt = report_logs(id)
+ all_reports.append(rpt)
+ outp = codecs.open('cache/courses/report_%s.txt' % id, 'w', 'utf-8')
+ outp.write(''.join(rpt))
+ outp.close()
+ return all_reports
+
+def track_user_q(id, q):
+ while True:
+ user = q.get()
+ print("Thread %i: Going to download user %s" % (id, str(user)))
+ try:
+ track_user(user, id)
+ except FetchError as e:
+ pass
+ q.task_done()
+
+
+# honestly it doesn't make much sense to get full histories this way if they're
+# already in the canvas data tables....
+
+# just the most recent hits or a short period
+#
+# Live data would be better.
+
+# Maintain local logs. Look to see if we have some, download logs since then for a user.
+def track_user(id=0,qid=0):
+ global recvd_date
+ L = [id,]
+ if not id:
+ ids = input("User ids (1 or more separated by comma): ")
+ L = [int(x) for x in ids.split(',')]
+ print("Getting users: " + json.dumps(L))
+
+
+ for id in L:
+ id = int(id)
+ # Open info file if it exists, check for last day retrived
+ try:
+ infofile = open("cache/users/%i.txt" % id, 'r')
+ info = json.loads( infofile.read() )
+
+ # TODO: set up this info file if it isn't there. check any changes too. it
+ # was written where?....
+ infofile.close()
+ except Exception as e:
+ print("failed to open info file for user id %i" % id)
+
+ info = get_user_info(id)
+
+ print("(%i) Student %i Info: " % (qid,id))
+ #print( json.dumps(info, indent=2))
+
+ url_addition = ""
+
+ if 1: # hard code dates
+
+ url_addition = "?start_time=%s&end_time=%s" % ( '2022-06-15T00:00:00-07:00', '2022-12-31T00:00:00-07:00' )
+ elif 'last_days_log' in info:
+ print("There's existing log data for %s (%s)" % (info['name'] , info['sis_user_id']))
+ print("Last day logged was: %s" % info['last_days_log'])
+ url_addition = "?start_time=%s" % info['last_days_log']
+ the_stamp = parser.parse(info['last_days_log'])
+ the_stamp = the_stamp.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific'))
+ now = dt.now()
+ now = now.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific'))
+ dif = now - the_stamp
+ print("It was %s ago" % str(dif))
+ if the_stamp < lds_stamp:
+ print("Too long, taking default")
+ url_addition = "?start_time=%s" % log_default_startdate
+
+ #lds_stamp = parser.parse(log_default_startdate)
+
+##########
+ else:
+ url_addition = "?start_time=%s" % log_default_startdate
+ #if dif.days > 1:
+
+ url = "/api/v1/users/%i/page_views%s" % (id, url_addition)
+ print(url)
+
+ try:
+
+ api_gen = fetch_stream(url,0)
+
+ log_file_name = 'cache/users/logs/%i.csv' % id
+ if path.exists(log_file_name):
+ print("Log file %s exists" % log_file_name)
+ temp = open(log_file_name, 'a', newline='')
+ csv_writer = csv.writer(temp)
+ else:
+ print("Creating new log file: %s" % log_file_name)
+ temp = open(log_file_name, 'w', newline='') ### TODO
+ csv_writer = csv.writer(temp)
+
+
+ count = 0
+ for result in api_gen:
+ if count == 0 and len(result):
+ header = result[0].keys()
+ csv_writer.writerow(header)
+ # results come in newest first....
+ recvd_date = result[0]['updated_at']
+ print("(%i) Most recent hit is %s" % (qid,recvd_date))
+
+ count += len(result)
+ indent = " " * qid
+ #print("(%i) Got %i records, %i so far" % (qid,len(result),count))
+ print("(%s - %i) %s %i" % (qid, id, indent, count))
+ if count > max_log_count:
+ print("Too many logs, bailing. sorry.")
+ break
+
+ for R in result:
+ csv_writer.writerow(R.values())
+
+ latest = parser.parse(recvd_date)
+ #last_full_day = (latest - timedelta(days=1)).isoformat()
+ info['last_days_log'] = recvd_date #last_full_day
+
+ infofile = open("cache/users/%i.txt" % id, 'w')
+ infofile.write(json.dumps( info, indent=2 ))
+ infofile.close()
+
+ print("(%i) Output to 'cache/users/log/%i.csv'" % (qid,id))
+ except FetchError as e:
+ print("Getting a 502 error.")
+ raise FetchError()
+ except Exception as e2:
+ print("Got an error receiving logs: %s" % str(e2))
+
+#
+def track_users_by_teacherclass():
+ all_teachers = teachers_courses_semester()
+
+ skip_to = "Punit Kamrah"
+ skipping = 1
+
+ grouped = funcy.group_by( lambda x: x[4], all_teachers )
+ g2 = {}
+ for k,v in grouped.items():
+ print(k)
+ if skipping and skip_to != k:
+ print("skipping")
+ continue
+ skipping = 0
+
+ g2[k] = list(funcy.distinct( v, 1 ))
+ print("\n\n\n\n\n")
+ print(k)
+ print("\n\n\n\n\n")
+
+ teacherfile = codecs.open('cache/teacherdata/reports/%s.txt' % k.replace(" ","_"),'w','utf-8')
+ class_ids = funcy.lpluck(1,v)
+ class_names = funcy.lpluck(2,v)
+ print(class_ids)
+ print(class_names)
+
+ rpts = track_users_in_class(class_ids)
+
+ for i, R in enumerate(rpts):
+ teacherfile.write('\n\n\n---\n\n%s \n\n' % class_names[i])
+ teacherfile.write(''.join(R))
+ teacherfile.flush()
+ teacherfile.close()
+
+
+
+ print(json.dumps(g2, indent=2))
+
+
+def nlp_sample():
+ # Stream a training corpus directly from S3.
+ #corpus = corpora.MmCorpus("s3://path/to/corpus")
+
+ stemmer = stem.porter.PorterStemmer()
+
+ strings = [
+ "Human machine interface for lab abc computer applications",
+ "A survey of user opinion of computer system response time",
+ "The EPS user interface management system",
+ "System and human system engineering testing of EPS",
+ "Relation of user perceived response time to error measurement",
+ "The generation of random binary unordered trees",
+ "The intersection graph of paths in trees",
+ "Graph minors IV Widths of trees and well quasi ordering",
+ "Graph minors A survey",
+]
+ processed = [ [ stemmer.stem(y) for y in utils.simple_preprocess(x, min_len=4)] for x in strings]
+ print(processed)
+ dictionary = corpora.Dictionary( processed )
+ dct = dictionary
+ print(dictionary)
+
+ corpus = [dictionary.doc2bow(text) for text in processed]
+
+ print(corpus)
+
+ # Train Latent Semantic Indexing with 200D vectors.
+ lsi = models.LsiModel(corpus, num_topics=4)
+ print(lsi.print_topics(-1))
+
+ # Convert another corpus to the LSI space and index it.
+ #index = similarities.MatrixSimilarity(lsi[another_corpus])
+
+ tfidf = models.TfidfModel(corpus)
+
+ #index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_features=12)
+ index = similarities.MatrixSimilarity(lsi[corpus])
+ print(index)
+
+
+ # Compute similarity of a query vs indexed documents.
+ query = "tree graph".split()
+ query_bow = dictionary.doc2bow(query)
+ vec_lsi = lsi[query_bow]
+
+ print(query_bow)
+ print(tfidf[query_bow])
+ print(vec_lsi)
+ print("ok")
+
+ # LdaMulticore
+
+ lda_model = models.LdaModel(corpus=corpus,
+ id2word=dictionary,
+ random_state=100,
+ num_topics=4,
+ passes=40,
+ chunksize=1000,
+ #batch=False,
+ alpha='asymmetric',
+ decay=0.5,
+ offset=64,
+ eta=None,
+ eval_every=0,
+ iterations=100,
+ gamma_threshold=0.001,
+ per_word_topics=True)
+ lda_model.save('cache/lda_model.model')
+ print(lda_model.print_topics(-1))
+ print(lda_model)
+
+ for c in lda_model[corpus]:
+ print("Document Topics : ", c[0]) # [(Topics, Perc Contrib)]
+ print("Word id, Topics : ", c[1][:3]) # [(Word id, [Topics])]
+ print("Phi Values (word id) : ", c[2][:2]) # [(Word id, [(Topic, Phi Value)])]
+ print("Word, Topics : ", [(dct[wd], topic) for wd, topic in c[1][:2]]) # [(Word, [Topics])]
+ print("Phi Values (word) : ", [(dct[wd], topic) for wd, topic in c[2][:2]]) # [(Word, [(Topic, Phi Value)])]
+ print("------------------------------------------------------\n")
+
+
+ sims = index[vec_lsi]
+ print("ok2")
+ print(list(enumerate(sims)))
+
+ for document_number, score in sorted(enumerate(sims), key=lambda x: x[1], reverse=True):
+ print(document_number, score)
+
+
+def nlp_sample2():
+ # load english language model
+ nlp = spacy.load('en_core_web_sm',disable=['ner','textcat'])
+
+ text = "This is a sample sentence."
+
+ # create spacy
+ doc = nlp(text)
+
+ for token in doc:
+ print(token.text,'->',token.pos_)
+
+
+
+
+
+def one_course_enrol():
+
+ users = '96 18771 2693 5863 327'.split()
+ course = '11015'
+ the_type = 'TeacherEnrollment' # 'StudentEnrollment'
+ u = url + '/api/v1/courses/%s/enrollments' % course
+
+ for user in users:
+ param = {
+ 'enrollment[user_id]':user,
+ 'enrollment[type]': the_type,
+ 'enrollment[enrollment_state]': 'active',
+ }
+
+ res = requests.post(u, headers = header, data=param)
+ print(res.text)
+
+
+def find_new_teachers():
+ filename = "cache/fa22_sched.json"
+ jj = json.loads(codecs.open(filename,'r','utf-8').read())
+ for J in jj:
+ print( J['teacher'])
+
+
+
+def user_db_sync():
+ #fetch all personnel dir entries from dir_api.php. PERSL unique emails
+ persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1")
+ persl_emails = set([x.lower() for x in funcy.pluck('email',persl)])
+ #persl_ids = set([x.lower() for x in funcy.pluck('email',persl)])
+ #
+ #fetch all staff from ilearn ILRN unique emails
+ ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
+ ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
+ #
+ #fetch all conf_users from dir_api.php CONUSR unique emails
+ conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1")
+ conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
+
+ #fetch all gavi_personnel_ext from dir_api.php GPEREXT must have column 'personnel' or 'c_users' or both.
+ gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1")
+
+ all_emails = set(persl_emails)
+ all_emails.update(ilrn_emails)
+ all_emails.update(conusr_emails)
+
+ all_emails = list(all_emails)
+ all_emails.sort()
+
+ fout = codecs.open('cache/db_staff_report.csv','w','utf-8')
+ fout.write('email,personnel_dir,ilearn,conf_user\n')
+ for e in all_emails:
+
+ if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'):
+ E = funcy.first(funcy.where(ilrn,email=e))
+ goo = E['login_id'][3:]
+ #print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) )
+ print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
+
+ # goo (minus G00) email, and name go into conf_users
+
+ fout.write(e+',')
+ if e in persl_emails:
+ fout.write('1,')
+ else:
+ fout.write('0,')
+ if e in ilrn_emails:
+ fout.write('1,')
+ else:
+ fout.write('0,')
+ if e in conusr_emails:
+ fout.write('1,')
+ else:
+ fout.write('0,')
+ fout.write('\n')
+ fout.close()
+ #
+
+ #print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) )
+ print('done')
+
+import traceback
+
+
+def find_no_goo():
+
+ DO_DELETE_USERS = 0
+ DO_DELETE_PORTFOLIOS = 0
+
+ output = codecs.open('cache/no_goo_numbers.json','w','utf-8')
+ output2 = codecs.open('cache/wrong_root_acct.json','w','utf-8')
+ output3 = codecs.open('cache/wrong_sis_import_id.json','w','utf-8')
+ output4 = codecs.open('cache/bad_portfolios.json','w','utf-8')
+ #output5 = codecs.open('cache/bad_portfolios_detail.html','w','utf-8')
+ all = []
+ no_root = []
+ no_sis = []
+ port = []
+ i = 0
+ j = 0
+ k = 0
+ p = 0
+ users = json.loads(codecs.open('cache/allusers.json','r','utf-8').read())
+ for u in users:
+ if not 'login_id' in u:
+ print(u['name'])
+ i+=1
+ all.append(u)
+ user_port = []
+ pp = fetch(url + '/api/v1/users/%s/eportfolios' % str(u['id']))
+ for p_user in pp:
+ try:
+ user_port.append( fetch(url+'/api/v1/eportfolios/%s' % str(p_user['id']) ) )
+ if DO_DELETE_PORTFOLIOS:
+ output5.write(" deleted: %s\n" % (str(p_user['id']),str(p_user['id'])) )
+ output5.flush()
+ del_request = requests.delete(url + "/api/v1/eportfolios/%s" % str(p_user['id']) ,headers=header)
+ print(del_request.text)
+ except Exception as e:
+ traceback.print_exc()
+ p += len(pp)
+ port.append(pp)
+
+ if DO_DELETE_USERS:
+ print("Deleting %s..." % u['name'])
+ del_request = requests.delete(url + "/api/v1/accounts/1/users/%s" % str(u['id']) ,headers=header)
+ print(del_request.text)
+ if 'root_account' in u and u['root_account'] != "ilearn.gavilan.edu":
+ no_root.append(u)
+ j += 1
+ if 'sis_import_id' in u and not u['sis_import_id']:
+ no_sis.append(u)
+ k += 1
+ print("Found %i users without G numbers" % i)
+ print("Found %i users with non gav root account" % j)
+ print("Found %i users without sis id" % k)
+ print("Found %i questionable portfolios" % p)
+ output.write( json.dumps(all,indent=2) )
+ output2.write( json.dumps(no_root,indent=2) )
+ output3.write( json.dumps(no_sis,indent=2) )
+ output4.write( json.dumps(port,indent=2) )
+
+
+def track_a_user():
+ a = input("User ID? ")
+ track_user(a)
+
+
+
+if __name__ == "__main__":
+ print ("")
+ options = { 1: ['Fetch iLearn users with @gavilan.edu email address', teacherRolesUpdateCache],
+ 2: ['Fetch all users',fetchAllUsers],
+ 5: ['Download user avatars', downloadPhoto],
+ 6: ['Merge photo folders', mergePhotoFolders],
+ 7: ['Get all teachers logs 1 month', get_recent_views],
+ 8: ['Gather teacher history, a variety of stats.', getTeachersInfoMain],
+ 9: ['test rtr.', read_training_records],
+ 10: ['Get a users logs', track_user],
+ 11: ['test: oneYearSchedule', oneYearSchedule],
+ 12: ['summarize hit activity', activity_summary],
+ 13: ['Get all users logs in a class', track_users_in_class],
+ 14: ['Get logs for 1 user', track_a_user ],
+ 15: ['Get all users logs in a semester', track_users_in_sem],
+ 16: ['Report on attendance for all classes', track_users_by_teacherclass],
+ 17: ['Show all convos for a user', convos_for_user],
+ 21: ['Show all pushed notifications for a user', comm_mssgs_for_user],
+ 18: ['Quiz submissions', quiz_submissions],
+ 19: ['NLP Sample', nlp_sample],
+ 20: ['Enroll a single user into a class', one_course_enrol],
+ 21: ['Teachers new this semester', find_new_teachers],
+ 22: ['Sync personnel and conference user databases', user_db_sync],
+ 23: ['Find non-gnumbers', find_no_goo ],
+ #3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm],
+ #5: ['Match names in schedule & ilearn', match_usernames],
+ #6: ['Create Dept\'s ZTC list', create_ztc_list],
+ ##7: ['Build and send ZTC emails', send_ztc_mails],
+ #14: ['investigate the logs', investigate_logs],
+ #12: ['test: match_usernames', match_usernames],
+ #13: ['test: get all names', getAllNames],
+ #13: ['x', users_with_gavilan_email],
+ }
+ if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
+ resp = int(sys.argv[1])
+ print("\n\nPerforming: %s\n\n" % options[resp][0])
+
+ else:
+ print ('')
+ for key in options:
+ print(str(key) + '.\t' + options[key][0])
+
+ print('')
+ resp = input('Choose: ')
+
+ # Call the function in the options dict
+ options[ int(resp)][1]()
+
diff --git a/util.py b/util.py
new file mode 100644
index 0000000..122e929
--- /dev/null
+++ b/util.py
@@ -0,0 +1,156 @@
+
+
+
+
+import re, csv
+from collections import defaultdict
+
+def print_table(table):
+ longest_cols = [
+ (max([len(str(row[i])) for row in table]) + 3)
+ for i in range(len(table[0]))
+ ]
+ row_format = "".join(["{:>" + str(longest_col) + "}" for longest_col in longest_cols])
+ for row in table:
+ print(row_format.format(*row))
+
+def remove_nl(str):
+ return str.rstrip()
+
+def UnicodeDictReader(utf8_data, **kwargs):
+ csv_reader = csv.DictReader(utf8_data, **kwargs)
+ for row in csv_reader:
+ yield {str(key, 'utf-8'):str(value, 'utf-8') for key, value in iter(list(row.items()))}
+
+
+def minimal_string(s):
+ s = s.lower()
+ s = re.sub(r'[^a-zA-Z0-9]',' ',s)
+ s = re.sub(r'(\s+)',' ',s)
+ s = s.strip()
+ return s
+
+
+def to_file_friendly(st):
+ st = st.lower()
+ st = re.sub( r"[^a-z0-9]+","_",st)
+ return st
+
+def clean_title(st):
+ sq = re.sub( r"[^a-zA-Z0-9\.\-\!]"," ",st )
+ if sq: st = sq
+ if len(st)>50: return st[:50]+'...'
+ return st
+
+
+
+def match59(x):
+ if x['links']['context']==7959: return True
+ return False
+
+
+def item_2(x): return x[2]
+
+def unix_time_millis(dt):
+ wst = pytz.timezone("US/Pacific")
+ epoch = datetime.datetime.fromtimestamp(0)
+ epoch = wst.localize(epoch)
+ return (dt - epoch).total_seconds() * 1000.0
+
+# ENGL250 returns ENGL
+def dept_from_name(n):
+ m = re.search('^([a-zA-Z]+)\s?[\d\/]+',n)
+ if m: return m.group(1)
+ print(("Couldn't find dept from: " + n))
+ return ''
+
+def most_common_item(li):
+ d = defaultdict(int)
+ for x in li:
+ d[x] += 1
+ s = sorted(iter(list(d.items())), key=lambda k_v: (k_v[1],k_v[0]), reverse=True)
+ #pdb.set_trace()
+ return s[0][0]
+
+def srt_times(a,b):
+ HERE = tz.tzlocal()
+ da = dateutil.parser.parse(a)
+ da = da.astimezone(HERE)
+ db = dateutil.parser.parse(b)
+ db = db.astimezone(HERE)
+ diff = da - db
+ return diff.seconds + diff.days * 24 * 3600
+
+def how_long_ago(a): # number of hours ago 'a' was...
+ if not a: return 9999
+ HERE = tz.tzlocal()
+ d_now = datetime.datetime.now()
+ d_now = d_now.replace(tzinfo=None)
+ #d_now = d_now.astimezone(HERE)
+ d_then = dateutil.parser.parse(a)
+ d_then = d_then.replace(tzinfo=None)
+ #d_then = d_then.astimezone(HERE)
+ diff = d_now - d_then
+ return (diff.seconds/3600) + (diff.days * 24) + 8 # add 8 hours to get back from UTC timezone
+
+def partition(times_list):
+ # get a list of times in this format: 2017-02-14T17:01:46Z
+ # and break them into a list of sessions, [start, hits, minutes]
+ global dd
+ mm = ['x','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
+ start = ""
+ last = ""
+ hits = 0
+ minutes_till_new_session = 26
+ delta = timedelta(minutes=26)
+ HERE = tz.tzlocal()
+ sessions = []
+
+ sorted_times_list = sorted(times_list, srt_times)
+ current_set = []
+ timeline_times = []
+
+ for T in sorted_times_list:
+ dt_naive = dateutil.parser.parse(T)
+ dt = dt_naive.astimezone(HERE)
+ timeline_st = unix_time_millis(dt)
+
+ timeline_et = timeline_st + (1 * 60 * 1000) # always end 1 minute later....
+ timeline_dict = {}
+ timeline_dict['starting_time'] = timeline_st
+ timeline_dict['ending_time'] = timeline_et
+ timeline_times.append(timeline_dict)
+
+ month = mm[ int(dt.strftime("%m"))]
+ formatted = month + " " + dt.strftime("%d %H:%M")
+ if not start: # start a new session
+ start = dt
+ start_f = formatted
+ last = dt
+ current_set.append(formatted)
+ hits = 1
+ else: #
+ if dt > last + delta: # too long. save sesh. start another, if hits > 2
+ minutes = (last - start)
+ minutes = (minutes.seconds / 60) + 5
+ if hits > 2:
+ sessions.append( [start_f, hits, minutes,current_set] )
+ start = dt
+ start_f = formatted
+ last = dt
+ hits = 1
+ current_set = [formatted]
+ else: # put in current session
+ last = dt
+ current_set.append(formatted)
+ hits += 1
+ # save last sesh
+ if (last):
+ minutes = (last - start)
+ minutes = (minutes.seconds / 60) + 5
+ if hits > 2:
+ sessions.append( [start_f,hits,minutes,current_set] )
+
+ dd.write(json.dumps(timeline_times))
+
+ return sessions