diff --git a/apphelp.py b/apphelp.py index 0d117af..ac61694 100644 --- a/apphelp.py +++ b/apphelp.py @@ -1,1619 +1,1977 @@ - - -import os,re - -output = '' -todos = 0 - -todos_d = {} - -# TODO: Make a second pass, and look for fxn calls inside of -# each function. Draw a graphviz. -# - -def fxns(fname): - global output, todos - lines = open(fname).readlines() - prev_L = "" - for L in lines: - - # is it a todo - a = re.search(r'(TODO|todo)\s*:?\s*(.*)$',L) - if a: - output += "\t\ttodo: " + a.group(2) + "\n" - todos += 1 - if fname in todos_d: - todos_d[fname] += 1 - else: - todos_d[fname] = 1 - - # is it a function def? - if re.search('^\s*def\s',L): - output += "\n" - if re.search('^#',prev_L): output += "\t"+prev_L - output += "\t"+L+"" - prev_L = L - -files = os.listdir('.') -files.sort() - - -for F in files: - if F=='apphelp.py': continue - if re.search('\.py$',F): - output += "\n" + F + "\n" - fxns(F) - -prog_in = open('apphelp.py','r') -prog_out = '' -td = '# Total TODOs remaining: %i' % todos - -td2 = '# TODOs per file: \n#\n' -for k in sorted(todos_d.keys()): - td2 += "#\t%i - %s \n" % (todos_d[k],k) - - -for R in prog_in.readlines(): - if re.search('^##\sF',R): - prog_out += "## Functions\n#\n%s\n#\n%s\n#\n" % (td,td2) - break - prog_out += R - -prog_out += '\n"""\n\n' + output + '\n\n"""\n\n' -prog_in.close() -prog_write = open('apphelp.py','w') -prog_write.write(prog_out) -prog_write.close() - - - - - -## Functions -# -# Total TODOs remaining: 57 -# -# TODOs per file: -# -# 1 - checker.py -# 1 - content.py -# 6 - courses.py -# 3 - curriculum.py -# 5 - depricated.py -# 6 - localcache.py -# 2 - outcomes.py -# 20 - pipelines.py -# 2 - server.py -# 2 - tasks.py -# 1 - tempget.py -# 8 - users.py - -# - -""" - - -__init__.py - -checker.py - todo: make this sweet - - def safe_html(html): - - def _attr_name_whitelisted(attr_name): - - def safe_css(attr, css): - - def plaintext(input): - - def _unescape(text): - - def fixup(m): - - def check_folder(fname,path): - - def check_class(folder): - - def check_all(): - -content.py - - def d(s): - - def stripper(s): - - def mycleaner(s): - - def freshdesk(): - - # Build a master file with the entire class content - def accessible_check(id=""): - todo: include linked pages even if they aren't in module - - def pan_testing(): - - # Given course, page url, and new content, upload the new revision of a page - def create_page(course_num,new_title,new_content): - - def md_to_course(): - - # DL pages only - def grab_course_pages(course_num=-1): - - # Appears to not be used - def put_course_pages(): - - # Also not used - def put_revised_pages(): - - # Download, clean html, and reupload page - def update_page(): - - # Given course, page url, and new content, upload the new revision of a page - def upload_page(course_num,pageurl,new_content): - - # Use template to build html page with homegrown subtitles - def build_srt_embed_php(data): - - def yt_title(code): - - def swap_youtube_subtitles(): - - def test_swap(): - - def multiple_downloads(): - -courses.py - todo: - - def int_or_zero(x): - - def float_or_zero(x): - - # Gott 1 Bootcamp - report on who completed it. - def get_gott1_passers(): - - # Plagiarism Module - report on who completed it. - def get_plague_passers(): - - # Who, in a class, passed? - def get_course_passers(course, min_passing, passers_filename, still_active_filename): - - # Change courses to show 2 announcements - def change_course_ann_homepage(id="10458"): - - def scrape_bookstore(): - todo: where does the most recent schedule come from? - - # Input: xxxx_sched.json. Output: xxxx_latestarts.txt - def list_latestarts(): - - # All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids. - def users_in_semester(): - todo: - - # All students in STEM (or any list of depts.. match the course_code). Return SET of canvas ids. - def users_in_depts_live(depts=[], termid='171'): - - def course_enrollment(id=''): - - def askForTerms(): - - # Return a list of term names and IDs. Also store in cache/courses/terms.txt - def getTerms(printme=1, ask=1): - todo: unsafe overwrite - - def getCourses(): # a dict - - # Relevant stuff trying to see if its even being used or not - def course_term_summary(): - - # Fetch all courses in a given term - def getCoursesInTerm(term=0,show=1,active=0): # a list - - def getCoursesTermSearch(term=0,search='',v=0): - - def courseLineSummary(c,sections={}): - - def xlistLineSummary(c,sections={}): - - def eslCrosslister(): - - def xlist(parasite='', host=''): # section id , new course id - todo: need to get the section id from each course: - - def unenroll_student(courseid,enrolid): - - def enroll_stem_students_live(): - - def enroll_orientation_students(): - - def summarize_proportion_online_classes(u): - - def summarize_num_term_classes(u): - - def make_ztc_list(sem='sp20'): - - def course_search_by_sis(): - - def add_evals(section=0): - todo: wanted: group shell for each GP (guided pathway) as a basic student services gateway.... - -curriculum.py - todo: These secrets - - def another_request(url,startat): - - def fetch_all_classes(): - - def fetch_all_programs(): - - def sortable_class(li): - - def c_name(c): - - def show_classes(createoutput=1): - - def clean_d_name(d): - - def show_programs(): - - def dd(): return defaultdict(dd) - - def organize_courses(): - - def check_de(): - - def clean_programs(): - - def course_lil_format(s): - - def header_lil_format(s): - - def organize_programs(): - - def divide_courses_list(li,rwd,online): - - def organize_programs2(): - - # sorting by order key of dict - def cmp_2(a): - - def cmp_order(a,b): - - # decipher the grouped up courses line - def split_course(st): - - # Any number gets an X (checked). Blank or zero gets no check. - def units_to_x(u): - - def p_block_rule(r,printme,doc,out=0): - - def p_cert_header(type,doc,r='',out=0): - - def p_block_header(r,doc,out=0): - - def p_cert_course_missing(cd,doc,out=0): - - def p_cert_course(cd,history,doc,out=0): - - def p_end_block(out=0): - - def p_end_cert(bigdoc, out=0): - - def ask_for_rule(r): - - def action_to_english(a): - - # Return True if the courses satisfy the rule - def check_a_block(b, courses, verbose=False): - - def read_block_english_to_code(): - - def read_section_online_history(): - todo: this file depends on other fxns. which? - - # This is the 3rd attempt. - def simple_find_online_programs(): - todo: courses with a HYPHEN in NAME get parsed wrong. - - def check_a_block_a(b,verbose=False): - - def smart_find_online_programs(): - - def show_contained_class(c): - - def show_block(c): - - def show_block(c): - - def is_online(c): - - def is_online(c): - - def is_online_inblock(c): - - def is_online_inblock(c): - - # of all the programs, what can be accomplished online? - def find_online_programs(): - - # take a string of all the types of classes offered, return a vector of [tot,lec,hyb,onl] - def string_to_types(st): - - def my_default_counter(): - - # Of the recent schedules, what was actually offered online? - def summarize_online_sections(): - - def fibonacci(n): - - def test_pampy(): - - def cq_parse_experiment(root=0, indent=''): - - def cq_start(): - - def cq_pattern_backup1(root=0, indent=''): - - def found(*x): - - def lookForMatch(rules,item): - - def cq_pattern(root=0, indent=''): - - def myprinter(item, indent=''): - - def cq_pattern_start(): - - def baby_int(j): - - def find_deg_in_cluster( clusters, deg ): - - def try_match_deg_programs(): - - def dict_generator(indict, pre=None): - - def print_dict(v, prefix='',indent=''): - - def walk_file(): - - def tag(x,y): return "<%s>%s" % (x,y,x) - - def tagc(x,c,y): return '<%s class="%s">%s' % (x,c,y,x) - - def a(t,h): return '%s' % (h,t) - - def server_save(key,value): - - def flask_thread(q): - - def home(): - - def s(key,val): - - def hello(): - - def sd(): - - def serve(): - - def attempt_match8020(rules,item): - - def clever_printer(item, indent=''): - - def print_return(x): - - def cq_8020(root=0, indent=''): - - def cq_8020_start(): - -curriculum2020.py - - def to_md(s): - - def print_return(x): - - def cq_8020(root,indent=0): - - def cq_8021(root,indent=0): - - def cq_8021_start(): - - def cq_8022(root,indent=0): - - def cq_8022_start(): - - def sortable_class(li): - - def c_name(c): - - def show_classes2020(): - - def show_classes2020_start(): - -curriculum_patterns.py - - def div1(a,b): - - def d2(a,b): - - def d3(a,b): - - def pp1(a,b): - - def pp2(a,b): - - def pp3(a,b,c,d,e,f,g): - - def pp4(a,b): - - def pp5(a,b,c): - - def pp6(a,b): - - def pp7(a,b): - - def pp8(a,b): - - def jj1(a,b,c,d,e): - - def jj2(a,b,c,d,e,f): - - def jj3(a,b,c,d,e): - - def jj4(a,b,c,d): - - def jj5(a,b,c,d,e,f): - - def jj6(a,b,c,d): - - def jj2(a,b,c,d): - - def jj2(a,b,c,d): - -depricated.py - - # Don't know - def demo(): - - def stats(): - - def dict_generator(indict, pre=None): - - def print_dict(v, prefix='',indent=''): - - def walk_file(): - - def tag(x,y): return "<%s>%s" % (x,y,x) - - def tagc(x,c,y): return '<%s class="%s">%s' % (x,c,y,x) - - def a(t,h): return '%s' % (h,t) - - def server_save(key,value): - - def flask_thread(q): - - def home(): - - def s(key,val): - - def hello(): - - def sd(): - - def serve(): - todo: this duplicates courses.py ?? - - # Prompt for course id, return list of user dicts. TODO this duplicates courses.py ?? - def getUsersInCourse(id=0): # returns list - todo: - - # NO LONGER USED - SEE COURSES - def enroll_stem_students(): - - # unused? - def getAllTeachersInTerm(): # a list - todo: hits in courses by teachers https://gavilan.instructure.com:443/api/v1/users/2/page_views?end_time=Dec%2010%2C%202018 - - def teacherActivityLog(uid=1): ### Next: save results in a hash and return that.... - - def summarize_student_teacher_role(u): - - def user_roles2(): - - def req_to_db(fname_list): - - def has_online(series): - - def has_lecture(series): - - def has_hybrid(series): - - # Wrapper to get 2 schedules at once - def dl_sched(): - todo: these semesters - - # Send a personalized email regarding ZTC - def send_z_email(fullname, firstname, addr, courses_list): - - def getInactiveTeachersInTerm(t=23): # a list - - def course_location(course): - - def course_time(course): - - def course_teacher(course): - - def reg_nums(): - - # In the schedule, is this a class or a continuation of the class above? - def categorize(): - todo: must we open all these files? - - # Deprecated. call perl. - def constructSchedule(): - - def fetch_dict(target,params={}): - - def get_schedule(term='201870', sem='fall'): - -interactive.py - - def dict_generator(indict, pre=None): - - def print_dict(v, prefix='',indent=''): - - def walk_file(): - - def tag(x,y): return "<%s>%s" % (x,y,x) - - def tagc(x,c,y): return '<%s class="%s">%s' % (x,c,y,x) - - def a(t,h): return '%s' % (h,t) - - def server_save(key,value): - - def flask_thread(q): - - def before_request(): - - def save_post(): - - def restart(): - - def dispatch3(func,arg,arrg): - - def dispatch2(func,arg): - - def dispatch(func): - - def dispatch3j(func,arg,arrg): - - def dispatch2j(func,arg): - - def dispatch1j(func): - - def home(): - - def send_jslib(path): - - def send_cachedata(path): - - def send_js(path): - - def s(key,val): - - def do_sample(): - - def media(file_id): - - def podcast(): - - def weblec(): - - def hello(): - - def sd(): - - def test_message(message): - - def serve(): - - def make_teacher_rel(self, tchr, clss): - - def __init__(self, uri, user, password): - - def close(self): - - def print_greeting(self, message): - - def _create_and_return_greeting(tx, message): - - def make_teacher_rel(g, tchr, clss): - - def testgraph(): - - def Memoize( func): - - def wrapper(*args): - - def startup(self, outfile): - - def set_my_dict(self,d): - - def cycle_color(self, s): - - def ascii_art(self, text): - - def close_window(self, ): - - def suggest(self, word): - - def curses_print_word(self, word,color_pair_code): - - def curses_print_line(self, line,color_pair_code): - - def redraw(self, start_y,end_y,fallback_y,fallback_x): - - def scroll_down(self, noredraw,fallback_y,fallback_x): - - def clear_upside(self, n,y,x): - - def display_suggest(self, y,x,word): - - def inputloop(self, ): - - def setup_command(self,outfile): - - def cleanup_command(self): - - def handle_command(self, cmd): - - def repl_staff(): - - def repl_degs(): - - def repl(): - - def repl(): - -interactivex.py - - def dict_generator(indict, pre=None): - - def print_dict(v, prefix='',indent=''): - - def walk_file(): - - def tag(x,y): return "<%s>%s" % (x,y,x) - - def tagc(x,c,y): return '<%s class="%s">%s' % (x,c,y,x) - - def a(t,h): return '%s' % (h,t) - - def server_save(key,value): - - def flask_thread(q): - - def before_request(): - - def restart(): - - def dispatch3(func,arg,arrg): - - def dispatch2(func,arg): - - def dispatch(func): - - def dispatch3j(func,arg,arrg): - - def dispatch2j(func,arg): - - def dispatch1j(func): - - def home(): - - def send_jslib(path): - - def send_cachedata(path): - - def send_js(path): - - def s(key,val): - - def do_sample(): - - def hello(): - - def sd(): - - def serve(): - - def make_teacher_rel(self, tchr, clss): - - def __init__(self, uri, user, password): - - def close(self): - - def print_greeting(self, message): - - def _create_and_return_greeting(tx, message): - - def make_teacher_rel(g, tchr, clss): - - def Memoize( func): - - def wrapper(*args): - - def startup(self, outfile): - - def set_my_dict(self,d): - - def cycle_color(self, s): - - def ascii_art(self, text): - - def close_window(self, ): - - def suggest(self, word): - - def curses_print_word(self, word,color_pair_code): - - def curses_print_line(self, line,color_pair_code): - - def redraw(self, start_y,end_y,fallback_y,fallback_x): - - def scroll_down(self, noredraw,fallback_y,fallback_x): - - def clear_upside(self, n,y,x): - - def display_suggest(self, y,x,word): - - def inputloop(self, ): - - def setup_command(self,outfile): - - def cleanup_command(self): - - def handle_command(self, cmd): - - def repl_staff(): - - def repl_degs(): - - def repl(): - - def repl(): - -ipython_log.py - -localcache.py - - def db(): - - def setup_table(table='requests'): - - # Help the next function to upload new users directly to conf database on gavilan. - def employees_refresh_flex(data): - - # Everyone in iLearn DB with an xyz@gavilan.edu email address. - def all_gav_employees(): - - # - def teachers_courses_semester(): - - # - def teachers_by_term(): - - # Report for AEC - def aec_su20_report(): - - # Return the most up do date version of the given file. Useful for 'dimensions'. - def most_recent_file_of( target ): - - def finder(st): - - # Given a table schema, parse log file, return a list of dicts. Optionally remove some columns. - def parse_file_with( file, format, with_gid=0 ): - - # I return a list of the read lines if the log dates in the file are within dates (top of this file), or FALSE - def is_requestfile_interesting(fname): - todo: more robust here - todo: - investigate pearson, developer key: 170000000000376 and their ridiculous amounts of hits. - - # Return a 'timeblock'. An integer number of 15 minute blocks from my epoch. Expects a datetime object in PST timezone. - def timeblock_from_dt(dt_obj): - - # Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time - def dt_from_timeblock(tb): - - # Twenty Four hour timeblocks - def timeblock_24hr_from_dt(dt_obj): - - # Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time - def dt_from_24hr_timeblock(tb): - - # Four hour timeblocks - def timeblock_4hr_from_dt(dt_obj): - - # Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time - def dt_from_4hr_timeblock(tb): - - # I make the line into a dict, erase keys with no data, make a DT field called date, make a time_block (int) field. - def requests_line(line,i=0): - - # Bulk insert of requests logs. Too much data to be useful. - def requests_file(fname_list): - todo: select if timeblock exists - - # Insert or update a request line. - def upsert_request(line, vals): - - # Generic insert of a dict into a table. Keys of dict must match table columns. - def dict_to_insert(thisline,table): # a dict - - # This now does tallying by timeblock. - def merge_requests(): - - def merge_comm_channel(): - - def merge_pseudonym(): - - def merge_users(): - - def merge_courses(): - - def merge_enrollment(): - - def merge_term(): - - def merge_roles(): - - def merge_convos(): - - # For returning sqlite results as dicts - def dict_factory(cursor, row): - todo: ... approaches to all this data... list requests in order descending time, unique users, and just - - # Attempt to do tallying - def make_views_summarys(): - - # original without time_blocks info. - def make_views_summarys_v1(): - - # Setup my basic db stats base from scratch - def full_reload(): - - def guess_dept(t): - - # Main view of all class / all user overview... - def dept_with_studentviews(dept="", sem=''): - - def f(x): - - # get student count and teacher name from local db - def course_quick_stats(canvasid): - - # What a student has taken / teacher has taught - def user_enrolled_in(userid): - - # All students in this semester ... - def users_this_semester_db(sem=''): - - # Everyone whose first semester is ..... - def users_new_this_semester(sem=''): - - # All student users in STEM - from local db - def user_in_stem(): - - # Get all the classes in one dept - def dept_classes(dept,sem=''): - todo: - - def depts_with_classcounts(sem=''): - todo: - - def f(x): - - def name_with_count(name,li): - - def arrange_data_for_web(dept='', sem=''): - - def f(x): - - # Get enrollments. (Best to freshly run pipelines/get_rosters) and put them into DB - def build_tables(headers,name): - - def load_tables(table,headers,row,verbose=0): - - def semester_enrollments(verbose=0): - - def qstrip(txt): return txt.strip('"') - - def more_unused_xreferencing(): - - def user_role_and_online(): - - def comm_channel_file(): - - def pseudonym_file(): - - def users_p_file(): - - def com_channel_dim(): - - def abcd(): - - def crns_to_teachers(): - -main.py - -outcomes.py - - def outcome_overview(term=21): - - def create_acct_lvl_outcomes(src,dept,makefolder='',folder=0): - - def connect_acct_oc_to_course(course_id,oc_group_id): - - def outcome_groups(): - - def outcome_groups_backup(): - - def x_ref_dept_names(): - - def create_course_group(short,parent): - - def create_dept_group(short): - - def outcomes_attached_to_courses(term=65,limitdept=''): - todo: Handle this: CSIS/DM85 WEB DESIGN 40823/24 - - def summarize_course_online_slo(outcome_list): - - def fetch_outcome_details(id): - - # Report on the actual evaluation data? - def outcome_report1(): - todo: - - # For the given course, get all outcome measurements, and display scores and stats. - def outcome_report2(): - - def fix_joined_class(str): - - def split_slo_name(str): - - def outcome_report3(): - - def read_slo_source(): - - def slo_source_by_dept(): - -patterns_8020.py - -patterns_topdown.py - - def pp0(a,b,c,d,e): - - def pp1(a,b,c,d,e,f): - - def pp2(a,b,c,d,e): - - def pp3(a,b,c,d): - - def pp4(a,b,c): - - def pp5(a,b,c): - - def pp6(a,b,c): - - def div1(a,b): - - def d2(a,b): - - def d3(a,b): - - def pp1(a,b): - - def pp2(a,b): - - def pp3(a,b,c,d,e,f,g): - - def pp4(a,b): - - def pp5(a,b,c): - - def pp6(a,b): - - def pp7(a,b): - - def pp8(a,b): - - def jj3(a,b,c,d,e): - - def jj5(a,b,c,d,e,f): - - def jj2(a,b,c,d): - - def jj2(a,b,c,d): - -pipelines.py - todo: secrets - todo: all these constants for SSB -- line 1008 - todo: secrets - - def d(s): - - # Main canvas querying fxn - def fetch(target,verbose=0): - - # Main canvas querying fxn - stream version - don't die on big requests - def fetch_stream(target,verbose=0): - - # paging makes problems... example: enrollment_terms - def fetch_collapse(target,collapse='',verbose=0): - - # Teacher name format changed. Remove commas and switch first to last - def fix_t_name(str): - - # Separate dept and code - def split_class_dept(c): - - def split_class_code(c): - - def split_class_code_letter(c): - - # go from sp20 to 2020spring - def shortToLongSem(s): - - # Go to the semesters folder and read the schedule. Return dataframe - def getSemesterSchedule(short='sp21'): # I used to be current_schedule - todo: Some semesters have a different format.... partofday type site xxx i just dL'd them again - - def prep_online_courses_df(): - - def course_is_online(crn): - - def get_crn_from_name(name): - - def get_enrlmts_for_user(user,enrollments): - - # Get something from Canvas Data - def do_request(path): - - # Canvas data, download all new files - def sync_non_interactive(): - - # list files in canvas_data (online) and choose one or some to download. - def interactive(): - - def todays_date_filename(): # helper - - def nowAsStr(): # possible duplicate - - def row_has_data(r): # helper - - def row_text(r): # helper - - # Take banner's html and make a csv(?) file - def ssb_to_csv(src): - - def clean_funny(str): - - def clean_funny2(str): - - def clean_funny3(str): - - ### course is a list of 1-3 lists, each one being a line in the schedule's output. First one has section - def course_start(course): - todo: use this to make a early/late/short field and store semester dates w/ other constants - - def time_to_partofday(t): - todo: account for multiple sites/rows - - # Deduce a 'site' field, based on room name and known offsite locations - def room_to_site(room,verbose=0): - todo: account for multiple sites/rows - todo: better way to store these offsite labels - - # take text lines and condense them to one dict per section - def to_section_list(input_text,verbose=0): - todo: no output files - todo: if extra line is different type? - - # Log the history of enrollments per course during registration - def log_section_filling(current_sched_list): - - # Same as above, but compressed, act only - def log_section_filling2(current_sched_list): - - # Use Firefox and log in to ssb and get full schedule. Only works where selenium is installed - def scrape_schedule(): - todo: my data here.... secret - todo: - - # recreate schedule json files with most current online schedule format. - def recent_schedules(): - todo: sems is a global in this file. Is that the right thing to do? - todo: the pipeline is disorganized. Organize it to have - todo: where does this belong in the pipeline? compare with recent_schedules() - - # Take the generically named rosters uploads files and move them to a semester folder and give them a date. - def move_to_folder(sem,year,folder): - - # This relates to enrollment files, not schedule. - def convert_roster_files(semester="",year="",folder=""): - - # From instructure sftp site - def fetch_current_rosters(): - todo: secret - - def fetch_current_rosters_auto(): - - # read schedule file with an eye toward watching what's filling up - def schedule_filling(): - todo: hardcoded - - # Upload a json file to www - def put_file(remotepath,localpath, localfile,prompt=1): - todo: remove this secret - todo: these paths - - def sec(t): return "

"+t+"

\n" - - def para(t): return "

"+t+"

\n" - - def ul(t): return "\n" - - def li(t): return "
  • "+t+"
  • \n" - - def question(t,bracket=1): - - def answer(t): - - def read_paragraph_element(element,type="NORMAL_TEXT"): - - def get_doc(docid, bracket=1, verbose=0): - todo: x link, x bold, list, image. - - def read_paragraph_element_2(element,type="NORMAL_TEXT"): - - # t is a string that begins with "Icons: " ... and contains comma(space) separated list - def handle_icons(t): - - # t is a string that begins with "Tags: " ... and contains comma(space) separated list - def handle_tags(t): - - def handle_question(t,bracket=1): - - def handle_answer(t): - - def handle_sec(t): return ('section',t) - - def handle_para(t): return ('paragraph',t) - - def handle_ul(t): return ('unorderdedlist',t) - - def handle_li(t): return ('listitem',t) - - def fetch_doc_image(k,value): - - def get_doc_generic(docid, bracket=1, verbose=0): - - def scrape_schedule_py(): - -server.py - - def tag(x,y): return "<%s>%s" % (x,y,x) - - def tagc(x,c,y): return '<%s class="%s">%s' % (x,c,y,x) - - def a(t,h): return '%s' % (h,t) - - def homepage(): - - def orgline(L): - todo: \s\[\#A\](.*)$', L) - - def editor(src): - - def in_form(txt,path): - - def mytime(fname): - - def index(): - - def writing(fname): - - def dashboard(): - - def dash(): - - def mycalendar(): - - def most_recent_file_of( target, folder ): - - def finder(st): - - def news(): - - def randPic(): - - def sample(): - - def sample2(a=""): - - # Filter a stream of loglines for those that match a course's url / id - def has_course(stream,courseid): - - def js(s): - - def sem_from_array_crn(crn): - - def user_courses(uid): - - def user_course_history_summary(usr_id): - - def roster(crn): - - def user_course_hits(usr,courseid): - - def profiles(id=1,b=2,c=3): - - # Departments, classes in each, and students (with hits) in each of those. - def enrollment(a): - - # All the classes in this dept, w/ all the students in each, with count of their views. - def dept(d=''): - - def user(canvas_id=None): - - def lectures(): - - def web_lectures(): - todo: update: dept, title, any of the other fields. - - # update a value: dept id of a personnel id - def update_pers_title(pid, tid): - - # update a value: dept id of a personnel id - def update_pers_dept(pid, did): - - def user_edit(canvas_id='2'): - - def staff_dir(search=''): - - def server_save(key,value): - - def server_dispatch_json(function_name,arg='', arg2=''): - - def server_dispatch(function_name,arg='', arg2=''): - -stats.py - - def grades_rundown(): - - def class_logs(): - - def user_logs(): - - def recent_logins(): - - def userHitsThisSemester(uid=2): - - def getCurrentActivity(): # a dict - - def externaltool(): # a list - -tasks.py - - def survey_answer(q=0): - - def survey_organize(): - - def build_quiz(filename=""): - - # Send an email - def send_email(fullname, firstname, addr, subj, content): - - def convert_to_pdf(name1, name2): - - # Build (docx/pdf) certificates for gott graduates - def certificates_gott_build(): - - # Email experiment - def mail_test(): - - # Change LTI Settings. Experimental - def modify_x_tool(): - - # Upload with sftp to www website folder: student/online/srt/classfoldername - def put_file(classfoldername): - todo: ',cnopts=cnopts) as sftp: - - # Switch everyone in a class to a teacher - def switch_enrol(): - - # Change dates & term of a class to unrestrict enrollment - def unrestrict_course(): - - # Bulk enroll users into a course - def enroll_accred(): - - # Calculate attendance stats based on enrollment/participation at 20% of term progressed, then 60% of term progressed. - def twenty_sixty_stats(li): - - # Older positive attendance hours calculation. - def hours_calc(): - - def course_2060_dates(crn=""): - todo: - - def course_update_all_users_locallogs(course_id=''): - - def hours_calc_pulldata(course_id=''): - - def xlist_cwe(): - - def pos_atten(): - -temp.py - -tempget.py - - # Use Firefox and log in to ssb and get full schedule - def login(): - todo: my data here.... secret - - def filename_friendly(str): - - def otter(): - -templates.py - - def item_to_masonry(item): - - def try_untemplate(): - - def php_remove(m): - - def php_add(m): - - def do_template(temp,source,side): - - def remove_filetype(f): - - def make(): - - def txt_2_table(): - - def studenttech_faq(): - - # https://docs.google.com/document/d/1tI_b-q75Lzu25HcA0GCx9bGfUt9ccM8m2YrrioDFZcA/edit?usp=sharing - def de_faq(): - - def degwork_faq(): - - def vrc_faq(): - - def counseling_faq(): - - def finaid_faq(): - - def coun_loc(): - - def tutor_faq(): - - def test_repl(): - -timer.py - - def func(a, b): - -users.py - todo: these constants - - # All users to a cache file cache/allusers.json - def fetchAllUsers(): - - # Fetch teacher users objects from local cache - def teacherRolesCache(): # I used to be load_users - - # Canvas: Fetch all people with gavilan.edu email address - def teacherRolesUpdateCache(): # I used to be get_users - - # Fetch preferred email address for a given user id. ( Canvas ) - def getEmail(user_id): - - # All teachers in a particular course - def getAllTeachers(course_id=59): # a list - - # - def classType(t): - todo: fix bug in schedule parser so non-online classes have a type field - - def my_blank_string(): return "no data" - - def my_blank_dict(): return {'name':'NoName','email':'noemail@gavilan.edu'} - - def my_empty_dict(): return defaultdict(my_blank_string) - - def get_email_from_rec(name,name_to_record): - - # Pull the staff directory on the webpage. Convert to pandas dataframe - def staff_dir(get_fresh=False): - todo: lol get fresh again... - - def schedForTeacherOverview(long,short): - - # Return a dataframe of the last 4 semester schedules put together - def oneYearSchedule(): - - def num_sections_last_year(line): - - def sec_type_stats(line): - - def prct_online(line): - - def prct_lecture(line): - - def prct_hybrid(line): - - # Given the names of teachers in last year's schedules, fill in email, etc. from ilearn files - def teacher_basic_info(sched, from_ilearn, names): - - def find_that_name(x): - - # Outputs: cache/teacher_by_semester.csv, - def teacherModalityHistory(sched=[],names=[]): - - def teacherCourseHistory(a,names): - todo: sort by dept also - - # Outputs: cache/course_teacher_combos.csv, - def teacherSharedCourses(a=[]): - - # How many courses in each department were taught in the last year? - def departmentCountCourses(a=[]): - - def clean_nonprint(s): - - def read_cmte(names): - - def read_training_records(): - - # open a file and mark the people with their ids given. Return a dataframe - def read_bootcamp1(filename): - - # open a file and mark the people with their ids given. Return a dataframe - def read_bootcamp2(filename): - - def not_blank_or_pound(L): - - def temp1(x): - - def add_realnames(df,names): # the surveys. raw name is in 2nd column - - def compareToughNames(a,b): - - def compareNames(a,b,verbose=0): - - def find_ilearn_record(ilearn_records,manual_records, othername,verbose=0): - - def manualNamesAndDept(): - - def manualNames(): - - # given a list of class codes, return the most common (academic) department - def guessDept(d_list): - - # Make one big csv file of everything I know about a teacher - def getTeachersInfoMain(): - - def enroll_staff_shell(): - - # take a list of raw hits. - def activity_summary(hits): - todo: month is hardcoded here - - # Get views counts on current teachers. todo: month is hardcoded here - def get_recent_views(id=1): - - # Have they taught online or hybrid classes? - def categorize_user(u): - todo: threaded - - # Doest the account have a photo loaded? - def checkForAvatar(id=2): - - # Grab em. Change the first if when continuing after problems.... - def downloadPhoto(): - - def mergePhotoFolders(): - - def mergePhotoFolders2(): - - # Go through my local profile pics, upload any that are missing. - def uploadPhoto(): - - def test_email(): - - def create_ztc_list(): - - def get_user_info(id): - - # these are any messages that get pushed out to their email - def comm_mssgs_for_user(uid=0): - - # - def convos_for_user(uid=0): - - # single q sub - def quiz_get_sub(courseid, quizid, subid=0): - - # quiz submissions for quiz id x, in course id y - def quiz_submissions(courseid=9768, quizid=32580): - - # return (timeblock, course, read=0,write=1) - def requests_line(line,i=0): - - # - def report_logs(id=0): - - def track_users_in_sem(): - - def track_users_in_class(L=[]): - - def track_user_q(id, q): - - # Maintain local logs. Look to see if we have some, download logs since then for a user. - def track_user(id=0,qid=0): - todo: set up this info file if it isn't there. check any changes too. it - todo: - - # - def track_users_by_teacherclass(): - - def nlp_sample(): - - def nlp_sample2(): - - def one_course_enrol(): - -util.py - - def print_table(table): - - def remove_nl(str): - - def UnicodeDictReader(utf8_data, **kwargs): - - def minimal_string(s): - - def to_file_friendly(st): - - def clean_title(st): - - def match59(x): - - def item_2(x): return x[2] - - def unix_time_millis(dt): - - # ENGL250 returns ENGL - def dept_from_name(n): - - def most_common_item(li): - - def srt_times(a,b): - - def how_long_ago(a): # number of hours ago 'a' was... - - def partition(times_list): - - -""" - + + +import os,re + +output = '' +todos = 0 + +todos_d = {} + +# TODO: Make a second pass, and look for fxn calls inside of +# each function. Draw a graphviz. +# + +def fxns(fname): + global output, todos + lines = open(fname).readlines() + prev_L = "" + for L in lines: + + # is it a todo + a = re.search(r'(TODO|todo)\s*:?\s*(.*)$',L) + if a: + output += "\t\ttodo: " + a.group(2) + "\n" + todos += 1 + if fname in todos_d: + todos_d[fname] += 1 + else: + todos_d[fname] = 1 + + # is it a function def? + if re.search('^\s*def\s',L): + output += "\n" + if re.search('^#',prev_L): output += "\t"+prev_L + output += "\t"+L+"" + prev_L = L + +files = os.listdir('.') +files.sort() + + +for F in files: + if F=='apphelp.py': continue + if re.search('\.py$',F): + output += "\n" + F + "\n" + fxns(F) + +prog_in = open('apphelp.py','r') +prog_out = '' +td = '# Total TODOs remaining: %i' % todos + +td2 = '# TODOs per file: \n#\n' +for k in sorted(todos_d.keys()): + td2 += "#\t%i - %s \n" % (todos_d[k],k) + + +for R in prog_in.readlines(): + if re.search('^##\sF',R): + prog_out += "## Functions\n#\n%s\n#\n%s\n#\n" % (td,td2) + break + prog_out += R + +prog_out += '\n"""\n\n' + output + '\n\n"""\n\n' +prog_in.close() +prog_write = open('apphelp.py','w') +prog_write.write(prog_out) +prog_write.close() + + + + + +## Functions +# +# Total TODOs remaining: 67 +# +# TODOs per file: +# +# 6 - content.py +# 6 - courses.py +# 3 - curriculum.py +# 6 - depricated.py +# 7 - localcache.py +# 2 - outcomes.py +# 17 - pipelines.py +# 2 - server.py +# 5 - tasks.py +# 1 - tempget.py +# 12 - users.py + +# + +""" + + +__init__.py + +canvas_secrets.py + +checker.py + + def safe_html(html): + + def _attr_name_whitelisted(attr_name): + + def safe_css(attr, css): + + def plaintext(input): + + def _unescape(text): + + def fixup(m): + + def check_folder(fname,path): + + def check_class(folder): + + def check_all(): + +content.py + + def d(s): + + # Build a master file with the entire class content + def accessible_check(id=""): + todo: include linked pages even if they aren't in module + + def pan_testing(): + + # Given course, page url, and new content, upload the new revision of a page + def create_page(course_num,new_title,new_content): + + def md_to_course(): + + # DL pages only + def grab_course_pages(course_num=-1): + + # Download, clean html, and reupload page + def update_page(): + + # Given course, page url, and new content, upload the new revision of a page + def upload_page(course_num,pageurl,new_content): + + # Use template to build html page with homegrown subtitles + def build_srt_embed_php(data): + + def yt_title(code): + + def swap_youtube_subtitles(): + + def test_swap(): + + def multiple_downloads(): + + def demo_vector_search(): + + def is_complete_sentence(text): + todo: site scraper + todo: find package that extracts text from web page + todo: master list of what to index. + todo: PDFs and DOCXs + todo: fix urls w/ anchors + + def clean_fn(s): + + def format_html(html): + + def visit(self, link, source=None): + + def fail(self, link): + + def crawl(): + + def txt_clean_index(): + + def samples(): + +courses.py + todo: + + # Gott 1 Bootcamp - report on who completed it. + def get_gott1_passers(): + + # Plagiarism Module - report on who completed it. + def get_plague_passers(): + + # Who, in a class, passed? + def get_course_passers(course, min_passing, passers_filename, still_active_filename): + + # Who, in a class and a quiz, passed? + def get_quiz_passers(): + + # Change courses to show 2 announcements + def change_course_ann_homepage(id="10458"): + + # All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids. + def users_in_semester(): + todo: + + # All students in STEM (or any list of depts.. match the course_code). Return SET of canvas ids. + def users_in_depts_live(depts=[], termid='171'): + + def course_enrollment(id=''): + + def askForTerms(): + + # Return a list of term names and IDs. Also store in cache/courses/terms.txt + def getTerms(printme=1, ask=1): + todo: unsafe overwrite + + def getCourses(x=0): # a dict + + def update_course_conclude(courseid="13590",enddate='2021-12-23T01:00Z'): + + # Relevant stuff trying to see if its even being used or not + def course_term_summary_local(term="176",term_label="FA22"): + + # Relevant stuff trying to see if its even being used or not + def course_term_summary(term="176",term_label="FA22"): + + # Fetch all courses in a given term + def getCoursesInTerm(term=0,get_fresh=1,show=0,active=0): # a list + + def getCoursesTermSearch(term=0,search='',v=0): + + def courseLineSummary(c,sections={}): + + def xlistLineSummary(c,sections={}): + + def numbers_in_common(L): + + def combined_name(nic,L): + + def semester_cross_lister(): + + def xlist_ii(parasite_id,host_id,new_name,new_code): + + def all_semester_course_sanity_check(): + + def eslCrosslister(): + + def xlist(parasite='', host=''): # section id , new course id + todo: need to get the section id from each course: + + def unenroll_student(courseid,enrolid): + + def enroll_stem_students_live(): + + def enroll_bulk_students_bydept(course_id, depts, the_term="172", cautious=1): # a string, a list of strings + todo: not done here + + def enroll_art_students_live(): + + def enroll_orientation_students(): + + def enroll_o_s_students(): + + def make_ztc_list(sem='sp20'): + + def course_search_by_sis(): + + def mod_eval_visibility( shell_id, visible=True ): + + def instructor_list_to_activate_evals(): + + def add_evals(section=0): + + def course_dates_terms(section=0): + + def remove_n_analytics(section=0): + + def create_sandboxes(): + + def course_term_summary_2(): + + def get_ext_tools(): + + def set_ext_tools(): + todo: wanted: group shell for each GP (guided pathway) as a basic student services gateway.... + +cq_demo.py + + def fetch(target): + +curric2022.py + + def fetch_all_programs(): + + def nothing(x=0): + + def clean(st): + + def recur_matcher(item, depth=0): + + def single_course_parse(c): + + def match_style_test(): + + def single_program_path_parse(c): + + def path_style_prog(): + + def term_txt_to_code(t): + + def all_outcomes(): + + def ddl(): + + def splitclassline(cl, id=''): + + def path_style_2_html(): + + def course_path_style_2_html(): + + def another_request(url,startat): + + def fetch_all_classes(): + + def recur_path_matcher(item, path=[]): + + def x2_path_update(x,y,z): + + def pathstyle(theclass): + + def single_course_path_parse(c): + + def path_style_test(): + + def make_sl(): + + def course_rank(): + +curriculum.py + + def dbg(x): + + def another_request(url,startat): + + def fetch_all_classes(): + + def fetch_all_programs(): + + def sortable_class(li): + + def c_name(c): + + def show_classes(createoutput=1): + + def clean_d_name(d): + + def show_programs(): + + def dd(): return defaultdict(dd) + + def organize_courses(): + + def check_de(): + + def clean_programs(): + + def course_lil_format(s): + + def header_lil_format(s): + + def organize_programs(): + + def divide_courses_list(li,rwd,online): + + def organize_programs2(): + + # sorting by order key of dict + def cmp_2(a): + + def cmp_order(a,b): + + # decipher the grouped up courses line + def split_course(st): + + # Any number gets an X (checked). Blank or zero gets no check. + def units_to_x(u): + + def p_block_rule(r,printme,doc,out=0): + + def p_cert_header(type,doc,r='',out=0): + + def p_block_header(r,doc,out=0): + + def p_cert_course_missing(cd,doc,out=0): + + def p_cert_course(cd,history,doc,out=0): + + def p_end_block(out=0): + + def p_end_cert(bigdoc, out=0): + + def ask_for_rule(r): + + def action_to_english(a): + + # Return True if the courses satisfy the rule + def check_a_block(b, courses, verbose=False): + + def read_block_english_to_code(): + + def read_section_online_history(): + todo: this file depends on other fxns. which? + + # This is the 3rd attempt. + def simple_find_online_programs(): + todo: courses with a HYPHEN in NAME get parsed wrong. + + def check_a_block_a(b,verbose=False): + + def smart_find_online_programs(): + + def show_contained_class(c): + + def show_block(c): + + def show_block(c): + + def is_online(c): + + def is_online(c): + + def is_online_inblock(c): + + def is_online_inblock(c): + + # 9/2021 clean programs to good json + def organize_programs_stage2(): + + # of all the programs, what can be accomplished online? + def find_online_programs(): + + # take a string of all the types of classes offered, return a vector of [tot,lec,hyb,onl] + def string_to_types(st): + + def my_default_counter(): + + # Of the recent schedules, what was actually offered online? + def summarize_online_sections(): + + def fibonacci(n): + + def test_pampy(): + + def cq_parse_experiment(root=0, indent=''): + + def cq_start(): + + def cq_pattern_backup1(root=0, indent=''): + + def found(*x): + + def lookForMatch(rules,item): + + def cq_pattern(root=0, indent=''): + + def myprinter(item, indent=''): + + def cq_pattern_start(): + + def baby_int(j): + + def find_deg_in_cluster( clusters, deg ): + + def try_match_deg_programs(): + + def dict_generator(indict, pre=None): + + def print_dict(v, prefix='',indent=''): + + def walk_file(): + + def tag(x,y): return "<%s>%s" % (x,y,x) + + def tagc(x,c,y): return '<%s class="%s">%s' % (x,c,y,x) + + def a(t,h): return '%s' % (h,t) + + def server_save(key,value): + + def flask_thread(q): + + def home(): + + def s(key,val): + + def hello(): + + def sd(): + + def serve(): + + def attempt_match8020(rules,item): + + def clever_printer(item, indent=''): + + def print_return(x): + + def cq_8020(root=0, indent=''): + + def cq_8020_start(): + + def recurse3(sec,path=''): + + def get_id_sortorder(sec): + + def include_exclude(str,inc,exc=[]): + + def pbd3(str): + + def handleField(f): + + def boolToStr(b): + + # Almost final formatting + def prog_info_to_entry(c): + + def cbd_to_entry(c): + + def pc5(str): + + def remove_prefix(str,i): + + def course_to_entry(c,order="0"): + + def courseline_to_pretty(line): + + # restarted oct 2019 and try to simplify + def prog_take_4(program): + todo: + +curriculum2020.py + + def to_md(s): + + def print_return(x): + + def cq_8020(root,indent=0): + + def cq_8021(root,indent=0): + + def cq_8021_start(): + + def cq_8022(root,indent=0): + + def cq_8022_start(): + + def sortable_class(li): + + def c_name(c): + + def show_classes2020(): + + def show_classes2020_start(): + +curriculum_patterns.py + + def div1(a,b): + + def d2(a,b): + + def d3(a,b): + + def pp1(a,b): + + def pp2(a,b): + + def pp3(a,b,c,d,e,f,g): + + def pp4(a,b): + + def pp5(a,b,c): + + def pp6(a,b): + + def pp7(a,b): + + def pp8(a,b): + + def jj1(a,b,c,d,e): + + def jj2(a,b,c,d,e,f): + + def jj3(a,b,c,d,e): + + def jj4(a,b,c,d): + + def jj5(a,b,c,d,e,f): + + def jj6(a,b,c,d): + + def jj2(a,b,c,d): + + def jj2(a,b,c,d): + +depricated.py + todo: where does the most recent schedule come from? + + # Input: xxxx_sched.json. Output: xxxx_latestarts.txt + def list_latestarts(): + + def prep_online_courses_df(): + + def course_is_online(crn): + + def get_crn_from_name(name): + + def get_enrlmts_for_user(user,enrollments): + + # Don't know + def demo(): + + def stats(): + + def dict_generator(indict, pre=None): + + def print_dict(v, prefix='',indent=''): + + def walk_file(): + + def tag(x,y): return "<%s>%s" % (x,y,x) + + def tagc(x,c,y): return '<%s class="%s">%s' % (x,c,y,x) + + def a(t,h): return '%s' % (h,t) + + def server_save(key,value): + + def flask_thread(q): + + def home(): + + def s(key,val): + + def hello(): + + def sd(): + + def serve(): + + def summarize_proportion_online_classes(u): + + def summarize_num_term_classes(u): + todo: this duplicates courses.py ?? + + # Prompt for course id, return list of user dicts. TODO this duplicates courses.py ?? + def getUsersInCourse(id=0): # returns list + + def recur_look_for_leafs(item,indent=0,show=1): + + def am_i_a_leaf(item): + + def sampleclass(): + + def matchstyle(): + + def user_role_and_online(): + + def more_unused_xreferencing(): + + def users_p_file(): + + def com_channel_dim(): + todo: + + # NO LONGER USED - SEE COURSES + def enroll_stem_students(): + + # unused? + def getAllTeachersInTerm(): # a list + todo: hits in courses by teachers https://gavilan.instructure.com:443/api/v1/users/2/page_views?end_time=Dec%2010%2C%202018 + + def teacherActivityLog(uid=1): ### Next: save results in a hash and return that.... + + def summarize_student_teacher_role(u): + + def user_roles2(): + + def req_to_db(fname_list): + + def has_online(series): + + def has_lecture(series): + + def has_hybrid(series): + + # Wrapper to get 2 schedules at once + def dl_sched(): + todo: these semesters + + # Send a personalized email regarding ZTC + def send_z_email(fullname, firstname, addr, courses_list): + + def getInactiveTeachersInTerm(t=23): # a list + + def course_location(course): + + def course_time(course): + + def course_teacher(course): + + def reg_nums(): + + # In the schedule, is this a class or a continuation of the class above? + def categorize(): + todo: must we open all these files? + + # Deprecated. call perl. + def constructSchedule(): + + def fetch_dict(target,params={}): + + def get_schedule(term='201870', sem='fall'): + + def dates(s): + + def parse_www_csv_sched(): + + def parse_json_test_sched(): + + def put_revised_pages(): + + def put_course_pages(): + + def freshdesk(): + +gpt.py + +graphics.py + +interactive.py + + def dict_generator(indict, pre=None): + + def print_dict(v, prefix='',indent=''): + + def walk_file(): + + def tag(x,y): return "<%s>%s" % (x,y,x) + + def tagc(x,c,y): return '<%s class="%s">%s' % (x,c,y,x) + + def a(t,h): return '%s' % (h,t) + + def server_save(key,value): + + def flask_thread(q): + + def before_request(): + + def clears(): + + def dpi(): + + def dpi2(): + + def screenoff_a(): + + def light(): + + def do_image(filename): + + def do_image_crop(filename,x,y,w,h,newname): + + def save_post(): + + def writing_img(fname): + + def restart(): + + def dispatch3(func,arg,arrg): + + def dispatch2(func,arg): + + def dispatch(func): + + def dispatch3j(func,arg,arrg): + + def dispatch2j(func,arg): + + def dispatch1j(func): + + def home(): + + def send_cachedata(path): + + def send_js(path): + + def send_jslib(path): + + def s(key,val): + + def do_sample(): + + def media(file_id): + + def podcast(): + + def weblec(): + + def hello(): + + def sd(): + + def test_message(message): + + def serve(): + + def make_teacher_rel(self, tchr, clss): + + def __init__(self, uri, user, password): + + def close(self): + + def print_greeting(self, message): + + def _create_and_return_greeting(tx, message): + + def make_teacher_rel(g, tchr, clss): + + def testgraph(): + + def Memoize( func): + + def wrapper(*args): + + def startup(self, outfile): + + def set_my_dict(self,d): + + def cycle_color(self, s): + + def ascii_art(self, text): + + def close_window(self, ): + + def suggest(self, word): + + def curses_print_word(self, word,color_pair_code): + + def curses_print_line(self, line,color_pair_code): + + def redraw(self, start_y,end_y,fallback_y,fallback_x): + + def scroll_down(self, noredraw,fallback_y,fallback_x): + + def clear_upside(self, n,y,x): + + def display_suggest(self, y,x,word): + + def inputloop(self, ): + + def setup_command(self,outfile): + + def cleanup_command(self): + + def handle_command(self, cmd): + + def repl_staff(): + + def repl_degs(): + + def repl(): + + def repl(): + +interactivex.py + + def dict_generator(indict, pre=None): + + def print_dict(v, prefix='',indent=''): + + def walk_file(): + + def tag(x,y): return "<%s>%s" % (x,y,x) + + def tagc(x,c,y): return '<%s class="%s">%s' % (x,c,y,x) + + def a(t,h): return '%s' % (h,t) + + def server_save(key,value): + + def flask_thread(q): + + def before_request(): + + def restart(): + + def dispatch3(func,arg,arrg): + + def dispatch2(func,arg): + + def dispatch(func): + + def dispatch3j(func,arg,arrg): + + def dispatch2j(func,arg): + + def dispatch1j(func): + + def home(): + + def send_jslib(path): + + def send_cachedata(path): + + def send_js(path): + + def s(key,val): + + def do_sample(): + + def hello(): + + def sd(): + + def serve(): + + def make_teacher_rel(self, tchr, clss): + + def __init__(self, uri, user, password): + + def close(self): + + def print_greeting(self, message): + + def _create_and_return_greeting(tx, message): + + def make_teacher_rel(g, tchr, clss): + + def Memoize( func): + + def wrapper(*args): + + def startup(self, outfile): + + def set_my_dict(self,d): + + def cycle_color(self, s): + + def ascii_art(self, text): + + def close_window(self, ): + + def suggest(self, word): + + def curses_print_word(self, word,color_pair_code): + + def curses_print_line(self, line,color_pair_code): + + def redraw(self, start_y,end_y,fallback_y,fallback_x): + + def scroll_down(self, noredraw,fallback_y,fallback_x): + + def clear_upside(self, n,y,x): + + def display_suggest(self, y,x,word): + + def inputloop(self, ): + + def setup_command(self,outfile): + + def cleanup_command(self): + + def handle_command(self, cmd): + + def repl_staff(): + + def repl_degs(): + + def repl(): + + def repl(): + +localcache.py + + def db(): + + def setup_table(table='requests'): + + # Help the next function to upload new users directly to conf database on gavilan. + def employees_refresh_flex(data): + + # Everyone in iLearn DB with an xyz@gavilan.edu email address. + def all_gav_employees(): + + # + def teachers_courses_semester(): + + # + def teachers_by_term(): + + # Report for AEC + def aec_su20_report(): + + # Return the most up do date version of the given file. Useful for 'dimensions'. + def most_recent_file_of( target ): + + def finder(st): + + # Given a table schema, parse log file, return a list of dicts. Optionally remove some columns. + def parse_file_with( file, format, with_gid=0 ): + + # I return a list of the read lines if the log dates in the file are within dates (top of this file), or FALSE + def is_requestfile_interesting(fname): + todo: more robust here + todo: - investigate pearson, developer key: 170000000000376 and their ridiculous amounts of hits. + + # Return a 'timeblock'. An integer number of 15 minute blocks from my epoch. Expects a datetime object in PST timezone. + def timeblock_from_dt(dt_obj): + + # Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time + def dt_from_timeblock(tb): + + # Twenty Four hour timeblocks + def timeblock_24hr_from_dt(dt_obj): + + # Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time + def dt_from_24hr_timeblock(tb): + + # Four hour timeblocks + def timeblock_4hr_from_dt(dt_obj): + + # Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time + def dt_from_4hr_timeblock(tb): + + # I make the line into a dict, erase keys with no data, make a DT field called date, make a time_block (int) field. + def requests_line(line,i=0): + + # Bulk insert of requests logs. Too much data to be useful. + def requests_file(fname_list): + todo: select if timeblock exists + + # Insert or update a request line. + def upsert_request(line, vals): + + # Generic insert of a dict into a table. Keys of dict must match table columns. + def dict_to_insert(thisline,table): # a dict + + # This now does tallying by timeblock. + def merge_requests(): + + def merge_comm_channel(): + + def merge_pseudonym(): + + def merge_users(): + + def merge_courses(): + + def merge_enrollment(): + + def merge_term(): + + def merge_roles(): + + def merge_convos(): + + # For returning sqlite results as dicts + def dict_factory(cursor, row): + todo: ... approaches to all this data... list requests in order descending time, unique users, and just + + # Attempt to do tallying + def make_views_summarys(): + + # original without time_blocks info. + def make_views_summarys_v1(): + + # Setup my basic db stats base from scratch + def full_reload(): + + def guess_dept(t): + + # Main view of all class / all user overview... + def dept_with_studentviews(dept="", sem=''): + + def f(x): + + def get_courses_in_term_local(term="172"): + + # get student count + def course_student_stats(canvasid): + + # get teacher name from local db + def course_quick_stats(canvasid): + + # What a student has taken / teacher has taught + def user_enrolled_in(userid): + + # All students in this semester ... + def users_this_semester_db(sem=''): + + # Everyone whose first semester is ..... + def users_new_this_semester(sem=''): + + # All student users in STEM - from local db + def user_in_stem(): + + # Get all the classes in one dept + def dept_classes(dept,sem=''): + todo: + + def depts_with_classcounts(sem=''): + todo: + + def f(x): + + def name_with_count(name,li): + + def arrange_data_for_web(dept='', sem=''): + + def f(x): + + # Get enrollments. (Best to freshly run pipelines/get_rosters) and put them into DB + def build_tables(headers,name): + + def load_tables(table,headers,row,verbose=0): + + def semester_enrollments(verbose=0): + + def qstrip(txt): return txt.strip('"') + + def comm_channel_file(): + + def pseudonym_file(): + + def abcd(): + + def crns_to_teachers(): + + def all_sem_courses_teachers(): + + def to_sis_sem(s): + + def build_db_schedule(): + + def finder(st): + + def process_enrollment_data(): + + def sem_to_idx(s): + todo: + + def do_encoding(): + +main.py + +myconsole.py + + def handler(signum, frame): + + def mainloop(): + +outcomes.py + + def outcome_overview(term=21): + + def create_acct_lvl_outcomes(src,dept,makefolder='',folder=0): + + def connect_acct_oc_to_course(course_id,oc_group_id): + + def outcome_groups_dump(): + + def outcome_groups_backup(): + + def create_course_group(short,parent): + + def create_dept_group(short): + + def outcomes_attached_to_courses(term=65,limitdept=''): + todo: Handle this: CSIS/DM85 WEB DESIGN 40823/24 + + def summarize_course_online_slo(outcome_list): + + def fetch_outcome_details(id): + + # Report on the actual evaluation data? + def outcome_report1(): + todo: + + # For the given course, get all outcome measurements, and display scores and stats. + def outcome_report2(): + + def fix_joined_class(str): + + def split_slo_name(str): + + def outcome_report3(): + + def read_slo_source(): + + def slo_source_by_dept(): + + def printj(j): + + def writej(o,j): + + # Get root outcome group + def root_og(): + + def recur_og(): + + def recur_main(out,g_url=""): + + def recur2(out,og={}): + + def all_og(): + + def course_slo_getter(q): + + def threaded_getter(): + + def demo_o_fetch(): + + def outcome_groups_2021(): + + def x_ref_dept_names(): + + def all_outcome_results_in_term(termid=''): + + def all_outcome_results_in_term_sub(termid=''): + + def all_linked_outcomes_in_term(termid=''): + + def all_linked_outcomes_in_term_sub(termid=''): + + def assemblerow(g,parent=''): + + def recur_full_fetch(out,g,parent=""): + + # return the appropriate cq course version. + def find_cq_course_version(code): + + def outcome_groups(): + + def summary_string(s): + + def add_outcomes_course_id(canvas_id): + + def add_outcomes_course_code(): + + def add_outcomes_course_code_sub(target_code='AJ184',term=178,fresh=0): + + def add_csis_sp22(): + + def quick_add_course_outcomes(ilearn_course_id, cq_outcome_id_list): + + def stringpad(s,n): + + def code_from_ilearn_name(n,verbose=0): + + def parse_ilearn_course_names_ALLSEMESTERS(): + + def parse_ilearn_course_names(term='178',fresh=1,log=0): + +outcomes2022.py + + def course_slo_getter(q): + + def ilearn_shell_slo_to_csv(shell_slos): + +patterns_8020.py + +patterns_topdown.py + + def pp0(a,b,c,d,e): + + def pp1(a,b,c,d,e,f): + + def pp2(a,b,c,d,e): + + def pp3(a,b,c,d): + + def pp4(a,b,c): + + def pp5(a,b,c): + + def pp6(a,b,c): + + def div1(a,b): + + def d2(a,b): + + def d3(a,b): + + def pp1(a,b): + + def pp2(a,b): + + def pp3(a,b,c,d,e,f,g): + + def pp4(a,b): + + def pp5(a,b,c): + + def pp6(a,b): + + def pp7(a,b): + + def pp8(a,b): + + def jj3(a,b,c,d,e): + + def jj5(a,b,c,d,e,f): + + def jj2(a,b,c,d): + + def jj2(a,b,c,d): + +pipelines.py + todo: all these constants for SSB -- line 1008 + todo: https://stackoverflow.com/questions/42656247/how-can-i-use-canvas-data-rest-api-using-python + + def d(s,end=''): + + # Main canvas querying fxn + def fetch(target,verbose=0): + + # Main canvas querying fxn - stream version - don't die on big requests + def fetch_stream(target,verbose=0): + + # paging makes problems... example: enrollment_terms + def fetch_collapse(target,collapse='',verbose=0): + + # Teacher name format changed. Remove commas and switch first to last + def fix_t_name(str): + + # Separate dept and code + def split_class_dept(c): + + def split_class_code(c): + + def split_class_code_letter(c): + + # go from sp20 to 2020spring + def shortToLongSem(s): + + # Go to the semesters folder and read the schedule. Return dataframe + def getSemesterSchedule(short='sp21'): # I used to be current_schedule + todo: Some semesters have a different format.... partofday type site xxx i just dL'd them again + + def get_enrlmts_for_user(user,enrollments): + + # Get something from Canvas Data + def do_request(path): + + # Canvas data, download all new files + def sync_non_interactive(): + + # list files in canvas_data (online) and choose one or some to download. + def interactive(): + + def todays_date_filename(): # helper + + def nowAsStr(): # possible duplicate + + def row_has_data(r): # helper + + def row_text(r): # helper + + # Take banner's html and make a csv(?) file + def ssb_to_csv(src): + + def clean_funny(str): + + def clean_funny2(str): + + def clean_funny3(str): + + ### course is a list of 1-3 lists, each one being a line in the schedule's output. First one has section + def course_start(course): + todo: use this to make a early/late/short field and store semester dates w/ other constants + todo: do these years matter? + + def time_to_partofday(t): + todo: account for multiple sites/rows + + # Deduce a 'site' field, based on room name and known offsite locations + def room_to_site(room,verbose=0): + todo: account for multiple sites/rows + todo: better way to store these offsite labels + + # take text lines and condense them to one dict per section + def to_section_list(input_text,verbose=0): + todo: no output files + todo: if extra line is different type? + + # Log the history of enrollments per course during registration + def log_section_filling(current_sched_list): + + # Same as above, but compressed, act only + def log_section_filling2(current_sched_list): + + # Use Firefox and log in to ssb and get full schedule. Only works where selenium is installed + def scrape_schedule(): + todo: + + def dza_sched(): + + # recreate schedule json files with most current online schedule format. + def recent_schedules(): + todo: sems is a global in this file. Is that the right thing to do? + todo: the pipeline is disorganized. Organize it to have + todo: where does this belong in the pipeline? compare with recent_schedules() + + # Take the generically named rosters uploads files and move them to a semester folder and give them a date. + def move_to_folder(sem,year,folder): + + # This relates to enrollment files, not schedule. + def convert_roster_files(semester="",year="",folder=""): + + # From instructure sftp site + def fetch_current_rosters(): + + def fetch_current_rosters_auto(): + + # read schedule file with an eye toward watching what's filling up + def schedule_filling(): + todo: hardcoded + + # Upload a json file to www + def put_file(remotepath,localpath, localfile,prompt=1): + todo: these paths + + def sec(t): return "

    "+t+"

    \n" + + def para(t): return "

    "+t+"

    \n" + + def ul(t): return "\n" + + def li(t): return "
  • "+t+"
  • \n" + + def question(t,bracket=1): + + def answer(t): + + def read_paragraph_element(element,type="NORMAL_TEXT"): + + def get_doc(docid, bracket=1, verbose=0): + todo: x link, x bold, list, image. + + def read_paragraph_element_2(element,type="NORMAL_TEXT"): + + # t is a string that begins with "Icons: " ... and contains comma(space) separated list + def handle_icons(t): + + # t is a string that begins with "Tags: " ... and contains comma(space) separated list + def handle_tags(t): + + def handle_question(t,bracket=1): + + def handle_answer(t): + + def handle_sec(t): return ('section',t) + + def handle_para(t): return ('paragraph',t) + + def handle_ul(t): return ('unorderdedlist',t) + + def handle_li(t): return ('listitem',t) + + def fetch_doc_image(k,value): + + def get_doc_generic(docid, bracket=1, verbose=0): + + def scrape_schedule_py(): + + def scrape_schedule_multi(): + + def scrape_for_db(): + + def argos_data(): + + def days_times(s): + + def remove_year(s): + + def argos_data_from_cvc(): + + def expand_old_semesters(): + + # Input: xxxx_sched.json. Output: xxxx_latestarts.txt + def list_latestarts(term="su23"): + +server.py + + def mqtt_loop(): + + # called when MQTT server connects + def on_connect(client, userdata, flags, rc): + + # The callback for when a PUBLISH message is received from the server. + def on_message(client, userdata, msg): + + def displaypi_on(): + + def displaypi_off(): + + def desklight(): + + def clearscreens(): + + def screenoff(): + + def tag(x,y): return "<%s>%s" % (x,y,x) + + def tagc(x,c,y): return '<%s class="%s">%s' % (x,c,y,x) + + def a(t,h): return '%s' % (h,t) + + def homepage(): + + def orgline(L): + todo: \s\[\#A\](.*)$', L) + + def editor(src): + + def in_form(txt,path): + + def mytime(fname): + + def index(): + + def writing(fname): + + def dashboard(): + + def dash(): + + def mycalendar(): + + def most_recent_file_of( target, folder ): + + def finder(st): + + def news(): + + def randPic(): + + def do_img_crop(im): + + def sample(): + + def sample2(a=""): + + # Filter a stream of loglines for those that match a course's url / id + def has_course(stream,courseid): + + def js(s): + + def sem_from_array_crn(crn): + + def user_courses(uid): + + def user_course_history_summary(usr_id): + + def roster(crn): + + def user_course_hits(usr,courseid): + + def profiles(id=1,b=2,c=3): + + # Departments, classes in each, and students (with hits) in each of those. + def enrollment(a): + + # All the classes in this dept, w/ all the students in each, with count of their views. + def dept(d=''): + + def user(canvas_id=None): + + def lectures(): + + def web_lectures(): + todo: update: dept, title, any of the other fields. + + # update a value: dept id of a personnel id + def update_pers_title(pid, tid): + + # update a value: dept id of a personnel id + def update_pers_dept(pid, did): + + def user_edit(canvas_id='2'): + + def staff_dir(search=''): + + def find_goo(n): + + def byname(x): + + def fn_to_struct( n, staff ): + + def image_edit(filename=''): + + def image_crop(filename,x,y,w,h,newname=''): + + def server_save(key,value): + + def server_dispatch_json(function_name,arg='', arg2=''): + + def server_dispatch(function_name,arg='', arg2=''): + +stats.py + + def grades_rundown(): + + def class_logs(): + + def user_logs(): + + def recent_logins(): + + def userHitsThisSemester(uid=2): + + def getCurrentActivity(): # a dict + + def externaltool(): # a list + +tasks.py + + def scrape_bookstore(): + + def survey_answer(q=0): + + def survey_organize(): + + def build_quiz(filename=""): + + # Send an email + def send_email(fullname, firstname, addr, subj, content): + + def convert_to_pdf(name1, name2): + + # Build (docx/pdf) certificates for gott graduates + def certificates_gott_build(): + + # Email experiment + def mail_test(): + + # Change LTI Settings. Experimental + def modify_x_tool(): + + # Upload with sftp to www website folder: student/online/srt/classfoldername + def put_file(classfoldername): + todo: ',cnopts=cnopts) as sftp: + + # Switch everyone in a class to a teacher + def switch_enrol(): + + # Change dates & term of a class to unrestrict enrollment + def unrestrict_course(): + + # Bulk enroll users into a course + def enroll_accred(): + + # Calculate attendance stats based on enrollment/participation at 20% of term progressed, then 60% of term progressed. + def twenty_sixty_stats(li): + + # Older positive attendance hours calculation. + def hours_calc(): + + def course_2060_dates(crn=""): + todo: + + def course_update_all_users_locallogs(course_id=''): + + def hours_calc_pulldata(course_id=''): + + def xlist_cwe(): + + def pos_atten(): + + def lname(x): + + def l_initial(x): + + def job_titles2(): + + def job_titles(): + + # an early version, before tearing up... + def job_titles3(): + + def index_pics(): + + def cmtes(): + + def strip(x): return x.strip() + + def esc_comma(x): return re.sub(',','[CMA]',x) + + def by_sem(x): return x['sem'] + + def parse_schedule(): + todo: check if i need to update it + todo: some weird hour offset issue w/ these activities + + def cal(): + todo: > + + def file_renamer(): + +temp.py + +tempget.py + + # Use Firefox and log in to ssb and get full schedule + def login(): + todo: my data here.... secret + + def filename_friendly(str): + + def otter(): + +templates.py + + def item_to_masonry(item): + + def try_untemplate(): + + def php_remove(m): + + def php_add(m): + + def do_template(temp,source,side): + + def remove_filetype(f): + + def make(): + + def txt_2_table(): + + def studenttech_faq(): + + # https://docs.google.com/document/d/1tI_b-q75Lzu25HcA0GCx9bGfUt9ccM8m2YrrioDFZcA/edit?usp=sharing + def de_faq(): + + def degwork_faq(): + + def vrc_faq(): + + def counseling_faq(): + + def finaid_faq(): + + def coun_loc(): + + def tutor_faq(): + + def test_repl(): + +timer.py + + def func(a, b): + +users.py + todo: these constants + + # All users to a cache file cache/allusers.json + def fetchAllUsers(): + + # Fetch teacher users objects from local cache + def teacherRolesCache(): # I used to be load_users + + # Canvas: Fetch all people with gavilan.edu email address + def teacherRolesUpdateCache(): # I used to be get_users + + # Fetch preferred email address for a given user id. ( Canvas ) + def getEmail(user_id): + + # All teachers in a particular course + def getAllTeachers(course_id=59): # a list + + # + def classType(t): + todo: fix bug in schedule parser so non-online classes have a type field + + def my_blank_string(): return "no data" + + def my_blank_dict(): return {'name':'NoName','email':'noemail@gavilan.edu'} + + def my_empty_dict(): return defaultdict(my_blank_string) + + def get_email_from_rec(name,name_to_record): + + # Pull the staff directory on the webpage. Convert to pandas dataframe + def staff_dir(get_fresh=False): + todo: lol get fresh again... + + def schedForTeacherOverview(long,short): + + # Return a dataframe of the last 4 semester schedules put together + def oneYearSchedule(): + + def num_sections_last_year(line): + + def sec_type_stats(line): + + def prct_online(line): + + def prct_lecture(line): + + def prct_hybrid(line): + + # Given the names of teachers in last year's schedules, fill in email, etc. from ilearn files + def teacher_basic_info(sched, from_ilearn, names): + + def find_that_name(x): + todo: Old and broken + + # Outputs: cache/teacher_by_semester.csv, + def teacherModalityHistory(sched=[],names=[]): + + def teacherCourseHistory(a,names): + todo: sort by dept also + + # Outputs: cache/course_teacher_combos.csv, + def teacherSharedCourses(a=[]): + todo: this is broken + + # How many courses in each department were taught in the last year? + def departmentCountCourses(a=[]): + + def clean_nonprint(s): + + def read_cmte(names): + + def read_training_records(): + + # open a file and mark the people with their ids given. Return a dataframe + def read_bootcamp1(filename): + + # open a file and mark the people with their ids given. Return a dataframe + def read_bootcamp2(filename): + + def not_blank_or_pound(L): + + def temp1(x): + + def add_realnames(df,names): # the surveys. raw name is in 2nd column + + def compareToughNames(a,b): + + def compareNames(a,b,verbose=0): + + def find_ilearn_record(ilearn_records,manual_records, othername,verbose=0): + + def manualNamesAndDept(): + + def manualNames(): + + # given a list of class codes, return the most common (academic) department + def guessDept(d_list): + + # Make one big csv file of everything I know about a teacher + def getTeachersInfoMain(): + todo: - broken + + def enroll_staff_shell(): + + # take a list of raw hits. + def activity_summary(hits): + todo: month is hardcoded here + + # Get views counts on current teachers. todo: month is hardcoded here + def get_recent_views(id=1): + todo: broken? + + def categorize_user(u): + todo: threaded + + # Doest the account have a photo loaded? + def checkForAvatar(id=2): + + # Grab em. Change the first if when continuing after problems.... + def downloadPhoto(): + + def mergePhotoFolders(): + + def mergePhotoFolders2(): + + # Go through my local profile pics, upload any that are missing. + def uploadPhoto(): + + def create_ztc_list(): + + def get_user_info(id): + + # these are any messages that get pushed out to their email + def comm_mssgs_for_user(uid=0): + + # + def convos_for_user(uid=0): + + # single q sub + def quiz_get_sub(courseid, quizid, subid=0): + + # quiz submissions for quiz id x, in course id y + def quiz_submissions(courseid=9768, quizid=32580): + + # return (timeblock, course, read=0,write=1) + def requests_line(line,i=0): + + # + def report_logs(id=0): + + def track_users_in_sem(): + + def track_users_in_class(L=[]): + + def track_user_q(id, q): + + # Maintain local logs. Look to see if we have some, download logs since then for a user. + def track_user(id=0,qid=0): + todo: set up this info file if it isn't there. check any changes too. it + todo: + + # + def track_users_by_teacherclass(): + + def nlp_sample(): + + def nlp_sample2(): + + def one_course_enrol(): + + def find_new_teachers(): + + def user_db_sync(): + + def find_no_goo(): + + def track_a_user(): + +util.py + + def stripper(s): + + def mycleaner(s): + + def print_table(table): + + def remove_nl(str): + + def UnicodeDictReader(utf8_data, **kwargs): + + def minimal_string(s): + + def to_file_friendly(st): + + def clean_title(st): + + def int_or_zero(x): + + def float_or_zero(x): + + def match59(x): + + def item_2(x): return x[2] + + def unix_time_millis(dt): + + # ENGL250 returns ENGL + def dept_from_name(n): + + def most_common_item(li): + + def srt_times(a,b): + + def how_long_ago(a): # number of hours ago 'a' was... + + def partition(times_list): + + +""" + diff --git a/content.py b/content.py index a855d93..d281bb8 100644 --- a/content.py +++ b/content.py @@ -6,6 +6,7 @@ from pipelines import header, fetch, url, put_file from util import clean_title, to_file_friendly, minimal_string, stripper, mycleaner from bs4 import BeautifulSoup as bs from html.parser import HTMLParser +from collections import defaultdict import tomd, checker import html2markdown as h2m import pypandoc @@ -829,40 +830,83 @@ Schedule an In-Person, Phone or Zoom Appointment""" ## TODO site scraper - -## TODO finde package that extracts text from web page - +## TODO find package that extracts text from web page ### TODO master list of what to index. -from pattern.web import URL, plaintext, extension +## TODO PDFs and DOCXs +## TODO fix urls w/ anchors + + + +from pattern.web import plaintext, extension from pattern.web import download -from pattern import URL, MIMETYPE_IMAGE -from pattern.web import Crawler -from util import clean_title +#from pattern import URL, MIMETYPE_IMAGE +from pattern.web import Crawler, DEPTH +import bs4 +import trafilatura save_folder = 'cache/crawl' +clean_folder = 'cache/cleancrawl' + +def clean_fn(s): + s = re.sub(r'[\s:]+','',s) + s = re.sub(r'\/','_',s) + return s + +def format_html(html): + soup = bs4.BeautifulSoup(html, 'html.parser') + return soup.prettify() + class GavCrawl(Crawler): def visit(self, link, source=None): - print 'visited:', repr(link.url), 'from:', link.referrer - txt = plaintext(link.source) ## , keep={'h1':[], 'h2':[], 'strong':[], 'a':['href']}) - codecs.open(save_folder + '/' + clean_title(link.url) + '.txt').write(txt) + print('visited:', repr(link.url), 'from:', link.referrer) + #txt = plaintext(source, keep={'h1':[], 'h2':[], 'h3':[], 'h4':[], 'td':[], 'strong':[], 'b':[], 'a':['href'], 'img':['src'], 'ul':[], 'ol':[], 'li':[], 'dd':[], 'dt':[], 'i':[]}) + #codecs.open(save_folder + '/' + mycleaner(clean_title(link.url)) + '.txt','w','utf-8').write(tomd.convert(txt)) + + codecs.open(save_folder + '/' + clean_fn(link.url) + '.txt','w','utf-8').write(trafilatura.extract(source,include_links=True, deduplicate=True, include_images=True, include_formatting=True)) + def fail(self, link): - print 'failed:', repr(link.url) + print('failed:', repr(link.url)) def crawl(): - p = GavCrawl(links=['http://www.gavilan.edu/'], delay=3) + p = GavCrawl(links=['http://www.gavilan.edu/'], domains=['gavilan.edu'], delay=0.75) while not p.done: - p.crawl(method=DEPTH, cached=False, throttle=3) - + try: + p.crawl(method=DEPTH, cached=False, throttle=0.76) + except Exception as e: + print("Exception: ", e) +def txt_clean_index(): + files = os.listdir(save_folder) + line_freq = defaultdict(int) + + # first pass + for f in files: + lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines() + for L in lines: + L = L.strip() + line_freq[L] += 1 + + # second pass + for f in files: + print("\n\n",f) + lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines() + out = codecs.open(clean_folder + '/' + f,'w','utf-8') + for L in lines: + L = L.strip() + if L in line_freq and line_freq[L] > 3: + continue + print(L) + out.write(L + '\n') + out.close() def samples(): crawler = Crawler(links=[], domains=[], delay=20.0, sort=FIFO) url = URL('http://www.clips.ua.ac.bemedia/pattern_schema.gif') - print url.mimetype in MIMETYPE_IMAGE + print(url.mimetype in MIMETYPE_IMAGE) #html = download('http://www.clips.ua.ac.be/', unicode=True) @@ -876,14 +920,14 @@ def samples(): url = URL('http://www.clips.ua.ac.be') dom = DOM(url.download()) for link in dom('a'): - print abs(link.attributes.get('href',''), base=url.redirect or url.string) + print(abs(link.attributes.get('href',''), base=url.redirect or url.string)) # get pdfs from pattern.web import URL, PDF url = URL('http://www.clips.ua.ac.be/sites/default/files/ctrs-002_0.pdf') pdf = PDF(url.download()) - print pdf.string + print(pdf.string) @@ -897,6 +941,8 @@ if __name__ == "__main__": # 5: ['import freshdesk content', freshdesk ], 6: ['download all a courses pages', grab_course_pages], 7: ['demo vector search', demo_vector_search], + 8: ['crawl',crawl], + 9: ['clean text index', txt_clean_index], } for key in options: diff --git a/courses.py b/courses.py index cf8be67..8583511 100644 --- a/courses.py +++ b/courses.py @@ -1086,6 +1086,7 @@ def add_evals(section=0): s = [ x.strip() for x in codecs.open('cache/sp23_eval_sections.csv','r').readlines()] s = list(funcy.flatten(s)) s.sort() + print(s) xyz = input('hit return to continue') #c = getCoursesInTerm(168,0,1) @@ -1306,7 +1307,6 @@ def set_ext_tools(): if __name__ == "__main__": options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] , - 30: ['List latestart classes', list_latestarts ], 2: ['Add announcements to homepage', change_course_ann_homepage], 3: ['Cross-list classes', xlist ], 4: ['List students who passed quiz X', get_quiz_passers], @@ -1335,6 +1335,7 @@ if __name__ == "__main__": 27: ['Fine tune term dates and winter session', course_dates_terms], 28: ['Cross list a semester from file', semester_cross_lister], 29: ['Check all courses & their sections in semester', all_semester_course_sanity_check], + #30: ['List latestart classes', list_latestarts ], # TODO wanted: group shell for each GP (guided pathway) as a basic student services gateway.... # }