Merge branch 'master' of http://192.168.1.6/phowell/canvasapp
This commit is contained in:
commit
6807ddd96c
496
apphelp.py
496
apphelp.py
|
|
@ -71,22 +71,21 @@ prog_write.close()
|
|||
|
||||
## Functions
|
||||
#
|
||||
# Total TODOs remaining: 57
|
||||
# Total TODOs remaining: 67
|
||||
#
|
||||
# TODOs per file:
|
||||
#
|
||||
# 1 - checker.py
|
||||
# 1 - content.py
|
||||
# 6 - content.py
|
||||
# 6 - courses.py
|
||||
# 3 - curriculum.py
|
||||
# 5 - depricated.py
|
||||
# 6 - localcache.py
|
||||
# 6 - depricated.py
|
||||
# 7 - localcache.py
|
||||
# 2 - outcomes.py
|
||||
# 20 - pipelines.py
|
||||
# 17 - pipelines.py
|
||||
# 2 - server.py
|
||||
# 2 - tasks.py
|
||||
# 5 - tasks.py
|
||||
# 1 - tempget.py
|
||||
# 8 - users.py
|
||||
# 12 - users.py
|
||||
|
||||
#
|
||||
|
||||
|
|
@ -95,8 +94,9 @@ prog_write.close()
|
|||
|
||||
__init__.py
|
||||
|
||||
canvas_secrets.py
|
||||
|
||||
checker.py
|
||||
todo: make this sweet
|
||||
|
||||
def safe_html(html):
|
||||
|
||||
|
|
@ -120,12 +120,6 @@ content.py
|
|||
|
||||
def d(s):
|
||||
|
||||
def stripper(s):
|
||||
|
||||
def mycleaner(s):
|
||||
|
||||
def freshdesk():
|
||||
|
||||
# Build a master file with the entire class content
|
||||
def accessible_check(id=""):
|
||||
todo: include linked pages even if they aren't in module
|
||||
|
|
@ -140,12 +134,6 @@ content.py
|
|||
# DL pages only
|
||||
def grab_course_pages(course_num=-1):
|
||||
|
||||
# Appears to not be used
|
||||
def put_course_pages():
|
||||
|
||||
# Also not used
|
||||
def put_revised_pages():
|
||||
|
||||
# Download, clean html, and reupload page
|
||||
def update_page():
|
||||
|
||||
|
|
@ -163,13 +151,32 @@ content.py
|
|||
|
||||
def multiple_downloads():
|
||||
|
||||
def demo_vector_search():
|
||||
|
||||
def is_complete_sentence(text):
|
||||
todo: site scraper
|
||||
todo: find package that extracts text from web page
|
||||
todo: master list of what to index.
|
||||
todo: PDFs and DOCXs
|
||||
todo: fix urls w/ anchors
|
||||
|
||||
def clean_fn(s):
|
||||
|
||||
def format_html(html):
|
||||
|
||||
def visit(self, link, source=None):
|
||||
|
||||
def fail(self, link):
|
||||
|
||||
def crawl():
|
||||
|
||||
def txt_clean_index():
|
||||
|
||||
def samples():
|
||||
|
||||
courses.py
|
||||
todo:
|
||||
|
||||
def int_or_zero(x):
|
||||
|
||||
def float_or_zero(x):
|
||||
|
||||
# Gott 1 Bootcamp - report on who completed it.
|
||||
def get_gott1_passers():
|
||||
|
||||
|
|
@ -179,15 +186,12 @@ courses.py
|
|||
# Who, in a class, passed?
|
||||
def get_course_passers(course, min_passing, passers_filename, still_active_filename):
|
||||
|
||||
# Who, in a class and a quiz, passed?
|
||||
def get_quiz_passers():
|
||||
|
||||
# Change courses to show 2 announcements
|
||||
def change_course_ann_homepage(id="10458"):
|
||||
|
||||
def scrape_bookstore():
|
||||
todo: where does the most recent schedule come from?
|
||||
|
||||
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
||||
def list_latestarts():
|
||||
|
||||
# All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids.
|
||||
def users_in_semester():
|
||||
todo:
|
||||
|
|
@ -203,13 +207,18 @@ courses.py
|
|||
def getTerms(printme=1, ask=1):
|
||||
todo: unsafe overwrite
|
||||
|
||||
def getCourses(): # a dict
|
||||
def getCourses(x=0): # a dict
|
||||
|
||||
def update_course_conclude(courseid="13590",enddate='2021-12-23T01:00Z'):
|
||||
|
||||
# Relevant stuff trying to see if its even being used or not
|
||||
def course_term_summary():
|
||||
def course_term_summary_local(term="176",term_label="FA22"):
|
||||
|
||||
# Relevant stuff trying to see if its even being used or not
|
||||
def course_term_summary(term="176",term_label="FA22"):
|
||||
|
||||
# Fetch all courses in a given term
|
||||
def getCoursesInTerm(term=0,show=1,active=0): # a list
|
||||
def getCoursesInTerm(term=0,get_fresh=1,show=0,active=0): # a list
|
||||
|
||||
def getCoursesTermSearch(term=0,search='',v=0):
|
||||
|
||||
|
|
@ -217,6 +226,16 @@ courses.py
|
|||
|
||||
def xlistLineSummary(c,sections={}):
|
||||
|
||||
def numbers_in_common(L):
|
||||
|
||||
def combined_name(nic,L):
|
||||
|
||||
def semester_cross_lister():
|
||||
|
||||
def xlist_ii(parasite_id,host_id,new_name,new_code):
|
||||
|
||||
def all_semester_course_sanity_check():
|
||||
|
||||
def eslCrosslister():
|
||||
|
||||
def xlist(parasite='', host=''): # section id , new course id
|
||||
|
|
@ -226,21 +245,93 @@ courses.py
|
|||
|
||||
def enroll_stem_students_live():
|
||||
|
||||
def enroll_bulk_students_bydept(course_id, depts, the_term="172", cautious=1): # a string, a list of strings
|
||||
todo: not done here
|
||||
|
||||
def enroll_art_students_live():
|
||||
|
||||
def enroll_orientation_students():
|
||||
|
||||
def summarize_proportion_online_classes(u):
|
||||
|
||||
def summarize_num_term_classes(u):
|
||||
def enroll_o_s_students():
|
||||
|
||||
def make_ztc_list(sem='sp20'):
|
||||
|
||||
def course_search_by_sis():
|
||||
|
||||
def mod_eval_visibility( shell_id, visible=True ):
|
||||
|
||||
def instructor_list_to_activate_evals():
|
||||
|
||||
def add_evals(section=0):
|
||||
|
||||
def course_dates_terms(section=0):
|
||||
|
||||
def remove_n_analytics(section=0):
|
||||
|
||||
def create_sandboxes():
|
||||
|
||||
def course_term_summary_2():
|
||||
|
||||
def get_ext_tools():
|
||||
|
||||
def set_ext_tools():
|
||||
todo: wanted: group shell for each GP (guided pathway) as a basic student services gateway....
|
||||
|
||||
cq_demo.py
|
||||
|
||||
def fetch(target):
|
||||
|
||||
curric2022.py
|
||||
|
||||
def fetch_all_programs():
|
||||
|
||||
def nothing(x=0):
|
||||
|
||||
def clean(st):
|
||||
|
||||
def recur_matcher(item, depth=0):
|
||||
|
||||
def single_course_parse(c):
|
||||
|
||||
def match_style_test():
|
||||
|
||||
def single_program_path_parse(c):
|
||||
|
||||
def path_style_prog():
|
||||
|
||||
def term_txt_to_code(t):
|
||||
|
||||
def all_outcomes():
|
||||
|
||||
def ddl():
|
||||
|
||||
def splitclassline(cl, id=''):
|
||||
|
||||
def path_style_2_html():
|
||||
|
||||
def course_path_style_2_html():
|
||||
|
||||
def another_request(url,startat):
|
||||
|
||||
def fetch_all_classes():
|
||||
|
||||
def recur_path_matcher(item, path=[]):
|
||||
|
||||
def x2_path_update(x,y,z):
|
||||
|
||||
def pathstyle(theclass):
|
||||
|
||||
def single_course_path_parse(c):
|
||||
|
||||
def path_style_test():
|
||||
|
||||
def make_sl():
|
||||
|
||||
def course_rank():
|
||||
|
||||
curriculum.py
|
||||
todo: These secrets
|
||||
|
||||
def dbg(x):
|
||||
|
||||
def another_request(url,startat):
|
||||
|
||||
|
|
@ -335,6 +426,9 @@ curriculum.py
|
|||
|
||||
def is_online_inblock(c):
|
||||
|
||||
# 9/2021 clean programs to good json
|
||||
def organize_programs_stage2():
|
||||
|
||||
# of all the programs, what can be accomplished online?
|
||||
def find_online_programs():
|
||||
|
||||
|
|
@ -408,6 +502,35 @@ curriculum.py
|
|||
|
||||
def cq_8020_start():
|
||||
|
||||
def recurse3(sec,path=''):
|
||||
|
||||
def get_id_sortorder(sec):
|
||||
|
||||
def include_exclude(str,inc,exc=[]):
|
||||
|
||||
def pbd3(str):
|
||||
|
||||
def handleField(f):
|
||||
|
||||
def boolToStr(b):
|
||||
|
||||
# Almost final formatting
|
||||
def prog_info_to_entry(c):
|
||||
|
||||
def cbd_to_entry(c):
|
||||
|
||||
def pc5(str):
|
||||
|
||||
def remove_prefix(str,i):
|
||||
|
||||
def course_to_entry(c,order="0"):
|
||||
|
||||
def courseline_to_pretty(line):
|
||||
|
||||
# restarted oct 2019 and try to simplify
|
||||
def prog_take_4(program):
|
||||
todo:
|
||||
|
||||
curriculum2020.py
|
||||
|
||||
def to_md(s):
|
||||
|
|
@ -473,6 +596,18 @@ curriculum_patterns.py
|
|||
def jj2(a,b,c,d):
|
||||
|
||||
depricated.py
|
||||
todo: where does the most recent schedule come from?
|
||||
|
||||
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
||||
def list_latestarts():
|
||||
|
||||
def prep_online_courses_df():
|
||||
|
||||
def course_is_online(crn):
|
||||
|
||||
def get_crn_from_name(name):
|
||||
|
||||
def get_enrlmts_for_user(user,enrollments):
|
||||
|
||||
# Don't know
|
||||
def demo():
|
||||
|
|
@ -504,10 +639,30 @@ depricated.py
|
|||
def sd():
|
||||
|
||||
def serve():
|
||||
|
||||
def summarize_proportion_online_classes(u):
|
||||
|
||||
def summarize_num_term_classes(u):
|
||||
todo: this duplicates courses.py ??
|
||||
|
||||
# Prompt for course id, return list of user dicts. TODO this duplicates courses.py ??
|
||||
def getUsersInCourse(id=0): # returns list
|
||||
|
||||
def recur_look_for_leafs(item,indent=0,show=1):
|
||||
|
||||
def am_i_a_leaf(item):
|
||||
|
||||
def sampleclass():
|
||||
|
||||
def matchstyle():
|
||||
|
||||
def user_role_and_online():
|
||||
|
||||
def more_unused_xreferencing():
|
||||
|
||||
def users_p_file():
|
||||
|
||||
def com_channel_dim():
|
||||
todo:
|
||||
|
||||
# NO LONGER USED - SEE COURSES
|
||||
|
|
@ -559,6 +714,22 @@ depricated.py
|
|||
|
||||
def get_schedule(term='201870', sem='fall'):
|
||||
|
||||
def dates(s):
|
||||
|
||||
def parse_www_csv_sched():
|
||||
|
||||
def parse_json_test_sched():
|
||||
|
||||
def put_revised_pages():
|
||||
|
||||
def put_course_pages():
|
||||
|
||||
def freshdesk():
|
||||
|
||||
gpt.py
|
||||
|
||||
graphics.py
|
||||
|
||||
interactive.py
|
||||
|
||||
def dict_generator(indict, pre=None):
|
||||
|
|
@ -579,8 +750,24 @@ interactive.py
|
|||
|
||||
def before_request():
|
||||
|
||||
def clears():
|
||||
|
||||
def dpi():
|
||||
|
||||
def dpi2():
|
||||
|
||||
def screenoff_a():
|
||||
|
||||
def light():
|
||||
|
||||
def do_image(filename):
|
||||
|
||||
def do_image_crop(filename,x,y,w,h,newname):
|
||||
|
||||
def save_post():
|
||||
|
||||
def writing_img(fname):
|
||||
|
||||
def restart():
|
||||
|
||||
def dispatch3(func,arg,arrg):
|
||||
|
|
@ -597,12 +784,12 @@ interactive.py
|
|||
|
||||
def home():
|
||||
|
||||
def send_jslib(path):
|
||||
|
||||
def send_cachedata(path):
|
||||
|
||||
def send_js(path):
|
||||
|
||||
def send_jslib(path):
|
||||
|
||||
def s(key,val):
|
||||
|
||||
def do_sample():
|
||||
|
|
@ -787,8 +974,6 @@ interactivex.py
|
|||
|
||||
def repl():
|
||||
|
||||
ipython_log.py
|
||||
|
||||
localcache.py
|
||||
|
||||
def db():
|
||||
|
|
@ -893,7 +1078,12 @@ localcache.py
|
|||
|
||||
def f(x):
|
||||
|
||||
# get student count and teacher name from local db
|
||||
def get_courses_in_term_local(term="172"):
|
||||
|
||||
# get student count
|
||||
def course_student_stats(canvasid):
|
||||
|
||||
# get teacher name from local db
|
||||
def course_quick_stats(canvasid):
|
||||
|
||||
# What a student has taken / teacher has taught
|
||||
|
|
@ -932,24 +1122,37 @@ localcache.py
|
|||
|
||||
def qstrip(txt): return txt.strip('"')
|
||||
|
||||
def more_unused_xreferencing():
|
||||
|
||||
def user_role_and_online():
|
||||
|
||||
def comm_channel_file():
|
||||
|
||||
def pseudonym_file():
|
||||
|
||||
def users_p_file():
|
||||
|
||||
def com_channel_dim():
|
||||
|
||||
def abcd():
|
||||
|
||||
def crns_to_teachers():
|
||||
|
||||
def all_sem_courses_teachers():
|
||||
|
||||
def to_sis_sem(s):
|
||||
|
||||
def build_db_schedule():
|
||||
|
||||
def finder(st):
|
||||
|
||||
def process_enrollment_data():
|
||||
|
||||
def sem_to_idx(s):
|
||||
todo:
|
||||
|
||||
def do_encoding():
|
||||
|
||||
main.py
|
||||
|
||||
myconsole.py
|
||||
|
||||
def handler(signum, frame):
|
||||
|
||||
def mainloop():
|
||||
|
||||
outcomes.py
|
||||
|
||||
def outcome_overview(term=21):
|
||||
|
|
@ -958,12 +1161,10 @@ outcomes.py
|
|||
|
||||
def connect_acct_oc_to_course(course_id,oc_group_id):
|
||||
|
||||
def outcome_groups():
|
||||
def outcome_groups_dump():
|
||||
|
||||
def outcome_groups_backup():
|
||||
|
||||
def x_ref_dept_names():
|
||||
|
||||
def create_course_group(short,parent):
|
||||
|
||||
def create_dept_group(short):
|
||||
|
|
@ -992,6 +1193,74 @@ outcomes.py
|
|||
|
||||
def slo_source_by_dept():
|
||||
|
||||
def printj(j):
|
||||
|
||||
def writej(o,j):
|
||||
|
||||
# Get root outcome group
|
||||
def root_og():
|
||||
|
||||
def recur_og():
|
||||
|
||||
def recur_main(out,g_url=""):
|
||||
|
||||
def recur2(out,og={}):
|
||||
|
||||
def all_og():
|
||||
|
||||
def course_slo_getter(q):
|
||||
|
||||
def threaded_getter():
|
||||
|
||||
def demo_o_fetch():
|
||||
|
||||
def outcome_groups_2021():
|
||||
|
||||
def x_ref_dept_names():
|
||||
|
||||
def all_outcome_results_in_term(termid=''):
|
||||
|
||||
def all_outcome_results_in_term_sub(termid=''):
|
||||
|
||||
def all_linked_outcomes_in_term(termid=''):
|
||||
|
||||
def all_linked_outcomes_in_term_sub(termid=''):
|
||||
|
||||
def assemblerow(g,parent=''):
|
||||
|
||||
def recur_full_fetch(out,g,parent=""):
|
||||
|
||||
# return the appropriate cq course version.
|
||||
def find_cq_course_version(code):
|
||||
|
||||
def outcome_groups():
|
||||
|
||||
def summary_string(s):
|
||||
|
||||
def add_outcomes_course_id(canvas_id):
|
||||
|
||||
def add_outcomes_course_code():
|
||||
|
||||
def add_outcomes_course_code_sub(target_code='AJ184',term=178,fresh=0):
|
||||
|
||||
def add_csis_sp22():
|
||||
|
||||
def quick_add_course_outcomes(ilearn_course_id, cq_outcome_id_list):
|
||||
|
||||
def stringpad(s,n):
|
||||
|
||||
def code_from_ilearn_name(n,verbose=0):
|
||||
|
||||
def parse_ilearn_course_names_ALLSEMESTERS():
|
||||
|
||||
def parse_ilearn_course_names(term='178',fresh=1,log=0):
|
||||
|
||||
outcomes2022.py
|
||||
|
||||
def course_slo_getter(q):
|
||||
|
||||
def ilearn_shell_slo_to_csv(shell_slos):
|
||||
|
||||
patterns_8020.py
|
||||
|
||||
patterns_topdown.py
|
||||
|
|
@ -1041,11 +1310,10 @@ patterns_topdown.py
|
|||
def jj2(a,b,c,d):
|
||||
|
||||
pipelines.py
|
||||
todo: secrets
|
||||
todo: all these constants for SSB -- line 1008
|
||||
todo: secrets
|
||||
todo: https://stackoverflow.com/questions/42656247/how-can-i-use-canvas-data-rest-api-using-python
|
||||
|
||||
def d(s):
|
||||
def d(s,end=''):
|
||||
|
||||
# Main canvas querying fxn
|
||||
def fetch(target,verbose=0):
|
||||
|
|
@ -1073,12 +1341,6 @@ pipelines.py
|
|||
def getSemesterSchedule(short='sp21'): # I used to be current_schedule
|
||||
todo: Some semesters have a different format.... partofday type site xxx i just dL'd them again
|
||||
|
||||
def prep_online_courses_df():
|
||||
|
||||
def course_is_online(crn):
|
||||
|
||||
def get_crn_from_name(name):
|
||||
|
||||
def get_enrlmts_for_user(user,enrollments):
|
||||
|
||||
# Get something from Canvas Data
|
||||
|
|
@ -1110,6 +1372,7 @@ pipelines.py
|
|||
### course is a list of 1-3 lists, each one being a line in the schedule's output. First one has section
|
||||
def course_start(course):
|
||||
todo: use this to make a early/late/short field and store semester dates w/ other constants
|
||||
todo: do these years matter?
|
||||
|
||||
def time_to_partofday(t):
|
||||
todo: account for multiple sites/rows
|
||||
|
|
@ -1132,9 +1395,10 @@ pipelines.py
|
|||
|
||||
# Use Firefox and log in to ssb and get full schedule. Only works where selenium is installed
|
||||
def scrape_schedule():
|
||||
todo: my data here.... secret
|
||||
todo:
|
||||
|
||||
def dza_sched():
|
||||
|
||||
# recreate schedule json files with most current online schedule format.
|
||||
def recent_schedules():
|
||||
todo: sems is a global in this file. Is that the right thing to do?
|
||||
|
|
@ -1149,7 +1413,6 @@ pipelines.py
|
|||
|
||||
# From instructure sftp site
|
||||
def fetch_current_rosters():
|
||||
todo: secret
|
||||
|
||||
def fetch_current_rosters_auto():
|
||||
|
||||
|
|
@ -1159,7 +1422,6 @@ pipelines.py
|
|||
|
||||
# Upload a json file to www
|
||||
def put_file(remotepath,localpath, localfile,prompt=1):
|
||||
todo: remove this secret
|
||||
todo: these paths
|
||||
|
||||
def sec(t): return "<h3>"+t+"</h3>\n"
|
||||
|
|
@ -1205,8 +1467,43 @@ pipelines.py
|
|||
|
||||
def scrape_schedule_py():
|
||||
|
||||
def scrape_schedule_multi():
|
||||
|
||||
def scrape_for_db():
|
||||
|
||||
def argos_data():
|
||||
|
||||
def days_times(s):
|
||||
|
||||
def remove_year(s):
|
||||
|
||||
def argos_data_from_cvc():
|
||||
|
||||
def expand_old_semesters():
|
||||
|
||||
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
||||
def list_latestarts(term="su23"):
|
||||
|
||||
server.py
|
||||
|
||||
def mqtt_loop():
|
||||
|
||||
# called when MQTT server connects
|
||||
def on_connect(client, userdata, flags, rc):
|
||||
|
||||
# The callback for when a PUBLISH message is received from the server.
|
||||
def on_message(client, userdata, msg):
|
||||
|
||||
def displaypi_on():
|
||||
|
||||
def displaypi_off():
|
||||
|
||||
def desklight():
|
||||
|
||||
def clearscreens():
|
||||
|
||||
def screenoff():
|
||||
|
||||
def tag(x,y): return "<%s>%s</%s>" % (x,y,x)
|
||||
|
||||
def tagc(x,c,y): return '<%s class="%s">%s</%s>' % (x,c,y,x)
|
||||
|
|
@ -1242,6 +1539,8 @@ server.py
|
|||
|
||||
def randPic():
|
||||
|
||||
def do_img_crop(im):
|
||||
|
||||
def sample():
|
||||
|
||||
def sample2(a=""):
|
||||
|
|
@ -1286,6 +1585,16 @@ server.py
|
|||
|
||||
def staff_dir(search=''):
|
||||
|
||||
def find_goo(n):
|
||||
|
||||
def byname(x):
|
||||
|
||||
def fn_to_struct( n, staff ):
|
||||
|
||||
def image_edit(filename=''):
|
||||
|
||||
def image_crop(filename,x,y,w,h,newname=''):
|
||||
|
||||
def server_save(key,value):
|
||||
|
||||
def server_dispatch_json(function_name,arg='', arg2=''):
|
||||
|
|
@ -1310,6 +1619,8 @@ stats.py
|
|||
|
||||
tasks.py
|
||||
|
||||
def scrape_bookstore():
|
||||
|
||||
def survey_answer(q=0):
|
||||
|
||||
def survey_organize():
|
||||
|
|
@ -1360,6 +1671,36 @@ tasks.py
|
|||
|
||||
def pos_atten():
|
||||
|
||||
def lname(x):
|
||||
|
||||
def l_initial(x):
|
||||
|
||||
def job_titles2():
|
||||
|
||||
def job_titles():
|
||||
|
||||
# an early version, before tearing up...
|
||||
def job_titles3():
|
||||
|
||||
def index_pics():
|
||||
|
||||
def cmtes():
|
||||
|
||||
def strip(x): return x.strip()
|
||||
|
||||
def esc_comma(x): return re.sub(',','[CMA]',x)
|
||||
|
||||
def by_sem(x): return x['sem']
|
||||
|
||||
def parse_schedule():
|
||||
todo: check if i need to update it
|
||||
todo: some weird hour offset issue w/ these activities
|
||||
|
||||
def cal():
|
||||
todo: >
|
||||
|
||||
def file_renamer():
|
||||
|
||||
temp.py
|
||||
|
||||
tempget.py
|
||||
|
|
@ -1466,6 +1807,7 @@ users.py
|
|||
def teacher_basic_info(sched, from_ilearn, names):
|
||||
|
||||
def find_that_name(x):
|
||||
todo: Old and broken
|
||||
|
||||
# Outputs: cache/teacher_by_semester.csv,
|
||||
def teacherModalityHistory(sched=[],names=[]):
|
||||
|
|
@ -1475,6 +1817,7 @@ users.py
|
|||
|
||||
# Outputs: cache/course_teacher_combos.csv,
|
||||
def teacherSharedCourses(a=[]):
|
||||
todo: this is broken
|
||||
|
||||
# How many courses in each department were taught in the last year?
|
||||
def departmentCountCourses(a=[]):
|
||||
|
|
@ -1512,6 +1855,7 @@ users.py
|
|||
|
||||
# Make one big csv file of everything I know about a teacher
|
||||
def getTeachersInfoMain():
|
||||
todo: - broken
|
||||
|
||||
def enroll_staff_shell():
|
||||
|
||||
|
|
@ -1521,8 +1865,8 @@ users.py
|
|||
|
||||
# Get views counts on current teachers. todo: month is hardcoded here
|
||||
def get_recent_views(id=1):
|
||||
todo: broken?
|
||||
|
||||
# Have they taught online or hybrid classes?
|
||||
def categorize_user(u):
|
||||
todo: threaded
|
||||
|
||||
|
|
@ -1539,8 +1883,6 @@ users.py
|
|||
# Go through my local profile pics, upload any that are missing.
|
||||
def uploadPhoto():
|
||||
|
||||
def test_email():
|
||||
|
||||
def create_ztc_list():
|
||||
|
||||
def get_user_info(id):
|
||||
|
|
@ -1583,8 +1925,20 @@ users.py
|
|||
|
||||
def one_course_enrol():
|
||||
|
||||
def find_new_teachers():
|
||||
|
||||
def user_db_sync():
|
||||
|
||||
def find_no_goo():
|
||||
|
||||
def track_a_user():
|
||||
|
||||
util.py
|
||||
|
||||
def stripper(s):
|
||||
|
||||
def mycleaner(s):
|
||||
|
||||
def print_table(table):
|
||||
|
||||
def remove_nl(str):
|
||||
|
|
@ -1597,6 +1951,10 @@ util.py
|
|||
|
||||
def clean_title(st):
|
||||
|
||||
def int_or_zero(x):
|
||||
|
||||
def float_or_zero(x):
|
||||
|
||||
def match59(x):
|
||||
|
||||
def item_2(x): return x[2]
|
||||
|
|
|
|||
310
content.py
310
content.py
|
|
@ -1,17 +1,18 @@
|
|||
|
||||
|
||||
#saved_titles = json.loads( codecs.open('cache/saved_youtube_titles.json','r','utf-8').read() )
|
||||
import requests, codecs, os, re, json
|
||||
import requests, codecs, os, re, json, sys, pypandoc
|
||||
import webbrowser, bs4, trafilatura, pickle, tomd, checker
|
||||
import html2markdown as h2m
|
||||
from pipelines import header, fetch, url, put_file
|
||||
from util import clean_title, to_file_friendly, minimal_string, stripper, mycleaner
|
||||
from bs4 import BeautifulSoup as bs
|
||||
from html.parser import HTMLParser
|
||||
import tomd, checker
|
||||
import html2markdown as h2m
|
||||
import pypandoc
|
||||
import webbrowser
|
||||
h = HTMLParser()
|
||||
from collections import defaultdict
|
||||
from pdfminer.high_level import extract_text
|
||||
from sentence_transformers import SentenceTransformer, util
|
||||
|
||||
h = HTMLParser()
|
||||
|
||||
DBG = 1
|
||||
|
||||
|
|
@ -21,8 +22,6 @@ def d(s):
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
# Download everything interesting in a course to a local folder
|
||||
# Build a master file with the entire class content
|
||||
def accessible_check(id=""):
|
||||
|
|
@ -828,64 +827,266 @@ Schedule an In-Person, Phone or Zoom Appointment"""
|
|||
print(f"Vector for the word '{example_word}': {vector}")
|
||||
|
||||
|
||||
|
||||
def makedir():
|
||||
files = os.listdir('cache/crawl')
|
||||
#print(files)
|
||||
files.sort()
|
||||
for f in files:
|
||||
m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
|
||||
if m:
|
||||
name = m.groups()[0]
|
||||
parts = name.split('+')
|
||||
print(parts)
|
||||
|
||||
def manual_index():
|
||||
files = os.listdir('cache/crawl')
|
||||
#print(files)
|
||||
ii = codecs.open('cache/crawl/index.html','w','utf-8')
|
||||
ii.write('<html><body><h1>Site index</h1>\n')
|
||||
files.sort()
|
||||
for f in files:
|
||||
m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
|
||||
if m:
|
||||
name = m.groups()[0]
|
||||
parts = name.split('+')
|
||||
ii.write('<br /><a href="mirror/'+f+'">'+f+'</a>\n')
|
||||
|
||||
def my_site():
|
||||
files = os.listdir('cache/crawl')
|
||||
output = []
|
||||
files.sort()
|
||||
for f in files:
|
||||
m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
|
||||
if m:
|
||||
name = m.groups()[0]
|
||||
parts = name.split('+')
|
||||
output.append(parts)
|
||||
return output
|
||||
|
||||
|
||||
## TODO site scraper
|
||||
|
||||
## TODO finde package that extracts text from web page
|
||||
|
||||
## TODO find package that extracts text from web page
|
||||
### TODO master list of what to index.
|
||||
|
||||
from pattern.web import URL, plaintext, extension
|
||||
from pattern.web import download
|
||||
from pattern import URL, MIMETYPE_IMAGE
|
||||
from pattern.web import Crawler
|
||||
from util import clean_title
|
||||
|
||||
save_folder = 'cache/crawl'
|
||||
|
||||
class GavCrawl(Crawler):
|
||||
def visit(self, link, source=None):
|
||||
print 'visited:', repr(link.url), 'from:', link.referrer
|
||||
txt = plaintext(link.source) ## , keep={'h1':[], 'h2':[], 'strong':[], 'a':['href']})
|
||||
codecs.open(save_folder + '/' + clean_title(link.url) + '.txt').write(txt)
|
||||
|
||||
def fail(self, link):
|
||||
print 'failed:', repr(link.url)
|
||||
## TODO PDFs and DOCXs
|
||||
## TODO fix urls w/ anchors
|
||||
|
||||
def crawl():
|
||||
p = GavCrawl(links=['http://www.gavilan.edu/'], delay=3)
|
||||
while not p.done:
|
||||
p.crawl(method=DEPTH, cached=False, throttle=3)
|
||||
import scrapy, logging
|
||||
from scrapy.crawler import CrawlerProcess
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(level=logging.CRITICAL)
|
||||
logging.basicConfig(level=logging.CRITICAL)
|
||||
logger.disabled = True
|
||||
|
||||
|
||||
avoid = ['ezproxy','community\.gavilan\.edu','archive\/tag','archive\/category', 'my\.gavilan\.edu', 'augusoft',
|
||||
'eis-prod', 'ilearn\.gavilan', 'mailto', 'cgi-bin', 'edu\/old\/schedule', ]
|
||||
|
||||
class MySpider(scrapy.Spider):
|
||||
name = 'myspider'
|
||||
#start_urls = ['https://gavilan.curriqunet.com/catalog/iq/1826']
|
||||
start_urls = ['https://www.gavilan.edu']
|
||||
|
||||
|
||||
"""
|
||||
logging.getLogger("scrapy").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("scrapy.utils.log").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("scrapy.extensions.telnet").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("scrapy.middleware").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("scrapy.core.engine").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("scrapy.middleware").setLevel(logging.CRITICAL)
|
||||
|
||||
logger.disabled = True"""
|
||||
|
||||
def parse(self, response):
|
||||
print('visited:', repr(response.url), 'status:', response.status)
|
||||
|
||||
if re.search(r'\.pdf$', response.url):
|
||||
m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
|
||||
if m:
|
||||
print("saving to ", save_folder + '/' + clean_fn(response.url))
|
||||
pdf_response = requests.get(response.url)
|
||||
with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
|
||||
f.write(pdf_response.content)
|
||||
text = extract_text(save_folder + '/' + clean_fn(response.url))
|
||||
codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(text)
|
||||
|
||||
for ext in ['doc','docx','ppt','pptx']:
|
||||
if re.search(r'\.'+ext+'$', response.url):
|
||||
m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
|
||||
if m:
|
||||
print("saving to ", save_folder + '/' + clean_fn(response.url))
|
||||
pdf_response = requests.get(response.url)
|
||||
with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
|
||||
f.write(pdf_response.content)
|
||||
#text = extract_text(save_folder + '/' + clean_fn(response.url) + '.txt')
|
||||
output = pypandoc.convert_file(save_folder + '/' + clean_fn(response.url), 'html', extra_args=['--extract-media=%s' % hash ])
|
||||
txt_output = trafilatura.extract(response.text,include_links=True, deduplicate=True, include_images=True, include_formatting=True)
|
||||
if txt_output:
|
||||
codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(txt_output)
|
||||
|
||||
for ext in ['jpg','jpeg','gif','webp']:
|
||||
if re.search(r'\.'+ext+'$', response.url):
|
||||
m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
|
||||
if m:
|
||||
print("saving to ", save_folder + '/' + clean_fn(response.url))
|
||||
pdf_response = requests.get(response.url)
|
||||
with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
|
||||
f.write(pdf_response.content)
|
||||
|
||||
f_out = codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8')
|
||||
|
||||
this_output = trafilatura.extract(response.text,include_links=True, deduplicate=True, include_images=True, include_formatting=True)
|
||||
if this_output:
|
||||
f_out.write(this_output)
|
||||
f_out.close()
|
||||
links = response.css('a::attr(href)').getall()
|
||||
|
||||
# Follow each link and parse its contents
|
||||
|
||||
for link in links:
|
||||
go = 1
|
||||
full_link = response.urljoin(link)
|
||||
print('++++++ trying ', full_link)
|
||||
|
||||
if not re.search(r'gavilan\.edu',full_link):
|
||||
go = 0
|
||||
print('--- not gav edu')
|
||||
else:
|
||||
if re.search(r'hhh\.gavilan\.edu',full_link):
|
||||
pass
|
||||
elif not re.search(r'^https?:\/\/www\.gavilan\.edu',full_link):
|
||||
# need to add www to gavilan.edu
|
||||
m = re.search(r'^(https?:\/\/)gavilan\.edu(\/.*)$',full_link)
|
||||
if m:
|
||||
full_link = m.group(1) + 'www.' + m.group(2)
|
||||
for a in avoid:
|
||||
if re.search(a,full_link):
|
||||
go = 0
|
||||
print('--- avoid ', a)
|
||||
|
||||
if go: yield scrapy.Request(full_link, callback=self.parse,
|
||||
headers={"User-Agent": "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"})
|
||||
else:
|
||||
print("------ avoiding ", full_link)
|
||||
# Instantiate a CrawlerProcess object
|
||||
process = CrawlerProcess()
|
||||
|
||||
# Add the MySpider spider to the process
|
||||
process.crawl(MySpider)
|
||||
|
||||
# Start the process
|
||||
logging.basicConfig(level=logging.CRITICAL)
|
||||
logging.getLogger('scrapy').propagate = False
|
||||
logging.getLogger("trafilatura").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("trafilatura").propagate = False
|
||||
logging.getLogger("pdfminer").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("pdfminer").propagate = False
|
||||
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("urllib3").propagate = False
|
||||
logging.basicConfig(level=logging.CRITICAL)
|
||||
process.start()
|
||||
|
||||
|
||||
|
||||
def samples():
|
||||
crawler = Crawler(links=[], domains=[], delay=20.0, sort=FIFO)
|
||||
save_folder = 'cache/crawl'
|
||||
clean_folder = 'cache/cleancrawl'
|
||||
|
||||
url = URL('http://www.clips.ua.ac.bemedia/pattern_schema.gif')
|
||||
print url.mimetype in MIMETYPE_IMAGE
|
||||
def clean_fn(s):
|
||||
s = re.sub(r'[\s:]+','',s)
|
||||
s = re.sub(r'\/','+',s)
|
||||
return s
|
||||
|
||||
def format_html(html):
|
||||
soup = bs4.BeautifulSoup(html, 'html.parser')
|
||||
return soup.prettify()
|
||||
|
||||
|
||||
#html = download('http://www.clips.ua.ac.be/', unicode=True)
|
||||
s = URL('http://www.clips.ua.ac.be').download()
|
||||
s = plaintext(s, keep={'h1':[], 'h2':[], 'strong':[], 'a':['href']})
|
||||
|
||||
|
||||
# getting absolute urls
|
||||
from pattern.web import URL, DOM, abs
|
||||
def txt_clean_index():
|
||||
files = os.listdir(save_folder)
|
||||
line_freq = defaultdict(int)
|
||||
|
||||
url = URL('http://www.clips.ua.ac.be')
|
||||
dom = DOM(url.download())
|
||||
for link in dom('a'):
|
||||
print abs(link.attributes.get('href',''), base=url.redirect or url.string)
|
||||
# first pass
|
||||
for f in files:
|
||||
lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines()
|
||||
for L in lines:
|
||||
L = L.strip()
|
||||
line_freq[L] += 1
|
||||
|
||||
# get pdfs
|
||||
from pattern.web import URL, PDF
|
||||
# second pass
|
||||
for f in files:
|
||||
print("\n\n",f)
|
||||
lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines()
|
||||
out = codecs.open(clean_folder + '/' + f,'w','utf-8')
|
||||
for L in lines:
|
||||
L = L.strip()
|
||||
if L in line_freq and line_freq[L] > 3:
|
||||
continue
|
||||
print(L)
|
||||
out.write(L + '\n')
|
||||
out.close()
|
||||
|
||||
url = URL('http://www.clips.ua.ac.be/sites/default/files/ctrs-002_0.pdf')
|
||||
pdf = PDF(url.download())
|
||||
print pdf.string
|
||||
|
||||
|
||||
def search_embeddings():
|
||||
model = SentenceTransformer('all-MiniLM-L6-v2')
|
||||
save_embeds = pickle.load( open( "cache/embeddings.p", "rb" ) )
|
||||
columns = list(zip(*save_embeds))
|
||||
files = columns[0]
|
||||
sentences = columns[1]
|
||||
embeddings = columns[2]
|
||||
|
||||
print(files[:20])
|
||||
print(sentences[:20])
|
||||
print(embeddings[:20])
|
||||
|
||||
s = ''
|
||||
while s != 'q':
|
||||
s = input("search or 'q' to quit: ")
|
||||
if s == 'q':
|
||||
return
|
||||
query_embedding = model.encode(s)
|
||||
|
||||
# Compute the cosine similarity between the query embedding and the sentence embeddings
|
||||
cosine_scores = util.cos_sim(query_embedding, embeddings)
|
||||
|
||||
# Sort the sentences by their cosine similarity to the query sentence
|
||||
results = sorted(zip(sentences, cosine_scores, files), key=lambda x: x[1], reverse=True)
|
||||
|
||||
# Print the top 5 results
|
||||
for i, (sentence, score, file) in enumerate(results[:5]):
|
||||
print(f'Top {i+1}: {file} - {sentence} - (Score: {score})')
|
||||
|
||||
|
||||
def create_embeddings():
|
||||
model = SentenceTransformer('all-MiniLM-L6-v2')
|
||||
files = os.listdir('cache/crawl')
|
||||
output = []
|
||||
save_embeds = [] # ['file','sentence','embedding']
|
||||
files.sort()
|
||||
for f in files:
|
||||
m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
|
||||
if m:
|
||||
lines = codecs.open('cache/crawl/' + f,'r','utf-8').readlines()
|
||||
lines = [L.strip() for L in lines]
|
||||
lines = [L for L in lines if L]
|
||||
embeddings = model.encode(lines)
|
||||
|
||||
print("\n-----", f)
|
||||
|
||||
#Print the embeddings
|
||||
for sentence, embedding in zip(lines, embeddings):
|
||||
print("Sentence:", sentence)
|
||||
#print("Embedding:", embedding)
|
||||
|
||||
save_embeds.append([f,sentence,embedding])
|
||||
pickle.dump( save_embeds, open( "cache/embeddings.p", "wb" ) )
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
|
@ -897,8 +1098,19 @@ if __name__ == "__main__":
|
|||
# 5: ['import freshdesk content', freshdesk ],
|
||||
6: ['download all a courses pages', grab_course_pages],
|
||||
7: ['demo vector search', demo_vector_search],
|
||||
8: ['crawl',crawl],
|
||||
9: ['clean text index', txt_clean_index],
|
||||
10: ['make web dir struct', manual_index],
|
||||
11: ['create search embeddings', create_embeddings],
|
||||
12: ['do a search', search_embeddings],
|
||||
}
|
||||
|
||||
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
||||
resp = int(sys.argv[1])
|
||||
print("\n\nPerforming: %s\n\n" % options[resp][0])
|
||||
|
||||
else:
|
||||
print ('')
|
||||
for key in options:
|
||||
print(str(key) + '.\t' + options[key][0])
|
||||
|
||||
|
|
@ -908,5 +1120,3 @@ if __name__ == "__main__":
|
|||
# Call the function in the options dict
|
||||
options[ int(resp)][1]()
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1086,6 +1086,7 @@ def add_evals(section=0):
|
|||
s = [ x.strip() for x in codecs.open('cache/sp23_eval_sections.csv','r').readlines()]
|
||||
s = list(funcy.flatten(s))
|
||||
s.sort()
|
||||
print(s)
|
||||
xyz = input('hit return to continue')
|
||||
|
||||
#c = getCoursesInTerm(168,0,1)
|
||||
|
|
@ -1306,7 +1307,6 @@ def set_ext_tools():
|
|||
|
||||
if __name__ == "__main__":
|
||||
options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] ,
|
||||
30: ['List latestart classes', list_latestarts ],
|
||||
2: ['Add announcements to homepage', change_course_ann_homepage],
|
||||
3: ['Cross-list classes', xlist ],
|
||||
4: ['List students who passed quiz X', get_quiz_passers],
|
||||
|
|
@ -1335,6 +1335,7 @@ if __name__ == "__main__":
|
|||
27: ['Fine tune term dates and winter session', course_dates_terms],
|
||||
28: ['Cross list a semester from file', semester_cross_lister],
|
||||
29: ['Check all courses & their sections in semester', all_semester_course_sanity_check],
|
||||
#30: ['List latestart classes', list_latestarts ],
|
||||
# TODO wanted: group shell for each GP (guided pathway) as a basic student services gateway....
|
||||
#
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1807,3 +1807,95 @@ def freshdesk():
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
#### content.py
|
||||
|
||||
|
||||
from pattern.web import plaintext, extension
|
||||
from pattern.web import download
|
||||
#from pattern import URL, MIMETYPE_IMAGE
|
||||
from pattern.web import Crawler, DEPTH, FIFO, MIMETYPE_IMAGE, MIMETYPE_PDF
|
||||
|
||||
class GavCrawl(Crawler):
|
||||
def visit(self, link, source=None):
|
||||
print('visited:', repr(link.url), 'from:', link.referrer)
|
||||
print(' ', link.url.mimetype)
|
||||
#txt = plaintext(source, keep={'h1':[], 'h2':[], 'h3':[], 'h4':[], 'td':[], 'strong':[], 'b':[], 'a':['href'], 'img':['src'], 'ul':[], 'ol':[], 'li':[], 'dd':[], 'dt':[], 'i':[]})
|
||||
#codecs.open(save_folder + '/' + mycleaner(clean_title(link.url)) + '.txt','w','utf-8').write(tomd.convert(txt))
|
||||
|
||||
codecs.open(save_folder + '/' + clean_fn(link.url) + '.txt','w','utf-8').write(trafilatura.extract(source,include_links=True, deduplicate=True, include_images=True, include_formatting=True))
|
||||
|
||||
|
||||
def fail(self, link):
|
||||
print('failed:', repr(link.url))
|
||||
if re.search(r'\.pdf$', link.url):
|
||||
m = re.search(r'\/([^\/]+\.pdf)$', link.url)
|
||||
if m:
|
||||
save_file = m.group(1)
|
||||
print("saving to ", save_folder + '/' + save_file)
|
||||
pdf_response = requests.get(link.url)
|
||||
with open(save_folder + '/' + save_file, 'wb') as f:
|
||||
f.write(pdf_response.content)
|
||||
text = extract_text(save_folder + '/' + save_file)
|
||||
#print(text)
|
||||
codecs.open(save_folder + '/' + save_file + '.txt','w','utf-8').write(text)
|
||||
else:
|
||||
print("no match for pdf url: ", link.url)
|
||||
|
||||
for ext in ['jpg','jpeg','gif','webp']:
|
||||
if re.search(r'\.'+ext+'$', link.url):
|
||||
m = re.search(r'\/([^\/]+\.'+ext+')$', link.url)
|
||||
if m:
|
||||
save_file = m.group(1)
|
||||
print("saving to ", save_folder + '/' + save_file)
|
||||
pdf_response = requests.get(link.url)
|
||||
with open(save_folder + '/' + save_file, 'wb') as f:
|
||||
f.write(pdf_response.content)
|
||||
else:
|
||||
print('no match for '+ext+' url: ', link.url)
|
||||
|
||||
def crawl2():
|
||||
#p = GavCrawl(links=['http://www.gavilan.edu/'], domains=['gavilan.edu', 'gavilan.curriqunet.com','www.boarddocs.com'], delay=0.75)
|
||||
#p = GavCrawl(links=['https://gavilan.edu/finaid/2022-23DirectLoanApplication1.pdf'], domains=['gavilan.edu', 'gavilan.curriqunet.com','www.boarddocs.com'], delay=0.75)
|
||||
p = GavCrawl(links=['https://gavilan.curriqunet.com/catalog/iq/1826'], domains=['gavilan.edu', 'gavilan.curriqunet.com','www.boarddocs.com'], delay=0.75)
|
||||
|
||||
|
||||
|
||||
|
||||
while not p.done:
|
||||
try:
|
||||
p.crawl(method=DEPTH, cached=False, throttle=0.76)
|
||||
except Exception as e:
|
||||
print("Exception: ", e)
|
||||
|
||||
|
||||
|
||||
def samples():
|
||||
crawler = Crawler(links=[], domains=[], delay=20.0, sort=FIFO)
|
||||
|
||||
url = URL('http://www.clips.ua.ac.bemedia/pattern_schema.gif')
|
||||
print(url.mimetype in MIMETYPE_IMAGE)
|
||||
|
||||
|
||||
#html = download('http://www.clips.ua.ac.be/', unicode=True)
|
||||
s = URL('http://www.clips.ua.ac.be').download()
|
||||
s = plaintext(s, keep={'h1':[], 'h2':[], 'strong':[], 'a':['href']})
|
||||
|
||||
|
||||
# getting absolute urls
|
||||
from pattern.web import URL, DOM, abs
|
||||
|
||||
url = URL('http://www.clips.ua.ac.be')
|
||||
dom = DOM(url.download())
|
||||
for link in dom('a'):
|
||||
print(abs(link.attributes.get('href',''), base=url.redirect or url.string))
|
||||
|
||||
# get pdfs
|
||||
from pattern.web import URL, PDF
|
||||
|
||||
url = URL('http://www.clips.ua.ac.be/sites/default/files/ctrs-002_0.pdf')
|
||||
pdf = PDF(url.download())
|
||||
print(pdf.string)
|
||||
|
||||
|
||||
|
|
|
|||
4
gpt.py
4
gpt.py
|
|
@ -4,8 +4,8 @@ import openai
|
|||
from canvas_secrets import openai_org, openai_api_key
|
||||
|
||||
|
||||
openai.organization = "org-66WLoZQEtBrO42Z9S8rfd10M"
|
||||
openai.api_key = "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"
|
||||
openai.organization = openai_org
|
||||
openai.api_key = openai_api_key
|
||||
#print(openai.Model.list())
|
||||
|
||||
my_prompt = "Write a series of texts trying to sell a pen to a stranger."
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
import curses
|
||||
import heapq, re, csv, os, shutil, datetime, urllib
|
||||
import itertools, time, markdown, csv, json, os.path, webbrowser, threading
|
||||
from functools import wraps
|
||||
|
|
@ -15,6 +14,20 @@ import localcache
|
|||
from server import *
|
||||
from canvas_secrets import flask_secretkey
|
||||
|
||||
from content import my_site
|
||||
|
||||
import socket
|
||||
this_host = socket.gethostname()
|
||||
|
||||
print('\n\n' + this_host, '\n\n')
|
||||
|
||||
has_curses = 0
|
||||
if this_host != 'ROGDESKTOP':
|
||||
import curses
|
||||
has_curses = 1
|
||||
else:
|
||||
print("Skipping curses stuff")
|
||||
|
||||
q = Queue()
|
||||
|
||||
|
||||
|
|
@ -25,7 +38,6 @@ PORT_NUMBER = 8080 # Maybe set this to 9000.
|
|||
|
||||
datafile = 'lambda.csv'
|
||||
|
||||
#writing_path = 'c:/users/peter/Nextcloud/Documents/writing/'
|
||||
|
||||
|
||||
####
|
||||
|
|
@ -95,6 +107,15 @@ def flask_thread(q):
|
|||
|
||||
|
||||
|
||||
@app.route('/mirror')
|
||||
def mirror():
|
||||
return codecs.open('cache/crawl/index.html','r','utf-8').read()
|
||||
|
||||
|
||||
@app.route('/mirror/<filename>')
|
||||
def mirror_file(filename):
|
||||
return markdown.markdown( codecs.open('cache/crawl/'+filename,'r','utf-8').read() ) + \
|
||||
"<pre>" + codecs.open('cache/crawl/'+filename,'r','utf-8').read() + "</pre>"
|
||||
|
||||
@app.route('/clearscreens')
|
||||
def clears():
|
||||
|
|
@ -166,6 +187,7 @@ def flask_thread(q):
|
|||
|
||||
@app.route('/x/writing/images/<fname>')
|
||||
def writing_img(fname):
|
||||
# TODO
|
||||
img_path = "/media/hd2/peter_home/Documents/writing_img/"
|
||||
print(img_path + fname + " - writing images folder")
|
||||
img_ext = fname.split('.')[-1]
|
||||
|
|
|
|||
45
server.py
45
server.py
|
|
@ -1,5 +1,5 @@
|
|||
import json, codecs, re, markdown, os, pypandoc, striprtf, sqlite3, random, urllib
|
||||
import subprocess, html
|
||||
import subprocess, html, time
|
||||
from striprtf.striprtf import rtf_to_text
|
||||
from flask import render_template, Response
|
||||
from flask import send_from_directory
|
||||
|
|
@ -16,8 +16,33 @@ from localcache import arrange_data_for_web, depts_with_classcounts, dept_with_s
|
|||
from yattag import Doc
|
||||
|
||||
|
||||
import socket
|
||||
this_host = socket.gethostname()
|
||||
print('\n\n server host: ' + this_host, '\n\n')
|
||||
|
||||
LECPATH = "/media/hd2/peter_home_offload/lecture/"
|
||||
host = 'http://192.168.1.6:5000'
|
||||
host = 'http://192.168.1.6:5000'
|
||||
news_path = '/media/hd2/peter_home/Documents/scripts/browser/'
|
||||
writing_path = '/media/hd2/peter_home/Documents/writing/'
|
||||
img_path = '/media/hd2/peter_home/Documents/writing_img/'
|
||||
pics_path = '/media/hd2/peter_home/misc/'
|
||||
|
||||
|
||||
if this_host == 'ROGDESKTOP':
|
||||
LECPATH = "d:/peter_home_offload/lecture/"
|
||||
host = 'http://192.168.1.7:5000'
|
||||
news_path = 'd:/peter_home/Documents/scripts/browser/'
|
||||
writing_path = 'd:/peter_home/Documents/writing/'
|
||||
img_path = 'd:/peter_home/Documents/writing_img/'
|
||||
pics_path = 'd:/peter_home/misc/'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
import paho.mqtt.client as mqtt
|
||||
|
|
@ -55,8 +80,8 @@ def on_message(client, userdata, msg):
|
|||
print(" %s mqtt msg: %s data: %s" % (now, msg.topic, msg.payload.decode()))
|
||||
|
||||
|
||||
|
||||
while(mqtt_offline):
|
||||
if 0:
|
||||
while(mqtt_offline):
|
||||
try:
|
||||
client = mqtt.Client()
|
||||
client.on_connect = on_connect
|
||||
|
|
@ -114,18 +139,6 @@ def screenoff():
|
|||
######
|
||||
|
||||
|
||||
news_path = '/media/hd2/peter_home/Documents/scripts/browser/'
|
||||
|
||||
if platform.system() == 'Windows':
|
||||
writing_path = 'c:/users/peter/Nextcloud/Documents/writing/'
|
||||
else:
|
||||
writing_path = '/media/hd2/peter_home/Documents/writing/'
|
||||
img_path = '/media/hd2/peter_home/Documents/writing_img/'
|
||||
|
||||
if platform.system() == 'Windows':
|
||||
pics_path = 'c:/users/peter/Nextcloud/misc/'
|
||||
else:
|
||||
pics_path = '/media/hd2/peter_home/misc/'
|
||||
|
||||
br = "<br />"
|
||||
nl = "\n"
|
||||
|
|
|
|||
Loading…
Reference in New Issue