From d1111d5f27beccc68a323bce79ad45eecf8d3063 Mon Sep 17 00:00:00 2001 From: Peter Howell Date: Thu, 11 Sep 2025 18:02:26 +0000 Subject: [PATCH] fix schedule sql and dean/gp --- content.py | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++ courses.py | 7 ++-- localcache.py | 6 ++- pipelines.py | 2 + schedules.py | 24 ++++++----- 5 files changed, 137 insertions(+), 15 deletions(-) diff --git a/content.py b/content.py index c8b92b4..91eebb6 100644 --- a/content.py +++ b/content.py @@ -1854,6 +1854,119 @@ def get_doc(docid, bracket=1, verbose=0): #print(text) return text + +'''#text = + result = [] + last_type = '' + #answer_text = '' + answer = [] + in_a_list = '' + + # Get all the images + for k,value in doc_objects.items(): + tempout.write( "->" + k + "=" + json.dumps(value,indent=2) + "\n\n\n--\n\n") + fetched = fetch_doc_image(k,value) + + list_stack = [] + list_depth = 0 + last_list_depth = 0 + for value in doc_content: + tempout.write( json.dumps(value,indent=2) + "\n\n\n") + if verbose: print(json.dumps(value, sort_keys=True, indent=4)) + + tag_fxn = handle_para + if 'paragraph' in value: + this_text = '' + + # First we deal with if we're in a list. + if 'bullet' in value['paragraph']: + # either we're (1)starting a new list, (2)in one (do nothing), + # (3)starting a nested one, or (4)finished a nested one. + lid = value['paragraph']['bullet']['listId'] + if not list_stack: # 1 + list_stack.append(lid) + else: + if not lid == list_stack[0]: + if not lid in list_stack: # 3 + list_stack.append(lid) + else: # 4 + x = list_stack.pop() + while x != lid: list_stack.pop() + elif len(list_stack) > 0: + # current para isn't a bullet but we still have a list open. + list_stack = [] + + + list_depth = len(list_stack) + deeper = list_depth - last_list_depth + if deeper > 0: + answer.append("" * deeper) + if len(list_stack): + tag_fxn = handle_li + + # NOW the tag_fxn is either 'para' or 'li'... let's get the styling info next, + elements = value.get('paragraph').get('elements') + if 'paragraphStyle' in value.get('paragraph'): + style = value.get('paragraph').get('paragraphStyle') + if 'namedStyleType' in style: + type = style['namedStyleType'] + + # and FINALLY, the actual contents. + for elem in elements: + # text content + this_text += read_paragraph_element_2(elem,type) + + # image content + if 'inlineObjectElement' in elem: + vpi = elem['inlineObjectElement'] + if 'inlineObjectId' in vpi: + ii = vpi['inlineObjectId'] + if ii in img_lookup: + img = img_lookup[ii] + h = img_heights[ii] + w = img_widths[ii] + this_text += '' % (img,w,h) + + + # Now for something tricky. Call an appropriate handler, based on: + # (a) what is the paragraph style type? + # (b) is it different from the prev one? + + if last_type=='NORMAL_TEXT' and type!=last_type: + if this_text.strip(): + result.append(handle_answer(answer)) + answer = [] + #answer_text = '' + + if type=='HEADING_2' and this_text.strip(): + result.append( handle_sec(this_text) ) + this_text = '' + elif type=='HEADING_3' and this_text.strip(): + result.append(handle_question(this_text,bracket)) + this_text = '' + else: + if this_text.lower().startswith('tags:'): + tag_fxn = handle_tags + if this_text.lower().startswith('icons:'): + tag_fxn = handle_icons + if this_text.strip(): + answer.append(tag_fxn(this_text)) + this_text = '' + last_type = type + last_list_depth = list_depth + + elif 'table' in value: + pass + + + result.append(handle_answer(answer)) + return json.dumps(result,indent=4) + +''' + def get_doc_generic(docid, bracket=1, verbose=0): return get_doc(docid, bracket, verbose) diff --git a/courses.py b/courses.py index b33133f..ce6cb32 100644 --- a/courses.py +++ b/courses.py @@ -1054,7 +1054,7 @@ def enroll_stem_students_live(): def enroll_stem_students_live_semester(the_term, do_removes=0): import localcache2 - depts = "MATH BIO CHEM CSIS PHYS PSCI GEOG ASTR ECOL ENVS ENGR".split(" ") + depts = "MATH BIO CHEM CSIS PHYS PSCI GEOG ASTR ECOL ENVS ENGR STAT".split(" ") users_to_enroll = users_in_by_depts_live(depts, the_term) # term id stem_enrollments = course_enrollment_with_faculty(stem_course_id) # by user_id @@ -2667,13 +2667,13 @@ def enrollment_helper(): # Reset the index to move 'class_name' back to a column pivot_df2.reset_index(inplace=True) - kmeans = try_clustering(pivot_df2.copy()) + '''kmeans = try_clustering(pivot_df2.copy()) pivot_df2.insert(0, "Cluster", kmeans.labels_) print(pivot_df2) pivot_df2.to_csv('cache/section_and_mode_counts_history.csv') - + # Group by teacher class_teacher_counts = df.groupby(['sem', 'code', 'teacher']).size().reset_index(name='class_teacher_count') @@ -2696,6 +2696,7 @@ def enrollment_helper(): #print(df) #df.to_csv('cache/section_and_mode_counts_history_clusters.csv') return kmeans + ''' def unpublish_a_course(course_id=0): diff --git a/localcache.py b/localcache.py index c1f1874..50de697 100644 --- a/localcache.py +++ b/localcache.py @@ -1700,6 +1700,8 @@ ORDER BY u.sortablename;""" def build_db_schedule(): # from the schedule json files + from schedules import campus_dept_hierarchy + (course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name) = campus_dept_hierarchy() target = r"\_sched\_expanded\.json" def finder(st): return re.search(target,st) @@ -1717,8 +1719,8 @@ def build_db_schedule(): parts = S['code'].split(' ') S['dept'] = parts[0] S['num'] = parts[1] - S['gp'] = gp[parts[0]] - S['dean'] = dean[parts[0]] + S['gp'] = course_to_gp[parts[0]] + S['dean'] = area_to_dean[course_to_area[parts[0]]] S['sem'] = F[0:4] S['sem_sis'] = short_to_sis(F[0:4]) if not 'partofday' in S: diff --git a/pipelines.py b/pipelines.py index 159ee41..a604629 100644 --- a/pipelines.py +++ b/pipelines.py @@ -319,6 +319,8 @@ def move_to_folder(sem,year,folder,files): safe_move('cache/rosters/enrollments-%s.csv' % folder, 'cache/rosters/%s/enrollments.%s.csv' % (semester,now)) if 'users.csv' in files: safe_move('cache/rosters/users-%s.csv' % folder, 'cache/rosters/%s/users.%s.csv' % (semester,now)) + if 'login.csv' in files: + safe_move('cache/rosters/login-%s.csv' % folder, 'cache/rosters/%s/login.%s.csv' % (semester,now)) diff --git a/schedules.py b/schedules.py index 1a64ba0..2e51c36 100644 --- a/schedules.py +++ b/schedules.py @@ -133,6 +133,8 @@ Science Technology Engineering and Mathematics,stem,jn,Jennifer Nari''' courses_df = pd.read_csv(StringIO(courses_csv)) areas_df = pd.read_csv(StringIO(areas_csv)) + #print("areas_df") + #print(areas_df) # Recreate gp dictionary course_to_gp = dict(zip(courses_df['Course'], courses_df['GP'])) @@ -151,11 +153,13 @@ Science Technology Engineering and Mathematics,stem,jn,Jennifer Nari''' dean_code_to_name = dict(zip(areas_df['DeanCode'], areas_df['DeanName'])) # Print samples to verify - print("gp:", list(course_to_gp.items())[:5]) - print("area:", list(course_to_area.items())[:5]) - print("areas:", list(areacode_to_area.items())[:5]) - print("dean:", list(course_to_dean.items())[:5]) - print("dean_names:", list(dean_code_to_name.items())[:5]) + if 0: + print("course_to_gp:", list(course_to_gp.items())[:5]) + print("course_to_area:", list(course_to_area.items())[:5]) + print("areacode_to_area:", list(areacode_to_area.items())[:5]) + print("area_to_dean:", list(area_to_dean.items())[:5]) + print("course_to_dean:", list(course_to_dean.items())[:5]) + print("dean_code_to_name:", list(dean_code_to_name.items())[:5]) return (course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name) @@ -692,7 +696,7 @@ def row_has_data(r): # helper return False if len(r.find_all('td')) > 2: return True - if re.search('Note\:', r.get_text()): + if re.search(r'Note\:', r.get_text()): return True return False @@ -713,7 +717,7 @@ def row_text(r): # helper d("\t"+r_text, end=" ") d('') - if len(arr)==1 and re.search('Note\:',arr[0]): + if len(arr)==1 and re.search(r'Note\:',arr[0]): note_line = clean_funny( arr[0] ) note_line = re.sub(r'\n',' ', note_line) note_line = re.sub(r'"','', note_line) @@ -726,11 +730,11 @@ def row_text(r): # helper del arr[2] arr = [ re.sub(r' ','',a) for a in arr] arr = [ re.sub(',','. ',a) for a in arr] - arr = [ re.sub('\(P\)','',a) for a in arr] + arr = [ re.sub(r'\(P\)','',a) for a in arr] arr = [ a.strip() for a in arr] #del arr[-1] r = ','.join(arr)+'\n' - r = re.sub('\n','',r) + r = re.sub(r'\n','',r) r = re.sub('add to worksheet','',r) d("Row Txt Fxn returns: " + r + "\n\n") @@ -1015,7 +1019,7 @@ def download_fresh_schedules(): # clean leading, multiple, and trailing spaces def clean_name(t): t = t.strip() - t = re.sub('\s+', ' ', t) + t = re.sub(r'\s+', ' ', t) return t