From d1111d5f27beccc68a323bce79ad45eecf8d3063 Mon Sep 17 00:00:00 2001
From: Peter Howell <peter.howell@gmail.com>
Date: Thu, 11 Sep 2025 18:02:26 +0000
Subject: [PATCH] fix schedule sql and dean/gp

---
 content.py    | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++
 courses.py    |   7 ++--
 localcache.py |   6 ++-
 pipelines.py  |   2 +
 schedules.py  |  24 ++++++-----
 5 files changed, 137 insertions(+), 15 deletions(-)
diff --git a/content.py b/content.py
index c8b92b4..91eebb6 100644
--- a/content.py
+++ b/content.py
@@ -1854,6 +1854,119 @@ def get_doc(docid, bracket=1, verbose=0):
     #print(text)
     return text
     
+
+'''#text = 
+    result = []
+    last_type = ''
+    #answer_text = ''
+    answer = []
+    in_a_list = ''
+    
+    # Get all the images
+    for k,value in doc_objects.items():
+        tempout.write( "->" + k + "=" + json.dumps(value,indent=2) + "\n\n\n--\n\n")
+        fetched = fetch_doc_image(k,value)
+
+    list_stack = []
+    list_depth = 0
+    last_list_depth = 0
+    for value in doc_content:
+        tempout.write( json.dumps(value,indent=2) + "\n\n\n")
+        if verbose: print(json.dumps(value, sort_keys=True, indent=4))
+
+        tag_fxn = handle_para
+        if 'paragraph' in value:
+            this_text = ''
+            
+            # First we deal with if we're in a list.
+            if 'bullet' in value['paragraph']:
+                # either we're (1)starting a new list, (2)in one (do nothing), 
+                #  (3)starting a nested one, or (4)finished a nested one. 
+                lid = value['paragraph']['bullet']['listId']
+                if not list_stack:  # 1
+                    list_stack.append(lid)
+                else:
+                    if not lid == list_stack[0]:
+                        if not lid in list_stack:   # 3
+                            list_stack.append(lid)
+                        else:                       # 4
+                            x = list_stack.pop()
+                            while x != lid: list_stack.pop()
+            elif len(list_stack) > 0:    
+                #  current para isn't a bullet but we still have a list open.             
+                list_stack = []
+                
+                
+            list_depth = len(list_stack)
+            deeper = list_depth - last_list_depth
+            if deeper > 0:
+                answer.append("<ul>" * deeper)
+            elif deeper < 0:
+                deeper = -1 * deeper
+                answer.append("</ul>" * deeper)
+            if len(list_stack):
+                tag_fxn = handle_li
+            
+            # NOW the tag_fxn is either 'para' or 'li'... let's get the styling info next,
+            elements = value.get('paragraph').get('elements')
+            if 'paragraphStyle' in value.get('paragraph'):
+                style = value.get('paragraph').get('paragraphStyle')
+                if 'namedStyleType' in style:
+                    type = style['namedStyleType']
+            
+            # and FINALLY, the actual contents.
+            for elem in elements:
+                # text content
+                this_text += read_paragraph_element_2(elem,type)
+                
+                # image content
+                if 'inlineObjectElement' in elem:
+                    vpi = elem['inlineObjectElement']
+                    if 'inlineObjectId' in vpi:
+                        ii = vpi['inlineObjectId']
+                        if ii in img_lookup:
+                            img = img_lookup[ii]
+                            h = img_heights[ii]
+                            w = img_widths[ii]
+                            this_text += '<img src="doc_images/%s" width="%i" height="%i" />' % (img,w,h)
+                
+                
+            # Now for something tricky. Call an appropriate handler, based on:
+            #  (a) what is the paragraph style type?
+            #  (b) is it different from the prev one?
+            
+            if last_type=='NORMAL_TEXT' and type!=last_type:
+                if this_text.strip():
+                    result.append(handle_answer(answer))
+                answer = []
+                #answer_text = ''
+            
+            if type=='HEADING_2' and this_text.strip():
+                result.append( handle_sec(this_text) )
+                this_text = ''
+            elif type=='HEADING_3' and this_text.strip(): 
+                result.append(handle_question(this_text,bracket))
+                this_text = ''
+            else:
+                if this_text.lower().startswith('tags:'):
+                    tag_fxn = handle_tags
+                if this_text.lower().startswith('icons:'):
+                    tag_fxn = handle_icons
+                if this_text.strip():
+                    answer.append(tag_fxn(this_text))
+                this_text = ''
+            last_type = type
+            last_list_depth = list_depth
+                
+        elif 'table' in value:
+            pass
+    
+                                      
+    result.append(handle_answer(answer))
+    return json.dumps(result,indent=4)
+
+'''
+
 def get_doc_generic(docid, bracket=1, verbose=0):
     return get_doc(docid, bracket, verbose)
 
diff --git a/courses.py b/courses.py
index b33133f..ce6cb32 100644
--- a/courses.py
+++ b/courses.py
@@ -1054,7 +1054,7 @@ def enroll_stem_students_live():
 
 def enroll_stem_students_live_semester(the_term, do_removes=0):
     import localcache2
-    depts = "MATH BIO CHEM CSIS PHYS PSCI GEOG ASTR ECOL ENVS ENGR".split(" ")
+    depts = "MATH BIO CHEM CSIS PHYS PSCI GEOG ASTR ECOL ENVS ENGR STAT".split(" ")
     users_to_enroll = users_in_by_depts_live(depts, the_term)      # term id
     
     stem_enrollments = course_enrollment_with_faculty(stem_course_id)     # by user_id
@@ -2667,13 +2667,13 @@ def enrollment_helper():
     # Reset the index to move 'class_name' back to a column
     pivot_df2.reset_index(inplace=True)
 
-    kmeans = try_clustering(pivot_df2.copy())
+    '''kmeans = try_clustering(pivot_df2.copy())
 
     pivot_df2.insert(0, "Cluster", kmeans.labels_)
 
     print(pivot_df2)
     pivot_df2.to_csv('cache/section_and_mode_counts_history.csv')
-
+    
 
     # Group by teacher
     class_teacher_counts = df.groupby(['sem', 'code', 'teacher']).size().reset_index(name='class_teacher_count')
@@ -2696,6 +2696,7 @@ def enrollment_helper():
     #print(df)
     #df.to_csv('cache/section_and_mode_counts_history_clusters.csv')
     return kmeans
+    '''
 
 
 def unpublish_a_course(course_id=0):
diff --git a/localcache.py b/localcache.py
index c1f1874..50de697 100644
--- a/localcache.py
+++ b/localcache.py
@@ -1700,6 +1700,8 @@ ORDER BY u.sortablename;"""
 
 def build_db_schedule():
     # from the schedule json files
+    from schedules import campus_dept_hierarchy
+    (course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name) = campus_dept_hierarchy()
     target = r"\_sched\_expanded\.json"
     def finder(st):
         return re.search(target,st)
@@ -1717,8 +1719,8 @@ def build_db_schedule():
             parts = S['code'].split(' ')
             S['dept'] = parts[0]
             S['num'] = parts[1]
-            S['gp'] = gp[parts[0]]
-            S['dean'] = dean[parts[0]]
+            S['gp'] = course_to_gp[parts[0]]
+            S['dean'] = area_to_dean[course_to_area[parts[0]]]
             S['sem'] = F[0:4]
             S['sem_sis'] = short_to_sis(F[0:4])
             if not 'partofday' in S:
diff --git a/pipelines.py b/pipelines.py
index 159ee41..a604629 100644
--- a/pipelines.py
+++ b/pipelines.py
@@ -319,6 +319,8 @@ def move_to_folder(sem,year,folder,files):
         safe_move('cache/rosters/enrollments-%s.csv' % folder, 'cache/rosters/%s/enrollments.%s.csv' % (semester,now))
     if 'users.csv' in files:
         safe_move('cache/rosters/users-%s.csv' % folder,       'cache/rosters/%s/users.%s.csv' % (semester,now))
+    if 'login.csv' in files:
+        safe_move('cache/rosters/login-%s.csv' % folder,       'cache/rosters/%s/login.%s.csv' % (semester,now))
     
 
 
diff --git a/schedules.py b/schedules.py
index 1a64ba0..2e51c36 100644
--- a/schedules.py
+++ b/schedules.py
@@ -133,6 +133,8 @@ Science Technology Engineering and Mathematics,stem,jn,Jennifer Nari'''
 
     courses_df = pd.read_csv(StringIO(courses_csv))
     areas_df = pd.read_csv(StringIO(areas_csv))
+    #print("areas_df")
+    #print(areas_df)
 
     # Recreate gp dictionary
     course_to_gp = dict(zip(courses_df['Course'], courses_df['GP']))
@@ -151,11 +153,13 @@ Science Technology Engineering and Mathematics,stem,jn,Jennifer Nari'''
     dean_code_to_name = dict(zip(areas_df['DeanCode'], areas_df['DeanName']))
 
     # Print samples to verify
-    print("gp:", list(course_to_gp.items())[:5])
-    print("area:", list(course_to_area.items())[:5])
-    print("areas:", list(areacode_to_area.items())[:5])
-    print("dean:", list(course_to_dean.items())[:5])
-    print("dean_names:", list(dean_code_to_name.items())[:5])
+    if 0:
+        print("course_to_gp:", list(course_to_gp.items())[:5])
+        print("course_to_area:", list(course_to_area.items())[:5])
+        print("areacode_to_area:", list(areacode_to_area.items())[:5])
+        print("area_to_dean:", list(area_to_dean.items())[:5])
+        print("course_to_dean:", list(course_to_dean.items())[:5])
+        print("dean_code_to_name:", list(dean_code_to_name.items())[:5])
 
     return (course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name)
 
@@ -692,7 +696,7 @@ def row_has_data(r):      # helper
         return False
     if len(r.find_all('td')) > 2:
         return True
-    if re.search('Note\:', r.get_text()):
+    if re.search(r'Note\:', r.get_text()):
         return True
     return False
 
@@ -713,7 +717,7 @@ def row_text(r):   # helper
         d("\t"+r_text, end=" ")
     d('')
     
-    if len(arr)==1 and re.search('Note\:',arr[0]):
+    if len(arr)==1 and re.search(r'Note\:',arr[0]):
         note_line = clean_funny( arr[0] )
         note_line = re.sub(r'\n',' ', note_line)
         note_line = re.sub(r'"','', note_line)
@@ -726,11 +730,11 @@ def row_text(r):   # helper
     del arr[2]
     arr = [ re.sub(r'&nbsp;','',a) for a in arr]
     arr = [ re.sub(',','. ',a) for a in arr]
-    arr = [ re.sub('\(P\)','',a) for a in arr]
+    arr = [ re.sub(r'\(P\)','',a) for a in arr]
     arr = [ a.strip() for a in arr]
     #del arr[-1]
     r = ','.join(arr)+'\n'
-    r = re.sub('\n','',r)
+    r = re.sub(r'\n','',r)
     r = re.sub('add to worksheet','',r)
     d("Row Txt Fxn returns:  " + r + "\n\n")
 
@@ -1015,7 +1019,7 @@ def download_fresh_schedules():
 # clean leading, multiple, and trailing spaces
 def clean_name(t):
     t = t.strip()
-    t = re.sub('\s+', ' ', t)
+    t = re.sub(r'\s+', ' ', t)
     return t