From a01ef8084d0b923b1da3c6177a3d851d9d923b5e Mon Sep 17 00:00:00 2001
From: Peter Howell <peter.howell@gmail.com>
Date: Tue, 7 Oct 2025 19:08:00 +0000
Subject: [PATCH] updates

---
 content.py   | 473 ++++++++++++++++++++++++++++++++++++++-------------
 courses.py   | 157 ++++++++++++++---
 schedules.py |   2 +-
 tasks.py     |  23 +--
 4 files changed, 502 insertions(+), 153 deletions(-)
diff --git a/content.py b/content.py
index 91eebb6..a2587f0 100644
--- a/content.py
+++ b/content.py
@@ -6,12 +6,14 @@ from __future__ import annotations
 #import html2markdown as h2m
 
 from typing import ItemsView
-import requests, codecs, os, re, json, sys, pypandoc
+import requests, codecs, os, re, json, sys, pypandoc, mimetypes, hashlib
 from checker import safe_html
 from pipelines import header, fetch, url
 from util import clean_title, to_file_friendly
+from urllib.parse import quote, urljoin, urlparse
 from bs4 import BeautifulSoup as bs
 from html.parser import HTMLParser
+from datetime import datetime, timezone
 
 
 pagebreak = '\n\n<!-- BREAK -->\n\n<div style="page-break-before: always;"></div>\n\n'
@@ -42,11 +44,10 @@ def test_forums(id=0):
     for m in modules:
         items[running_index] = '<h2>%s</h2>%s\n' % ( m['name'], pagebreak )
         running_index += 1
-        
         mod_items = fetch('/api/v1/courses/' + str(id) + '/modules/'+str(m['id'])+'/items', verbose)
-        
+
         for I in mod_items:
-            
+
             if I['type'] in ['SubHeader', 'Page', 'Quiz', 'Discussion', 'ExternalUrl' ] or 'content_id' in I:
                 running_index += 1
                 
@@ -99,7 +100,7 @@ def write_message(fd, view, participants):
             write_message(fd, r, participants)
     fd.write("</blockquote>\n")
 
-def extract_forums(id, course_folder, item_id_to_index, verbose=0):
+def extract_forums(id, course_folder, item_id_to_index, verbose=0, discussion_link_map=None):
     ###
     ### FORUMS
     ###
@@ -109,7 +110,6 @@ def extract_forums(id, course_folder, item_id_to_index, verbose=0):
     index = []
     forum_f = course_folder + '/forums'
     headered = 0
-    image_count = 0
     print("\nFORUMS")
     try:
         os.mkdir(forum_f)
@@ -133,6 +133,8 @@ def extract_forums(id, course_folder, item_id_to_index, verbose=0):
                     fd.write(message + "\n\n")
                     for v in t2['view']:
                         write_message(fd, v, participants)
+                if discussion_link_map is not None:
+                    discussion_link_map[p['id']] = f"forums/{easier_filename}.html"
                 if not headered: index.append( ('<br /><b>Discussion Forums</b><br />') )
                 headered = 1
                 index.append( ( 'forums/' + easier_filename + '.html', p['title'] ) )
@@ -196,12 +198,18 @@ def course_download(id=""):
     for x in range(9000): items.append(0)
     
     video_link_list = []
+    page_local_map = {}
+    assignment_local_map = {}
+    file_local_map = {}
+    discussion_local_map = {}
+    module_details = []
     
     for m in modules:
         items[running_index] = '<h2>%s</h2>%s\n' % ( m['name'], pagebreak )
         running_index += 1
-        
+
         mod_items = fetch('/api/v1/courses/' + str(id) + '/modules/'+str(m['id'])+'/items', verbose)
+        module_entry = {'name': m['name'], 'items': []}
         
         for I in mod_items:
             
@@ -214,13 +222,13 @@ def course_download(id=""):
                     
                 if I['type'] == 'Page': 
                     item_id_to_index[ I['page_url'] ] = running_index
-                    
+
                 if I['type'] == 'Quiz':  
                     item_id_to_index[ I['content_id'] ] = running_index
-                
+
                 if I['type'] == 'Discussion':  
                     item_id_to_index[ I['content_id'] ] = running_index
-                
+
                 if I['type'] == 'ExternalUrl':
                     items[running_index] = "<a href='%s'>%s</a><br />\n\n" % (I['external_url'], I['title'])
                     
@@ -238,6 +246,17 @@ def course_download(id=""):
             # I['page_url']
             # I['type']
             # I['published']
+            module_entry['items'].append({
+                'type': I.get('type'),
+                'title': I.get('title'),
+                'page_url': I.get('page_url'),
+                'content_id': I.get('content_id'),
+                'html_url': I.get('html_url'),
+                'url': I.get('url'),
+                'external_url': I.get('external_url'),
+                'id': I.get('id')
+            })
+        module_details.append(module_entry)
     # assignments and files have content_id, pages have page_url
     
     course_folder = '../course_temps/course_'+id
@@ -281,14 +300,95 @@ def course_download(id=""):
                 if not headered: 
                     index.append( ('<br /><b>Files</b><br />') )
                     headered = 1
-                index.append( ('files/' + f['filename'], f['filename']) )
+                relative_path = 'files/' + f['filename']
+                index.append( (relative_path, f['filename']) )
+                file_local_map[f['id']] = relative_path
         
     ###
     ### PAGES
     ###
     pages_f = course_folder + '/pages'
     headered = 0
-    image_count = 0
+    images_f = os.path.join(pages_f, 'images')
+    try:
+        os.makedirs(images_f)
+    except FileExistsError:
+        pass
+    except Exception as e:
+        print(f" * Unable to ensure images folder: {e}")
+
+    image_map = {}
+    image_counter = 0
+
+    def ensure_local_image(src, canvas_override=None):
+        nonlocal image_counter
+        if not src:
+            return (None, None)
+        original_src = src
+        if src.startswith('data:'):
+            return (None, None)
+        if src.startswith('images/'):
+            full_rel = f"pages/{src}"
+            image_map.setdefault(original_src, (src, full_rel))
+            return image_map[original_src], canvas_override
+        if src.startswith('pages/'):
+            page_rel = src.split('pages/', 1)[-1]
+            page_rel = page_rel if page_rel else src
+            full_rel = src
+            image_map.setdefault(original_src, (page_rel, full_rel))
+            return image_map[original_src], canvas_override
+
+        mapped = image_map.get(original_src)
+        if mapped:
+            return mapped, canvas_override or original_src
+
+        absolute_src = src
+        if not absolute_src.lower().startswith('http'):
+            absolute_src = urljoin(url, absolute_src)
+
+        mapped = image_map.get(absolute_src)
+        if mapped:
+            image_map[original_src] = mapped
+            return mapped, canvas_override or absolute_src
+
+        try:
+            response = requests.get(absolute_src, headers=header, stream=True, timeout=30)
+            response.raise_for_status()
+        except Exception as e:
+            d(f"   * error downloading image {absolute_src}: {e}")
+            return (None, canvas_override or absolute_src)
+
+        content_type = response.headers.get('content-type', '').split(';')[0]
+        ext = ''
+        if content_type:
+            guessed = mimetypes.guess_extension(content_type)
+            if guessed:
+                ext = guessed
+        if not ext:
+            ext = os.path.splitext(urlparse(absolute_src).path)[1]
+        if not ext:
+            ext = '.bin'
+        ext = ext.lstrip('.')
+
+        local_name = f"img_{image_counter}.{ext}"
+        image_counter += 1
+        local_path = os.path.join(images_f, local_name)
+
+        try:
+            with open(local_path, 'wb') as fd:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        fd.write(chunk)
+        except Exception as e:
+            d(f"   * error saving image {absolute_src}: {e}")
+            return (None, canvas_override or absolute_src)
+
+        page_rel = f"images/{local_name}"
+        full_rel = f"pages/{page_rel}"
+        image_map[original_src] = (page_rel, full_rel)
+        if absolute_src != original_src:
+            image_map[absolute_src] = image_map[original_src]
+        return image_map[original_src], canvas_override or absolute_src
     print("\nPAGES")
     try:
         os.mkdir(pages_f)
@@ -296,6 +396,12 @@ def course_download(id=""):
         print(" * Pages folder already exists.")
         
         
+    page_manifest = {
+        'course_id': str(id),
+        'generated_at': datetime.now(timezone.utc).isoformat(),
+        'pages': {}
+    }
+
     pages = fetch('/api/v1/courses/' + str(id) + '/pages', verbose)
     for p in pages:
         d(' - %s' % p['title'])
@@ -312,119 +418,111 @@ def course_download(id=""):
         index.append( ( 'pages/' + easier_filename + '.html', p['title'] ) )
 
 
-        if os.path.exists(this_page_filename):
+        t2 = {'title': p['title']}
+        soup_infolder = None
+        soup_in_main = None
+        page_local_map[p['url']] = f"pages/{easier_filename}.html"
+        this_page_content = None
+
+        fetched_page = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
+        if fetched_page and fetched_page.get('body'):
+            t2 = fetched_page
+            soup_infolder = bs(t2['body'], features="lxml")
+            soup_in_main = bs(t2['body'], features="lxml")
+        elif os.path.exists(this_page_filename):
             d(" - already downloaded %s" % this_page_filename)
             this_page_content = codecs.open(this_page_filename,'r','utf-8').read()
-        #elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
-        #elif re.search(r'eis-prod',p['url']):
-        #    d('   * skipping file behind passwords')
-        else:    
-            t2 = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
-            if t2 and 'body' in t2 and t2['body']:
-                soup_infolder = bs(t2['body'],features="lxml")
-                soup_in_main = bs(t2['body'],features="lxml")
-                a_links = soup_infolder.find_all('a')
-                for A in a_links: 
-                    href = A.get('href')
+            soup_infolder = bs(this_page_content, features="lxml")
+            soup_in_main = bs(this_page_content, features="lxml")
+        else:
+            d('   * nothing returned or bad fetch')
+            continue
 
-                    if href and re.search( r'youtu',href):
-                        video_link_list.append( (A.get('href'), A.text, 'pages/'+easier_filename + ".html") )
-                        
-                # Images
-                page_images = soup_infolder.find_all('img') 
-                page_image_paths = {}
-                for I in page_images:
-                    src = I.get('src')
-                    if src:
-                        d('   - %s' % src)
-                        try:
-                            r = requests.get(src,headers=header, stream=True)
-                            mytype = r.headers['content-type']
-                            #print("Response is type: " + str(mytype))
-                            r_parts = mytype.split("/")
-                            ending = r_parts[-1]
+        page_title = (t2.get('title') or p['title']).strip() if isinstance(t2, dict) else p['title']
 
-                            if ending=='jpeg': ending = "jpg"
+        def strip_leading_heading(soup):
+            if not soup:
+                return
+            first_heading = soup.find(['h1', 'h2'])
+            if first_heading and first_heading.get_text(strip=True) == page_title:
+                first_heading.decompose()
 
-                            img_full_path = f"{pages_f}/{str(image_count)}.{ending}"
-                            local_src = f"{str(image_count)}.{ending}"
-                            page_image_paths[src] = f"pages/{local_src}"
-                            I['src'] = local_src
+        strip_leading_heading(soup_infolder)
+        strip_leading_heading(soup_in_main)
 
-                            with open(img_full_path, 'wb') as fd:
-                                for chunk in r.iter_content(chunk_size=128):
-                                    fd.write(chunk)
-                            image_count += 1
-                        except Exception as e:
-                            d( ' * Error downloading page image, %s' % str(e) )
-                
-                # Repeat for version for main file
-                page_main_images = soup_in_main.find_all('img')
-                for I in page_main_images:
-                    src = I.get('src')
-                    if src:
-                        I['src'] = page_image_paths[src]
+        a_links = soup_infolder.find_all('a')
+        for A in a_links:
+            href = A.get('href')
+            if href and re.search(r'youtu', href):
+                video_link_list.append((A.get('href'), A.text, 'pages/' + easier_filename + ".html"))
 
+        # Images -> ensure local copies
+        for img in soup_infolder.find_all('img'):
+            mapping, canvas_src = ensure_local_image(img.get('src'), img.get('data-canvas-src'))
+            if mapping:
+                img['src'] = mapping[0]
+                if canvas_src:
+                    img['data-canvas-src'] = canvas_src
 
-                # STUDIO VIDEOS
-                # Regex pattern to match "custom_arc_media_id%3D" and capture everything
-                # until the next '&' or end of string
-                pattern = r"custom_arc_media_id%3D([^&]+)"
-                found_ids = []
+        for img in soup_in_main.find_all('img'):
+            mapping, canvas_src = ensure_local_image(img.get('src'), img.get('data-canvas-src'))
+            if mapping:
+                img['src'] = mapping[1]
+                if canvas_src:
+                    img['data-canvas-src'] = canvas_src
 
-                replacement_tag = '''<video width="480" height="320" controls="controls"><source src="http://serverIP_or_domain/location_of_video.mp4" type="video/mp4"></video>'''
-
-                # Iterate over all <iframe> tags
-                for iframe in soup_infolder.find_all("iframe"):
-                    src = iframe.get("src")
-                    if src:
-                        # Search for the pattern in the src
-                        match = re.search(pattern, src)
-                        if match:
-                            found_ids.append(match.group(1))
-                        videos_log.write(f"page: {p['url']}  iframe src: {src}\n")
-                        videos_log.flush()
-
-                        match2 = re.search('instructuremedia\.com', src)
-                        if match2:
-                            iframe_response = requests.get(src)
-                            if iframe_response.status_code != 200:
-                                print(f"Failed to retrieve iframe content from: {src}")
-                                continue
-                            videos_log.write(f"succesfully fetched {src}\n")
-                            videos_log.flush()
-
-                            # Step 4: Parse the iframes HTML
-                            iframe_soup = bs(iframe_response.text, 'html.parser')
-
-                            video_tag = iframe_soup.find('video')
-                            if video_tag:
-                                # Find the <source> tag(s) within the video
-                                source_tags = video_tag.find_all('source')
-                                # Extract each 'src' attribute
-                                for source_tag in source_tags:
-                                    print("Video Source found:", source_tag.get('src'))
-                                    videos_log.write(f"page: {p['url']} video src: {source_tag.get('src')}\n")
-                                    videos_log.flush()
-                
-                            
-                # WRITE out page
+        # STUDIO VIDEOS
+        pattern = r"custom_arc_media_id%3D([^&]+)"
+        for iframe in soup_infolder.find_all("iframe"):
+            src = iframe.get("src")
+            if not src:
+                continue
+            match = re.search(pattern, src)
+            if match:
+                videos_log.write(f"page: {p['url']}  arc id: {match.group(1)}\n")
+                videos_log.flush()
+            videos_log.write(f"page: {p['url']}  iframe src: {src}\n")
+            videos_log.flush()
+            if 'instructuremedia.com' in src:
                 try:
-                    this_page_content = f"<h2>{t2['title']}</h2>\n{soup_infolder.prettify()}"
-                    with codecs.open(this_page_filename, 'w','utf-8') as fd:
-                        fd.write(this_page_content)
-                except:
-                    d(' * problem writing page content')
-                    ## TODO include linked pages even if they aren't in module
-            else:
-                d('   * nothing returned or bad fetch')
+                    iframe_response = requests.get(src, timeout=15)
+                    iframe_response.raise_for_status()
+                except Exception as e:
+                    print(f"Failed to retrieve iframe content from: {src} ({e})")
+                    continue
+                videos_log.write(f"succesfully fetched {src}\n")
+                videos_log.flush()
+                iframe_soup = bs(iframe_response.text, 'html.parser')
+                for source_tag in iframe_soup.find_all('source'):
+                    videos_log.write(f"page: {p['url']} video src: {source_tag.get('src')}\n")
+                    videos_log.flush()
+
+        # WRITE out page (always refresh to ensure local paths)
+        try:
+            this_page_content = f"<h2>{t2['title']}</h2>\n{soup_infolder.prettify()}"
+            with codecs.open(this_page_filename, 'w','utf-8') as fd:
+                fd.write(this_page_content)
+        except Exception as e:
+            d(f' * problem writing page content: {e}')
+
         # write to running log of content in order of module
-        if p and p['url'] in item_id_to_index:
-            items[  item_id_to_index[ p['url'] ]  ] =  f"<h2>{t2['title']}</h2>\n{soup_in_main.prettify()}\n{pagebreak}"
+        if p and p['url'] in item_id_to_index and soup_in_main:
+            items[item_id_to_index[p['url']]] = f"<h2>{t2['title']}</h2>\n{soup_in_main.prettify()}\n{pagebreak}"
         else:
             d(' -- This page didnt seem to be in the modules list.')
+
+        if this_page_content is not None:
+            page_hash = hashlib.sha256(this_page_content.encode('utf-8')).hexdigest()
+            page_manifest['pages'][p['url']] = {
+                'title': t2.get('title') or p['title'],
+                'filename': f"pages/{easier_filename}.html",
+                'hash': page_hash
+            }
     
-    
+    manifest_path = os.path.join(course_folder, 'pages_manifest.json')
+    with codecs.open(manifest_path, 'w', 'utf-8') as manifest_file:
+        manifest_file.write(json.dumps(page_manifest, indent=2))
+
     ###
     ### ASSIGNMENTS
     ###
@@ -446,6 +544,7 @@ def course_download(id=""):
             try:
                 friendlyfile = to_file_friendly(p['name'])
                 this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
+                assignment_local_map[p['id']] = 'assignments/' + str(p['id'])+"_"+ friendlyfile + '.html'
                 if os.path.exists(this_assmt_filename):
                     d(" - already downloaded %s" % this_assmt_filename)
                     this_assmt_content = open(this_assmt_filename,'r').read()
@@ -469,7 +568,7 @@ def course_download(id=""):
         ### FORUMS
         ###
 
-        index.extend( extract_forums(id, course_folder, item_id_to_index, verbose) )
+        index.extend( extract_forums(id, course_folder, item_id_to_index, verbose, discussion_local_map) )
 
         """        
     
@@ -506,18 +605,60 @@ def course_download(id=""):
     
     
     # Create index page of all gathered items
+    index.insert(0, ('modules.html', 'Modules Overview'))
+
     myindex = codecs.open(course_folder+'/index.html','w','utf-8')
     for i in index:
-        if len(i)==2: myindex.write("<a href='"+i[0]+"'>"+i[1]+"</a><br />\n")
-        else: myindex.write(i)
-    
+        if len(i)==2:
+            myindex.write(f"<a href='{i[0]}'>{i[1]}</a><br />\n")
+        else:
+            myindex.write(i)
+    myindex.close()
+
+    def resolve_module_item_link(item):
+        item_type = (item.get('type') or '').lower()
+        if item_type == 'page':
+            return page_local_map.get(item.get('page_url')) or item.get('html_url')
+        if item_type == 'assignment':
+            return assignment_local_map.get(item.get('content_id')) or item.get('html_url')
+        if item_type == 'discussion':
+            return discussion_local_map.get(item.get('content_id')) or item.get('html_url')
+        if item_type == 'file':
+            return file_local_map.get(item.get('content_id')) or item.get('html_url')
+        if item_type == 'externalurl':
+            return item.get('external_url')
+        if item_type in ('externaltool', 'quiz', 'assignmentquiz', 'attendance'):
+            return item.get('html_url') or item.get('url')
+        if item_type == 'subheader':
+            return None
+        return item.get('html_url') or item.get('url')
+
+    module_index_path = course_folder + '/modules.html'
+    with codecs.open(module_index_path, 'w', 'utf-8') as module_index:
+        module_index.write('<html><body>\n')
+        module_index.write(f"<h1>{courseinfo['name']} - Modules</h1>\n")
+        for module in module_details:
+            module_index.write(f"<h2>{module['name']}</h2>\n<ul>\n")
+            for item in module['items']:
+                title = item.get('title') or '(Untitled)'
+                item_type = item.get('type') or 'Item'
+                link = resolve_module_item_link(item)
+                if item_type.lower() == 'subheader':
+                    module_index.write(f"<li><strong>{title}</strong></li>\n")
+                    continue
+                if link:
+                    module_index.write(f"<li><a href='{link}'>{title}</a> <em>({item_type})</em></li>\n")
+                else:
+                    module_index.write(f"<li>{title} <em>({item_type})</em></li>\n")
+            module_index.write('</ul>\n')
+        module_index.write('</body></html>\n')
+
 
-    
     # Full course content in single file    
     print("Writing main course files...")
     mycourse = codecs.open(course_folder+'/fullcourse.raw.html','w','utf-8')
 
-    mycourse.write(f"<html><head><base href='file:///C:/Users/phowell/source/repos/course_temps/course_{id}/'></head><body>\n")
+    mycourse.write("<html><head></head><body>\n")
     
     for I in items:
         if I:
@@ -562,6 +703,100 @@ def course_download(id=""):
         print(f"couldn't create doc fullcourse page: {e}")    
 
 
+def restore_canvas_image_sources(html_fragment):
+    soup = bs(html_fragment, features="lxml")
+    changed = False
+    for img in soup.find_all('img'):
+        canvas_src = img.get('data-canvas-src')
+        if canvas_src:
+            img['src'] = canvas_src
+            del img['data-canvas-src']
+            changed = True
+    body = soup.body
+    if body:
+        restored = ''.join(str(child) for child in body.children)
+    else:
+        restored = soup.decode()
+    return restored, changed
+
+
+def _push_page_update(course_num, page_slug, new_content):
+    endpoint = f"{url}/api/v1/courses/{course_num}/pages/{page_slug}"
+    data = {'wiki_page[body]': new_content}
+    response = requests.put(endpoint, headers=header, params=data)
+    if response.status_code >= 400:
+        print(f" - Failed to upload {page_slug}: {response.status_code} {response.text}")
+        return False
+    print(f" - Uploaded {page_slug}")
+    return True
+
+
+def upload_modified_pages(course_id=None, confirm_each=False):
+    if not course_id:
+        course_id = input("course id> ").strip()
+    if not course_id:
+        print("No course id provided; aborting.")
+        return
+
+    course_folder = f"../course_temps/course_{course_id}"
+    manifest_path = os.path.join(course_folder, 'pages_manifest.json')
+    if not os.path.exists(manifest_path):
+        print(f"No manifest found at {manifest_path}. Run course_download first.")
+        return
+
+    with codecs.open(manifest_path, 'r', 'utf-8') as manifest_file:
+        manifest = json.loads(manifest_file.read())
+
+    pages = manifest.get('pages', {})
+    if not pages:
+        print("Manifest contains no page entries.")
+        return
+
+    updated = False
+    for slug, meta in pages.items():
+        local_rel = meta.get('filename')
+        local_path = os.path.join(course_folder, local_rel) if local_rel else None
+        if not local_rel or not local_rel.startswith('pages/'):
+            print(f" - Skipping {slug}: not a downloaded page ({local_rel})")
+            continue
+        if not local_path or not os.path.exists(local_path):
+            print(f" - Skipping {slug}: local file missing ({local_rel})")
+            continue
+
+        with codecs.open(local_path, 'r', 'utf-8') as local_file:
+            local_html = local_file.read()
+        current_hash = hashlib.sha256(local_html.encode('utf-8')).hexdigest()
+        if current_hash == meta.get('hash'):
+            continue
+
+        restored_html, changed = restore_canvas_image_sources(local_html)
+        payload = restored_html if changed else local_html
+
+        do_upload = True
+        if confirm_each:
+            ans = input(f"Upload changes for {slug}? [y/N]: ").strip().lower()
+            do_upload = ans in ('y', 'yes')
+
+        if not do_upload:
+            print(f" - Skipped {slug} by user request")
+            continue
+
+        if _push_page_update(course_id, slug, payload):
+            manifest['pages'][slug]['hash'] = current_hash
+            updated = True
+
+    if updated:
+        with codecs.open(manifest_path, 'w', 'utf-8') as manifest_file:
+            manifest_file.write(json.dumps(manifest, indent=2))
+        print("Updated manifest hashes for uploaded pages.")
+    else:
+        print("No page uploads performed.")
+
+
+def upload_modified_pages_prompt():
+    upload_modified_pages()
+
+
 def media_testing():
     user_id = 285  #ksmith
     t = f"https://gavilan.instructuremedia.com/api/public/v1/users/{user_id}/media"
@@ -810,9 +1045,8 @@ def make_pages_from_folder(folder='cache/csis6/', course = '20558'):
 
 # Given course, page url, and new content, upload the new revision of a page
 def upload_page(course_num,pageurl,new_content):
-    print("Repaired page:\n\n")
+    print(f"Uploading page: {pageurl}")
     #print new_content
-    print(pageurl)
     t3 = url + '/api/v1/courses/' + str(course_num) + '/pages/' + pageurl
     xyz = input('Enter 1 to continue and send back to: ' + t3 + ': ')
     #xyz = '1'
@@ -1984,6 +2218,7 @@ if __name__ == "__main__":
                 5: ['course download tester', test_forums ],
                 6: ['download all a courses pages', grab_course_pages],
                 7: ['quick site downloader', download_web],
+                8: ['upload modified pages back to Canvas', upload_modified_pages_prompt],
                17: ['repair ezproxy links', repair_ezproxy_links],
                18: ['create pages from html files', make_pages_from_folder],
                19: ['fetch support page', fetch_support_page],
diff --git a/courses.py b/courses.py
index 0f5f0de..3a703a6 100644
--- a/courses.py
+++ b/courses.py
@@ -1,8 +1,7 @@
 import json, re, requests, codecs, sys, time, funcy, os
 import pandas as pd
-from datetime import datetime   
+from datetime import datetime, timedelta, timezone
 import pytz
-from datetime import datetime
 from util import print_table, int_or_zero, float_or_zero, dept_from_name, num_from_name
 from pipelines import fetch, fetch_stream, fetch_collapse, header, url
 from schedules import get_semester_schedule
@@ -2240,13 +2239,19 @@ def instructor_list_to_activate_evals():
             
     #print(mylist)
 
-
-    
+# Toggle the eval tool visibility for all courses in the selected Canvas term.
 def add_evals(section=0):
     # show or hide?
 
-    TERM = 287
-    SEM = "sp25"
+    term_record = find_term(input('term? '))
+    if not term_record:
+        raise ValueError(f"Unknown term")
+
+    term_id = term_record.get('canvas_term_id')
+    if term_id is None:
+        raise ValueError(f"Canvas term id missing for {term_record}")
+
+    term_code = term_record.get('code')
 
     # fetch list of courses?
     GET_FRESH_LIST = 0
@@ -2261,16 +2266,16 @@ def add_evals(section=0):
     ASK = 0
 
     # are we showing or hiding the course eval link?
-    HIDE = True
+    HIDE = False
 
 
-    s = [ x.strip() for x in codecs.open(f'cache/{SEM}_eval_sections.txt','r').readlines()]
+    s = [ x.strip() for x in codecs.open(f"cache/{term_code}_eval_sections.txt",'r').readlines()]
     s = list(funcy.flatten(s))
     s.sort()
     print(f"Going to activate course evals in these sections: \n{s}\n")
     xyz = input('hit return to continue')
     
-    all_semester_courses = getCoursesInTerm(TERM, GET_FRESH_LIST, 1)   
+    all_semester_courses = getCoursesInTerm(term_id, GET_FRESH_LIST, 1)   
     eval_course_ids = []
     courses = {}
     for C in all_semester_courses:
@@ -2814,21 +2819,119 @@ def quick_sem_course_list(term=180):
         print(C['name'])
     
 
+# Check Canvas for an existing calendar event that matches the provided metadata.
+def find_existing_calendar_event(context_code, title, start_at_iso, description="", tolerance_hours=12):
+    def _normalize_iso(value):
+        if not value:
+            return None
+        if value.endswith('Z'):
+            value = value[:-1] + '+00:00'
+        try:
+            return datetime.fromisoformat(value)
+        except ValueError:
+            return None
 
+    target_start = _normalize_iso(start_at_iso)
+    if not target_start:
+        return None
+
+    window_start = (target_start - timedelta(hours=tolerance_hours)).date().isoformat()
+    window_end = (target_start + timedelta(hours=tolerance_hours)).date().isoformat()
+
+    params = {
+        "context_codes[]": context_code,
+        "start_date": window_start,
+        "end_date": window_end,
+    }
+
+    existing_events = fetch("/api/v1/calendar_events", params=params)
+    if not isinstance(existing_events, list):
+        print(f"Unable to inspect existing events for context {context_code}: unexpected response")
+        return None
+
+    normalized_title = title.strip().lower() if isinstance(title, str) else ""
+    normalized_description = description.strip().lower() if isinstance(description, str) else ""
+
+    for event in existing_events:
+        event_title = (event.get('title') or "").strip().lower()
+        event_description = (event.get('description') or "").strip().lower()
+        event_start = _normalize_iso(event.get('start_at') or "")
+        if not event_start:
+            continue
+        time_difference = abs((event_start - target_start).total_seconds())
+        if time_difference > tolerance_hours * 3600:
+            continue
+        if event_title == normalized_title:
+            return event
+        if normalized_description and event_description == normalized_description:
+            return event
+    return None
+
+
+# Remove all calendar events attached to a course after user confirmation.
+def remove_all_course_events():
+    course_id = input("course id> ").strip()
+    if not course_id:
+        print("No course id provided; aborting.")
+        return
+    context_code = course_id if course_id.startswith("course_") else f"course_{course_id}"
+    today = datetime.now(timezone.utc).date()
+    start_date = (today - timedelta(days=730)).isoformat()
+    end_date = (today + timedelta(days=365)).isoformat()
+    print(f"Fetching existing events for {context_code} between {start_date} and {end_date}...")
+    params = {
+        "context_codes[]": context_code,
+        "per_page": 100,
+        "start_date": start_date,
+        "end_date": end_date,
+    }
+    events = fetch("/api/v1/calendar_events", params=params)
+    if not events:
+        print("No events found for this course.")
+        return
+
+    print(f"Found {len(events)} events. Beginning removal...")
+    for event in events:
+        event_id = event.get("id")
+        event_title = event.get("title", "(no title)")
+        if not event_id:
+            print(f"Skipping event '{event_title}' with missing id")
+            continue
+        print(f"Deleting event '{event_title}' (id {event_id}) in {context_code}...", end=' ')
+        delete_url = f"{url}/api/v1/calendar_events/{event_id}"
+        response = requests.delete(delete_url, headers=header)
+        if response.ok:
+            print("deleted successfully")
+        else:
+            print(f"failed: {response.status_code} {response.text}")
+
+
+# Create Canvas calendar events for predefined orientation shells from CSV input.
 def create_calendar_event():
     events = codecs.open('cache/academic_calendar_2025.csv','r','utf-8').readlines()
 
-    orientation_shells = ["course_15924","course_19094","course_20862"]
+    orientation_shells = ["course_15924","course_19094","course_20862", "course_23313"]
     
     for ori_shell in orientation_shells:
         for e in events:
-            (date, title, desc) = e.split(',')
+            if not e.strip():
+                continue
+            parts = [part.strip() for part in e.split(',', 2)]
+            if len(parts) < 3:
+                continue
+            date, title, desc = parts
             local = pytz.timezone("America/Los_Angeles")
             naive = datetime.strptime(date, "%Y-%m-%d")
             local_dt = local.localize(naive, is_dst=None)
             utc_dt = local_dt.astimezone(pytz.utc).isoformat()
 
-
+            print(f"Checking event '{title}' ({date}) in {ori_shell}...", end=' ')
+            existing_event = find_existing_calendar_event(ori_shell, title, utc_dt, desc)
+            if existing_event:
+                existing_id = existing_event.get('id')
+                print(f"exists as id {existing_id} in {ori_shell}, skipping add")
+                continue
+            print(f"no existing event in {ori_shell}, attempting add")
 
             params = {
                 "calendar_event[context_code]": ori_shell,    
@@ -2840,12 +2943,21 @@ def create_calendar_event():
 
             u = url + "/api/v1/calendar_events"
             res = requests.post(u, headers = header, params=params)
-            result = json.loads(res.text)
-            print(title,end=" ")
-            if "errors" in result:
-                print(result["errors"])
-            if "id" in result:
-                print("ok, id#", result["id"])
+            if res.ok:
+                try:
+                    result = json.loads(res.text)
+                except json.JSONDecodeError:
+                    print(f"add completed for '{title}' in {ori_shell} (status {res.status_code}) but response parse failed")
+                    continue
+                new_id = result.get("id")
+                if new_id:
+                    print(f"added successfully as id {new_id} in {ori_shell} (status {res.status_code})")
+                elif "errors" in result:
+                    print(f"add failed for '{title}' in {ori_shell}: {result['errors']}")
+                else:
+                    print(f"add attempted for '{title}' in {ori_shell} with unexpected response {result}")
+            else:
+                print(f"add failed for '{title}' in {ori_shell}: {res.status_code} {res.text}")
 
 def utc_to_local(utc_str):
     if not utc_str: return ""
@@ -3076,10 +3188,11 @@ if __name__ == "__main__":
                 20: ['Get a course info by id',getCourses],
                 21: ['Reset course conclude date',update_course_conclude],   
                 22: ['Create calendar events for orientation shells', create_calendar_event],
-                23: ['list all assignments', list_all_assignments],
-                24: ['Bulk unenroll from course', bulk_unenroll],
-                25: ['enrollment helper', enrollment_helper],
-                26: ['g number list enroll to shell id', enroll_gnumber_list_to_courseid],
+                23: ['Remove all calendar events from a course', remove_all_course_events],
+                24: ['list all assignments', list_all_assignments],
+                25: ['Bulk unenroll from course', bulk_unenroll],
+                26: ['enrollment helper', enrollment_helper],
+                27: ['g number list enroll to shell id', enroll_gnumber_list_to_courseid],
 
                 30: ['* Overview semester start dates',overview_start_dates],
                 31: ['Fine tune term dates and winter session', course_by_depts_terms],
diff --git a/schedules.py b/schedules.py
index 2e51c36..8954e97 100644
--- a/schedules.py
+++ b/schedules.py
@@ -1,6 +1,6 @@
 # schedule.py
 #
-# experimenting with manipulating and querying the schedule of courses
+# manipulating and querying the schedule of courses
 
 
 #from telnetlib import GA
diff --git a/tasks.py b/tasks.py
index de222fb..0bcec75 100644
--- a/tasks.py
+++ b/tasks.py
@@ -23,12 +23,13 @@ from time import mktime
 from semesters import human_to_short
 from canvas_secrets import badgr_target, badgr_hd
 
+from docxtpl import DocxTemplate
 
 if os.name != 'posix':
     import win32com.client
     import win32com.client as win32
     import pypandoc
-    from docxtpl import DocxTemplate
+    
     import xlwt
 
 from pipelines import header, url, fetch, convert_roster_files, move_to_folder
@@ -112,7 +113,7 @@ def build_quiz(filename=""):
             this_q = L.strip()
             state = "answers"
         elif state =="answers":
-            m = re.search( '^Answer\:\s(\w)$', L)
+            m = re.search( r'^Answer\:\s(\w)$', L)
             if m:
                 correct_answer = m.group(1)
                 qs.append( [this_q, this_as, correct_answer ] )
@@ -120,7 +121,7 @@ def build_quiz(filename=""):
                 this_as = {  }
                 correct_answer = ""
                 continue
-            m = re.search( '^(\w)\)\s(.*)$', L)
+            m = re.search( r'^(\w)\)\s(.*)$', L)
             if m:
                 print(m.group(1))
                 print(m.group(2))
@@ -183,7 +184,7 @@ def convert_to_pdf(name1, name2):
 
 # Build (docx/pdf) certificates for gott graduates
 def certificates_gott_build():
-    course = "gott_1_fa25"
+    course = "gott_1_fa25_sept"
     coursedate = "Fall 2025"
     certificate = "gott 1 template.docx"
 
@@ -202,7 +203,7 @@ def certificates_gott_build():
             name = row[0].strip()
         doc = DocxTemplate(f"cache/certificates/{certificate}")
         doc.render({ 'name' : name, 'coursedate': coursedate })
-        name_as_filename = re.sub('\s', '_', name.lower())
+        name_as_filename = re.sub(r'\s', '_', name.lower())
         fn = f"cache/certificates/{course}_{name_as_filename}."
         print(fn+'docx')
         doc.save(fn+'docx')
@@ -409,7 +410,7 @@ def hours_calc():
     allout = codecs.open('pa_de_noncred.txt','w','utf-8')
     
     for f in os.listdir('.'):
-        m = re.match('pa(\d+)\.txt',f)
+        m = re.match(r'pa(\d+)\.txt',f)
         if m:
             sec = m.group(1)
             # split up the combined sections
@@ -785,21 +786,21 @@ def job_titles():
         lastname = " ".join(parts[1:])
         for fns in first_name_subs:
             fns_parts = fns.split(',')
-            subbed = re.sub('^'+fns_parts[0]+'$',fns_parts[1].strip(), first)
+            subbed = re.sub(r'^'+fns_parts[0]+'$',fns_parts[1].strip(), first)
             if first != subbed:
                 #print("Subbed %s %s for %s %s" % (subbed,lastname, first, lastname))
                 name_to_title[ subbed + " " + lastname ] = x[1].strip()
-            subbed = re.sub('^'+fns_parts[1].strip()+'$',fns_parts[0], first)
+            subbed = re.sub(r'^'+fns_parts[1].strip()+'$',fns_parts[0], first)
             if first != subbed:
                 #print("Subbed %s %s for %s %s" % (subbed,lastname, first, lastname))
                 name_to_title[ subbed + " " + lastname ] = x[1].strip()
         for lns in last_name_subs:
             fns_parts = lns.split(',')
-            subbed = re.sub('^'+fns_parts[0]+'$',fns_parts[1].strip(), lastname)
+            subbed = re.sub(r'^'+fns_parts[0]+'$',fns_parts[1].strip(), lastname)
             if lastname != subbed:
                 #print("L Subbed %s %s for %s %s" % (first, subbed, first, lastname))
                 name_to_title[ first + " " + subbed ] = x[1].strip()
-            subbed = re.sub('^'+fns_parts[1].strip()+'$',fns_parts[0], lastname)
+            subbed = re.sub(r'^'+fns_parts[1].strip()+'$',fns_parts[0], lastname)
             if lastname != subbed:
                 #print("L Subbed %s %s for %s %s" % (first, subbed, first, lastname))
                 name_to_title[ first + " " + subbed ] = x[1].strip()
@@ -1280,7 +1281,7 @@ def file_renamer():
     ff = os.listdir(where)
     
     for F in ff:
-        nn = re.sub("\.jpg$","",F)
+        nn = re.sub(r"\.jpg$","",F)
         print("Old name: %s. New name: %s" % (F, nn))
         os.rename( where+F, where+nn )
         print("ok")