updates

2025-10-07 19:08:00 +00:00 · 2025-10-07 19:08:00 +00:00 · a01ef8084d
parent 776ff0a45b
commit a01ef8084d
4 changed files with 502 additions and 153 deletions
--- a/content.py
+++ b/content.py
@ -6,12 +6,14 @@ from __future__ import annotations
 #import html2markdown as h2m

 from typing import ItemsView
-import requests, codecs, os, re, json, sys, pypandoc
+import requests, codecs, os, re, json, sys, pypandoc, mimetypes, hashlib
 from checker import safe_html
 from pipelines import header, fetch, url
 from util import clean_title, to_file_friendly
+from urllib.parse import quote, urljoin, urlparse
 from bs4 import BeautifulSoup as bs
 from html.parser import HTMLParser
+from datetime import datetime, timezone


 pagebreak = '\n\n<!-- BREAK -->\n\n<div style="page-break-before: always;"></div>\n\n'
@ -42,7 +44,6 @@ def test_forums(id=0):
    for m in modules:
        items[running_index] = '<h2>%s</h2>%s\n' % ( m['name'], pagebreak )
        running_index += 1
-        
        mod_items = fetch('/api/v1/courses/' + str(id) + '/modules/'+str(m['id'])+'/items', verbose)

        for I in mod_items:
@ -99,7 +100,7 @@ def write_message(fd, view, participants):
            write_message(fd, r, participants)
    fd.write("</blockquote>\n")

-def extract_forums(id, course_folder, item_id_to_index, verbose=0):
+def extract_forums(id, course_folder, item_id_to_index, verbose=0, discussion_link_map=None):
    ###
    ### FORUMS
    ###
@ -109,7 +110,6 @@ def extract_forums(id, course_folder, item_id_to_index, verbose=0):
    index = []
    forum_f = course_folder + '/forums'
    headered = 0
-    image_count = 0
    print("\nFORUMS")
    try:
        os.mkdir(forum_f)
@ -133,6 +133,8 @@ def extract_forums(id, course_folder, item_id_to_index, verbose=0):
                    fd.write(message + "\n\n")
                    for v in t2['view']:
                        write_message(fd, v, participants)
+                if discussion_link_map is not None:
+                    discussion_link_map[p['id']] = f"forums/{easier_filename}.html"
                if not headered: index.append( ('<br /><b>Discussion Forums</b><br />') )
                headered = 1
                index.append( ( 'forums/' + easier_filename + '.html', p['title'] ) )
@ -196,12 +198,18 @@ def course_download(id=""):
    for x in range(9000): items.append(0)
    
    video_link_list = []
+    page_local_map = {}
+    assignment_local_map = {}
+    file_local_map = {}
+    discussion_local_map = {}
+    module_details = []
    
    for m in modules:
        items[running_index] = '<h2>%s</h2>%s\n' % ( m['name'], pagebreak )
        running_index += 1

        mod_items = fetch('/api/v1/courses/' + str(id) + '/modules/'+str(m['id'])+'/items', verbose)
+        module_entry = {'name': m['name'], 'items': []}
        
        for I in mod_items:
            
@ -238,6 +246,17 @@ def course_download(id=""):
            # I['page_url']
            # I['type']
            # I['published']
+            module_entry['items'].append({
+                'type': I.get('type'),
+                'title': I.get('title'),
+                'page_url': I.get('page_url'),
+                'content_id': I.get('content_id'),
+                'html_url': I.get('html_url'),
+                'url': I.get('url'),
+                'external_url': I.get('external_url'),
+                'id': I.get('id')
+            })
+        module_details.append(module_entry)
    # assignments and files have content_id, pages have page_url
    
    course_folder = '../course_temps/course_'+id
@ -281,14 +300,95 @@ def course_download(id=""):
                if not headered: 
                    index.append( ('<br /><b>Files</b><br />') )
                    headered = 1
-                index.append( ('files/' + f['filename'], f['filename']) )
+                relative_path = 'files/' + f['filename']
+                index.append( (relative_path, f['filename']) )
+                file_local_map[f['id']] = relative_path
        
    ###
    ### PAGES
    ###
    pages_f = course_folder + '/pages'
    headered = 0
-    image_count = 0
+    images_f = os.path.join(pages_f, 'images')
+    try:
+        os.makedirs(images_f)
+    except FileExistsError:
+        pass
+    except Exception as e:
+        print(f" * Unable to ensure images folder: {e}")
+
+    image_map = {}
+    image_counter = 0
+
+    def ensure_local_image(src, canvas_override=None):
+        nonlocal image_counter
+        if not src:
+            return (None, None)
+        original_src = src
+        if src.startswith('data:'):
+            return (None, None)
+        if src.startswith('images/'):
+            full_rel = f"pages/{src}"
+            image_map.setdefault(original_src, (src, full_rel))
+            return image_map[original_src], canvas_override
+        if src.startswith('pages/'):
+            page_rel = src.split('pages/', 1)[-1]
+            page_rel = page_rel if page_rel else src
+            full_rel = src
+            image_map.setdefault(original_src, (page_rel, full_rel))
+            return image_map[original_src], canvas_override
+
+        mapped = image_map.get(original_src)
+        if mapped:
+            return mapped, canvas_override or original_src
+
+        absolute_src = src
+        if not absolute_src.lower().startswith('http'):
+            absolute_src = urljoin(url, absolute_src)
+
+        mapped = image_map.get(absolute_src)
+        if mapped:
+            image_map[original_src] = mapped
+            return mapped, canvas_override or absolute_src
+
+        try:
+            response = requests.get(absolute_src, headers=header, stream=True, timeout=30)
+            response.raise_for_status()
+        except Exception as e:
+            d(f"   * error downloading image {absolute_src}: {e}")
+            return (None, canvas_override or absolute_src)
+
+        content_type = response.headers.get('content-type', '').split(';')[0]
+        ext = ''
+        if content_type:
+            guessed = mimetypes.guess_extension(content_type)
+            if guessed:
+                ext = guessed
+        if not ext:
+            ext = os.path.splitext(urlparse(absolute_src).path)[1]
+        if not ext:
+            ext = '.bin'
+        ext = ext.lstrip('.')
+
+        local_name = f"img_{image_counter}.{ext}"
+        image_counter += 1
+        local_path = os.path.join(images_f, local_name)
+
+        try:
+            with open(local_path, 'wb') as fd:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        fd.write(chunk)
+        except Exception as e:
+            d(f"   * error saving image {absolute_src}: {e}")
+            return (None, canvas_override or absolute_src)
+
+        page_rel = f"images/{local_name}"
+        full_rel = f"pages/{page_rel}"
+        image_map[original_src] = (page_rel, full_rel)
+        if absolute_src != original_src:
+            image_map[absolute_src] = image_map[original_src]
+        return image_map[original_src], canvas_override or absolute_src
    print("\nPAGES")
    try:
        os.mkdir(pages_f)
@ -296,6 +396,12 @@ def course_download(id=""):
        print(" * Pages folder already exists.")
        
        
+    page_manifest = {
+        'course_id': str(id),
+        'generated_at': datetime.now(timezone.utc).isoformat(),
+        'pages': {}
+    }
+
    pages = fetch('/api/v1/courses/' + str(id) + '/pages', verbose)
    for p in pages:
        d(' - %s' % p['title'])
@ -312,118 +418,110 @@ def course_download(id=""):
        index.append( ( 'pages/' + easier_filename + '.html', p['title'] ) )


-        if os.path.exists(this_page_filename):
-            d(" - already downloaded %s" % this_page_filename)
-            this_page_content = codecs.open(this_page_filename,'r','utf-8').read()
-        #elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
-        #elif re.search(r'eis-prod',p['url']):
-        #    d('   * skipping file behind passwords')
-        else:    
-            t2 = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
-            if t2 and 'body' in t2 and t2['body']:
+        t2 = {'title': p['title']}
+        soup_infolder = None
+        soup_in_main = None
+        page_local_map[p['url']] = f"pages/{easier_filename}.html"
+        this_page_content = None
+
+        fetched_page = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
+        if fetched_page and fetched_page.get('body'):
+            t2 = fetched_page
            soup_infolder = bs(t2['body'], features="lxml")
            soup_in_main = bs(t2['body'], features="lxml")
+        elif os.path.exists(this_page_filename):
+            d(" - already downloaded %s" % this_page_filename)
+            this_page_content = codecs.open(this_page_filename,'r','utf-8').read()
+            soup_infolder = bs(this_page_content, features="lxml")
+            soup_in_main = bs(this_page_content, features="lxml")
+        else:
+            d('   * nothing returned or bad fetch')
+            continue
+
+        page_title = (t2.get('title') or p['title']).strip() if isinstance(t2, dict) else p['title']
+
+        def strip_leading_heading(soup):
+            if not soup:
+                return
+            first_heading = soup.find(['h1', 'h2'])
+            if first_heading and first_heading.get_text(strip=True) == page_title:
+                first_heading.decompose()
+
+        strip_leading_heading(soup_infolder)
+        strip_leading_heading(soup_in_main)
+
        a_links = soup_infolder.find_all('a')
        for A in a_links:
            href = A.get('href')
-
            if href and re.search(r'youtu', href):
                video_link_list.append((A.get('href'), A.text, 'pages/' + easier_filename + ".html"))

-                # Images
-                page_images = soup_infolder.find_all('img') 
-                page_image_paths = {}
-                for I in page_images:
-                    src = I.get('src')
-                    if src:
-                        d('   - %s' % src)
-                        try:
-                            r = requests.get(src,headers=header, stream=True)
-                            mytype = r.headers['content-type']
-                            #print("Response is type: " + str(mytype))
-                            r_parts = mytype.split("/")
-                            ending = r_parts[-1]
-
-                            if ending=='jpeg': ending = "jpg"
-
-                            img_full_path = f"{pages_f}/{str(image_count)}.{ending}"
-                            local_src = f"{str(image_count)}.{ending}"
-                            page_image_paths[src] = f"pages/{local_src}"
-                            I['src'] = local_src
-
-                            with open(img_full_path, 'wb') as fd:
-                                for chunk in r.iter_content(chunk_size=128):
-                                    fd.write(chunk)
-                            image_count += 1
-                        except Exception as e:
-                            d( ' * Error downloading page image, %s' % str(e) )
-                
-                # Repeat for version for main file
-                page_main_images = soup_in_main.find_all('img')
-                for I in page_main_images:
-                    src = I.get('src')
-                    if src:
-                        I['src'] = page_image_paths[src]
+        # Images -> ensure local copies
+        for img in soup_infolder.find_all('img'):
+            mapping, canvas_src = ensure_local_image(img.get('src'), img.get('data-canvas-src'))
+            if mapping:
+                img['src'] = mapping[0]
+                if canvas_src:
+                    img['data-canvas-src'] = canvas_src

+        for img in soup_in_main.find_all('img'):
+            mapping, canvas_src = ensure_local_image(img.get('src'), img.get('data-canvas-src'))
+            if mapping:
+                img['src'] = mapping[1]
+                if canvas_src:
+                    img['data-canvas-src'] = canvas_src

        # STUDIO VIDEOS
-                # Regex pattern to match "custom_arc_media_id%3D" and capture everything
-                # until the next '&' or end of string
        pattern = r"custom_arc_media_id%3D([^&]+)"
-                found_ids = []
-
-                replacement_tag = '''<video width="480" height="320" controls="controls"><source src="http://serverIP_or_domain/location_of_video.mp4" type="video/mp4"></video>'''
-
-                # Iterate over all <iframe> tags
        for iframe in soup_infolder.find_all("iframe"):
            src = iframe.get("src")
-                    if src:
-                        # Search for the pattern in the src
+            if not src:
+                continue
            match = re.search(pattern, src)
            if match:
-                            found_ids.append(match.group(1))
+                videos_log.write(f"page: {p['url']}  arc id: {match.group(1)}\n")
+                videos_log.flush()
            videos_log.write(f"page: {p['url']}  iframe src: {src}\n")
            videos_log.flush()
-
-                        match2 = re.search('instructuremedia\.com', src)
-                        if match2:
-                            iframe_response = requests.get(src)
-                            if iframe_response.status_code != 200:
-                                print(f"Failed to retrieve iframe content from: {src}")
+            if 'instructuremedia.com' in src:
+                try:
+                    iframe_response = requests.get(src, timeout=15)
+                    iframe_response.raise_for_status()
+                except Exception as e:
+                    print(f"Failed to retrieve iframe content from: {src} ({e})")
                    continue
                videos_log.write(f"succesfully fetched {src}\n")
                videos_log.flush()
-
-                            # Step 4: Parse the iframes HTML
                iframe_soup = bs(iframe_response.text, 'html.parser')
-
-                            video_tag = iframe_soup.find('video')
-                            if video_tag:
-                                # Find the <source> tag(s) within the video
-                                source_tags = video_tag.find_all('source')
-                                # Extract each 'src' attribute
-                                for source_tag in source_tags:
-                                    print("Video Source found:", source_tag.get('src'))
+                for source_tag in iframe_soup.find_all('source'):
                    videos_log.write(f"page: {p['url']} video src: {source_tag.get('src')}\n")
                    videos_log.flush()

-                            
-                # WRITE out page
+        # WRITE out page (always refresh to ensure local paths)
        try:
            this_page_content = f"<h2>{t2['title']}</h2>\n{soup_infolder.prettify()}"
            with codecs.open(this_page_filename, 'w','utf-8') as fd:
                fd.write(this_page_content)
-                except:
-                    d(' * problem writing page content')
-                    ## TODO include linked pages even if they aren't in module
-            else:
-                d('   * nothing returned or bad fetch')
+        except Exception as e:
+            d(f' * problem writing page content: {e}')
+
        # write to running log of content in order of module
-        if p and p['url'] in item_id_to_index:
+        if p and p['url'] in item_id_to_index and soup_in_main:
            items[item_id_to_index[p['url']]] = f"<h2>{t2['title']}</h2>\n{soup_in_main.prettify()}\n{pagebreak}"
        else:
            d(' -- This page didnt seem to be in the modules list.')

+        if this_page_content is not None:
+            page_hash = hashlib.sha256(this_page_content.encode('utf-8')).hexdigest()
+            page_manifest['pages'][p['url']] = {
+                'title': t2.get('title') or p['title'],
+                'filename': f"pages/{easier_filename}.html",
+                'hash': page_hash
+            }
+    
+    manifest_path = os.path.join(course_folder, 'pages_manifest.json')
+    with codecs.open(manifest_path, 'w', 'utf-8') as manifest_file:
+        manifest_file.write(json.dumps(page_manifest, indent=2))

    ###
    ### ASSIGNMENTS
@ -446,6 +544,7 @@ def course_download(id=""):
            try:
                friendlyfile = to_file_friendly(p['name'])
                this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
+                assignment_local_map[p['id']] = 'assignments/' + str(p['id'])+"_"+ friendlyfile + '.html'
                if os.path.exists(this_assmt_filename):
                    d(" - already downloaded %s" % this_assmt_filename)
                    this_assmt_content = open(this_assmt_filename,'r').read()
@ -469,7 +568,7 @@ def course_download(id=""):
        ### FORUMS
        ###

-        index.extend( extract_forums(id, course_folder, item_id_to_index, verbose) )
+        index.extend( extract_forums(id, course_folder, item_id_to_index, verbose, discussion_local_map) )

        """        
    
@ -506,18 +605,60 @@ def course_download(id=""):
    
    
    # Create index page of all gathered items
+    index.insert(0, ('modules.html', 'Modules Overview'))
+
    myindex = codecs.open(course_folder+'/index.html','w','utf-8')
    for i in index:
-        if len(i)==2: myindex.write("<a href='"+i[0]+"'>"+i[1]+"</a><br />\n")
-        else: myindex.write(i)
+        if len(i)==2:
+            myindex.write(f"<a href='{i[0]}'>{i[1]}</a><br />\n")
+        else:
+            myindex.write(i)
+    myindex.close()

+    def resolve_module_item_link(item):
+        item_type = (item.get('type') or '').lower()
+        if item_type == 'page':
+            return page_local_map.get(item.get('page_url')) or item.get('html_url')
+        if item_type == 'assignment':
+            return assignment_local_map.get(item.get('content_id')) or item.get('html_url')
+        if item_type == 'discussion':
+            return discussion_local_map.get(item.get('content_id')) or item.get('html_url')
+        if item_type == 'file':
+            return file_local_map.get(item.get('content_id')) or item.get('html_url')
+        if item_type == 'externalurl':
+            return item.get('external_url')
+        if item_type in ('externaltool', 'quiz', 'assignmentquiz', 'attendance'):
+            return item.get('html_url') or item.get('url')
+        if item_type == 'subheader':
+            return None
+        return item.get('html_url') or item.get('url')
+
+    module_index_path = course_folder + '/modules.html'
+    with codecs.open(module_index_path, 'w', 'utf-8') as module_index:
+        module_index.write('<html><body>\n')
+        module_index.write(f"<h1>{courseinfo['name']} - Modules</h1>\n")
+        for module in module_details:
+            module_index.write(f"<h2>{module['name']}</h2>\n<ul>\n")
+            for item in module['items']:
+                title = item.get('title') or '(Untitled)'
+                item_type = item.get('type') or 'Item'
+                link = resolve_module_item_link(item)
+                if item_type.lower() == 'subheader':
+                    module_index.write(f"<li><strong>{title}</strong></li>\n")
+                    continue
+                if link:
+                    module_index.write(f"<li><a href='{link}'>{title}</a> <em>({item_type})</em></li>\n")
+                else:
+                    module_index.write(f"<li>{title} <em>({item_type})</em></li>\n")
+            module_index.write('</ul>\n')
+        module_index.write('</body></html>\n')


    # Full course content in single file    
    print("Writing main course files...")
    mycourse = codecs.open(course_folder+'/fullcourse.raw.html','w','utf-8')

-    mycourse.write(f"<html><head><base href='file:///C:/Users/phowell/source/repos/course_temps/course_{id}/'></head><body>\n")
+    mycourse.write("<html><head></head><body>\n")
    
    for I in items:
        if I:
@ -562,6 +703,100 @@ def course_download(id=""):
        print(f"couldn't create doc fullcourse page: {e}")    


+def restore_canvas_image_sources(html_fragment):
+    soup = bs(html_fragment, features="lxml")
+    changed = False
+    for img in soup.find_all('img'):
+        canvas_src = img.get('data-canvas-src')
+        if canvas_src:
+            img['src'] = canvas_src
+            del img['data-canvas-src']
+            changed = True
+    body = soup.body
+    if body:
+        restored = ''.join(str(child) for child in body.children)
+    else:
+        restored = soup.decode()
+    return restored, changed
+
+
+def _push_page_update(course_num, page_slug, new_content):
+    endpoint = f"{url}/api/v1/courses/{course_num}/pages/{page_slug}"
+    data = {'wiki_page[body]': new_content}
+    response = requests.put(endpoint, headers=header, params=data)
+    if response.status_code >= 400:
+        print(f" - Failed to upload {page_slug}: {response.status_code} {response.text}")
+        return False
+    print(f" - Uploaded {page_slug}")
+    return True
+
+
+def upload_modified_pages(course_id=None, confirm_each=False):
+    if not course_id:
+        course_id = input("course id> ").strip()
+    if not course_id:
+        print("No course id provided; aborting.")
+        return
+
+    course_folder = f"../course_temps/course_{course_id}"
+    manifest_path = os.path.join(course_folder, 'pages_manifest.json')
+    if not os.path.exists(manifest_path):
+        print(f"No manifest found at {manifest_path}. Run course_download first.")
+        return
+
+    with codecs.open(manifest_path, 'r', 'utf-8') as manifest_file:
+        manifest = json.loads(manifest_file.read())
+
+    pages = manifest.get('pages', {})
+    if not pages:
+        print("Manifest contains no page entries.")
+        return
+
+    updated = False
+    for slug, meta in pages.items():
+        local_rel = meta.get('filename')
+        local_path = os.path.join(course_folder, local_rel) if local_rel else None
+        if not local_rel or not local_rel.startswith('pages/'):
+            print(f" - Skipping {slug}: not a downloaded page ({local_rel})")
+            continue
+        if not local_path or not os.path.exists(local_path):
+            print(f" - Skipping {slug}: local file missing ({local_rel})")
+            continue
+
+        with codecs.open(local_path, 'r', 'utf-8') as local_file:
+            local_html = local_file.read()
+        current_hash = hashlib.sha256(local_html.encode('utf-8')).hexdigest()
+        if current_hash == meta.get('hash'):
+            continue
+
+        restored_html, changed = restore_canvas_image_sources(local_html)
+        payload = restored_html if changed else local_html
+
+        do_upload = True
+        if confirm_each:
+            ans = input(f"Upload changes for {slug}? [y/N]: ").strip().lower()
+            do_upload = ans in ('y', 'yes')
+
+        if not do_upload:
+            print(f" - Skipped {slug} by user request")
+            continue
+
+        if _push_page_update(course_id, slug, payload):
+            manifest['pages'][slug]['hash'] = current_hash
+            updated = True
+
+    if updated:
+        with codecs.open(manifest_path, 'w', 'utf-8') as manifest_file:
+            manifest_file.write(json.dumps(manifest, indent=2))
+        print("Updated manifest hashes for uploaded pages.")
+    else:
+        print("No page uploads performed.")
+
+
+def upload_modified_pages_prompt():
+    upload_modified_pages()
+
+
 def media_testing():
    user_id = 285  #ksmith
    t = f"https://gavilan.instructuremedia.com/api/public/v1/users/{user_id}/media"
@ -810,9 +1045,8 @@ def make_pages_from_folder(folder='cache/csis6/', course = '20558'):

 # Given course, page url, and new content, upload the new revision of a page
 def upload_page(course_num,pageurl,new_content):
-    print("Repaired page:\n\n")
+    print(f"Uploading page: {pageurl}")
    #print new_content
-    print(pageurl)
    t3 = url + '/api/v1/courses/' + str(course_num) + '/pages/' + pageurl
    xyz = input('Enter 1 to continue and send back to: ' + t3 + ': ')
    #xyz = '1'
@ -1984,6 +2218,7 @@ if __name__ == "__main__":
                5: ['course download tester', test_forums ],
                6: ['download all a courses pages', grab_course_pages],
                7: ['quick site downloader', download_web],
+                8: ['upload modified pages back to Canvas', upload_modified_pages_prompt],
               17: ['repair ezproxy links', repair_ezproxy_links],
               18: ['create pages from html files', make_pages_from_folder],
               19: ['fetch support page', fetch_support_page],
--- a/courses.py
+++ b/courses.py
@ -1,8 +1,7 @@
 import json, re, requests, codecs, sys, time, funcy, os
 import pandas as pd
-from datetime import datetime   
+from datetime import datetime, timedelta, timezone
 import pytz
-from datetime import datetime
 from util import print_table, int_or_zero, float_or_zero, dept_from_name, num_from_name
 from pipelines import fetch, fetch_stream, fetch_collapse, header, url
 from schedules import get_semester_schedule
@ -2240,13 +2239,19 @@ def instructor_list_to_activate_evals():
            
    #print(mylist)

-
-    
+# Toggle the eval tool visibility for all courses in the selected Canvas term.
 def add_evals(section=0):
    # show or hide?

-    TERM = 287
-    SEM = "sp25"
+    term_record = find_term(input('term? '))
+    if not term_record:
+        raise ValueError(f"Unknown term")
+
+    term_id = term_record.get('canvas_term_id')
+    if term_id is None:
+        raise ValueError(f"Canvas term id missing for {term_record}")
+
+    term_code = term_record.get('code')

    # fetch list of courses?
    GET_FRESH_LIST = 0
@ -2261,16 +2266,16 @@ def add_evals(section=0):
    ASK = 0

    # are we showing or hiding the course eval link?
-    HIDE = True
+    HIDE = False


-    s = [ x.strip() for x in codecs.open(f'cache/{SEM}_eval_sections.txt','r').readlines()]
+    s = [ x.strip() for x in codecs.open(f"cache/{term_code}_eval_sections.txt",'r').readlines()]
    s = list(funcy.flatten(s))
    s.sort()
    print(f"Going to activate course evals in these sections: \n{s}\n")
    xyz = input('hit return to continue')
    
-    all_semester_courses = getCoursesInTerm(TERM, GET_FRESH_LIST, 1)   
+    all_semester_courses = getCoursesInTerm(term_id, GET_FRESH_LIST, 1)   
    eval_course_ids = []
    courses = {}
    for C in all_semester_courses:
@ -2814,21 +2819,119 @@ def quick_sem_course_list(term=180):
        print(C['name'])
    

+# Check Canvas for an existing calendar event that matches the provided metadata.
+def find_existing_calendar_event(context_code, title, start_at_iso, description="", tolerance_hours=12):
+    def _normalize_iso(value):
+        if not value:
+            return None
+        if value.endswith('Z'):
+            value = value[:-1] + '+00:00'
+        try:
+            return datetime.fromisoformat(value)
+        except ValueError:
+            return None

+    target_start = _normalize_iso(start_at_iso)
+    if not target_start:
+        return None
+
+    window_start = (target_start - timedelta(hours=tolerance_hours)).date().isoformat()
+    window_end = (target_start + timedelta(hours=tolerance_hours)).date().isoformat()
+
+    params = {
+        "context_codes[]": context_code,
+        "start_date": window_start,
+        "end_date": window_end,
+    }
+
+    existing_events = fetch("/api/v1/calendar_events", params=params)
+    if not isinstance(existing_events, list):
+        print(f"Unable to inspect existing events for context {context_code}: unexpected response")
+        return None
+
+    normalized_title = title.strip().lower() if isinstance(title, str) else ""
+    normalized_description = description.strip().lower() if isinstance(description, str) else ""
+
+    for event in existing_events:
+        event_title = (event.get('title') or "").strip().lower()
+        event_description = (event.get('description') or "").strip().lower()
+        event_start = _normalize_iso(event.get('start_at') or "")
+        if not event_start:
+            continue
+        time_difference = abs((event_start - target_start).total_seconds())
+        if time_difference > tolerance_hours * 3600:
+            continue
+        if event_title == normalized_title:
+            return event
+        if normalized_description and event_description == normalized_description:
+            return event
+    return None
+
+
+# Remove all calendar events attached to a course after user confirmation.
+def remove_all_course_events():
+    course_id = input("course id> ").strip()
+    if not course_id:
+        print("No course id provided; aborting.")
+        return
+    context_code = course_id if course_id.startswith("course_") else f"course_{course_id}"
+    today = datetime.now(timezone.utc).date()
+    start_date = (today - timedelta(days=730)).isoformat()
+    end_date = (today + timedelta(days=365)).isoformat()
+    print(f"Fetching existing events for {context_code} between {start_date} and {end_date}...")
+    params = {
+        "context_codes[]": context_code,
+        "per_page": 100,
+        "start_date": start_date,
+        "end_date": end_date,
+    }
+    events = fetch("/api/v1/calendar_events", params=params)
+    if not events:
+        print("No events found for this course.")
+        return
+
+    print(f"Found {len(events)} events. Beginning removal...")
+    for event in events:
+        event_id = event.get("id")
+        event_title = event.get("title", "(no title)")
+        if not event_id:
+            print(f"Skipping event '{event_title}' with missing id")
+            continue
+        print(f"Deleting event '{event_title}' (id {event_id}) in {context_code}...", end=' ')
+        delete_url = f"{url}/api/v1/calendar_events/{event_id}"
+        response = requests.delete(delete_url, headers=header)
+        if response.ok:
+            print("deleted successfully")
+        else:
+            print(f"failed: {response.status_code} {response.text}")
+
+
+# Create Canvas calendar events for predefined orientation shells from CSV input.
 def create_calendar_event():
    events = codecs.open('cache/academic_calendar_2025.csv','r','utf-8').readlines()

-    orientation_shells = ["course_15924","course_19094","course_20862"]
+    orientation_shells = ["course_15924","course_19094","course_20862", "course_23313"]
    
    for ori_shell in orientation_shells:
        for e in events:
-            (date, title, desc) = e.split(',')
+            if not e.strip():
+                continue
+            parts = [part.strip() for part in e.split(',', 2)]
+            if len(parts) < 3:
+                continue
+            date, title, desc = parts
            local = pytz.timezone("America/Los_Angeles")
            naive = datetime.strptime(date, "%Y-%m-%d")
            local_dt = local.localize(naive, is_dst=None)
            utc_dt = local_dt.astimezone(pytz.utc).isoformat()

-
+            print(f"Checking event '{title}' ({date}) in {ori_shell}...", end=' ')
+            existing_event = find_existing_calendar_event(ori_shell, title, utc_dt, desc)
+            if existing_event:
+                existing_id = existing_event.get('id')
+                print(f"exists as id {existing_id} in {ori_shell}, skipping add")
+                continue
+            print(f"no existing event in {ori_shell}, attempting add")

            params = {
                "calendar_event[context_code]": ori_shell,    
@ -2840,12 +2943,21 @@ def create_calendar_event():

            u = url + "/api/v1/calendar_events"
            res = requests.post(u, headers = header, params=params)
+            if res.ok:
+                try:
                    result = json.loads(res.text)
-            print(title,end=" ")
-            if "errors" in result:
-                print(result["errors"])
-            if "id" in result:
-                print("ok, id#", result["id"])
+                except json.JSONDecodeError:
+                    print(f"add completed for '{title}' in {ori_shell} (status {res.status_code}) but response parse failed")
+                    continue
+                new_id = result.get("id")
+                if new_id:
+                    print(f"added successfully as id {new_id} in {ori_shell} (status {res.status_code})")
+                elif "errors" in result:
+                    print(f"add failed for '{title}' in {ori_shell}: {result['errors']}")
+                else:
+                    print(f"add attempted for '{title}' in {ori_shell} with unexpected response {result}")
+            else:
+                print(f"add failed for '{title}' in {ori_shell}: {res.status_code} {res.text}")

 def utc_to_local(utc_str):
    if not utc_str: return ""
@ -3076,10 +3188,11 @@ if __name__ == "__main__":
                20: ['Get a course info by id',getCourses],
                21: ['Reset course conclude date',update_course_conclude],   
                22: ['Create calendar events for orientation shells', create_calendar_event],
-                23: ['list all assignments', list_all_assignments],
-                24: ['Bulk unenroll from course', bulk_unenroll],
-                25: ['enrollment helper', enrollment_helper],
-                26: ['g number list enroll to shell id', enroll_gnumber_list_to_courseid],
+                23: ['Remove all calendar events from a course', remove_all_course_events],
+                24: ['list all assignments', list_all_assignments],
+                25: ['Bulk unenroll from course', bulk_unenroll],
+                26: ['enrollment helper', enrollment_helper],
+                27: ['g number list enroll to shell id', enroll_gnumber_list_to_courseid],

                30: ['* Overview semester start dates',overview_start_dates],
                31: ['Fine tune term dates and winter session', course_by_depts_terms],
--- a/schedules.py
+++ b/schedules.py
@ -1,6 +1,6 @@
 # schedule.py
 #
-# experimenting with manipulating and querying the schedule of courses
+# manipulating and querying the schedule of courses


 #from telnetlib import GA
--- a/tasks.py
+++ b/tasks.py
@ -23,12 +23,13 @@ from time import mktime
 from semesters import human_to_short
 from canvas_secrets import badgr_target, badgr_hd

+from docxtpl import DocxTemplate

 if os.name != 'posix':
    import win32com.client
    import win32com.client as win32
    import pypandoc
-    from docxtpl import DocxTemplate
+    
    import xlwt

 from pipelines import header, url, fetch, convert_roster_files, move_to_folder
@ -112,7 +113,7 @@ def build_quiz(filename=""):
            this_q = L.strip()
            state = "answers"
        elif state =="answers":
-            m = re.search( '^Answer\:\s(\w)$', L)
+            m = re.search( r'^Answer\:\s(\w)$', L)
            if m:
                correct_answer = m.group(1)
                qs.append( [this_q, this_as, correct_answer ] )
@ -120,7 +121,7 @@ def build_quiz(filename=""):
                this_as = {  }
                correct_answer = ""
                continue
-            m = re.search( '^(\w)\)\s(.*)$', L)
+            m = re.search( r'^(\w)\)\s(.*)$', L)
            if m:
                print(m.group(1))
                print(m.group(2))
@ -183,7 +184,7 @@ def convert_to_pdf(name1, name2):

 # Build (docx/pdf) certificates for gott graduates
 def certificates_gott_build():
-    course = "gott_1_fa25"
+    course = "gott_1_fa25_sept"
    coursedate = "Fall 2025"
    certificate = "gott 1 template.docx"

@ -202,7 +203,7 @@ def certificates_gott_build():
            name = row[0].strip()
        doc = DocxTemplate(f"cache/certificates/{certificate}")
        doc.render({ 'name' : name, 'coursedate': coursedate })
-        name_as_filename = re.sub('\s', '_', name.lower())
+        name_as_filename = re.sub(r'\s', '_', name.lower())
        fn = f"cache/certificates/{course}_{name_as_filename}."
        print(fn+'docx')
        doc.save(fn+'docx')
@ -409,7 +410,7 @@ def hours_calc():
    allout = codecs.open('pa_de_noncred.txt','w','utf-8')
    
    for f in os.listdir('.'):
-        m = re.match('pa(\d+)\.txt',f)
+        m = re.match(r'pa(\d+)\.txt',f)
        if m:
            sec = m.group(1)
            # split up the combined sections
@ -785,21 +786,21 @@ def job_titles():
        lastname = " ".join(parts[1:])
        for fns in first_name_subs:
            fns_parts = fns.split(',')
-            subbed = re.sub('^'+fns_parts[0]+'$',fns_parts[1].strip(), first)
+            subbed = re.sub(r'^'+fns_parts[0]+'$',fns_parts[1].strip(), first)
            if first != subbed:
                #print("Subbed %s %s for %s %s" % (subbed,lastname, first, lastname))
                name_to_title[ subbed + " " + lastname ] = x[1].strip()
-            subbed = re.sub('^'+fns_parts[1].strip()+'$',fns_parts[0], first)
+            subbed = re.sub(r'^'+fns_parts[1].strip()+'$',fns_parts[0], first)
            if first != subbed:
                #print("Subbed %s %s for %s %s" % (subbed,lastname, first, lastname))
                name_to_title[ subbed + " " + lastname ] = x[1].strip()
        for lns in last_name_subs:
            fns_parts = lns.split(',')
-            subbed = re.sub('^'+fns_parts[0]+'$',fns_parts[1].strip(), lastname)
+            subbed = re.sub(r'^'+fns_parts[0]+'$',fns_parts[1].strip(), lastname)
            if lastname != subbed:
                #print("L Subbed %s %s for %s %s" % (first, subbed, first, lastname))
                name_to_title[ first + " " + subbed ] = x[1].strip()
-            subbed = re.sub('^'+fns_parts[1].strip()+'$',fns_parts[0], lastname)
+            subbed = re.sub(r'^'+fns_parts[1].strip()+'$',fns_parts[0], lastname)
            if lastname != subbed:
                #print("L Subbed %s %s for %s %s" % (first, subbed, first, lastname))
                name_to_title[ first + " " + subbed ] = x[1].strip()
@ -1280,7 +1281,7 @@ def file_renamer():
    ff = os.listdir(where)
    
    for F in ff:
-        nn = re.sub("\.jpg$","",F)
+        nn = re.sub(r"\.jpg$","",F)
        print("Old name: %s. New name: %s" % (F, nn))
        os.rename( where+F, where+nn )
        print("ok")