canvasapp/content.py



#saved_titles = json.loads( codecs.open('cache/saved_youtube_titles.json','r','utf-8').read() )

#from calendar import FRIDAY
#import html2markdown as h2m

from typing import ItemsView
import requests, codecs, os, re, json, sys, pypandoc
from checker import safe_html
from pipelines import header, fetch, url
from util import clean_title, to_file_friendly
from bs4 import BeautifulSoup as bs
from html.parser import HTMLParser


pagebreak = '\n\n<!-- BREAK -->\n\n<div style="page-break-before: always;"></div>\n\n'
DBG = 1

items = []

def d(s):
    global DBG
    if DBG: print(s)

def test_forums(id=0):
    if not id:
        id = input("ID of course to check?  ")
    verbose = 1

    courseinfo = fetch('/api/v1/courses/' + str(id), verbose )

    item_id_to_index = {}
    items_inorder = ["<font size='24'>" + courseinfo['name'] + "</font>\n\n" + pagebreak,]
    running_index = 1

    modules = fetch('/api/v1/courses/' + str(id) + '/modules',verbose)

    items = []
    for x in range(9000): items.append(0)

    for m in modules:
        items[running_index] = '<h2>%s</h2>%s\n' % ( m['name'], pagebreak )
        running_index += 1

        mod_items = fetch('/api/v1/courses/' + str(id) + '/modules/'+str(m['id'])+'/items', verbose)

        for I in mod_items:

            if I['type'] in ['SubHeader', 'Page', 'Quiz', 'Discussion', 'ExternalUrl' ] or 'content_id' in I:
                running_index += 1

                if I['type'] == 'SubHeader':
                    #print('subheader: ' + str(I))
                    items[running_index] = '<h3>%s</h3>\n' % str(json.dumps(I,indent=2))

                if I['type'] == 'Page':
                    item_id_to_index[ I['page_url'] ] = running_index

                if I['type'] == 'Quiz':
                    item_id_to_index[ I['content_id'] ] = running_index

                if I['type'] == 'Discussion':
                    item_id_to_index[ I['content_id'] ] = running_index

                if I['type'] == 'ExternalUrl':
                    items[running_index] = "<a href='%s'>%s</a><br />\n\n" % (I['external_url'], I['title'])

                # ?
                #if 'content_id' in I:
                #    item_id_to_index[ I['content_id'] ] = running_index
            else:
                print("What is this item? " + str(I))


            #items_inorder.append('<i>Not included: '+ I['title'] + '(a ' + I['type'] + ')</i>\n\n\n' )

            # I['title']
            # I['content_id']
            # I['page_url']
            # I['type']
            # I['published']
    # assignments and files have content_id, pages have page_url

    course_folder = '../course_temps/course_'+id
    index = []
    try:
        os.mkdir(course_folder)
    except:
        print("Course folder exists.")

    index.extend( extract_forums(id, course_folder, item_id_to_index, verbose) )
    print(json.dumps(index,indent=2))

def write_message(fd, view, participants):
    fd.write(f"<blockquote>\nfrom <b>{participants[view['user_id']]['display_name']}</b>:<br />\n{view['message']}\n<br />")
    if 'replies' in view:
        for r in view['replies']:
            write_message(fd, r, participants)
    fd.write("</blockquote>\n")

def extract_forums(id, course_folder, item_id_to_index, verbose=0):
    ###
    ### FORUMS
    ###

    global items

    index = []
    forum_f = course_folder + '/forums'
    headered = 0
    image_count = 0
    print("\nFORUMS")
    try:
        os.mkdir(forum_f)
        forums = fetch('/api/v1/courses/' + str(id) + '/discussion_topics', verbose)
        for p in forums:
            p['title'] = clean_title(p['title'])
            forum_id = p['id']
            easier_filename = p['title']
            for a in 'title,posted_at,published'.split(','):
                print(str(p[a]), "\t", end=' ')
            print("")
            t2 = fetch(f"/api/v1/courses/{id}/discussion_topics/{forum_id}", verbose)
            title = t2['title']
            message = t2['message']

            t2 = fetch(f"/api/v1/courses/{id}/discussion_topics/{forum_id}/view", verbose)
            try:
                participants = {x['id']:x for x in t2['participants']}
                with codecs.open(forum_f + '/' + easier_filename + '.html', 'w','utf-8') as fd:
                    fd.write(f"<h1>{title}</h1>\n")
                    fd.write(message + "\n\n")
                    for v in t2['view']:
                        write_message(fd, v, participants)
                if not headered: index.append( ('<br /><b>Discussion Forums</b><br />') )
                headered = 1
                index.append( ( 'forums/' + easier_filename + '.html', p['title'] ) )


                # write to running log of content in order of module
                if p['id'] in item_id_to_index:
                    items[  item_id_to_index[ p['id'] ]  ] = f"<h1>{title}</h1>\n\n{message}\n\n{pagebreak}"
                else:
                    print('  This forum didnt seem to be in the modules list.')
            except Exception as e:
                print("Error here:", e)
                #print p
                #print results_dict
    except Exception as e:
        print("** Forum folder seems to exist. Skipping those.")
        print(e)

    return index


#
#
#
#
#
# todo: include front page.
# todo: clean html
# todo: toc
#
#
# Download everything interesting in a course to a local folder
# Build a master file with the entire class content
def course_download(id=""):
    global items

    if not id:
        id = input("ID of course to check?  ")
        # temp hard code
        #id = "21284"

    verbose = 0
    PAGES_ONLY = 0

    videos_log = codecs.open('cache/accessible_check_log.txt','w','utf-8')

    save_file_types = ['application/pdf','application/docx','image/jpg','image/png','image/gif','image/webp','application/vnd.openxmlformats-officedocument.wordprocessingml.document']

    courseinfo = fetch('/api/v1/courses/' + str(id), verbose )

    # reverse lookup into items array
    item_id_to_index = {}


    modules = fetch('/api/v1/courses/' + str(id) + '/modules',verbose)

    # headers / module names
    items = [f"<h1>{courseinfo['name']}</h1>\n{pagebreak}",]
    running_index = 1
    for x in range(9000): items.append(0)

    video_link_list = []

    for m in modules:
        items[running_index] = '<h2>%s</h2>%s\n' % ( m['name'], pagebreak )
        running_index += 1

        mod_items = fetch('/api/v1/courses/' + str(id) + '/modules/'+str(m['id'])+'/items', verbose)

        for I in mod_items:

            if I['type'] in ['SubHeader', 'Page', 'Quiz', 'Discussion', 'ExternalUrl' ] or 'content_id' in I:
                running_index += 1

                if I['type'] == 'SubHeader':
                    #print('subheader: ' + str(I))
                    items[running_index] = f"<h3>{I['title']}</h3>\n"

                if I['type'] == 'Page':
                    item_id_to_index[ I['page_url'] ] = running_index

                if I['type'] == 'Quiz':
                    item_id_to_index[ I['content_id'] ] = running_index

                if I['type'] == 'Discussion':
                    item_id_to_index[ I['content_id'] ] = running_index

                if I['type'] == 'ExternalUrl':
                    items[running_index] = "<a href='%s'>%s</a><br />\n\n" % (I['external_url'], I['title'])

                # ?
                #if 'content_id' in I:
                #    item_id_to_index[ I['content_id'] ] = running_index
            else:
                print("What is this item? " + str(I))


            #items_inorder.append('<i>Not included: '+ I['title'] + '(a ' + I['type'] + ')</i>\n\n\n' )

            # I['title']
            # I['content_id']
            # I['page_url']
            # I['type']
            # I['published']
    # assignments and files have content_id, pages have page_url

    course_folder = '../course_temps/course_'+id

    # list of each item, organized by item type. Tuples of (url,title)
    index = []
    try:
        os.mkdir(course_folder)
    except:
        print("Course folder exists.")
    ###
    ### FILES
    ###
    if not PAGES_ONLY:
        files_f = course_folder + '/files'
        headered = 0
        print("\nFILES")
        try:
            os.mkdir(files_f)
        except:
            print(" * Files folder already exists.")

        files = fetch('/api/v1/courses/' + str(id) + '/files', verbose)
        print("LISTING COURSE FILES")
        for f in files:
            for arg in 'filename,content-type,size,url'.split(','):
                if arg=='size':
                    f['size'] = str(int(f['size']) / 1000) + 'k'

            if f['content-type'] in save_file_types:
                d(' - %s' % f['filename'])

                if not os.path.exists(files_f + '/' + f['filename']):
                    r = requests.get(f['url'],headers=header, stream=True)
                    with open(files_f + '/' + f['filename'], 'wb') as fd:
                        for chunk in r.iter_content(chunk_size=128):
                            fd.write(chunk)
                else:
                    d(" - already downloaded %s" % files_f + '/' + f['filename'])

                if not headered:
                    index.append( ('<br /><b>Files</b><br />') )
                    headered = 1
                index.append( ('files/' + f['filename'], f['filename']) )

    ###
    ### PAGES
    ###
    pages_f = course_folder + '/pages'
    headered = 0
    image_count = 0
    print("\nPAGES")
    try:
        os.mkdir(pages_f)
    except:
        print(" * Pages folder already exists.")


    pages = fetch('/api/v1/courses/' + str(id) + '/pages', verbose)
    for p in pages:
        d(' - %s' % p['title'])

        p['title'] = clean_title(p['title'])
        easier_filename = clean_title(p['url'])
        this_page_filename = "%s/%s.html" % (pages_f, easier_filename)
        #for a in 'title,updated_at,published'.split(','):
        #    print(str(p[a]), "\t", end=' ')

        if not headered:
            index.append( ('<br /><b>Pages</b><br />') )
            headered = 1
        index.append( ( 'pages/' + easier_filename + '.html', p['title'] ) )


        if os.path.exists(this_page_filename):
            d(" - already downloaded %s" % this_page_filename)
            this_page_content = codecs.open(this_page_filename,'r','utf-8').read()
        #elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
        #elif re.search(r'eis-prod',p['url']):
        #    d('   * skipping file behind passwords')
        else:
            t2 = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
            if t2 and 'body' in t2 and t2['body']:
                soup_infolder = bs(t2['body'],features="lxml")
                soup_in_main = bs(t2['body'],features="lxml")
                a_links = soup_infolder.find_all('a')
                for A in a_links:
                    href = A.get('href')

                    if href and re.search( r'youtu',href):
                        video_link_list.append( (A.get('href'), A.text, 'pages/'+easier_filename + ".html") )

                # Images
                page_images = soup_infolder.find_all('img')
                page_image_paths = {}
                for I in page_images:
                    src = I.get('src')
                    if src:
                        d('   - %s' % src)
                        try:
                            r = requests.get(src,headers=header, stream=True)
                            mytype = r.headers['content-type']
                            #print("Response is type: " + str(mytype))
                            r_parts = mytype.split("/")
                            ending = r_parts[-1]

                            if ending=='jpeg': ending = "jpg"

                            img_full_path = f"{pages_f}/{str(image_count)}.{ending}"
                            local_src = f"{str(image_count)}.{ending}"
                            page_image_paths[src] = f"pages/{local_src}"
                            I['src'] = local_src

                            with open(img_full_path, 'wb') as fd:
                                for chunk in r.iter_content(chunk_size=128):
                                    fd.write(chunk)
                            image_count += 1
                        except Exception as e:
                            d( ' * Error downloading page image, %s' % str(e) )

                # Repeat for version for main file
                page_main_images = soup_in_main.find_all('img')
                for I in page_main_images:
                    src = I.get('src')
                    if src:
                        I['src'] = page_image_paths[src]


                # STUDIO VIDEOS
                # Regex pattern to match "custom_arc_media_id%3D" and capture everything
                # until the next '&' or end of string
                pattern = r"custom_arc_media_id%3D([^&]+)"
                found_ids = []

                replacement_tag = '''<video width="480" height="320" controls="controls"><source src="http://serverIP_or_domain/location_of_video.mp4" type="video/mp4"></video>'''

                # Iterate over all <iframe> tags
                for iframe in soup_infolder.find_all("iframe"):
                    src = iframe.get("src")
                    if src:
                        # Search for the pattern in the src
                        match = re.search(pattern, src)
                        if match:
                            found_ids.append(match.group(1))
                        videos_log.write(f"page: {p['url']}  iframe src: {src}\n")
                        videos_log.flush()

                        match2 = re.search('instructuremedia\.com', src)
                        if match2:
                            iframe_response = requests.get(src)
                            if iframe_response.status_code != 200:
                                print(f"Failed to retrieve iframe content from: {src}")
                                continue
                            videos_log.write(f"succesfully fetched {src}\n")
                            videos_log.flush()

                            # Step 4: Parse the iframes HTML
                            iframe_soup = bs(iframe_response.text, 'html.parser')

                            video_tag = iframe_soup.find('video')
                            if video_tag:
                                # Find the <source> tag(s) within the video
                                source_tags = video_tag.find_all('source')
                                # Extract each 'src' attribute
                                for source_tag in source_tags:
                                    print("Video Source found:", source_tag.get('src'))
                                    videos_log.write(f"page: {p['url']} video src: {source_tag.get('src')}\n")
                                    videos_log.flush()


                # WRITE out page
                try:
                    this_page_content = f"<h2>{t2['title']}</h2>\n{soup_infolder.prettify()}"
                    with codecs.open(this_page_filename, 'w','utf-8') as fd:
                        fd.write(this_page_content)
                except:
                    d(' * problem writing page content')
                    ## TODO include linked pages even if they aren't in module
            else:
                d('   * nothing returned or bad fetch')
        # write to running log of content in order of module
        if p and p['url'] in item_id_to_index:
            items[  item_id_to_index[ p['url'] ]  ] =  f"<h2>{t2['title']}</h2>\n{soup_in_main.prettify()}\n{pagebreak}"
        else:
            d(' -- This page didnt seem to be in the modules list.')


    ###
    ### ASSIGNMENTS
    ###

    if not PAGES_ONLY:
        headered = 0
        asm_f = course_folder + '/assignments'
        print("\nASSIGNMENTS")
        try:
            os.mkdir(asm_f)
        except:
            d(" - Assignments dir exists")

        asm = fetch('/api/v1/courses/' + str(id) + '/assignments', verbose)
        for p in asm:
            d(' - %s' % p['name'])


            try:
                friendlyfile = to_file_friendly(p['name'])
                this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
                if os.path.exists(this_assmt_filename):
                    d(" - already downloaded %s" % this_assmt_filename)
                    this_assmt_content = open(this_assmt_filename,'r').read()
                else:
                    t2 = fetch('/api/v1/courses/' + str(id) + '/assignments/'+str(p['id']), verbose)
                    with codecs.open(this_assmt_filename, 'w','utf-8') as fd:
                        this_assmt_content = "<h2>%s</h2>\n%s\n\n" % (t2['name'], t2['description'])
                        fd.write(this_assmt_content)
                    if not headered:
                        index.append( ('<br /><b>Assignments</b><br />') )
                        headered = 1
                    index.append( ('assignments/' + str(p['id'])+"_"+friendlyfile + '.html', p['name']) )

                # write to running log of content in order of module
                if p['id'] in item_id_to_index:
                    items[  item_id_to_index[ p['url'] ]  ] = this_assmt_content+'\n\n'+pagebreak
            except Exception as e:
                d(' * Problem %s' % str(e))

        ###
        ### FORUMS
        ###

        index.extend( extract_forums(id, course_folder, item_id_to_index, verbose) )

        """


        ###
        ### QUIZZES
        ###


        # get a list external urls
        headered = 0
        t = url + '/api/v1/courses/' + str(id) + '/modules'
        while t: t = fetch(t)
        mods = results
        results = []
        for m in mods:
            results = []
            t2 = url + '/api/v1/courses/' + str(id) + '/modules/' + str(m['id']) + '/items'
            while t2: t2 = fetch(t2)
            items = results
            for i in items:
                #print i
                if i['type'] == "ExternalUrl":
                    #print i
                    for j in 'id,title,external_url'.split(','):
                        print unicode(i[j]), "\t",
                    print ""
                    if not headered: index.append( ('<br /><b>External Links</b><br />') )
                    headered = 1
                    index.append( (i['external_url'], i['title']) )
        """


    # Create index page of all gathered items
    myindex = codecs.open(course_folder+'/index.html','w','utf-8')
    for i in index:
        if len(i)==2: myindex.write("<a href='"+i[0]+"'>"+i[1]+"</a><br />\n")
        else: myindex.write(i)


    # Full course content in single file
    print("Writing main course files...")
    mycourse = codecs.open(course_folder+'/fullcourse.raw.html','w','utf-8')

    mycourse.write(f"<html><head><base href='file:///C:/Users/phowell/source/repos/course_temps/course_{id}/'></head><body>\n")

    for I in items:
        if I:
            mycourse.write(  I  )
    mycourse.write("\n</body></html>")


    temp = open('cache/coursedump.txt','w')
    temp.write( "items:  " +  json.dumps(items,indent=2) )
    temp.write("\n\n\n")
    temp.write( "index:  " + json.dumps(index,indent=2) )
    temp.write("\n\n\n")
    #temp.write( "items_inorder:  " + json.dumps(items_inorder,indent=2) )
    #temp.write("\n\n\n")
    temp.write( "item_id_to_index:  " + json.dumps(item_id_to_index,indent=2) )


    if video_link_list:
        mycourse.write('\n<h1>Videos Linked in Pages</h1>\n<table>')
        for V in video_link_list:
            (url, txt, pg) = V
            mycourse.write("<tr><td><a target='_blank' href='"+url+"'>"+txt+"</a></td><td> on <a target='_blank' href='" + pg + "'>" + pg + "</a></td></tr>\n")
        mycourse.write("</table>\n")

    mycourse.close()
    try:
        pypandoc.convert_file(course_folder+'/fullcourse.raw.html', 'html', outputfile=course_folder+"/fullcourse.html")
    except Exception as e:
        print(f"couldn't create html fullcourse page: {e}")
    try:
        pypandoc.convert_file(course_folder+'/fullcourse.html', 'md', outputfile=course_folder+"/fullcourse.md")
    except Exception as e:
        print(f"couldn't create markdown fullcourse page: {e}")
    try:
        pypandoc.convert_file(course_folder+'/fullcourse.html', 'docx', outputfile=course_folder+"/fullcourse.docx")
    except Exception as e:
        print(f"couldn't create doc fullcourse page: {e}")


def media_testing():
    user_id = 285  #ksmith
    t = f"https://gavilan.instructuremedia.com/api/public/v1/users/{user_id}/media"
    media = fetch(t,verbose=1,media=1)
    print(media)

def pan_testing():
    course_folder = '../course_temps/course_6862'
    pypandoc.convert_file(course_folder+'/fullcourse.md', 'html', outputfile=course_folder+"/fullcourse.v2.html")

# Given course, page url, and new content, upload the new revision of a page
def create_page(course_num,new_title,new_content):
    t3 = url + '/api/v1/courses/' + str(course_num) + '/pages'
    #xyz = raw_input('Enter 1 to continue and send back to: ' + t3 + ': ')
    #print("Creating page: %s\nwith content:%s\n\n\n" % (new_title,new_content))
    print("Creating page: %s" % new_title)
    xyz = input('type 1 to confirm: ')   #'1'
    if xyz=='1':
        data = {'wiki_page[title]':new_title, 'wiki_page[body]':new_content}
        r3 = requests.post(t3, headers=header, params=data)
        print(r3)
        print('ok')


def md_to_course():
    #input = 'C:/Users/peter/Nextcloud/Documents/gavilan/student_orientation.txt'
    #output = 'C:/Users/peter/Nextcloud/Documents/gavilan/stu_orientation/student_orientation.html'
    id = "11214"
    infile = 'cache/pages/course_%s.md' % id
    output = 'cache/pages/course_%s_fixed.html' % id
    output3 = pypandoc.convert_file(infile, 'html', format='md', outputfile=output)

    xx = codecs.open(output,'r','utf-8').read()
    soup = bs(  xx, features="lxml" )
    soup.encode("utf-8")

    current_page = ""
    current_title = ""

    for child in soup.body.children:
        if child.name == "h1" and not current_title:
            current_title = child.get_text()
        elif child.name == "h1":
            upload_page(id,current_title,current_page)
            current_title = child.get_text()
            current_page = ""
            print( "Next page: %s" % current_title )
        else:
            #print(dir(child))
            if 'prettify' in dir(child):
                current_page += child.prettify(formatter="html")
            else:
                current_page += child.string

    upload_page(id,current_title,current_page)
    print("Done")


# DL pages only
def grab_course_pages(course_num=-1):
    global results, results_dict, url, header
    # course_num = raw_input("What is the course id? ")
    if course_num<0:
        course_num = input("Id of course? ")
    else:
        course_num = str(course_num)
    modpagelist = []
    modurllist = []
    # We want things in the order of the modules
    t4 = url + '/api/v1/courses/'+str(course_num)+'/modules?include[]=items'
    results = fetch(t4)
    i = 1
    pageout = codecs.open('cache/pages/course_'+str(course_num)+'.html','w','utf-8')
    pageoutm = codecs.open('cache/pages/course_'+str(course_num)+'.md','w','utf-8')
    divider = "\n### "
    for M in results:
        print("Module Name: " + M['name'])
        for I in M['items']:
            if I['type']=='Page':
                modpagelist.append(I['title'])
                modurllist.append(I['page_url'])
                pageout.write(divider+I['title']+'### '+I['page_url']+'\n')
                easier_filename = clean_title(I['page_url'])
                print("  " + str(i) + ". " + I['title'])
                t2 = url + '/api/v1/courses/' + str(course_num) + '/pages/'+I['page_url']
                print('Getting: ' + t2)
                mypage = fetch(t2)
                fixed = safe_html(mypage['body'])
                if fixed:
                    #markdown = h2m.convert(fixed)
                    #p_data = pandoc.read(mypage['body'])
                    markdown = pypandoc.convert_text("\n<h1>" + I['title'] + "</h1>\n" + mypage['body'], 'md', format='html')
                    pageout.write(fixed+'\n')
                    pageoutm.write(markdown+'\n')
                    pageout.flush()
                i += 1
    pageout.close()
    pageoutm.close()

# Download, clean html, and reupload page
def update_page():
    global results, results_dict, url, header
    # course_num = raw_input("What is the course id? ")
    course_num = '6862'
    t = url + '/api/v1/courses/' + str(course_num) + '/pages'
    while t: t = fetch(t)
    pages = results
    results = []
    mypagelist = []
    myurllist = []
    modpagelist = []
    modurllist = []
    for p in pages:
        p['title'] = clean_title(p['title'])
        mypagelist.append(p['title'])
        myurllist.append(p['url'])
        easier_filename = clean_title(p['url'])
        #for a in 'title,updated_at,published'.split(','):
        #    print unicode(p[a]), "\t",
        #print ""

    # We want things in the order of the modules
    t4 = url + '/api/v1/courses/'+str(course_num)+'/modules?include[]=items'
    while t4: t4 = fetch(t4)
    mods = results
    results = []
    i = 1
    print("\nWhat page do you want to repair?")
    for M in mods:
        print("Module Name: " + M['name'])
        for I in M['items']:
            if I['type']=='Page':
                modpagelist.append(I['title'])
                modurllist.append(I['page_url'])
                print("  " + str(i) + ". " + I['title'])
                i += 1

    choice = input("\n> ")
    choice = int(choice) - 1
    chosen_url = modurllist[choice]
    print('Fetching: ' + modpagelist[choice])
    t2 = url + '/api/v1/courses/' + str(course_num) + '/pages/'+chosen_url
    print('From: ' + t2)

    results_dict = {}
    while(t2): t2 = fetch(t2)
    mypage = results_dict
    fixed_page = safe_html(mypage['body'])
    upload_page(course_num,chosen_url,fixed_page)

# given dict of file info (from files api), construct an img tag that works in a page
#def file_to_img_tag(f, alt, course, soup):
#    #tag = f"<img id=\"\" src=\"https://ilearn.gavilan.edu/courses/{course}/files/{f['id']}/preview\" alt=\"{f['filename']}\" "
#    #tag += f"data-api-endpoint=\"https://ilearn.gavilan.edu/api/v1/courses/{course}/files/{f['id']}\" data-api-returntype=\"File\" />"
#    return T


def html_file_to_page(filename, course, tags):

    try:
        soup = bs(codecs.open(filename,'r', 'utf-8').read(), 'html.parser')
    except Exception as e:
        print(f"Exception on {filename}: {e}")
        return
    img_tags = soup.find_all('img')

    result = {'title': soup.title.text if soup.title else ''}
    result['title'].strip()

    for img in img_tags:
        src = img['src']
        try:
            alt = img['alt']
        except:
            alt = src
        orig_filename = os.path.basename(src)
        if orig_filename in tags:
            T = soup.new_tag(name='img', src=f"https://ilearn.gavilan.edu/courses/{course}/files/{tags[orig_filename]['id']}/preview")
            T['id'] = tags[orig_filename]['id']
            T['alt'] = alt
            T['data-api-endpoint'] = f"https://ilearn.gavilan.edu/api/v1/courses/{course}/files/{tags[orig_filename]['id']}"
            T['data-api-returntype'] = "File"
            img.replace_with(T)
            print( f"   replaced image: {src}   alt: {alt}")
        else:
            print( f"   couldn't find replacement image: {src}   alt: {alt}")
    outfile = codecs.open(filename+"_mod.html", 'w', 'utf-8')
    outfile.write( soup.prettify() )
    outfile.close()
    result['body'] = ''.join(map(str, soup.body.contents)) if soup.body else ''
    return result

def create_new_page(course_id, title, body):
    print(f"Creating page: {title}, length: {len(body)}")
    request = f"{url}/api/v1/courses/{course_id}/pages"
    print(request)
    data = { 'wiki_page[title]': title, 'wiki_page[body]': body }
    r3 = requests.post(request, headers=header, data=data)
    try:
        result = json.loads(r3.text)
        print( f"    + ok: {result['url']}")
    except:
        print("    - problem creating page?")

# Given a folder full of html pages and their linked images, create Canvas PAGES of them
def make_pages_from_folder(folder='cache/csis6/', course = '20558'):
    if 0:
        request = f"{url}/api/v1/courses/{course}/files"
        print("Fetching course files")
        files = fetch(request)

        tempfile = codecs.open('cache/csis6filelist.json','w','utf-8')
        tempfile.write(json.dumps(files))
        tempfile.close()

    if 1:
        files = json.loads( codecs.open('cache/csis6filelist.json', 'r', 'utf-8').read())


    course_files = {f['filename']: f for f in files}
    tags = {}
    for f in files:
        if f['filename'].lower().endswith('.jpg') or f['filename'].lower().endswith('.png'):
            tags[f['filename']] = f


    contents = os.listdir(folder)
    contents = ['welcome.html','welcome2.html', 'welcome3.html']
    print(contents)
    for f in contents:
        m = re.search(r'^(.*)\.(html?)$', f)
        if m:
            print(f"html file: {m.group(1)}, extension: {m.group(2)}")
            newpage = html_file_to_page(folder+f, course, tags)
            create_new_page(course, newpage['title'], newpage['body'])
        else:
            m = re.search(r'^(.*)\.(.*)$', f)
            if m:
                print(f"other file: {m.group(1)}, extension: {m.group(2)}")
            else:
                print(f"unknown file: {f}")


# Given course, page url, and new content, upload the new revision of a page
def upload_page(course_num,pageurl,new_content):
    print("Repaired page:\n\n")
    #print new_content
    print(pageurl)
    t3 = url + '/api/v1/courses/' + str(course_num) + '/pages/' + pageurl
    xyz = input('Enter 1 to continue and send back to: ' + t3 + ': ')
    #xyz = '1'
    if xyz=='1':
        data = {'wiki_page[body]':new_content}
        r3 = requests.put(t3, headers=header, params=data)
        print(r3)
        print('ok')


def multiple_downloads():

    x = input("What IDs? Separate with one space: ")
    for id in x.split(" "):
        course_download(id)


def fetch_support_page():
    u = "https://ilearn.gavilan.edu/courses/20850/pages/online-student-support-hub"
    course_num = 20850
    page_url = "online-student-support-hub"
    t2 = f"{url}/api/v1/courses/{course_num}/pages/{page_url}"
    print('Getting: ' + t2)
    mypage = fetch(t2)
    print(json.dumps(mypage,indent=2))
    print(mypage['body'])


from courses import getCoursesInTerm

def clear_old_page(shell_id,page_name):
    # get all pages
    t = f"{url}/api/v1/courses/{shell_id}/pages"
    pages = fetch(t)
    for page in pages:
        if page['title'] == page_name:
            print(f"found a page named {page_name}. Deleting it.")
            id = page['page_id']
            t2 = f"{url}/api/v1/courses/{shell_id}/pages/{id}"
            r2 = requests.delete(t2, headers=header)
            print(f"{r2}")

def add_support_page_full_semester(term=289):
    print("Fetching list of all active courses")
    # term = 184 # fa24   # 182
    c = getCoursesInTerm(term,0,0)  # sp25 = 287   wi24=182

    #print(c)

    check = 'each'
    print("answer 'all' to do the rest without confirming")

    for C in c:
        if check == 'each':
            answer = input(f"Type 1 <enter> to add support page to {C['id']} ({C['name']}) ")
            if answer == '1':
                create_support_page(C['id'])
            else:
                if answer == 'all':
                    check = 'all'
                    create_support_page(C['id'])
                    continue
        elif check == 'all':
            create_support_page(C['id'])

def create_support_page(shell_id=18297):    # 29):

    # clear one of same name first.
    clear_old_page(shell_id, "Online Student Support Hub")

    # make new one
    t3 = f"{url}/api/v1/courses/{shell_id}/pages/online-student-support-hub"
    new_content = codecs.open("cache/support_min.html","r","utf-8").read()
    title = "Online Student Support Hub"
    data = {'wiki_page[body]':new_content, 'wiki_page[title]':title, 'wiki_page[published]':"true"}
    r3 = requests.put(t3, headers=header, params=data)
    #print(r3.content)

    print('Page Created')
    try:
        response = r3.json()
        print(f"page id: {response['page_id']}")
    except Exception as e:
        print(f"Exception: {e}")


    # list modules
    # GET /api/v1/courses/:course_id/modules
    t4 = f"{url}/api/v1/courses/{shell_id}/modules"
    modules = fetch(t4)
    module_id = 0

    # what if there are no modules?
    if len(modules) == 0:
        t6 = f"{url}/api/v1/courses/{shell_id}/modules/"
        mod_data = {'module[name]': 'Welcome', 'module[unlock_at]':"2024-01-01T06:00:00-08:00"}
        r6 = requests.post(t6, headers=header, params=mod_data)
        mod_response = r6.json()
        module_id = mod_response['id']
        print(f"created module, id: {module_id}")

        # publish module
        t7 = f"{url}/api/v1/courses/{shell_id}/modules/{module_id}"
        mod_data2 = {'module[published]':'true'}
        r6 = requests.put(t7, headers=header, params=mod_data2)

    for M in modules:
        if M['position'] == 1:
            module_id = M['id']
            print(f"found first module 1: ({module_id}) {M['name']}")
    #print(json.dumps(modules,indent=2))
    #
    # create module item
    # POST /api/v1/courses/:course_id/modules/:module_id/items
    t5 = f"{url}/api/v1/courses/{shell_id}/modules/{module_id}/items"
    item_data = {'module_item[title]': title, 'module_item[type]': 'Page', 'module_item[page_url]': response['url'], 'module_item[position]':1}
    r5 = requests.post(t5, headers=header, params=item_data)

    print('ok')

def list_modules_and_items(shell_id, verbose=0):
    modules = fetch(f"{url}/api/v1/courses/{shell_id}/modules?include[]=items&include[]=content_details")
    if verbose: print(json.dumps(modules,indent=2))
    return modules

def check_modules_for_old_orientation():
    from util import contains_key_value, find_dict_with_key_value, extract_key_values

    checklist = []

    for term in [286, 287]:   # wi25, sp25

        print("Fetching list of all active courses")
        #term = 287 # 184 # fa24   # 182
        #term = 286  # wi25
        c = getCoursesInTerm(term,0,0)  # sp25 = 287   wi24=182

        for C in c:
            print(f"{C['id']} - {C['name']}")
            m = list_modules_and_items(C['id'])

            if contains_key_value(m, 'name', 'Online Student Support Services - Summer & Fall 2024'):
                old_mod = find_dict_with_key_value(m,'name','Online Student Support Services - Summer & Fall 2024')

                print("   this course has the old module")
                checklist.append(f"{C['id']}")
                titles = extract_key_values(old_mod, 'title')
                [ print(f"  {T}") for T in titles ]

    print(f"\nCheck these course ids:")
    for id in checklist:
        print(id)


def repair_ezproxy_links():
    from localcache2 import pages_in_term

    # get all pages in term
    all_pages = pages_in_term()

    # c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
    for p in all_pages:
        course = p[1]
        title = p[4]
        url = p[5]
        body = p[7]
        # print(body)
        try:
            #s = re.search('''["']https:\/\/ezproxy\.gavilan\.edu\/login\?url=(.*)["']''',body)
            a = re.search(r'Online Library Services',title)
            if a:
                continue
            s = re.findall('\n.*ezproxy.*\n',body)
            if s:
                print(course, title, url)
                print("   ", s, "\n")    # s.group())
        except Exception as e:
            #print(f"Skipped: {title},   {e}")
            pass


if __name__ == "__main__":

    print ('')
    options = { 1: ['download a class into a folder / word file', course_download] ,
                2: ['download multiple classes', multiple_downloads ],
                3: ['convert stuff', pan_testing ],
                4: ['convert md to html', md_to_course ],
                5: ['course download tester', test_forums ],
                6: ['download all a courses pages', grab_course_pages],
               17: ['repair ezproxy links', repair_ezproxy_links],
               18: ['create pages from html files', make_pages_from_folder],
               19: ['fetch support page', fetch_support_page],
               20: ['create support page', create_support_page],
               21: ['add support page to all shells in semester', add_support_page_full_semester],
               22: ['fetch all modules / items', check_modules_for_old_orientation],
               30: ['media fetch', media_testing]
              }

    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
        resp = int(sys.argv[1])
        print("\n\nPerforming: %s\n\n" % options[resp][0])

    else:
        print ('')
        for key in options:
            print(str(key) + '.\t' + options[key][0])

        print('')
        resp = input('Choose: ')

    # Call the function in the options dict
    options[ int(resp)][1]()