content and outcomes update
This commit is contained in:
parent
08514ad69e
commit
0e5a62472d
64
content.py
64
content.py
|
|
@ -169,6 +169,59 @@ def extract_forums(id, course_folder, item_id_to_index, verbose=0, discussion_li
|
|||
#
|
||||
# Download everything interesting in a course to a local folder
|
||||
# Build a master file with the entire class content
|
||||
# Adjust image paths in aggregated snippets so they work from the course root.
|
||||
def adjust_fullcourse_image_sources(html_fragment):
|
||||
if not html_fragment:
|
||||
return html_fragment
|
||||
|
||||
def _prefix_images(match):
|
||||
prefix = match.group(1)
|
||||
path = match.group(2)
|
||||
normalized = path.lstrip('./')
|
||||
if normalized.lower().startswith('pages/'):
|
||||
return f"{prefix}{normalized}"
|
||||
return f"{prefix}pages/{normalized}"
|
||||
|
||||
src_pattern = re.compile(r'(<img[^>]+?\bsrc\s*=\s*[\'"])(?:\./)?(images/[^\'"]*)', re.IGNORECASE)
|
||||
html_fragment = src_pattern.sub(_prefix_images, html_fragment)
|
||||
|
||||
canvas_pattern = re.compile(r'(<img[^>]+?\bdata-canvas-src\s*=\s*[\'"])(?:\./)?(images/[^\'"]*)', re.IGNORECASE)
|
||||
html_fragment = canvas_pattern.sub(_prefix_images, html_fragment)
|
||||
|
||||
srcset_pattern = re.compile(r'(<img[^>]+?\bsrcset\s*=\s*[\'"])([^\'"]*)([\'"])', re.IGNORECASE | re.DOTALL)
|
||||
|
||||
def _prefix_srcset(match):
|
||||
prefix = match.group(1)
|
||||
value = match.group(2)
|
||||
suffix = match.group(3)
|
||||
entries = []
|
||||
changed = False
|
||||
for chunk in value.split(','):
|
||||
chunk = chunk.strip()
|
||||
if not chunk:
|
||||
continue
|
||||
parts = chunk.split()
|
||||
url = parts[0]
|
||||
descriptors = parts[1:]
|
||||
normalized = url.lstrip('./')
|
||||
if normalized.lower().startswith('pages/'):
|
||||
new_url = url
|
||||
elif normalized.lower().startswith('images/'):
|
||||
new_url = f"pages/{normalized}"
|
||||
changed = True
|
||||
else:
|
||||
new_url = url
|
||||
descriptor_text = ' '.join(descriptors)
|
||||
entry = f"{new_url} {descriptor_text}".strip()
|
||||
entries.append(entry)
|
||||
if not changed:
|
||||
return match.group(0)
|
||||
return f"{prefix}{', '.join(entries)}{suffix}"
|
||||
|
||||
html_fragment = srcset_pattern.sub(_prefix_srcset, html_fragment)
|
||||
|
||||
return html_fragment
|
||||
|
||||
def course_download(id=""):
|
||||
global items
|
||||
|
||||
|
|
@ -203,6 +256,7 @@ def course_download(id=""):
|
|||
file_local_map = {}
|
||||
discussion_local_map = {}
|
||||
module_details = []
|
||||
canvas_host = urlparse(url).hostname if url else None
|
||||
|
||||
for m in modules:
|
||||
items[running_index] = '<h2>%s</h2>%s\n' % ( m['name'], pagebreak )
|
||||
|
|
@ -352,7 +406,9 @@ def course_download(id=""):
|
|||
return mapped, canvas_override or absolute_src
|
||||
|
||||
try:
|
||||
response = requests.get(absolute_src, headers=header, stream=True, timeout=30)
|
||||
target_host = urlparse(absolute_src).hostname
|
||||
request_headers = header if not canvas_host or target_host == canvas_host else None
|
||||
response = requests.get(absolute_src, headers=request_headers, stream=True, timeout=30)
|
||||
response.raise_for_status()
|
||||
except Exception as e:
|
||||
d(f" * error downloading image {absolute_src}: {e}")
|
||||
|
|
@ -662,7 +718,7 @@ def course_download(id=""):
|
|||
|
||||
for I in items:
|
||||
if I:
|
||||
mycourse.write( I )
|
||||
mycourse.write(adjust_fullcourse_image_sources(I))
|
||||
mycourse.write("\n</body></html>")
|
||||
|
||||
|
||||
|
|
@ -684,8 +740,8 @@ def course_download(id=""):
|
|||
if video_link_list:
|
||||
mycourse.write('\n<h1>Videos Linked in Pages</h1>\n<table>')
|
||||
for V in video_link_list:
|
||||
(url, txt, pg) = V
|
||||
mycourse.write("<tr><td><a target='_blank' href='"+url+"'>"+txt+"</a></td><td> on <a target='_blank' href='" + pg + "'>" + pg + "</a></td></tr>\n")
|
||||
video_url, txt, pg = V
|
||||
mycourse.write("<tr><td><a target='_blank' href='"+video_url+"'>"+txt+"</a></td><td> on <a target='_blank' href='" + pg + "'>" + pg + "</a></td></tr>\n")
|
||||
mycourse.write("</table>\n")
|
||||
|
||||
mycourse.close()
|
||||
|
|
|
|||
116
curric2022.py
116
curric2022.py
|
|
@ -116,6 +116,7 @@ def recur_matcher(item, depth=0):
|
|||
|
||||
num_failed_course = 1
|
||||
|
||||
# Capture a single course payload for structured traversal.
|
||||
def single_course_parse(c):
|
||||
global num_failed_course
|
||||
this_course = []
|
||||
|
|
@ -129,14 +130,37 @@ def single_course_parse(c):
|
|||
ooops.close()
|
||||
num_failed_course = num_failed_course + 1
|
||||
return ("-1", [])
|
||||
|
||||
|
||||
# Normalize course file payloads so downstream code always gets a list of instances.
|
||||
def load_course_file(path):
|
||||
try:
|
||||
raw_data = json.loads(codecs.open(path, 'r', 'utf-8').read(), strict=False)
|
||||
except Exception as e:
|
||||
print(f"Unable to read {path}: {e}")
|
||||
return []
|
||||
|
||||
if isinstance(raw_data, dict):
|
||||
if 'entityInstances' in raw_data:
|
||||
return raw_data.get('entityInstances', [])
|
||||
return [raw_data]
|
||||
|
||||
if isinstance(raw_data, list):
|
||||
if raw_data and isinstance(raw_data[0], dict) and 'entityInstances' in raw_data[0]:
|
||||
instances = []
|
||||
for block in raw_data:
|
||||
if isinstance(block, dict) and 'entityInstances' in block:
|
||||
instances.extend(block.get('entityInstances', []))
|
||||
return instances
|
||||
return raw_data
|
||||
|
||||
return []
|
||||
def match_style_test():
|
||||
classes = {}
|
||||
oo = codecs.open("cache/courses/curric2022test.json","w","utf-8")
|
||||
for f in os.listdir('cache/courses'):
|
||||
if re.search('classes_',f):
|
||||
if re.search(r'classes_',f):
|
||||
print(f)
|
||||
cls = json.loads(codecs.open('cache/courses/'+f,'r','utf-8').read())
|
||||
cls = load_course_file('cache/courses/'+f)
|
||||
for c in cls:
|
||||
id,output = single_course_parse(c)
|
||||
classes[id] = "\n".join(output)
|
||||
|
|
@ -170,7 +194,7 @@ def path_style_prog():
|
|||
classes = {}
|
||||
oo = codecs.open("cache/programs/allprogrampaths.txt","w","utf-8")
|
||||
for f in os.listdir('cache/programs'):
|
||||
if re.search('^programs_',f):
|
||||
if re.search(r'^programs_',f):
|
||||
print(f)
|
||||
cls = json.loads(codecs.open('cache/programs/'+f,'r','utf-8').read())
|
||||
for c in cls:
|
||||
|
|
@ -195,6 +219,30 @@ def all_outcomes():
|
|||
csvwriter = csv.writer(csvfile)
|
||||
csvwriter.writerow('code cqcourseid coursestatus termineffect dept num cqoutcomeid outcome'.split(' '))
|
||||
|
||||
# Export sorted course titles from the raw course path dump.
|
||||
def export_course_titles():
|
||||
source_path = 'cache/courses/allclasspaths.txt'
|
||||
dest_path = 'cache/courses/allclasstitles.txt'
|
||||
pattern = re.compile(r'^Course\/(\d+)\/Course Description\/entityTitle\/(.*)$')
|
||||
titles = []
|
||||
|
||||
try:
|
||||
with codecs.open(source_path, 'r', 'utf-8') as infile:
|
||||
for line in infile:
|
||||
match = pattern.match(line.strip())
|
||||
if match:
|
||||
title = match.group(2).strip()
|
||||
if title:
|
||||
titles.append(title)
|
||||
except FileNotFoundError:
|
||||
print(f"Source file not found: {source_path}")
|
||||
return
|
||||
|
||||
titles.sort(key=lambda s: s.lower())
|
||||
with codecs.open(dest_path, 'w', 'utf-8') as outfile:
|
||||
for title in titles:
|
||||
outfile.write(title + '\n')
|
||||
|
||||
csvfile2 = codecs.open('cache/courses/all_active_outcomes.csv','w','utf-8')
|
||||
csvwriter2 = csv.writer(csvfile2)
|
||||
csvwriter2.writerow('code cqcourseid coursestatus termineffect dept num cqoutcomeid outcome'.split(' '))
|
||||
|
|
@ -211,7 +259,7 @@ def all_outcomes():
|
|||
count = 0
|
||||
|
||||
for L in rr:
|
||||
a = re.search('Course\/(\d+)',L)
|
||||
a = re.search(r'Course/(\d+)',L)
|
||||
if a:
|
||||
course_num = a.group(1)
|
||||
#print(course_num, current_course_num)
|
||||
|
|
@ -234,25 +282,25 @@ def all_outcomes():
|
|||
current_course['c'] = course_num
|
||||
|
||||
|
||||
a = re.search('Course\/(\d+)\/1\/Course\ Description\/0\/Course\ Discipline\/(.*)$',L)
|
||||
a = re.search(r'Course/(\d+)/1/Course Description/0/Course Discipline/(.*)$',L)
|
||||
if a:
|
||||
current_course['d'] = a.group(2)
|
||||
a = re.search('Course\/(\d+)\/1\/Course\ Description\/0\/Course\ Number\/(.*)$',L)
|
||||
a = re.search(r'Course/(\d+)/1/Course Description/0/Course Number/(.*)$',L)
|
||||
if a:
|
||||
current_course['n'] = a.group(2)
|
||||
a = re.search('Course\/(\d+)\/1\/Course\ Description\/0\/Course\ Title\/(.*)$',L)
|
||||
a = re.search(r'Course/(\d+)/1/Course Description/0/Course Title/(.*)$',L)
|
||||
if a:
|
||||
current_course['T'] = a.group(2)
|
||||
a = re.search('Course\/(\d+)\/1\/Course\ Description\/0\/Short\ Title\/(.*)$',L)
|
||||
a = re.search(r'Course/(\d+)/1/Course Description/0/Short Title/(.*)$',L)
|
||||
if a:
|
||||
current_course['t'] = a.group(2)
|
||||
a = re.search('Course\ Description\/status\/(.*)$',L)
|
||||
a = re.search(r'Course Description/status/(.*)$',L)
|
||||
if a:
|
||||
current_course['s'] = a.group(1)
|
||||
a = re.search('Course\ Content\/\d+\/Lecture\ Content\/Curriculum\ Approval\ Date:\s*(.*)$',L)
|
||||
a = re.search(r'Course Content/\d+/Lecture Content/Curriculum Approval Date:\s*(.*)$',L)
|
||||
if a:
|
||||
current_course['a'] = a.group(1)
|
||||
a = re.search('Course\ Description\/\d+\/Internal\ Processing\ Term\/(.*)$',L)
|
||||
a = re.search(r'Course Description/\d+/Internal Processing Term/(.*)$',L)
|
||||
if a:
|
||||
t_code = term_txt_to_code(a.group(1))
|
||||
current_course['m'] = t_code
|
||||
|
|
@ -262,20 +310,20 @@ def all_outcomes():
|
|||
|
||||
# Course/3091/1/Course Description/0/Internal Processing Term/Spring 2018
|
||||
|
||||
a = re.search('Learning\ Outcomes\/\d+\/(cqid_\d+)\/Learning\ Outcomes\/Description\/(.*)$',L)
|
||||
a = re.search(r'Learning Outcomes/\d+/(cqid_\d+)/Learning Outcomes/Description/(.*)$',L)
|
||||
if a:
|
||||
current_course['o'].append(a.group(2))
|
||||
current_course['i'] = a.group(1)
|
||||
csvwriter.writerow([current_course['d']+current_course['n'], current_course_num, current_course['s'], current_course['m'], current_course['d'], current_course['n'], current_course['i'], a.group(2)])
|
||||
csvwriter2.writerow([current_course['d']+current_course['n'], current_course_num, current_course['s'], current_course['m'], current_course['d'], current_course['n'], current_course['i'], a.group(2)])
|
||||
if current_course['s']=='Active':
|
||||
csvwriter2.writerow([current_course['d']+current_course['n'], current_course_num, current_course['s'], current_course['m'], current_course['d'], current_course['n'], current_course['i'], a.group(2)])
|
||||
|
||||
|
||||
if re.search('Learning\ Outcomes\/Description\/',L):
|
||||
if re.search(r'Learning Outcomes/Description/',L):
|
||||
ww.write(L)
|
||||
if re.search('Description\/entityTitle\/',L):
|
||||
if re.search(r'Description/entityTitle/',L):
|
||||
ww.write(L)
|
||||
if re.search('Description\/status\/',L):
|
||||
if re.search(r'Description/status/',L):
|
||||
ww.write(L)
|
||||
|
||||
xx = codecs.open("cache/courses/course_cq_index.json","w", "utf-8")
|
||||
|
|
@ -601,20 +649,20 @@ def course_path_style_2_html():
|
|||
active_courses = {}
|
||||
|
||||
lookup_table = { 'entityTitle':'title', 'proposalType':'type',
|
||||
'\/Course\sDescription\/status':'status', 'Course\sDiscipline':'dept',
|
||||
'Course\sNumber':'number', 'Course\sTitle':'name', 'Course Description\/\d\/Justification':'justification',
|
||||
'Short\sTitle':'shortname', 'Course Description\/\d\/Internal\sProcessing\sTerm':'term', 'This\sCourse\sIs\sDegree\sApplicable':'degree_applicable',
|
||||
'\/Course\sDescription\/\d+\/Course\sDescription\/':'desc',
|
||||
'Minimum\sUnits':'min_units', 'Minimum\sLecture\sHour':'min_lec_hour', 'Minimum\sLab\sHour':'min_lab_hour', 'Course\shas\svariable\shours':'has_var_hours',
|
||||
'Number\sWeeks':'weeks',
|
||||
'Maximum\sUnits':'max_units', 'Credit\sStatus':'credit_status',
|
||||
'TOP\sCode':'top_code', 'Classification':'classification', 'Non\sCredit\sCategory':'noncredit_category', 'Stand-Alone\sClass?':'stand_alone',
|
||||
'Grade\sOption':'grade_option', 'Is\sRepeatable':'repeatable', 'Learning\sOutcomes\/Description':'slo',
|
||||
'Is\sThis\sCourse\sis\sRecommended\sfor\sTransfer\sto\sState\sUniversities\sand\sColleges?':'transfer_csu',
|
||||
'Is\sThis\sCourse\sis\sRecommended\sfor\sTransfer\sto\sUniversity\sof\sCalifornia?':'transfer_uc',
|
||||
'\/Catalog\sCourse\sSummary\sView\/':'catalog',
|
||||
'\/Course\sContent/\d+/Lecture\sContent\/':'content',
|
||||
'\/ASSIST\sPreview\/\d+\/Outcomes\sand\sObjectives\/':'objectives'}
|
||||
r'/Course\sDescription/status':'status', r'Course\sDiscipline':'dept',
|
||||
r'Course\sNumber':'number', r'Course\sTitle':'name', r'Course Description/\d/Justification':'justification',
|
||||
r'Short\sTitle':'shortname', r'Course Description/\d/Internal\sProcessing\sTerm':'term', r'This\sCourse\sIs\sDegree\sApplicable':'degree_applicable',
|
||||
r'/Course\sDescription/\d+/Course\sDescription/':'desc',
|
||||
r'Minimum\sUnits':'min_units', r'Minimum\sLecture\sHour':'min_lec_hour', r'Minimum\sLab\sHour':'min_lab_hour', r'Course\shas\svariable\shours':'has_var_hours',
|
||||
r'Number\sWeeks':'weeks',
|
||||
r'Maximum\sUnits':'max_units', r'Credit\sStatus':'credit_status',
|
||||
r'TOP\sCode':'top_code', r'Classification':'classification', r'Non\sCredit\sCategory':'noncredit_category', r'Stand-Alone\sClass\?':'stand_alone',
|
||||
r'Grade\sOption':'grade_option', r'Is\sRepeatable':'repeatable', r'Learning\sOutcomes/Description':'slo',
|
||||
r'Is\sThis\sCourse\sis\sRecommended\sfor\sTransfer\sto\sState\sUniversities\sand\sColleges\?':'transfer_csu',
|
||||
r'Is\sThis\sCourse\sis\sRecommended\sfor\sTransfer\sto\sUniversity\sof\sCalifornia\?':'transfer_uc',
|
||||
r'/Catalog\sCourse\sSummary\sView/':'catalog',
|
||||
r'/Course\sContent/\d+/Lecture\sContent/':'content',
|
||||
r'/ASSIST\sPreview/\d+/Outcomes\sand\sObjectives/':'objectives'}
|
||||
|
||||
for C in sorted(list(course_prebuild.keys()),key=int):
|
||||
v = 0
|
||||
|
|
@ -817,9 +865,9 @@ def path_style_test():
|
|||
classes = {}
|
||||
oo = codecs.open("cache/courses/allclasspaths.txt","w","utf-8")
|
||||
for f in os.listdir('cache/courses'):
|
||||
if re.search('^classes_',f):
|
||||
if re.search(r'^classes_',f):
|
||||
print(f)
|
||||
cls = json.loads(codecs.open('cache/courses/'+f,'r','utf-8').read(),strict=False)
|
||||
cls = load_course_file('cache/courses/'+f)
|
||||
for c in cls:
|
||||
id,output = single_course_path_parse(c)
|
||||
classes[id] = "\n".join(output)
|
||||
|
|
@ -976,6 +1024,7 @@ if __name__ == "__main__":
|
|||
6: ['extract de info from class paths', de_classpaths],
|
||||
7: ['build schedule or summary for SLO planning', slo_summary_report],
|
||||
8: ['remove deactivated courses', filter_classes],
|
||||
9: ['export sorted course titles', export_course_titles],
|
||||
10: ['fetch all programs', fetch_all_programs],
|
||||
11: ['process all programs', path_style_prog],
|
||||
12: ['programs - path style to html catalog', path_style_2_html],
|
||||
|
|
@ -997,4 +1046,3 @@ if __name__ == "__main__":
|
|||
|
||||
# Call the function in the options dict
|
||||
options[ int(resp)][1]()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue