content and outcomes update
This commit is contained in:
parent
08514ad69e
commit
0e5a62472d
64
content.py
64
content.py
|
|
@ -169,6 +169,59 @@ def extract_forums(id, course_folder, item_id_to_index, verbose=0, discussion_li
|
||||||
#
|
#
|
||||||
# Download everything interesting in a course to a local folder
|
# Download everything interesting in a course to a local folder
|
||||||
# Build a master file with the entire class content
|
# Build a master file with the entire class content
|
||||||
|
# Adjust image paths in aggregated snippets so they work from the course root.
|
||||||
|
def adjust_fullcourse_image_sources(html_fragment):
|
||||||
|
if not html_fragment:
|
||||||
|
return html_fragment
|
||||||
|
|
||||||
|
def _prefix_images(match):
|
||||||
|
prefix = match.group(1)
|
||||||
|
path = match.group(2)
|
||||||
|
normalized = path.lstrip('./')
|
||||||
|
if normalized.lower().startswith('pages/'):
|
||||||
|
return f"{prefix}{normalized}"
|
||||||
|
return f"{prefix}pages/{normalized}"
|
||||||
|
|
||||||
|
src_pattern = re.compile(r'(<img[^>]+?\bsrc\s*=\s*[\'"])(?:\./)?(images/[^\'"]*)', re.IGNORECASE)
|
||||||
|
html_fragment = src_pattern.sub(_prefix_images, html_fragment)
|
||||||
|
|
||||||
|
canvas_pattern = re.compile(r'(<img[^>]+?\bdata-canvas-src\s*=\s*[\'"])(?:\./)?(images/[^\'"]*)', re.IGNORECASE)
|
||||||
|
html_fragment = canvas_pattern.sub(_prefix_images, html_fragment)
|
||||||
|
|
||||||
|
srcset_pattern = re.compile(r'(<img[^>]+?\bsrcset\s*=\s*[\'"])([^\'"]*)([\'"])', re.IGNORECASE | re.DOTALL)
|
||||||
|
|
||||||
|
def _prefix_srcset(match):
|
||||||
|
prefix = match.group(1)
|
||||||
|
value = match.group(2)
|
||||||
|
suffix = match.group(3)
|
||||||
|
entries = []
|
||||||
|
changed = False
|
||||||
|
for chunk in value.split(','):
|
||||||
|
chunk = chunk.strip()
|
||||||
|
if not chunk:
|
||||||
|
continue
|
||||||
|
parts = chunk.split()
|
||||||
|
url = parts[0]
|
||||||
|
descriptors = parts[1:]
|
||||||
|
normalized = url.lstrip('./')
|
||||||
|
if normalized.lower().startswith('pages/'):
|
||||||
|
new_url = url
|
||||||
|
elif normalized.lower().startswith('images/'):
|
||||||
|
new_url = f"pages/{normalized}"
|
||||||
|
changed = True
|
||||||
|
else:
|
||||||
|
new_url = url
|
||||||
|
descriptor_text = ' '.join(descriptors)
|
||||||
|
entry = f"{new_url} {descriptor_text}".strip()
|
||||||
|
entries.append(entry)
|
||||||
|
if not changed:
|
||||||
|
return match.group(0)
|
||||||
|
return f"{prefix}{', '.join(entries)}{suffix}"
|
||||||
|
|
||||||
|
html_fragment = srcset_pattern.sub(_prefix_srcset, html_fragment)
|
||||||
|
|
||||||
|
return html_fragment
|
||||||
|
|
||||||
def course_download(id=""):
|
def course_download(id=""):
|
||||||
global items
|
global items
|
||||||
|
|
||||||
|
|
@ -203,6 +256,7 @@ def course_download(id=""):
|
||||||
file_local_map = {}
|
file_local_map = {}
|
||||||
discussion_local_map = {}
|
discussion_local_map = {}
|
||||||
module_details = []
|
module_details = []
|
||||||
|
canvas_host = urlparse(url).hostname if url else None
|
||||||
|
|
||||||
for m in modules:
|
for m in modules:
|
||||||
items[running_index] = '<h2>%s</h2>%s\n' % ( m['name'], pagebreak )
|
items[running_index] = '<h2>%s</h2>%s\n' % ( m['name'], pagebreak )
|
||||||
|
|
@ -352,7 +406,9 @@ def course_download(id=""):
|
||||||
return mapped, canvas_override or absolute_src
|
return mapped, canvas_override or absolute_src
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.get(absolute_src, headers=header, stream=True, timeout=30)
|
target_host = urlparse(absolute_src).hostname
|
||||||
|
request_headers = header if not canvas_host or target_host == canvas_host else None
|
||||||
|
response = requests.get(absolute_src, headers=request_headers, stream=True, timeout=30)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
d(f" * error downloading image {absolute_src}: {e}")
|
d(f" * error downloading image {absolute_src}: {e}")
|
||||||
|
|
@ -662,7 +718,7 @@ def course_download(id=""):
|
||||||
|
|
||||||
for I in items:
|
for I in items:
|
||||||
if I:
|
if I:
|
||||||
mycourse.write( I )
|
mycourse.write(adjust_fullcourse_image_sources(I))
|
||||||
mycourse.write("\n</body></html>")
|
mycourse.write("\n</body></html>")
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -684,8 +740,8 @@ def course_download(id=""):
|
||||||
if video_link_list:
|
if video_link_list:
|
||||||
mycourse.write('\n<h1>Videos Linked in Pages</h1>\n<table>')
|
mycourse.write('\n<h1>Videos Linked in Pages</h1>\n<table>')
|
||||||
for V in video_link_list:
|
for V in video_link_list:
|
||||||
(url, txt, pg) = V
|
video_url, txt, pg = V
|
||||||
mycourse.write("<tr><td><a target='_blank' href='"+url+"'>"+txt+"</a></td><td> on <a target='_blank' href='" + pg + "'>" + pg + "</a></td></tr>\n")
|
mycourse.write("<tr><td><a target='_blank' href='"+video_url+"'>"+txt+"</a></td><td> on <a target='_blank' href='" + pg + "'>" + pg + "</a></td></tr>\n")
|
||||||
mycourse.write("</table>\n")
|
mycourse.write("</table>\n")
|
||||||
|
|
||||||
mycourse.close()
|
mycourse.close()
|
||||||
|
|
|
||||||
114
curric2022.py
114
curric2022.py
|
|
@ -116,6 +116,7 @@ def recur_matcher(item, depth=0):
|
||||||
|
|
||||||
num_failed_course = 1
|
num_failed_course = 1
|
||||||
|
|
||||||
|
# Capture a single course payload for structured traversal.
|
||||||
def single_course_parse(c):
|
def single_course_parse(c):
|
||||||
global num_failed_course
|
global num_failed_course
|
||||||
this_course = []
|
this_course = []
|
||||||
|
|
@ -130,13 +131,36 @@ def single_course_parse(c):
|
||||||
num_failed_course = num_failed_course + 1
|
num_failed_course = num_failed_course + 1
|
||||||
return ("-1", [])
|
return ("-1", [])
|
||||||
|
|
||||||
|
# Normalize course file payloads so downstream code always gets a list of instances.
|
||||||
|
def load_course_file(path):
|
||||||
|
try:
|
||||||
|
raw_data = json.loads(codecs.open(path, 'r', 'utf-8').read(), strict=False)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Unable to read {path}: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
if isinstance(raw_data, dict):
|
||||||
|
if 'entityInstances' in raw_data:
|
||||||
|
return raw_data.get('entityInstances', [])
|
||||||
|
return [raw_data]
|
||||||
|
|
||||||
|
if isinstance(raw_data, list):
|
||||||
|
if raw_data and isinstance(raw_data[0], dict) and 'entityInstances' in raw_data[0]:
|
||||||
|
instances = []
|
||||||
|
for block in raw_data:
|
||||||
|
if isinstance(block, dict) and 'entityInstances' in block:
|
||||||
|
instances.extend(block.get('entityInstances', []))
|
||||||
|
return instances
|
||||||
|
return raw_data
|
||||||
|
|
||||||
|
return []
|
||||||
def match_style_test():
|
def match_style_test():
|
||||||
classes = {}
|
classes = {}
|
||||||
oo = codecs.open("cache/courses/curric2022test.json","w","utf-8")
|
oo = codecs.open("cache/courses/curric2022test.json","w","utf-8")
|
||||||
for f in os.listdir('cache/courses'):
|
for f in os.listdir('cache/courses'):
|
||||||
if re.search('classes_',f):
|
if re.search(r'classes_',f):
|
||||||
print(f)
|
print(f)
|
||||||
cls = json.loads(codecs.open('cache/courses/'+f,'r','utf-8').read())
|
cls = load_course_file('cache/courses/'+f)
|
||||||
for c in cls:
|
for c in cls:
|
||||||
id,output = single_course_parse(c)
|
id,output = single_course_parse(c)
|
||||||
classes[id] = "\n".join(output)
|
classes[id] = "\n".join(output)
|
||||||
|
|
@ -170,7 +194,7 @@ def path_style_prog():
|
||||||
classes = {}
|
classes = {}
|
||||||
oo = codecs.open("cache/programs/allprogrampaths.txt","w","utf-8")
|
oo = codecs.open("cache/programs/allprogrampaths.txt","w","utf-8")
|
||||||
for f in os.listdir('cache/programs'):
|
for f in os.listdir('cache/programs'):
|
||||||
if re.search('^programs_',f):
|
if re.search(r'^programs_',f):
|
||||||
print(f)
|
print(f)
|
||||||
cls = json.loads(codecs.open('cache/programs/'+f,'r','utf-8').read())
|
cls = json.loads(codecs.open('cache/programs/'+f,'r','utf-8').read())
|
||||||
for c in cls:
|
for c in cls:
|
||||||
|
|
@ -195,6 +219,30 @@ def all_outcomes():
|
||||||
csvwriter = csv.writer(csvfile)
|
csvwriter = csv.writer(csvfile)
|
||||||
csvwriter.writerow('code cqcourseid coursestatus termineffect dept num cqoutcomeid outcome'.split(' '))
|
csvwriter.writerow('code cqcourseid coursestatus termineffect dept num cqoutcomeid outcome'.split(' '))
|
||||||
|
|
||||||
|
# Export sorted course titles from the raw course path dump.
|
||||||
|
def export_course_titles():
|
||||||
|
source_path = 'cache/courses/allclasspaths.txt'
|
||||||
|
dest_path = 'cache/courses/allclasstitles.txt'
|
||||||
|
pattern = re.compile(r'^Course\/(\d+)\/Course Description\/entityTitle\/(.*)$')
|
||||||
|
titles = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
with codecs.open(source_path, 'r', 'utf-8') as infile:
|
||||||
|
for line in infile:
|
||||||
|
match = pattern.match(line.strip())
|
||||||
|
if match:
|
||||||
|
title = match.group(2).strip()
|
||||||
|
if title:
|
||||||
|
titles.append(title)
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"Source file not found: {source_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
titles.sort(key=lambda s: s.lower())
|
||||||
|
with codecs.open(dest_path, 'w', 'utf-8') as outfile:
|
||||||
|
for title in titles:
|
||||||
|
outfile.write(title + '\n')
|
||||||
|
|
||||||
csvfile2 = codecs.open('cache/courses/all_active_outcomes.csv','w','utf-8')
|
csvfile2 = codecs.open('cache/courses/all_active_outcomes.csv','w','utf-8')
|
||||||
csvwriter2 = csv.writer(csvfile2)
|
csvwriter2 = csv.writer(csvfile2)
|
||||||
csvwriter2.writerow('code cqcourseid coursestatus termineffect dept num cqoutcomeid outcome'.split(' '))
|
csvwriter2.writerow('code cqcourseid coursestatus termineffect dept num cqoutcomeid outcome'.split(' '))
|
||||||
|
|
@ -211,7 +259,7 @@ def all_outcomes():
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
for L in rr:
|
for L in rr:
|
||||||
a = re.search('Course\/(\d+)',L)
|
a = re.search(r'Course/(\d+)',L)
|
||||||
if a:
|
if a:
|
||||||
course_num = a.group(1)
|
course_num = a.group(1)
|
||||||
#print(course_num, current_course_num)
|
#print(course_num, current_course_num)
|
||||||
|
|
@ -234,25 +282,25 @@ def all_outcomes():
|
||||||
current_course['c'] = course_num
|
current_course['c'] = course_num
|
||||||
|
|
||||||
|
|
||||||
a = re.search('Course\/(\d+)\/1\/Course\ Description\/0\/Course\ Discipline\/(.*)$',L)
|
a = re.search(r'Course/(\d+)/1/Course Description/0/Course Discipline/(.*)$',L)
|
||||||
if a:
|
if a:
|
||||||
current_course['d'] = a.group(2)
|
current_course['d'] = a.group(2)
|
||||||
a = re.search('Course\/(\d+)\/1\/Course\ Description\/0\/Course\ Number\/(.*)$',L)
|
a = re.search(r'Course/(\d+)/1/Course Description/0/Course Number/(.*)$',L)
|
||||||
if a:
|
if a:
|
||||||
current_course['n'] = a.group(2)
|
current_course['n'] = a.group(2)
|
||||||
a = re.search('Course\/(\d+)\/1\/Course\ Description\/0\/Course\ Title\/(.*)$',L)
|
a = re.search(r'Course/(\d+)/1/Course Description/0/Course Title/(.*)$',L)
|
||||||
if a:
|
if a:
|
||||||
current_course['T'] = a.group(2)
|
current_course['T'] = a.group(2)
|
||||||
a = re.search('Course\/(\d+)\/1\/Course\ Description\/0\/Short\ Title\/(.*)$',L)
|
a = re.search(r'Course/(\d+)/1/Course Description/0/Short Title/(.*)$',L)
|
||||||
if a:
|
if a:
|
||||||
current_course['t'] = a.group(2)
|
current_course['t'] = a.group(2)
|
||||||
a = re.search('Course\ Description\/status\/(.*)$',L)
|
a = re.search(r'Course Description/status/(.*)$',L)
|
||||||
if a:
|
if a:
|
||||||
current_course['s'] = a.group(1)
|
current_course['s'] = a.group(1)
|
||||||
a = re.search('Course\ Content\/\d+\/Lecture\ Content\/Curriculum\ Approval\ Date:\s*(.*)$',L)
|
a = re.search(r'Course Content/\d+/Lecture Content/Curriculum Approval Date:\s*(.*)$',L)
|
||||||
if a:
|
if a:
|
||||||
current_course['a'] = a.group(1)
|
current_course['a'] = a.group(1)
|
||||||
a = re.search('Course\ Description\/\d+\/Internal\ Processing\ Term\/(.*)$',L)
|
a = re.search(r'Course Description/\d+/Internal Processing Term/(.*)$',L)
|
||||||
if a:
|
if a:
|
||||||
t_code = term_txt_to_code(a.group(1))
|
t_code = term_txt_to_code(a.group(1))
|
||||||
current_course['m'] = t_code
|
current_course['m'] = t_code
|
||||||
|
|
@ -262,20 +310,20 @@ def all_outcomes():
|
||||||
|
|
||||||
# Course/3091/1/Course Description/0/Internal Processing Term/Spring 2018
|
# Course/3091/1/Course Description/0/Internal Processing Term/Spring 2018
|
||||||
|
|
||||||
a = re.search('Learning\ Outcomes\/\d+\/(cqid_\d+)\/Learning\ Outcomes\/Description\/(.*)$',L)
|
a = re.search(r'Learning Outcomes/\d+/(cqid_\d+)/Learning Outcomes/Description/(.*)$',L)
|
||||||
if a:
|
if a:
|
||||||
current_course['o'].append(a.group(2))
|
current_course['o'].append(a.group(2))
|
||||||
current_course['i'] = a.group(1)
|
current_course['i'] = a.group(1)
|
||||||
csvwriter.writerow([current_course['d']+current_course['n'], current_course_num, current_course['s'], current_course['m'], current_course['d'], current_course['n'], current_course['i'], a.group(2)])
|
csvwriter2.writerow([current_course['d']+current_course['n'], current_course_num, current_course['s'], current_course['m'], current_course['d'], current_course['n'], current_course['i'], a.group(2)])
|
||||||
if current_course['s']=='Active':
|
if current_course['s']=='Active':
|
||||||
csvwriter2.writerow([current_course['d']+current_course['n'], current_course_num, current_course['s'], current_course['m'], current_course['d'], current_course['n'], current_course['i'], a.group(2)])
|
csvwriter2.writerow([current_course['d']+current_course['n'], current_course_num, current_course['s'], current_course['m'], current_course['d'], current_course['n'], current_course['i'], a.group(2)])
|
||||||
|
|
||||||
|
|
||||||
if re.search('Learning\ Outcomes\/Description\/',L):
|
if re.search(r'Learning Outcomes/Description/',L):
|
||||||
ww.write(L)
|
ww.write(L)
|
||||||
if re.search('Description\/entityTitle\/',L):
|
if re.search(r'Description/entityTitle/',L):
|
||||||
ww.write(L)
|
ww.write(L)
|
||||||
if re.search('Description\/status\/',L):
|
if re.search(r'Description/status/',L):
|
||||||
ww.write(L)
|
ww.write(L)
|
||||||
|
|
||||||
xx = codecs.open("cache/courses/course_cq_index.json","w", "utf-8")
|
xx = codecs.open("cache/courses/course_cq_index.json","w", "utf-8")
|
||||||
|
|
@ -601,20 +649,20 @@ def course_path_style_2_html():
|
||||||
active_courses = {}
|
active_courses = {}
|
||||||
|
|
||||||
lookup_table = { 'entityTitle':'title', 'proposalType':'type',
|
lookup_table = { 'entityTitle':'title', 'proposalType':'type',
|
||||||
'\/Course\sDescription\/status':'status', 'Course\sDiscipline':'dept',
|
r'/Course\sDescription/status':'status', r'Course\sDiscipline':'dept',
|
||||||
'Course\sNumber':'number', 'Course\sTitle':'name', 'Course Description\/\d\/Justification':'justification',
|
r'Course\sNumber':'number', r'Course\sTitle':'name', r'Course Description/\d/Justification':'justification',
|
||||||
'Short\sTitle':'shortname', 'Course Description\/\d\/Internal\sProcessing\sTerm':'term', 'This\sCourse\sIs\sDegree\sApplicable':'degree_applicable',
|
r'Short\sTitle':'shortname', r'Course Description/\d/Internal\sProcessing\sTerm':'term', r'This\sCourse\sIs\sDegree\sApplicable':'degree_applicable',
|
||||||
'\/Course\sDescription\/\d+\/Course\sDescription\/':'desc',
|
r'/Course\sDescription/\d+/Course\sDescription/':'desc',
|
||||||
'Minimum\sUnits':'min_units', 'Minimum\sLecture\sHour':'min_lec_hour', 'Minimum\sLab\sHour':'min_lab_hour', 'Course\shas\svariable\shours':'has_var_hours',
|
r'Minimum\sUnits':'min_units', r'Minimum\sLecture\sHour':'min_lec_hour', r'Minimum\sLab\sHour':'min_lab_hour', r'Course\shas\svariable\shours':'has_var_hours',
|
||||||
'Number\sWeeks':'weeks',
|
r'Number\sWeeks':'weeks',
|
||||||
'Maximum\sUnits':'max_units', 'Credit\sStatus':'credit_status',
|
r'Maximum\sUnits':'max_units', r'Credit\sStatus':'credit_status',
|
||||||
'TOP\sCode':'top_code', 'Classification':'classification', 'Non\sCredit\sCategory':'noncredit_category', 'Stand-Alone\sClass?':'stand_alone',
|
r'TOP\sCode':'top_code', r'Classification':'classification', r'Non\sCredit\sCategory':'noncredit_category', r'Stand-Alone\sClass\?':'stand_alone',
|
||||||
'Grade\sOption':'grade_option', 'Is\sRepeatable':'repeatable', 'Learning\sOutcomes\/Description':'slo',
|
r'Grade\sOption':'grade_option', r'Is\sRepeatable':'repeatable', r'Learning\sOutcomes/Description':'slo',
|
||||||
'Is\sThis\sCourse\sis\sRecommended\sfor\sTransfer\sto\sState\sUniversities\sand\sColleges?':'transfer_csu',
|
r'Is\sThis\sCourse\sis\sRecommended\sfor\sTransfer\sto\sState\sUniversities\sand\sColleges\?':'transfer_csu',
|
||||||
'Is\sThis\sCourse\sis\sRecommended\sfor\sTransfer\sto\sUniversity\sof\sCalifornia?':'transfer_uc',
|
r'Is\sThis\sCourse\sis\sRecommended\sfor\sTransfer\sto\sUniversity\sof\sCalifornia\?':'transfer_uc',
|
||||||
'\/Catalog\sCourse\sSummary\sView\/':'catalog',
|
r'/Catalog\sCourse\sSummary\sView/':'catalog',
|
||||||
'\/Course\sContent/\d+/Lecture\sContent\/':'content',
|
r'/Course\sContent/\d+/Lecture\sContent/':'content',
|
||||||
'\/ASSIST\sPreview\/\d+\/Outcomes\sand\sObjectives\/':'objectives'}
|
r'/ASSIST\sPreview/\d+/Outcomes\sand\sObjectives/':'objectives'}
|
||||||
|
|
||||||
for C in sorted(list(course_prebuild.keys()),key=int):
|
for C in sorted(list(course_prebuild.keys()),key=int):
|
||||||
v = 0
|
v = 0
|
||||||
|
|
@ -817,9 +865,9 @@ def path_style_test():
|
||||||
classes = {}
|
classes = {}
|
||||||
oo = codecs.open("cache/courses/allclasspaths.txt","w","utf-8")
|
oo = codecs.open("cache/courses/allclasspaths.txt","w","utf-8")
|
||||||
for f in os.listdir('cache/courses'):
|
for f in os.listdir('cache/courses'):
|
||||||
if re.search('^classes_',f):
|
if re.search(r'^classes_',f):
|
||||||
print(f)
|
print(f)
|
||||||
cls = json.loads(codecs.open('cache/courses/'+f,'r','utf-8').read(),strict=False)
|
cls = load_course_file('cache/courses/'+f)
|
||||||
for c in cls:
|
for c in cls:
|
||||||
id,output = single_course_path_parse(c)
|
id,output = single_course_path_parse(c)
|
||||||
classes[id] = "\n".join(output)
|
classes[id] = "\n".join(output)
|
||||||
|
|
@ -976,6 +1024,7 @@ if __name__ == "__main__":
|
||||||
6: ['extract de info from class paths', de_classpaths],
|
6: ['extract de info from class paths', de_classpaths],
|
||||||
7: ['build schedule or summary for SLO planning', slo_summary_report],
|
7: ['build schedule or summary for SLO planning', slo_summary_report],
|
||||||
8: ['remove deactivated courses', filter_classes],
|
8: ['remove deactivated courses', filter_classes],
|
||||||
|
9: ['export sorted course titles', export_course_titles],
|
||||||
10: ['fetch all programs', fetch_all_programs],
|
10: ['fetch all programs', fetch_all_programs],
|
||||||
11: ['process all programs', path_style_prog],
|
11: ['process all programs', path_style_prog],
|
||||||
12: ['programs - path style to html catalog', path_style_2_html],
|
12: ['programs - path style to html catalog', path_style_2_html],
|
||||||
|
|
@ -997,4 +1046,3 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
# Call the function in the options dict
|
# Call the function in the options dict
|
||||||
options[ int(resp)][1]()
|
options[ int(resp)][1]()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue