import requests,json,os,re, bisect, csv, codecs
import sortedcontainers as sc
from collections import defaultdict
from toolz.itertoolz import groupby
#from docx.shared import Inches
#from docx import Document
#import docx
from durable.lang import *
from durable.engine import *
from pampy import match, _
from bs4 import BeautifulSoup as bs
import pandas as pd
import sys, locale, re
from pipelines import getSemesterSchedule
from secrets import cq_url, cq_user, cq_pasw
#sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
TRACING = codecs.open('cache/progdebug.txt','w','utf-8')
param = "?method=getCourses"
def dbg(x):
if TRACING: TRACING.write(' + %s\n' % str(x))
sems = ['sp20','fa19', 'su19','sp19']
filen = 1
def another_request(url,startat):
global cq_user, cq_pasw, TRACING
newparam = "&skip=" + str(startat)
print((url+newparam))
r = requests.get(url+newparam, auth=(cq_user,cq_pasw))
try:
TRACING.write(r.text + "\n\n")
TRACING.flush()
mydata = json.loads(r.text)
except Exception as e:
print("Couldn't read that last bit")
print((r.text))
print(e)
return 0,0,[]
size = mydata['resultSetMetadata']['ResultSetSize']
endn = mydata['resultSetMetadata']['EndResultNum']
items = mydata['entityInstances']
print((' Got ' + str(size) + ' instances, ending at item number ' + str(endn)))
return size,endn,items
def fetch_all_classes():
global cq_url,param
size = 100
endn = 0
filen = 1
while(size > 99):
size, endn, items = another_request(cq_url+param,endn)
out = open('cache/courses/classes_'+str(filen)+'.txt','w')
out.write(json.dumps(items,indent=2))
out.close()
filen += 1
print("Written to 'cache/classes....")
def fetch_all_programs():
global cq_url
size = 100
endn = 0
filen = 1
param = "?returnFormat=json&method=getPrograms&status=Active"
while(size > 99):
size, endn, items = another_request(cq_url+param,endn)
out = open('cache/programs/programs_'+str(filen)+'.txt','w')
out.write(json.dumps(items,indent=4))
out.close()
filen += 1
print("Written to 'cache/programs....")
def sortable_class(li):
dept = li[1]
rest = ''
print(li)
# another dumb special case / error
if li[2] == "ASTR 1L": li[2] = "1L"
# little error case here
n = re.match(r'([A-Za-z]+)(\d+)',li[2])
if n:
num = int(n.group(2))
else:
m = re.match(r'(\d+)([A-Za-z]+)$',li[2])
if m:
num = int(m.group(1))
rest = m.group(2)
else:
num = int(li[2])
if num < 10: num = '00'+str(num)
elif num < 100: num = '0'+str(num)
else: num = str(num)
return dept+num+rest
def c_name(c):
delivery = set()
units = []
slos = []
hybridPct = ''
active = 'Active'
id = c['entityMetadata']['entityId']
if c['entityMetadata']['status'] != 'Active':
active = 'Inactive'
#return ()
for r in c['entityFormData']['rootSections']:
if r['attributes']['sectionName'] == 'Course Description':
for ss in r['subsections']:
for f in ss['fields']:
if f['attributes']['fieldName'] == 'Course Discipline':
dept = f['lookUpDisplay']
if f['attributes']['fieldName'] == 'Course Number':
num = f['fieldValue']
if f['attributes']['fieldName'] == 'Course Title':
title = f['fieldValue']
#print "\n" + title
if f['attributes']['fieldName'] == 'Course Description':
desc = re.sub(r'\n',' ', f['fieldValue'])
if r['attributes']['sectionName'] == 'Units/Hours/Status':
for ss in r['subsections']:
if ss['attributes']['sectionName'] == '':
for f in ss['fields']:
if f['attributes']['fieldName'] == 'Minimum Units' and f['fieldValue'] not in units:
units.insert(0,f['fieldValue'])
if f['attributes']['fieldName'] == 'Maximum Units' and f['fieldValue'] and f['fieldValue'] not in units:
units.append(f['fieldValue'])
# Newer entered courses have this filled out
if r['attributes']['sectionName'] == 'Distance Education Delivery':
for ss in r['subsections']:
if ss['attributes']['sectionName'] == 'Distance Education Delivery':
for ssa in ss['subsections']:
for f in ssa['fields']:
if f['attributes']['fieldName'] == 'Delivery Method':
delivery.add(f['lookUpDisplay'])
if ss['attributes']['sectionName'] == "":
if ss['fields'][0]['attributes']['fieldName'] == "If this course is Hybrid, what percent is online?":
hybridPct = str(ss['fields'][0]['fieldValue'])
# Older ones seem to have it this way
if r['attributes']['sectionName'] == 'Distance Education':
for ss in r['subsections']:
for f2 in ss['fields']:
if 'fieldName' in f2['attributes'] and f2['attributes']['fieldName'] == 'Methods of Instruction':
#print f2['fieldValue']
if f2['fieldValue'] == 'Dist. Ed Internet Delayed':
delivery.add('Online')
# SLO
if r['attributes']['sectionName'] == 'Student Learning Outcomes':
for ss in r['subsections']:
if 'subsections' in ss:
if ss['attributes']['sectionName'] == 'Learning Outcomes':
for s3 in ss['subsections']:
for ff in s3['fields']:
if ff['attributes']['fieldName'] == 'Description':
slos.append(ff['fieldValue'])
#print ff
#[0]['fields']:
#print ff['fieldValue']
#for f2 in ss['fields']:
# if 'fieldName' in f2['attributes'] and f2['attributes']['fieldName'] == 'Methods of Instruction':
# if f2['fieldValue'] == 'Dist. Ed Internet Delayed':
# delivery.append('online(x)')
if len(units)==1: units.append('')
if len(delivery)==0: delivery.add('')
u0 = 0
try:
u0 = units[0]
except:
pass
u1 = 0
try:
u1 = units[2]
except:
pass
return id,dept,num,active,title,u0,u1,'/'.join(delivery),hybridPct,desc,slos
def show_classes(createoutput=1):
max_active = {} # hold the id of the class if seen. only include the highest id class in main list.
used_course = {} # hold the actual course info, the version we'll actually use.
slo_by_id = {} # values are a list of slos.
slo_by_id_included = {} # just the ids of active or most recent versions.
#tmp = codecs.open('cache/course_temp.txt','w','utf-8')
for f in os.listdir('cache/courses'):
if re.search('classes_',f):
print(f)
cls = json.loads(open('cache/courses/'+f,'r').read())
for c in cls:
dir_data = list(c_name(c))
#tmp.write(str(dir_data) + "\n\n")
slo_by_id[dir_data[0]] = dir_data[10] #
info = list(map(str,dir_data[:10]))
info.append(dir_data[10])
#pdb.set_trace()
#print info
course_key = sortable_class(info)
curqnt_id = int(info[0])
if course_key in max_active:
if curqnt_id < max_active[course_key]:
continue
max_active[course_key] = curqnt_id
used_course[course_key] = info
if not createoutput: return 1
# now we have the ideal version of each course
all = sc.SortedList(key=sortable_class)
for key,crs in list(used_course.items()): all.add(crs)
by_dept = groupby(1,all)
t = open('cache/courses/index.json','w')
t.write(json.dumps(sorted(by_dept.keys())))
u = open('cache/courses/slos.json','w')
for d in list(by_dept.keys()):
s = open('cache/courses/' + d.lower() + '.json','w')
for course in by_dept[d]:
try:
course.append(slo_by_id[int(d[0])])
except:
pass
s.write(json.dumps(by_dept[d], indent=2))
s.close()
for c in by_dept[d]:
ss = slo_by_id[int(c[0])] # [ x.encode('ascii ','ignore') for x in slo_by_id[int(c[0])] ]
slo_by_id_included[int(c[0])] = ss
u.write( json.dumps(slo_by_id_included, indent=2))
def clean_d_name(d):
d = d.lower()
d = re.sub(r'[\&\(\)\.\/\:]','',d)
d = re.sub(r'[\s\-]+','_',d)
return d
def show_programs():
allprogs = defaultdict(list)
dept_index = set([('Liberal Arts','liberal_arts'),])
prog_index = defaultdict(list)
for f in os.listdir('cache/programs'):
if re.search('programs_',f):
print(f)
pro = json.loads(open('cache/programs/'+f,'r').read())
for c in pro:
this_prog = prog_take_4(c)
if not 'dept' in this_prog: this_prog['dept'] = 'Liberal Arts'
if not 'type' in this_prog:
this_prog['type'] = '' #print "*** Why no type?"
this_prog['key'] = clean_d_name(this_prog['title']+'_'+this_prog['type'])
dept_index.add( (this_prog['dept'],clean_d_name(this_prog['dept'] )) )
bisect.insort(prog_index[this_prog['dept']], (this_prog['title'], this_prog['type'], clean_d_name(this_prog['dept'])+'/'+clean_d_name(this_prog['title'])+'/'+clean_d_name(this_prog['type'])))
allprogs[this_prog['dept']].append( this_prog )
for D,li in list(allprogs.items()):
dept = clean_d_name(D)
s = open('cache/programs/'+dept+'.json','w')
s.write( json.dumps(sorted(li,key=lambda x: x['title']),indent=2) )
s.close()
s = open('cache/programs/index.json','w')
s.write( json.dumps({'departments':sorted(list(dept_index)), 'programs':prog_index}, indent=2) )
s.close()
#r = open('cache/deg_certs.json','w')
#r.write( json.dumps(sorted(list(allprogs.items())), indent=2) )
#r.close()
organize_programs_stage2( )
def dd(): return defaultdict(dd)
def organize_courses():
keys = "id,dept,num,active,title,low_unit,hi_unit,is_online,hybrid_pct,desc,slos".split(",")
depts = defaultdict(dd)
for f in os.listdir('cache/courses'):
if f == 'index.json': continue
if f == 'slos.json': continue
#print f
u = open('cache/courses/' + f,'r')
w = json.loads(u.read())
for A in w:
course = {}
i = 0
for k in keys:
course[k] = A[i]
i += 1
depts[ course['dept'] ][ course['num'] ] = course
print((A[7], "\t", A[8], "\t",A[4]))
o = open('cache/courses_org.json','w')
o.write(json.dumps(depts,indent=2))
def check_de():
for f in os.listdir('cache/courses'):
if f == 'index.json': continue
if f == 'slos.json': continue
#print f
u = open('cache/courses/' + f,'r')
w = json.loads(u.read())
for A in w:
print((A[7], "\t", A[8], "\t",A[4]))
def clean_programs():
#rewrite_list = 0 ########################################### Careful you don't overwrite the existing file!
re_list = open('req_phrases.txt','r').readlines()
req_what_do = {}
last_times_seen = {}
req_times_seen = defaultdict(int)
for L in re_list:
L = L.strip()
parts = L.split('|')
req_what_do[parts[0]] = parts[1]
req_times_seen[parts[0]] = 0
last_times_seen[parts[0]] = parts[2]
attained = csv.DictReader(open("cache/degrees_attained.csv"))
att_keys = []
for row in attained:
att_keys.append(row['Program'])
#temp = open('cache/temp.txt','w')
#temp.write(json.dumps(sorted(att_keys), indent=2))
progs = json.loads(open('cache/programs.json','r').read())
# list of phrases that describe requirements
reqs = Set()
prog_keys = []
for k in progs:
if not 'title' in k or not 'type' in k or not 'dept' in k:
pass
#print "Funny prog: "
#print k
else:
ty = re.sub('Degree','',k['type'])
ty = re.sub('\.','',ty)
prog_title = k['dept'] + ": " + k['title'] + " " + ty
prog_keys.append(prog_title)
for b in k['blocks']:
rule = ''
if 'courses' in b and len(b['courses']):
if 'rule' in b and not b['rule']==' ':
#print b['rule']
reqs.add(b['rule'])
rule = b['rule']
req_times_seen[rule] += 1
if req_what_do[rule] == 'q':
print(("\nIn Program: " + prog_title))
print(("What does this rule mean? " + rule))
print(("(I see it " + last_times_seen[rule] + " times.)"))
for C in b['courses']: print((" " + C))
z = eval(input())
for c in b['courses']:
if re.search('header2',c):
parts = c.split('|')
#print "" + parts[1]
reqs.add(parts[1])
rule = parts[1]
req_times_seen[rule] += 1
if req_what_do[rule] == 'q':
print(("\nIn Program: " + prog_title))
print(("What does this rule mean? " + rule))
print(("(I see it " + last_times_seen[rule] + " times.)"))
for C in b['courses']: print((" " + C))
z = eval(input())
# Key for the list
# q - ask whats up with this rule
# n1 - single class required
# n2 - two classes required
# n3
# n4
# u1 - u99 - that many units required (minimum. ignore max)
# a - all of them
# x - ignore
# s - special or more logic needed
# e - recommended electives
#reqs_list = open('req_phrases.txt','w')
#for a in sorted( list(reqs) ):
# if a == ' ': continue
# reqs_list.write(a+"|" + req_what_do[a] + "|" + str(req_times_seen[a]) + "\n")
#mat = process.extractOne(prog_title, att_keys)
#print "Title: " + prog_title + " Closest match: " + mat[0] + " at " + str(mat[1]) + "% conf"
#temp.write(json.dumps(sorted(prog_keys),indent=2))
def course_lil_format(s):
# "02-125706|THEA12B - Acting II 3.000 *Historical*"
parts = s.split('|')
parts2 = parts[1].split(' - ')
parts3 = parts2[1].split(' ')[0:-3]
return parts2[0], parts3 ### code, name
def header_lil_format(s):
# "04-125802header2|Choose 2 courses from following list:"
parts = s.split('|')
return parts[1]
def organize_programs():
re_list = open('req_phrases.txt','r').readlines()
req_what_do = {'':'x'}
last_times_seen = {}
req_times_seen = defaultdict(int)
num_programs = 0
num_w_special_logic = 0
num_okay = 0
fout = open('program_worksheet.txt','w')
for L in re_list:
L = L.strip()
parts = L.split('|')
req_what_do[parts[0]] = parts[1]
req_times_seen[parts[0]] = 0
last_times_seen[parts[0]] = parts[2]
progs = json.loads(open('cache/programs.json','r').read())
prog_keys = []
output = ''
for k in progs:
rule_sequence = []
if not 'title' in k or not 'type' in k or not 'dept' in k:
pass
#print "Funny prog: "
#print k
else:
num_programs += 1
ty = re.sub('Degree','',k['type'])
ty = re.sub('\.','',ty)
prog_title = k['dept'] + ": " + k['title'] + " " + ty
output += "\n" + prog_title + "\n"
prog_keys.append(prog_title)
for b in k['blocks']:
rule = ''
if 'courses' in b and len(b['courses']):
if 'rule' in b and not b['rule']==' ':
rule = b['rule']
output += " Rule: ("+ req_what_do[rule]+ ") " + b['rule'] + "\n"
rule_sequence.append(req_what_do[rule])
#reqs.add(b['rule'])
req_times_seen[rule] += 1
if req_what_do[rule] == 'q':
print(("\nIn Program: " + prog_title))
print(("What does this rule mean? " + rule))
print(("(I see it " + last_times_seen[rule] + " times.)"))
for C in b['courses']: print((" " + C))
z = eval(input())
miniblocks = []
this_miniblock = {'courses':[], 'header':''}
for c in sorted(b['courses'])[::-1]:
if re.search('header2',c):
parts = c.split('|')
if this_miniblock['courses'] or req_what_do[this_miniblock['header']]!='x':
miniblocks.append(this_miniblock)
rule_sequence.append(req_what_do[this_miniblock['header']])
rule = parts[1]
this_miniblock = {'header':rule,'courses':[] }
req_times_seen[rule] += 1
if req_what_do[rule] == 'q':
print(("\nIn Program: " + prog_title))
print(("What does this rule mean? " + rule))
print(("(I see it " + last_times_seen[rule] + " times.)"))
for C in b['courses']: print((" " + C))
z = eval(input())
else:
code,name = course_lil_format(c)
this_miniblock['courses'].append(code)
if not this_miniblock['header']:
output += " "
for ccc in this_miniblock['courses']:
output += ccc + " "
output += "\n"
# final course, final mb append
if this_miniblock['courses']:
miniblocks.append(this_miniblock)
rule_sequence.append(req_what_do[this_miniblock['header']])
if miniblocks:
for m in miniblocks:
if m['header']:
output += " Miniblock rule: ("+ req_what_do[rule] + ") " + m['header'] + "\n"
output += " "
for c in m['courses']:
output += c + " "
output += "\n"
if 's' in rule_sequence:
num_w_special_logic += 1
else:
num_okay += 1
output += " Summary: [" + " ".join(rule_sequence) + " ]" + "\n"
fout.write(output)
print(("Number of programs: " + str(num_programs)))
print(("Number without special logic: " + str(num_okay)))
print(("Number with special logic: " + str(num_w_special_logic)))
# Key for the list
# q - ask whats up with this rule
# n1 - single class required
# n2 - two classes required
# n3
# n4
# u1 - u99 - that many units required (minimum. ignore max)
# a - all of them
# x - ignore
# s - special or more logic needed
# e - recommended electives
def divide_courses_list(li,rwd,online):
# return a list of lists.
lol = []
cur_list = []
for L in sorted(li):
if re.search('header2',L):
if cur_list: lol.append(cur_list)
cur_list = []
L = header_lil_format(L)
L = rwd[L] + ": " + L
else:
L,x = course_lil_format(L)
if online[L]: L = L + " " + online[L]
if L[0]!='x': cur_list.append(L)
lol.append(cur_list)
return lol
def organize_programs2():
re_list = open('req_phrases.txt','r').readlines()
classes = json.loads(open('cache/courses_org.json','r').read())
classes_bycode = {}
for d in list(classes.keys()):
for c in list(classes[d].keys()):
classes_bycode[d+" "+c] = classes[d][c]['is_online']
classes_bycode[d+c] = classes[d][c]['is_online']
#print d+c+":\t"+classes_bycode[d+c]
req_what_do = {'':'x', ' ':'x'}
last_times_seen = {}
req_times_seen = defaultdict(int)
num_programs = 0
num_w_special_logic = 0
num_okay = 0
fout = open('cache/program_worksheet.txt','w')
cout = open('cache/classes_online.json','w')
cout.write(json.dumps(classes_bycode))
cout.close()
for L in re_list:
L = L.strip()
parts = L.split('|')
req_what_do[parts[0]] = parts[1]
req_times_seen[parts[0]] = 0
last_times_seen[parts[0]] = parts[2]
progs = json.loads(open('cache/programs.json','r').read())
prog_keys = []
output = ''
for k in progs:
rule_sequence = []
if not 'title' in k or not 'type' in k or not 'dept' in k:
pass
#print "Funny prog: "
#print k
else:
num_programs += 1
ty = re.sub('Degree','',k['type'])
ty = re.sub('\.','',ty)
prog_title = k['dept'] + ": " + k['title'] + " " + ty
output += "\n" + prog_title + "\n"
for b in sorted(k['blocks'], key=lambda x: x['order'] ):
rule = ''
if 'courses' in b and len(b['courses']) and 'rule' in b and req_what_do[b['rule']]!='x':
#req_what_do[b['rule']]
output += " "+req_what_do[b['rule']]+": " + b['rule'] + "\n"
output += json.dumps(divide_courses_list(b['courses'],req_what_do,classes_bycode),indent=2) + "\n"
"""
prog_keys.append(prog_title)
for b in k['blocks']:
rule = ''
if 'courses' in b and len(b['courses']):
if 'rule' in b and not b['rule']==' ':
rule = b['rule']
output += " Rule: ("+ req_what_do[rule]+ ") " + b['rule'] + "\n"
rule_sequence.append(req_what_do[rule])
#reqs.add(b['rule'])
req_times_seen[rule] += 1
if req_what_do[rule] == 'q':
print "\nIn Program: " + prog_title
print "What does this rule mean? " + rule
print "(I see it " + last_times_seen[rule] + " times.)"
for C in b['courses']: print " " + C
z = raw_input()
miniblocks = []
this_miniblock = {'courses':[], 'header':''}
for c in sorted(b['courses'])[::-1]:
if re.search('header2',c):
parts = c.split('|')
if this_miniblock['courses'] or req_what_do[this_miniblock['header']]!='x':
miniblocks.append(this_miniblock)
rule_sequence.append(req_what_do[this_miniblock['header']])
rule = parts[1]
this_miniblock = {'header':rule,'courses':[] }
req_times_seen[rule] += 1
if req_what_do[rule] == 'q':
print "\nIn Program: " + prog_title
print "What does this rule mean? " + rule
print "(I see it " + last_times_seen[rule] + " times.)"
for C in b['courses']: print " " + C
z = raw_input()
else:
code,name = course_lil_format(c)
this_miniblock['courses'].append(code)
if not this_miniblock['header']:
output += " "
for ccc in this_miniblock['courses']:
output += ccc + " "
output += "\n"
# final course, final mb append
if this_miniblock['courses']:
miniblocks.append(this_miniblock)
rule_sequence.append(req_what_do[this_miniblock['header']])
if miniblocks:
for m in miniblocks:
if m['header']:
output += " Miniblock rule: ("+ req_what_do[rule] + ") " + m['header'] + "\n"
output += " "
for c in m['courses']:
output += c + " "
output += "\n"
if 's' in rule_sequence:
num_w_special_logic += 1
else:
num_okay += 1
output += " Summary: [" + " ".join(rule_sequence) + " ]" + "\n"
"""
fout.write(output)
print(("Number of programs: " + str(num_programs)))
print(("Number without special logic: " + str(num_okay)))
print(("Number with special logic: " + str(num_w_special_logic)))
# Key for the list
# q - ask whats up with this rule
# n1 - single class required
# n2 - two classes required
# n3
# n4
# u1 - u99 - that many units required (minimum. ignore max)
# a - all of them
# x - ignore
# s - special or more logic needed
# e - recommended electives
# sorting by order key of dict
def cmp_2(a):
return a['order']
def cmp_order(a,b):
if a['order'] > b['order']: return 1
if a['order'] < b['order']: return -1
if a['order'] == b['order']: return 0
# decipher the grouped up courses line
def split_course(st):
# "01-127153|SOC1A - Introduction to Sociology 3.000 *Active*"
if 'header2' in st:
return st.split("|")[1] #"Header - " + st
parts = re.search( r'^(.*)\|(.+?)\s-\s(.+?)\s([\d|\.|\s|\-]+)\s+(\*.+\*)([\s\|\sOR]*)$', st)
if parts:
#print "Matched: ", parts
name = parts.group(3)
units = parts.group(4)
units = re.sub( r'(\d)\.000', r'\1', units)
units = re.sub( r'\.500', r'.5', units)
if units=='1500 3 ':
units = 3
name += " 1500" # hack for HIST 4
return {'cn_code':parts.group(1), 'code':parts.group(2), 'name':name,
'units':units, 'status':parts.group(5), 'or':parts.group(6) }
print("*** Didn't match that class")
return 0
# Any number gets an X (checked). Blank or zero gets no check.
def units_to_x(u):
if u: return 'X'
return ' '
def p_block_rule(r,printme,doc,out=0):
if printme:
if out: out.write("\t".join([r,'Units','Spring 19','Summer 19','Fall 19']) + "\n")
if not len(doc.tables):
t = doc.add_table(1, 5, style='Table Grid')
else:
t = doc.tables[-1]
t.rows[0].cells[0].text = r
t.rows[0].cells[1].text = 'Units'
t.rows[0].cells[2].text = 'Spring 19'
t.rows[0].cells[3].text = 'Summer 19'
t.rows[0].cells[4].text = 'Fall 19'
else:
if out: out.write("\t" + r + "\n")
t = doc.tables[-1].add_row()
t = doc.tables[-1].add_row()
t.cells[0].text = r
def p_cert_header(type,doc,r='',out=0):
if out: out.write("DEGREE: " + type + " (" + r + ")" + "\n")
if r: doc.add_heading(type + " (" + r + ")", 2)
else: doc.add_heading(type , 2)
t = doc.add_table(1, 5, style='Table Grid')
t.rows[0].cells[0].width = Inches(3.0)
#print(type)
def p_block_header(r,doc,out=0):
t = doc.tables[-1].add_row()
t = doc.tables[-1].add_row()
t.cells[0].text = r
if out: out.write("\t"+r+"\n" )
def p_cert_course_missing(cd,doc,out=0):
if out: out.write(cd['code'] + " - " + cd['name'] + "\t" + cd['units'] + "\n")
t = doc.tables[-1].add_row()
t.cells[0].text = cd['code'] + " - " + cd['name']
t.cells[1].text = cd['units']
def p_cert_course(cd,history,doc,out=0):
if out:
line = "\t" + units_to_x(history['sp19']) + "\t" \
+ units_to_x(history['su19']) + "\t" + units_to_x(history['fa19'])
out.write(cd['code'] + " - " + cd['name'] + "\t" + cd['units'] + line + "\n")
t = doc.tables[-1].add_row()
t.cells[0].text = cd['code'] + " - " + cd['name']
if cd['or']: t.cells[0].text += " OR "
t.cells[1].text = str(cd['units'])
t.cells[2].text = units_to_x(history['sp19'])
t.cells[3].text = units_to_x(history['su19'])
t.cells[4].text = units_to_x(history['fa19'])
#print("\t" + cd['code'] + "\t" + cd['name'] + "\t" + cd['units']+line)
def p_end_block(out=0):
if out: out.write("\n")
def p_end_cert(bigdoc, out=0):
if out: out.write("\n\n\n")
bigdoc.add_page_break()
def ask_for_rule(r):
print(("Can't find this rule: " + r))
print("""Possible answers:
# q - ask whats up with this rule # u1 - u99 - that many units required (minimum. ignore max)
# n1 - single class required a - all of them
# n2 - two classes required x - ignore
# n3 s - special or more logic needed
# n4 e - recommended electives""")
answer = input("What should it be? ").strip()
f= open("cache/req_phrases.txt","a+",encoding="utf-8")
f.write("\n" + r + "|" + answer + "|1")
f.close()
return answer
def action_to_english(a):
if a == 'x': return 0
if a == 'e': return 'Electives'
if a == 's': return 'More logic needed / Special rule'
if a == 'a': return "Required - Complete ALL of the following courses:"
m = re.search(r'^([a-z])([\d\.]+)$',a)
if m:
if m.group(1) == 'u':
return "Choose %s units from the following courses: " % m.group(2)
if m.group(1) == 'n':
return "Choose %s courses from the following: " % m.group(2)
return 0
# block = { rule, num } and courses is a DataFrame
# Return True if the courses satisfy the rule
def check_a_block(b, courses, verbose=False):
indent = " "
if verbose:
print((indent+"Trying the rule: " + b['englrule']))
if b['rule'] == 'all':
for C in b['courses']:
if verbose: print(C)
if not C[3]:
if verbose: print((indent+"Failed."))
return False
return True
elif b['rule'] == 'min_units':
num = float(b['num'])
count = 0.0
for C in b['courses']:
if C[3]: count += C[2]
return count >= num
elif b['rule'] == 'min_courses':
num = float(b['num'])
count = 0
for C in b['courses']:
if C[3]: count += 1
if not count >= num:
if verbose: print((indent+"Failed."))
return count >= num
if b['rule'] in [ 'elective', 'special' ]: return 1
print("I didn't understand the rule")
return True
def read_block_english_to_code():
blockrules = {}
for L in open('cache/req_phrases.txt','r',encoding='utf-8').readlines():
parts = L.strip().split('|')
blockrules[ parts[0] ] = [ parts[1], parts[2] ]
return blockrules
def read_section_online_history():
sections = pd.read_csv('cache/one_year_course_modes.csv') # todo: this file depends on other fxns. which?
sections.set_index('Unnamed: 0',inplace=True)
sections.sort_values('Unnamed: 0', inplace=True)
for i, course in sections.iterrows():
if course['sp19'] or course['su19'] or course['fa19']:
sections.loc[i,'was_online'] = 1
else:
sections.loc[i,'was_online'] = 0
return sections
# Use an easy data structure (dataframes and dicts) and functions that operate on them.
# This is the 3rd attempt.
def simple_find_online_programs():
## Step 1: Gather the relevant details.
## Read in all data, and perform whatever analysis that can be
## done individually. * list of depts * list of which classes offered online
## * the rules in english vs. code * the programs themselves
##
##
## Step 2: Do the big pass through the programs (degrees and certs). Focus on the leaves and
## branches first. Those are the individual courses and the blocks.
## Process each block on this first pass.
##
## Result of each block is a dataframe (holding course info) and a dict (holding details, labels,
## conclusions and analysis).
##
## After the blocks, There's enough info to process the cert. Do that, and conclude if it
## is online, close, or far. (Later this same pipeline will work for whether it is evening, etc...)
##
## Step 3: Do the second pass, and output to documents, web, or whatever other format.
##
##
# 1. Gathering data
section_history = read_section_online_history() # a dataframe indexed by course codename.
blockrules = read_block_english_to_code()
alldepts = [x[1] for x in json.loads( open('cache/programs/index.json','r').read() )['departments']]
# todo: courses with a HYPHEN in NAME get parsed wrong.
# 2. First pass: Process blocks, certs.
for prog in alldepts:
fname = 'cache/programs/'+prog+'.json'
print(("Reading %s" % fname))
inp = open(fname,'r')
filedata = inp.read()
p_info = json.loads(filedata)
for p in p_info:
print((" "+p['dept'] + "\t" + p['type'] + "\t" + p['title']))
b = p['blocks']
b.sort(key=cmp_2)
for block in b:
if 'rule' in block:
### RIGHT HERE - fxn to extract block to DF
print((" " + block['rule']))
for_df = []
for crs in block['courses']:
c_data = split_course(crs)
if type(c_data) is dict:
c_data['code'] = re.sub(r'\s','',c_data['code'])
try:
c_data['was_online'] = section_history.loc[ c_data['code'] , 'was_online' ]
except KeyError:
c_data['was_online'] = 0
for_df.append(c_data)
else:
print((" ", c_data))
if len(for_df):
this_df = pd.DataFrame(for_df)
print(this_df)
#input("\n\nPress enter to continue...\n\n")
def check_a_block_a(b,verbose=False):
indent = " "
if verbose: print((indent+"Trying the rule: " + b['englrule']))
if b['rule'] == 'all':
for C in b['courses']:
if verbose: print(C)
if not C[3]:
if verbose: print((indent+"Failed."))
return False
return True
elif b['rule'] == 'min_units':
num = float(b['num'])
count = 0.0
for C in b['courses']:
if C[3]: count += C[2]
return count >= num
elif b['rule'] == 'min_courses':
num = float(b['num'])
count = 0
for C in b['courses']:
if C[3]: count += 1
if not count >= num:
if verbose: print((indent+"Failed."))
return count >= num
if b['rule'] in [ 'elective', 'special' ]: return 1
print("I didn't understand the rule")
return True
def smart_find_online_programs():
big_block_list = []
with ruleset('curriculum'):
# COURSES in BLOCKS
@when_all( (m.relationship == 'contains') & (+m.course) )
def show_contained_class(c):
#print( str(c.m.block) + " is a block that contains " \
# + str(c.m.course) + " with " + str(c.m.units) + " units" )
pass
# BLOCK Rule/Condition with and without numbers
@when_all( (+m.blockrule) & (+m.number) )
def show_block(c):
#print( str(c.m.block) + " is a block that needs " + str(c.m.blockrule) + " of " + c.m.number )
big_block_list.append( [ c.m.block, "rule", c.m.blockrule, c.m.number, c.m.englrule ] )
@when_all( (+m.blockrule) & (-m.number) )
def show_block(c):
#print( str(c.m.block) + " is a block that needs " + str(c.m.blockrule) )
print(("++RULE: " + str(c.m)))
big_block_list.append( [ c.m.block, "rule", c.m.blockrule, 0, c.m.englrule ] )
# Has course historically been OFFERED ONLINE
@when_all(m.sem1>0 or m.sem2>0 or m.sem3>0)
def is_online(c):
#print("Class counts as online: " + str(c.m.course))
c.assert_fact('curriculum', { 'course': c.m.course, 'status': 'was_offered_online', 'value': True })
# Or NEVER ONLINE
@when_all(m.sem1==0 and m.sem2==0 and m.sem3==0)
def is_online(c):
#print("Class was never online: " + str(c.m.course))
c.assert_fact('curriculum', { 'course': c.m.course, 'status': 'was_offered_online', 'value': False })
# Has course in the block OFFERED ONLINE?
@when_all( c.zero << +m.blockrule,
c.first << (m.relationship == 'contains') & (m.block==c.zero.block),
c.second << (m.course == c.first.course ) & (m.status == 'was_offered_online') & (m.value==True) )
def is_online_inblock(c):
#print(" and it was online! " + c.first.block + " / " + c.second.course)
#print(c.first.block + "\t" + c.first.course['code'] + "\t Yes online")
print(" Yes online")
big_block_list.append( [ c.first.block, c.first.course, c.first.units, True, c.first ] )
# Has course in the block *NOT OFFERED ONLINE?
@when_all( c.three << +m.blockrule,
c.four << (m.relationship == 'contains') & (m.block==c.three.block),
c.five << (m.course == c.four.course ) & (m.status == 'was_offered_online') & (m.value==False) )
def is_online_inblock(c):
#print(" and it was online! " + c.four.block + " / " + c.five.course)
#print(c.first.block + "\t" + c.first.course['code'] + "\t NOT online")
print(" NOT online")
big_block_list.append( [ c.four.block, c.four.course, c.four.units, False, c.four ] )
sections = pd.read_csv('cache/one_year_course_modes.csv')
sections.set_index('Unnamed: 0',inplace=True)
sections.sort_values('Unnamed: 0', inplace=True)
alldepts = [x[1] for x in json.loads( open('cache/programs/index.json','r').read() )['departments']]
#history = sections.df.to_dict('index')
print('starting...')
for i, course in sections.iterrows():
try:
assert_fact('curriculum', { 'course': str(i), 'sem1': int(course['sp19']), 'sem2': int(course['su19']), 'sem3':int(course['fa19']) })
except Exception as e:
pass
blockrules = {}
for L in open('cache/req_phrases.txt','r',encoding='utf-8').readlines():
parts = L.strip().split('|')
blockrules[ parts[0] ] = [ parts[1], parts[2] ]
blockindex = 0
for prog in alldepts:
p_info = json.loads(open('cache/programs/'+prog+'.json','r').read())
for p in p_info:
deg_longname = p['dept'] + ' - ' + p['type'] + ' - ' + p['title']
print(deg_longname)
big_block_list.append( [ deg_longname ] )
for block in sorted(p['blocks'],key=cmp_2):
if not 'rule' in block: continue
# Look up code for what is needed with this block of classes.
the_rule = block['rule'].strip()
if not the_rule in blockrules:
blockrules[ the_rule ] = [ ask_for_rule( the_rule ), 1 ]
action = blockrules[ the_rule][0]
engl = action_to_english(action)
if not engl: continue
print((" + " + engl))
blockindex += 1
blocklabel = 'block_' + str(blockindex)
# Assert if the courses make the block qualify
#print(action)
# needs to be a rule too....... # Required - Complete ALL of the following courses:
#print("\n\n")
try:
match = re.search(r'^([a-z])([\d\.]+)$',action)
if action == 'a':
assert_fact('curriculum', { 'block':blocklabel, 'degree': deg_longname, 'blockrule': 'all', 'englrule':engl})
elif action == 'x':
pass
elif action == 'e':
assert_fact('curriculum', { 'block':blocklabel, 'degree': deg_longname, 'blockrule': 'elective', 'englrule':engl})
elif action == 's':
assert_fact('curriculum', { 'block':blocklabel, 'degree': deg_longname, 'blockrule': 'special', 'englrule':engl})
elif match and match.group(1) == 'u':
assert_fact('curriculum', { 'block':blocklabel, 'degree': deg_longname, 'blockrule': 'min_units', 'number': match.group(2), 'englrule':engl })
elif match and match.group(1) == 'n':
assert_fact('curriculum', { 'block':blocklabel, 'degree': deg_longname, 'blockrule': 'min_courses', 'number': match.group(2), 'englrule':engl })
except MessageNotHandledException as e:
pass
#print(e)
for crs in block['courses']:
if re.search(r'header2',crs):
descr = crs.split("|")[1]
big_block_list.append( [ 'header', descr ] )
continue
c_data = split_course(crs)
#c_data['code'] = re.sub(r'\s','',c_data['code'])
try:
if 'code' in c_data and c_data['code']:
fixed_code = re.sub(r'\s','',c_data['code'])
history = sections.loc[fixed_code]
else:
msg = "BAD COURSE DATA: " + str(crs)
data = {'code':'?','name':'?','units':'?'}
continue
except Exception as e:
msg = "COULDNT FIND ONLINE DATA for " + c_data['code'] + " - " + c_data['name']
continue
#p_cert_course(c_data,history,output,doc)
# Handle the class
#print("\t" + str(c_data))
try:
print((" Asserting " + blocklabel + "\t" + json.dumps({ 'block':blocklabel, 'course': fixed_code,
'relationship': 'contains', 'units':float(c_data['units']),
'code': fixed_code, 'name': c_data['name'],
'status': c_data['status'], 'or': c_data['or'] })))
assert_fact('curriculum', { 'block':blocklabel, 'course': fixed_code,
'relationship': 'contains', 'units':float(c_data['units']),
'code': fixed_code, 'name': c_data['name'],
'status': c_data['status'], 'or': c_data['or'] })
except Exception as e:
pass
#print(e)
# END block of courses
#print("Finished reading "+deg_longname)
# END cert or degree
eval(input('hit return...'))
# Big Structure of all degrees
degs_main = {}
this_deg = ''
for R in big_block_list:
if R[0] == 'header': # its a funny header, not quite a rule....
#print(R)
degs_main[this_deg]['blocks'].append( {'rule':'', 'englrule':'', 'courses':[], 'header':R[1] } )
elif not R[0].startswith('block'): # everything starts with block except new degrees
degs_main[R[0]] = { 'deg':R[0], 'blocks':[] }
this_deg = R[0]
#print(this_deg)
elif R[1] == 'rule':
degs_main[this_deg]['blocks'].append( {'rule':R[2], 'englrule':R[4], 'courses':[], 'header':'' } )
#print(" "+R[4])
if len(R) > 3:
degs_main[this_deg]['blocks'][-1]['num'] = R[3]
else:
degs_main[this_deg]['blocks'][-1]['courses'].append(R)
#print(" "+str(R))
# Print them
bigdoc = Document()
for k,v in list(degs_main.items()):
print((v['deg']))
qualifies = True
if not re.search(r'chievement',v['deg']):
qualifies = False ## JUST DOING CAs
print(" Skipping because not a CA")
if not qualifies: continue
for vv in v['blocks']:
for CC in vv['courses']:
print((" " + "\t".join([ CC[0], CC[1], str(CC[3]), CC[4]['name']])))
if not check_a_block_a(vv,1):
qualifies = False
break
if not qualifies: continue
print(" + OK, including this one.")
bigdoc.add_heading('Gavilan College', 2)
#bigdoc.add_heading(v['deg'], 2)
p_cert_header(v['deg'],bigdoc)
print_headers = 1
for vv in v['blocks']:
p_block_rule(vv['englrule'],print_headers,bigdoc)
print_headers = 0
more = ''
if 'num' in vv: more = ' / ' + str( vv['num'] )
#print( " " + vv['rule'] + more )
if vv['header']:
p_block_header(vv['header'],bigdoc)
#print(" ("+vv['header']+")")
for vvv in vv['courses']:
#print(vvv[4])
#print(vvv)
#print(" " + json.dumps(vvv))
p_cert_course(vvv[4], sections.loc[ vvv[1] ],bigdoc)
p_end_cert(bigdoc)
bigdoc.save('output/onlinecerts/all_cert_achievement.docx')
# 9/2021 clean programs to good json
def organize_programs_stage2():
alldepts = [x[1] for x in json.loads( open('cache/programs/index.json','r').read() )['departments']]
output = codecs.open('cache/deg_certs.json','w','utf-8')
all_progs = []
for prog in alldepts:
fname = 'cache/programs/'+prog+'.json'
print(("Reading %s" % fname))
filedata = open(fname,'r').read()
p_info = json.loads(filedata)
for p in p_info:
pretty_p = {}
print(p['dept'] + "\t" + p['type'] + "\t" + p['title'])
pretty_p['title'] = p['title']
pretty_p['dept'] = p['dept']
if 'desc' in p: pretty_p['desc'] = p['desc']
if 'type' in p: pretty_p['type'] = p['type']
print(" - %s\n - %s\n" % (p['dept'],p['title']))
pretty_p['groups'] = []
b = p['blocks']
b.sort(key=cmp_2)
for block in b:
this_block = {'courses':[],'header':""}
if 'rule' in block:
#print("\t"+block['order'] + "\t" + block['rule'])
#p_block_rule(block['rule'],output,print_headers,doc)
this_block['header'] = block['rule']
for crs in sorted(block['courses']):
if re.search(r'header2',crs):
if len(this_block['courses']):
pretty_p['groups'].append(this_block)
this_block = {'courses':[],'header':""}
parts = crs.split("|")
#print(parts)
this_block['header'] = parts[1]
continue
c_data = split_course(crs)
if type({})==type(c_data) and 'code' in c_data:
code = c_data['code']
if type({})==type(c_data) and 'or' in c_data and c_data['or']: code += " or"
if c_data:
this_block['courses'].append( [ code,c_data['name'],c_data['units'] ])
# a string or a dict
# {'cn_code':parts.group(1), 'code':parts.group(2), 'name':parts.group(3),
# 'units':parts.group(4), 'status':parts.group(5), 'or':parts.group(6) }
pretty_p['groups'].append(this_block)
all_progs.append(pretty_p)
output.write(json.dumps( all_progs,indent=2))
# of all the programs, what can be accomplished online?
def find_online_programs():
#sections = summarize_online_sections()
sections = pd.read_csv('cache/one_year_course_modes.csv')
sections.set_index('Unnamed: 0',inplace=True)
bigdoc = Document()
#bigdoc.styles.add_style('Table Grid', docx.styles.style._TableStyle, builtin=True)
alldepts = [x[1] for x in json.loads( open('cache/programs/index.json','r').read() )['departments']]
for prog in alldepts:
#prog = 'administration_of_justice'
fname = 'cache/programs/'+prog+'.json'
print(("Reading %s" % fname))
input = open(fname,'r')
filedata = input.read()
p_info = json.loads(filedata)
#print p_info
output = open('output/onlinecerts/'+prog+'.txt','w')
for p in p_info:
#print(p['dept'] + "\t" + p['type'] + "\t" + p['title'])
if re.search(r'chievement',p['type']):
use_bigdoc = 1
bigdoc.add_heading('Gavilan College', 2)
bigdoc.add_heading(p['dept'], 2)
p_cert_header(p['type'],bigdoc,p['title'],output)
else:
use_bigdoc = 0
#doc = Document()
#doc.add_heading('Gavilan College', 2)
#p_cert_header(p['type'],p['title'],output,doc)
b = p['blocks']
b.sort(key=cmp_2)
print_headers = 1
for block in b:
if 'rule' in block:
#print("\t"+block['order'] + "\t" + block['rule'])
#p_block_rule(block['rule'],output,print_headers,doc)
if use_bigdoc: p_block_rule(block['rule'],output,print_headers,bigdoc)
print_headers = 0
for crs in block['courses']:
if re.search(r'header2',crs):
parts = crs.split("|")
#p_block_header(parts[1],output,doc)
if use_bigdoc: p_block_header(parts[1],output,bigdoc)
continue
c_data = split_course(crs)
try:
if 'code' in c_data and c_data['code']:
fixed_code = re.sub(r'\s','',c_data['code'])
history = sections.loc[fixed_code]
else:
print(("BAD COURSE DATA: " + str(crs)))
#p_cert_course_missing({'code':'?','name':'?','units':'?'},output,doc)
if use_bigdoc: p_cert_course_missing({'code':'?','name':'?','units':'?'},output,bigdoc)
continue
except Exception as e:
#print("COULDNT FIND ONLINE DATA for " + c_data['code'] + " - " + c_data['name'])
#p_cert_course_missing(c_data,output,doc)
if use_bigdoc: p_cert_course_missing(c_data,output,bigdoc)
#print(e)
continue
#print("\t\t[", crs, "]")
#print("\t\t", c_data)
#p_cert_course(c_data,history,output,doc)
if use_bigdoc: p_cert_course(c_data,history,output,bigdoc)
#p_end_block(output)
if use_bigdoc: p_end_cert(output,bigdoc)
#doc_title = re.sub(r'\/','_',p['title'])
#doc.save('output/onlinecerts/'+prog+'_' + doc_title + '.docx')
bigdoc.save('output/onlinecerts/all_ca.docx')
# take a string of all the types of classes offered, return a vector of [tot,lec,hyb,onl]
def string_to_types(st):
l,h,o,s = (0,0,0,0)
for p in st.split(','):
s += 1
if p == 'online': o+=1
elif p == 'face to face': l += 1
elif p == 'hybrid': h += 1
#else: print "Didn't catch this: ", p
return [s,l,h,o]
def my_default_counter():
temp = {}
for S in sems:
temp[S] = 0
return temp
#return {'units':'','Spring 19':0,'Summer 19':0,'Fall 19',0}
# Of the recent schedules, what was actually offered online?
def summarize_online_sections():
scheds = list(map(getSemesterSchedule,sems))
all = pd.concat(scheds,sort=True)
selected = all[['code','type','sem']]
selected.to_csv('cache/one_year_course_sections.csv')
# Count the online sections offered by semester
counter = defaultdict(my_default_counter)
for index,row in selected.iterrows():
# print(row)
code = row['code']
code = re.sub('\s','',code)
entry = counter[code]
if row['type'] == 'online':
entry[ row['sem'] ] += 1
df_counter = pd.DataFrame.from_dict(counter,orient='index')
#print(df_counter)
df_counter.to_csv('cache/one_year_course_modes.csv')
#return df_counter
bycode = selected.groupby('code')
try:
ff = bycode.agg( lambda x: string_to_types(','.join(x)) )
except Exception as e:
print("There was a problem with the schedules. One may not have the 'type' column.")
print("Check 'cache/one_year_course_modes.csv' for details")
return
types_by_course = {}
for row_index, row in ff.iterrows():
types_by_course[row_index.replace(" ","")] = row['type']
df = pd.DataFrame.from_dict(types_by_course,orient='index',columns=['sections','lec','hyb','online'])
#print(df)
df.to_csv('cache/one_year_online_courses.csv')
print("Saved to cache/one_year_online_courses.csv")
return df
def fibonacci(n):
return match(n,
1, 1,
2, 1,
_, lambda x: fibonacci(x-1) + fibonacci(x-2)
)
def test_pampy():
for i in [1,2,3,4,5,7,9,15]:
print(("fib(%i) is: %i" % (i,fibonacci(i))))
def cq_parse_experiment(root=0, indent=''):
# call this on anything that's a list. It'll recurse on each element of it.
# if the value was false, roll it back up and dont
# display
ret = ''
if type(root) == type({}):
ret += indent + "{"
for K,V in list(root.items()):
ret += K + ": " + \
cq_parse_experiment(V,indent+" ")+ ", " +indent
ret += "}"
elif type(root) == type([]):
for K in root:
ret += "[" + cq_parse_experiment(K, indent+" ") + "]"
elif type(root) == type("abc"): ret += root
elif type(root) == type(55): ret += str(root)
elif type(root) == type(5.5): ret += str(root)
else: ret += str(root)
return ret
def cq_start():
root = json.loads( open('cache/programs/programs_1.txt','r').read())
outt = open('cache/test_prog.txt','w')
outt.write(cq_parse_experiment(root,'\n'))
"""my first pattern
"dataTypeDetails": {
"scale": 2,
"type": "numeric",
"precision": 6
},
def cq_pattern_backup1(root=0, indent=''):
# call this on anything that's a list. It'll recurse on each element of it.
# if the value was false, roll it back up and dont
# display
ret = ''
# xxxx Rules here catches them top-down
if type(root) == type({}):
ret += indent + "{"
for K,V in list(root.items()):
ret += '"'+K+'"' + ": " + \
str(cq_pattern(V,indent+" "))+ ", " +indent
ret += "}"
elif type(root) == type([]):
for K in root:
ret += "[" + str(cq_pattern(K, indent+" ")) + "]"
elif type(root) == type("abc"): ret += '"'+root+'"'
elif type(root) == type(55): ret += str(root)
elif type(root) == type(5.5): ret += str(root)
elif type(root) == type(False):
if root == False: return "False"
elif root == True: return "True"
else:
result = lookForMatch(pat,rule)
if result: ret = str(result)
else: ret += '"'+str(root)+'"'
return ret
"""
def found(*x):
#print(len(x))
print(x)
return str(x)
def lookForMatch(rules,item):
var1 = ''
for i,x in enumerate(rules):
if i % 2 == 1:
a = match(item, var1, x, default='')
if a:
labels[i-1 / 2] += 1
break
else:
var1 = x
#a = match(root,*curic_patterns,default='')
if a:
#print("Matched: " + str(a))
return a
#print("Didn't match: " + str(item) + "\n")
return False
#from curriculum_patterns import pat
from patterns_topdown import pat
labels = defaultdict(int)
def cq_pattern(root=0, indent=''):
# call this on anything that's a list. It'll recurse on each element of it.
# if the value was false, roll it back up and dont
# display
ret = ''
# xxxx Rules here catches them top-down
# instead we'll do each element of data structure, and then try to match the whole thing.
if type(root) == type({}):
ret = {}
for K,V in list(root.items()):
ret[K] = cq_pattern(V,indent+" ")
elif type(root) == type([]):
ret = []
for K in root:
ret.append(cq_pattern(K, indent+" "))
elif type(root) == type("abc"): ret = root # ret += '"'+root+'"'
elif type(root) == type(55): ret = root # ret += str(root)
elif type(root) == type(5.5): ret = root # ret += str(root)
elif type(root) == type(False):
if root == False: ret = root # return "False"
elif root == True: ret = root # return "True"
result = lookForMatch(pat,root)
if result: ret = result
if ret: return ret
return root
def myprinter(item, indent=''):
if type(item) == type( ('a','b') ) and len(item)==2 and type(item[0])==type('a') and type(item[1])==type( {"a":2} ):
return "[[" + item[0] + ": " + ('\n'+indent+' ').join( [ K+":> "+ myprinter(V,indent+" ") for K,V in item[1].items() ] ) + "]]"
if type(item) == type( {} ):
return "{" + ('\n'+indent+' ').join( [ K+": "+ myprinter(V,indent+" ") for K,V in item.items() ] )+"}"
if type(item) == type( [] ):
return "[" + ('\n'+indent+' ').join( [ myprinter(I,indent+" ") for I in item ] )+"]"
return '"|'+str(item)+'|"'
def cq_pattern_start():
root = json.loads( open('cache/programs/programs_2.txt','r').read())
outt = open('cache/test_prog.txt','w')
result = cq_pattern(root,'\n')
for R in result:
outt.write(myprinter(R)+"\n")
k_srt = sorted(labels.keys())
for k in k_srt:
v = labels[k]
print(" Slot %i:\t%i hits" % (k/2,v))
def baby_int(j):
if j=='': return 0
return int(j)
def find_deg_in_cluster( clusters, deg ):
for k,v in clusters.items():
if deg in v: return k
return "pathway_not_found"
def try_match_deg_programs():
# my index, from curricunet, is the "longname". The 'attained' file has medium. kind of.
type_lookup = { "Certificate of Proficiency":"CP", "A.A. Degree":"AA", "A.S. Degree":"AS", "Certificate of Achievement":"CA", "A.S.-T Degree":"AS_T", "A.A.-T Degree":"AA_T", "NC-Cmptncy: NC Certificate of Competency":"COMP", "NC-Complet: NC Certificate of Completion":"COMP" }
# Curricunet
curicunet_version = {}
for f in os.listdir('cache/programs'):
if not re.search('index|programs',f):
#print(f)
pro = json.loads(open('cache/programs/'+f,'r').read()) # blocks title dept key type desc
for c in pro:
longname = c['dept'] + " | " + c['title'] + " | " + c['type']
curicunet_version[longname] = 0
abbrev = "??"
if c['type'] in type_lookup:
abbrev = type_lookup[ c['type'] ]
#print(" " + abbrev + ": " + longname)
# for each in 'attained' list, try to match to correspondent in variants/long list.
#
#
gp_clusters = {}
current_cluster = "X"
gp_file = open('cache/g_path_cluster2020.txt','r')
for L in gp_file:
L = L.strip()
if L:
if L.startswith('#'):
mch = re.search(r'^\#\s(.*)$',L)
if mch:
current_cluster = mch.group(1)
gp_clusters[ current_cluster ] = []
else:
gp_clusters[ current_cluster ].append( L )
#print( gp_clusters )
#x = input('paused')
matchers = csv.reader(open('cache/deg_name_variants.csv','r'),delimiter=",")
by_long = {}
by_medium = {}
by_med_unmatched = {}
by_gp_name = {}
line = 0
for row in matchers: # variants
if line==0:
pass
else:
by_long[ row[3] ] = row
by_gp_name[ row[2] ] = row
by_medium[ row[1] ] = row # # # **
by_med_unmatched[ row[1] ] = row # # # **
#if row[1]: print(row[1])
# remove from curricunet list so i can see whats left
if row[3] in curicunet_version:
curicunet_version[ row[3] ] = 1
line += 1
by_medium[''] = [0,0,0,0,0,0,0,0,0,0]
#print(by_medium)
# Attained List
attained = csv.reader(open('cache\degrees_attained.csv','r'),delimiter=",") # 1 6 22 17
line = 0
matched = {}
unmatched = {}
#print("These ones I can't match.")
for row in attained:
if line==0:
attained_columns = row
attained_columns.append("total")
attained_columns.insert(0,"shortname")
attained_columns.insert(0,"pathway")
attained_columns.insert(0,"dept")
attained_columns.insert(0,"type")
attained_columns.insert(5,"longname")
else:
row.insert(0,'sn')
row.insert(0,'p')
row.insert(0,'d')
row.insert(0,'t')
row.insert(5,'')
#print("Matching by medium name: %s" % str(row))
#print("Matched to: %s" % str(by_medium[ row[4] ]) )
matching_longname = by_medium[row[4]][3]
if len(matching_longname):
#print("matching longname: %s" % matching_longname)
row[5] = matching_longname ### THE matching longname
m_parts = matching_longname.split(" | ")
dept = m_parts[0]
ttype = m_parts[2]
row[1] = dept
row[0] = ttype
matched[row[4]] = row
row[3] = by_medium[row[4]][0] # shortname
row[2] = find_deg_in_cluster(gp_clusters, by_medium[row[4]][2])
print("OK: " + str(row))
else:
row[0] = ''
row[1] = ''
row[2] = ''
row[3] = ''
row[5] = ''
print("XX: " + str(row))
unmatched[row[4]] = row
line += 1
print("matched %i and missed %i." % (len(matched),len(unmatched)))
#print("\nactually missed %i." % len(by_med_unmatched))
print("\nLeftover degrees:")
for k,v in curicunet_version.items():
if not v: print(k)
mash_cols = "type dept pathway shortname mediumname longname grad09 10 11 12 13 14 15 16 17 18 total".split(" ")
mash_rows = []
# attained / matched
for xrow in matched.values():
mash_rows.append(xrow)
# attained / unmatched
for xrow in unmatched.values():
mash_rows.append(xrow)
# curricunet leftovers
mydf = pd.DataFrame(mash_rows, columns=mash_cols)
mydf.to_csv('cache/attainment_masterlist.csv',index=False)
return
# open('cache/programs/programs_1.txt','r').read()
""" SEE serve.py .... i mean ... interactive.py
def dict_generator(indict, pre=None):
pre = pre[:] if pre else []
if isinstance(indict, dict):
for key, value in indict.items():
if isinstance(value, dict):
for d in dict_generator(value, pre + [key]):
yield d
elif isinstance(value, list) or isinstance(value, tuple):
for v in value:
for d in dict_generator(v, pre + [key]):
yield d
else:
yield str(pre) + " " + str([key, value]) + "\n"
else:
yield pre + [indict]
yield str(pre) + " " + str([indict]) + "\n"
def print_dict(v, prefix='',indent=''):
if isinstance(v, dict):
return [ print_dict(v2, "{}['{}']".format(prefix, k) + " ", indent+" " ) for k, v2 in v.items() ]
elif isinstance(v, list):
return [ print_dict( v2, "{}[{}]".format(prefix , i) + " ", indent+" ") for i, v2 in enumerate(v) ]
else:
return '{} = {}'.format(prefix, repr(v)) + "\n"
def walk_file():
j = json.loads(open('cache/programs/programs_2.txt','r').read())
return print_dict(j)
from flask import Flask
from flask import request
def tag(x,y): return "<%s>%s%s>" % (x,y,x)
def tagc(x,c,y): return '<%s class="%s">%s%s>' % (x,c,y,x)
def a(t,h): return '%s' % (h,t)
def server_save(key,value):
codecs.open('cache/server_data.txt','a').write( "%s=%s\n" % (str(key),str(value)))
def flask_thread(q):
app = Flask(__name__)
@app.route("/")
def home():
return tag('h1','This is my server.') + " " + a('want to shut down?','/sd')
@app.route("/save//")
def s(key,val):
server_save(key,val)
return tag('h1','Saved.') + " " + tag('p', 'Saved: %s = %s' % (str(key),str(val)))
@app.route("/crazy")
def hello():
r = ''
r += tag('style', 'textarea { white-space:nowrap; }')
r += tag('body', \
tagc('div','container-fluid', \
tagc('div','row', \
tagc( 'div', 'col-md-6', tag('pre', walk_file() ) ) + \
tagc( 'div', 'col-md-6', 'Column 2' + a('Shut Down','/shutdown' ) ) ) ) )
return r
@app.route("/sd")
def sd():
print('SIGINT or CTRL-C detected. Exiting gracefully')
func = request.environ.get('werkzeug.server.shutdown')
if func is None:
raise RuntimeError('Not running with the Werkzeug Server')
func()
return "Server has shut down."
app.run()
from queue import Queue
q = Queue()
def serve():
import webbrowser
import threading
x = threading.Thread(target=flask_thread, args=(q,))
x.start()
webbrowser.open_new_tab("http://localhost:5000")
#s = open('cache/programs/index.json','w')
#s.write( json.dumps({'departments':sorted(list(dept_index)), 'programs':prog_index}, indent=2) )
#s.close()
"""
# feb 2020 goal:
# - treemap of graduates in each division, dept, pathway, degree type, major
# - rollover or other interactive explorer of pathways
# sept short term goals:
# 1. viable presentation on web pages w/ good overview
# 1a. - will necessarily include courses, learning outcomes cause it depends on them.
# 2. show progs close to 50% limit
# 3. foundation for visualization, model degree attainment, and simulation
# 4. prep for work on iLearn -> SLO -> any useful contributions I can make
#
# sept 8, 2020 approach:
# 1 hr: pull labels, types, interesting notes, discern structures that are most
# important. The 20% that gets me 80%.
#
# 1 hr: 2-3 short experiments for different ways of pattern matching them.
#
# 1 hr: best (rushed) effort to condense it all into accurate (if incomplete)
# compact data structure.
#
# 1 hr: php to fetch and display for a given prog, deg, dept, cert, or overview.
#
# show draft live on wed sept 10.
#
"""
def attempt_match8020(rules,item):
var1 = ''
for i,x in enumerate(rules):
if i % 2 == 1:
a = match(item, var1, x, default='')
if a:
labels8020[i-1 / 2] += 1
else:
var1 = x
#a = match(root,*curic_patterns,default='')
if a:
print("Matched: " + str(a))
return a
print("Didn't match: " + str(item) + "\n")
return False
"""
def clever_printer(item, indent=''):
if type(item) == type( ('a','b') ) and len(item)==2 and type(item[0])==type('a') and type(item[1])==type( {"a":2} ):
return "[[" + item[0] + ": " + ('\n'+indent+' ').join( [ K+":> "+ myprinter(V,indent+" ") for K,V in item[1].items() ] ) + "]]"
if type(item) == type( {} ):
return "{" + ('\n'+indent+' ').join( [ K+": "+ myprinter(V,indent+" ") for K,V in item.items() ] )+"}"
if type(item) == type( [] ):
return "[" + ('\n'+indent+' ').join( [ myprinter(I,indent+" ") for I in item ] )+"]"
return '"|'+str(item)+'|"'
def print_return(x):
print('got a hit')
print()
return x
from patterns_8020 import pat8020
labels8020 = defaultdict(int)
def cq_8020(root=0, indent=''):
# Try to match the root, and if no match, try to break it up ( dicts, lists ) and
# recurse on those parts.
# if no matches below this point in the tree, return false
ret = []
for pattern in pat8020:
m = match( root, pattern, print_return )
if m:
print('case 1')
print('this: ' + str(m))
print('matched this pattern: ' + str(pattern))
print(print_return)
xyz = input('enter to continue')
ret.append(m)
"""
if type(root) == type({}):
for K,V in list(root.items()):
m = cq_8020(V)
if m:
print('case 2')
ret.append(m)
elif type(root) == type([]):
for V in root:
m = cq_8020(V)
if m:
print('case 3')
ret.append(m)"""
return ret
def cq_8020_start():
""" (programs) entityType entityTitle status proposalType sectionName lastUpdated lastUpdatedBy
fieldName displayName lookUpDisplay fieldValue instanceSortOrder
lookUpDataset (array of dicts, each has keys: name, value, and corresponding values.)
subsections or fields (arrays) - ignore for now just takem in order
(courses) same as above?
html values: markdown convert?
"""
root = json.loads( open('cache/programs/programs_2.txt','r').read())
outt = open('cache/test_prog8020.txt','w')
result = cq_8020(root,'\n')
outt.write( json.dumps( result, indent=2 ) )
##### Restored from an earlier version
def recurse3(sec,path=''):
output = ''
if 'subsections' in sec and len(sec['subsections']):
for subsec in sec['subsections']:
#pdb.set_trace()
id = get_id_sortorder(subsec)
output += recurse3(subsec, path + subsec['attributes']['sectionName'] + " ("+id+") | ")
if 'fields' in sec and len(sec['fields']):
for subfld in sec['fields']:
try:
fld = handleField(subfld)
if fld:
dbg('Field: %s' % str(fld))
output += path + subfld['attributes']['fieldName'] + " | " + fld + "\n"
except Exception as e:
print("Problem in field: %s"% str(e))
print(subfld)
x = input('enter to continue')
return output
def get_id_sortorder(sec):
ord = ''
if 'instanceSortOrder' in sec['attributes']:
ord = str(sec['attributes']['instanceSortOrder'])
if 'sectionSortOrder' in sec['attributes']:
ord = str(sec['attributes']['sectionSortOrder'])
if ord and int(ord)<10: ord = '0'+ord
if 'instanceId' in sec['attributes']:
return ord + '-' + str(sec['attributes']['instanceId'])
elif 'sectionSortOrder' in sec['attributes']:
return ord + '-' + str(sec['attributes']['sectionSortOrder'])
else: return ord
def include_exclude(str,inc,exc=[]):
# True if str contains anything in inc, and does not contain anything in exc
good = False
for i in inc:
if i in str: good = True
if not good: return False
for e in exc:
if e in str: return False
return True
def pbd3(str):
# get the id from the 'Program Block Definitions' in the 3rd position
p = str.split("|")
if len(p)>3: str = p[2]
m = re.search(r'Program\sBlock\sDefinitions\s\(([\-\d]+)\)',str)
if m:
if m.group(1) != '0':
return m.group(1)
return 0
def handleField(f):
lud = ''
if 'lookUpDisplay' in f: lud = boolToStr(f['lookUpDisplay'])
#fv = unicode(f['fieldValue']).replace('\n', ' ').replace('\r', '')
fv = str(f['fieldValue']).replace('\n', ' ').replace('\r', '')
if not lud and not fv: return False
return f['attributes']['fieldName'] + ': ' + lud + " / " + fv
def boolToStr(b):
if isinstance(b,bool):
if b: return "True"
return "False"
return b
# Almost final formatting
def prog_info_to_entry(c):
out = {}
p1 = c.split(" | ")
if p1[2]=="Program Title":
print(p1[3][18:])
return {'title':p1[3][18:]}
if p1[2]=="Department":
d = p1[3][12:]
prt = d.split(" / ")
return {'dept':prt[0]}
if p1[2]=="Award Type":
return {'type':p1[3][12:].split(' /')[0]}
if p1[2]=="Description":
desc = p1[3][16:]
soup = bs(desc, 'html.parser')
for s in soup.find_all('span'): s.unwrap()
for e in soup.find_all(True):
e.attrs = {}
dd = str(soup)
dd = re.sub('\u00a0',' ',dd)
return {'desc':dd}
return {}
def cbd_to_entry(c):
parts = c.split(" | ")
if parts[3]=='Course Block Definition':
p2 = parts[4].split(" / ")
return { 'rule':p2[1] }
return {}
def pc5(str):
# get the id from the 'Program Courses' in the 5th position
p = str.split("|")
if len(p)>5: str = p[4]
m = re.search(r'Program\sCourses\s\(([\-\d]+)\)',str)
if m:
if m.group(1) != '0':
return m.group(1)
return 0
def remove_prefix(str,i):
p = str.split(" | ")
if len(p) > i:
return " | ".join(p[i:])
return str
def course_to_entry(c,order="0"):
p1 = c.split(" | ")
dbg(" c2e: %s" % str(c))
if p1[1] == "Course":
p2 = p1[2].split(" / ")
origname = order+"|"+p2[0][8:]
id = p2[1]
#return {'id':id,'origname':origname}
dbg(" c2e is course: %s" % str(origname))
return origname
if p1[1] == "Condition":
#print p1[2][11:13]
if p1[2][11:13] == 'or':
#return {'ornext':1}
dbg(" c2e is OR")
return " | OR "
if p1[0] == "Non-Course Requirements":
#pdb.set_trace()
dbg(" c2e is header: %s" % str(p1[1][28:]))
return order + "header2" + "|" + p1[1][28:]
return ''
def courseline_to_pretty(line):
# from this: 01-125780|THEA1 - Theatre History: Greece to Restoration 3.000 *Active*
# 09-125764|THEA19 - Acting and Voice for TV/Film/Media 3.000 *Historical* | OR
# 11-129282header2|Choose additional units from the courses below to complete the unit minimum requirement:
# to decent looking
return line
out = ''
oor = 0
#pdb.set_trace()
parts = line.split("|")
if re.search('header2',parts[0]):
out = "
" + parts[1] + "
"
elif len(parts) > 2 and parts[2]==" OR":
oor = 1
m = re.search(r'(.*)\s\-\s(.*)\s([0-9{1,3}\.\s\-]+)\s\*(\w*)\*',parts[1])
if m:
code = m.group(1)
name = m.group(2)
units = m.group(3)
active = m.group(4)
if oor: name += "OR"
out = "
"+code+""+name+""+units+"
"
return out
# restarted oct 2019 and try to simplify
def prog_take_4(program):
fullyProcessed = ''
for r in program['entityFormData']['rootSections']:
dbg('a recurse3 call...')
fullyProcessed += recurse3(r,program['entityMetadata']['entityTitle']+" | ")
taken = []
for L in (program['entityMetadata']['entityTitle'] + fullyProcessed).split('\n'):
if include_exclude(L,['Description','Department','Award Type','Program Title','Course Block Definition','Program Courses','Outcome | Outcome | Outcome | Outcome'], ['Map SLO to']):
taken.append(L)
program_struct = { 'blocks':[]}
# start dividing up course blocks
blocks = groupby(pbd3,taken)
for k,v in blocks.items(): # each of the PDBs
block = { 'order':str(k) }
for a in v:
dbg('block: ' + str(k))
course_list = []
if k == 0:
program_struct.update(prog_info_to_entry(a))
else:
#pdb.set_trace()
block.update(cbd_to_entry(a))
courses = groupby(pc5,blocks[k])
for C,cval in courses.items(): # each of the courses
df = [remove_prefix(x,5) for x in cval]
#my_c = { 'order':str(C) }
courseline = ''
for K in df:
c2e = course_to_entry(K,C)
dbg(" c2e: %s" % str(c2e))
if re.search('header2',c2e):
course_list.append( courseline_to_pretty(courseline))
courseline = c2e
continue
if re.search('3\sUnit\sMin',c2e):
dbg(" --courseline: %s" % str(courseline))
courseline = re.sub('1\.000\s+\-\s+2\.000','3.000',courseline)
dbg(" ---courseline changed: %s" % str(courseline))
continue # hack for span non native opt 2
# TODO
courseline += c2e
dbg(" courseline: %s" % str(courseline))
#if courseline:
# my_c.update(courseline)
# #if 'id' in my_c and my_c['id'] in ids:
# # my_c['reference'] = ids[my_c['id']]
dbg('--new courseline--')
if courseline: course_list.append( courseline_to_pretty(courseline))
block['courses'] = sorted(course_list)
if block: program_struct['blocks'].append(block)
#jsonout.write( json.dumps(program_struct,indent=2) )
#return '\n'.join(taken)
return program_struct
if __name__ == "__main__":
#cq_8020_start()
#exit()
print ('')
options = { 1: ['Fetch all class data from curricunet',fetch_all_classes] ,
2: ['Fetch all program data from curricunet', fetch_all_programs] ,
3: ['Translate class data to condensed json files', show_classes] ,
4: ['Translate program data to condensed json files', show_programs] ,
5: ['Try to consolidate lists of programs and degrees and # attained', try_match_deg_programs] ,
#5: ['Check DE', check_de] ,
#6: ['Sort courses', organize_courses] ,
#7: ['Clean up degree/program entries', clean_programs] ,
#8: ['Reorganize degree/program entries', organize_programs] ,
#9: ['Reorganize degree/program entries, take 2', organize_programs2] ,
10:['Find online programs', find_online_programs],
11:['Which courses were scheduled as online?', summarize_online_sections],
12:['First try with logic rules', smart_find_online_programs],
13:['Another try, simplified', simple_find_online_programs],
14:['Parse programs with pattern matching', cq_start],
15:['Parse programs with pattern matching, take 2', cq_pattern_start],
#16:['Baby web server', serve],
16:['80 20 effort. Sept 2020', cq_8020_start],
17:['Organize programs stage 2 (2021)', organize_programs_stage2],
}
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
resp = int(sys.argv[1])
print("\n\nPerforming: %s\n\n" % options[resp][0])
else:
print ('')
for key in options:
print(str(key) + '.\t' + options[key][0])
print('')
resp = input('Choose: ')
# Call the function in the options dict
options[ int(resp)][1]()