from lark import Lark, Transformer, v_args import json, sys, re, codecs debug_out = codecs.open("cache/degrees_debug.txt", "w", "utf-8") def d(s): #if type(s) == tuple or type(s) == list: # debug_out.write(" ".join(str(s)) + "\n") debug_out.write(str(s) + "\n") """describe college courses, their number of units, any other courses that are prerequisites, as well as various degrees which consist of groups of courses that must be taken. The groups have rules associated with them, such as: - take all (the following courses) - take n (where n=3 or 5 or something) - take at least n units from the following list (so the taken course's unit value must add up to n or greater) - and so on.""" class CourseOr: def __init__(self, subs): self.names = subs self.name = " or ".join(self.names) def __repr__(self): return f"[{self.name}]" class Course: def __init__(self, name): self.name = name def __repr__(self): return f"{self.name}" class CourseList: def __init__(self, name, courses): self.name = name self.courses = courses def __iter__(self): return iter(self.courses) def __len__(self): return len(self.courses) def __repr__(self): return ", ".join([f"\"{str(x)}\"" for x in self.courses]) class DegreeRule: rule_count = 1 def __init__(self, rule_type, course_list): self.rule_type = rule_type if 'n' in rule_type.__dict__: self.n = rule_type.n self.course_list = course_list self.rule_number = DegreeRule.rule_count DegreeRule.rule_count += 1 def __repr__(self): s = ",".join( [ f"{x}" for x in self.course_list]) the_rule = f"% {self.rule_type}\n" return the_rule + f"array[1..{len(self.course_list)}] of string: rule_{self.rule_number}_courses = [{s}];" class Degree: def __init__(self, name, degree_rules): self.name = name self.degree_rules = degree_rules def __repr__(self): n = 1 s = f"% Degree: {self.name}\n" for r in self.degree_rules: s += str(r) + "\n" n += 1 return s class RuleType(): def __init__(self,what,n=0): self.what = what self.n = n def __repr__(self): return "" #f"RuleType({self.what}, {self.n})" class Rule: pass class TakeAll(Rule): def __repr__(self): return f"Take all:" class TakeN(Rule): def __init__(self, n): self.n = n def __repr__(self): return f"Take {self.n} courses:" class TakeNUnits(Rule): def __init__(self, n): self.n = n def __repr__(self): return f"Take {self.n} units:" @v_args(inline=True) class DSLTransformer(Transformer): def __init__(self): self.courses = {} self.degrees = {} self.lists = {} def course_declaration(self, name, units): d("\ncourse_declaration") d(name) d(units) def take_all(self): d("\ntake_all") return TakeAll() def take_n_courses(self, n): d("\ntake_n_courses") d(n) return TakeN(n) def take_n_units(self, n): d("\ntake_n_units") d(n) return TakeNUnits(n) def degree_rule(self, rule_type, course_list): d("\ndegree_rule") d(rule_type) d(course_list) return DegreeRule(rule_type, course_list) def list_ref(self, name): d("\nlist_ref") d(name) return self.lists[name] def course(self, c): d("\ncourse") d(c) return Course(c.value) def course_or(self, *items): d("\ncourse_or") d(items) return CourseOr(items) def course_list(self, *items): if items[-1] == None: items = items[:-1] d("\ncourse_list") d(items) return items def list_declaration(self, name, course_list): d("\nlist_declaration") d(name) d(course_list) c = CourseList(name, course_list) self.lists[name] = c return c def program(self, name, *rules): d("\nprogram") d(name) d(rules) dg = Degree(name, rules) self.degrees[name] = dg return dg grammar = """ start: _spec+ _spec: program | course_declaration | list_declaration program: "program" PROGRAMNAME degree_rule* degree_rule: "take" rule_type [course_list | list_ref] list_ref: LISTNAME rule_type: "all from" -> take_all | "at least" INT "units from" -> take_n_units | "at least" INT "courses from" -> take_n_courses | "at least" INT "course from" -> take_n_courses list_declaration: "list" LISTNAME ":" course_list course_declaration: "course" COURSECODE UNITAMOUNT "units" ["prerequisites" (COURSECODE ","?)*] course_list: [course | course_or] ["," [course | course_or]]* course: COURSECODE course_or: COURSECODE "or" COURSECODE COURSECODE: ("A".."Z")+ INT+ ["A".."Z"]* PROGRAMNAME: ("A".."Z" | "a".."z")+ LISTNAME: ("a".."z"["A".."Z" | "a".."z" | INT]+) UNITAMOUNT: NUMUNITS | NUMUNITS "-" NUMUNITS NUMUNITS: INT | INT "." INT %import common.INT %import common.WS %ignore WS """ def parser(): dsl = """ list a1: CMUN1A, CMUN5, CMUN8, CMUN10 list a2: ENGL1A list a3: PHIL2, PHIL4, ENGL1C, CMUN3 list c1: BUS1, ENGL250, ACCT120, BOT100, ACCT121, BOT105B program BusinessAccountingOption take all from CSIS1, CSIS2 take at least 6 units from ACCT120, ACCT20 or ECON20 take at least 3 courses from MUS5, MUS10, BUS1, ENGL250, ACCT120, BOT100, ACCT121, BOT105B take at least 1 course from c1 program CSUGenEd take at least 1 course from a1 take at least 1 course from a2 take at least 1 course from a3 """ do_parse(dsl) def do_parse(dsl): parser = Lark(grammar) #d(parser.parse(dsl).pretty()) #d("\n\n\n") #d("\nTRANSFORMER: ") parser = Lark(grammar) transformer = DSLTransformer() def parse_dsl(dsl): tree = parser.parse(dsl) return transformer.transform(tree) result = parse_dsl(dsl) print(transformer.courses) print() print(transformer.degrees) print() print(transformer.lists) print() [print(c) for c in transformer.courses] print() print("\nDEGREES: ") for deg,i in transformer.degrees.items(): print(str(i)) print() [print(deg) for deg in transformer.lists] word_to_num = {'course':1, 'One':1, 'one':1, 'two':2, 'three':3, 'four':4, 'five':5, 'six':6, 'seven':7, 'eight':8, 'nine':9, 'ten':10, 'eleven':11,} def word2num(word, verbose=0): word = word.lower() ret = word_to_num[word] if word in word_to_num else word if verbose: print(f" word2num({word}) -> {ret}") return ret lab_classes = {} def load_lab_classes(): global lab_classes if lab_classes: return lab_classes for c in json.loads(codecs.open('cache/courses/courses_built.json','r','utf-8').read()).values(): #print(c) if 'min_lab_hour' in c and float(c['min_lab_hour']) > 0: lab_classes[c['dept'] + c['number']] = 1 print(lab_classes) return lab_classes def is_lab_class(c): lab_classes = load_lab_classes() if c in lab_classes: return True return False def is_noncourse_new_section(noncourse_line): from degree_vars import note_true_section, note_false_section for non in note_false_section: if re.search(non, noncourse_line): #print(f"- {noncourse_line}") return False for yes in note_true_section: if re.search(yes, noncourse_line): #print(f"+ {noncourse_line}") return True print(f" -> should this start a new rule/section? [{noncourse_line}]") return False rule_lookup = { 'take_all_prereq': ['RN PROGRAM PREREQUISITES', 'PREREQUISITES', ], 'take at least n courses': ['(\d+) courses total', 'SELECT (ONE|TWO) OF THE FOLLOWING', 'Select (one|two|three)', 'Select (\d+) courses', 'Choose (one) or more', 'Choose (one|two|three) of the classes listed', 'Choose (\w+) of the following','Choose (one|two|three)', 'Choose ([\d\w]+) courses from', 'ANY (COURSE) NOT USED IN', 'Select (1)', 'Select (one) of the following REQUIRED CORE', '(One) of the following:', 'LIST [AB]: Select (\d)', 'Choose (One) Course:', ], 'take at least n units': ['LIST A \((\d+) units\)', 'LIST B \((\d+) units\)', 'LIST C \- Any course .*\((\d+) units\)', '(\d+) units total', 'Select (\d+) units', 'Any combination totaling (\d+) units', 'Choose (\w+) units from classes listed', 'Choose a minimum of ([\w\d]+) units from', 'Choose any combination of courses for a minimum of ([\w\d]+) units', 'Choose ([\w\d]+) units', 'Choose courses for at least ([\w\d]+) units', 'Choose a minimum of (\d+) units', 'Select any (\d+)\-\d+ units from the following'], 'electives': ['Electives', 'Recommended electives?:', ], 'take_all': ['RN PROGRAM', 'REQUIRED CORE', 'CORE COURSES', 'ADDITIONAL REQUIREMENTS','REQUIREMENTS:', 'Requirements', 'Core Requirements', 'Required Core', 'REQUIRED', 'LVN PROGRAM', 'Student Teaching Practicum', '^LIST A:?$', '^LIST B:$', 'Program Requirements', 'Required Courses:', 'PROGRAM REQUIREMENTS (5 Units)', 'PROGRAM REQUIREMENTS (162 Hours)', ], } def lookup_rule(line): verbose = 0 for key in rule_lookup.keys(): for each in rule_lookup[key]: m = re.search(each, line) if m: num = None try: if m.group(1): num = m.group(1) except Exception as e: pass if verbose: print(f"line: {line} matched: {each} with {num}") return key,num return None,None def examine(li,award, verbose=0): summary = [x[0] for x in li] if summary[1] in ['and','or']: if verbose: print(" - ", summary) def check_ands_ors_pbd(award, pbd, verbose=0): verbose = 0 if verbose: print(f"check_ands_ors_pbd({award}, ...)") summary = [x[0] for x in pbd] if verbose: print(" ", summary) # iterate through in groups of 3, from 0/1/2 up to 3/4/5 # (for length 6. length n: n-2, n-1, n) for i in range(len(pbd)-2): examine(pbd[i:i+3], award, verbose) if verbose: print() def build_program_rules(): cfile = "cache/courses/courses_active_built.json" pfile = "cache/programs/programs_built.json" courses = json.loads(codecs.open(cfile,'r','utf-8').read()) programs = json.loads(codecs.open(pfile,'r','utf-8').read()) course_spec = [] for index,c in courses.items(): try: d = c['dept'] n = c['number'] name = c['name'] u2 = num_units(c['min_units']) if 'max_units' in c: u1 = num_units(c['max_units']) else: u1 = u2 if u1 == u2: units = {'units': u1} u = u1 else: units = {'min_units': u2, 'max_units': u1} u = f"{u2}-{u1}" course_spec.append(f"course {d}{n} {u} units") except Exception as e: pass #print(e) #print(json.dumps(c,indent=2)) d_out = codecs.open('cache/prog_debug.txt','w','utf-8') def d(s): d_out.write(str(s) + "\n") for index,p in programs.items(): v2 = 0 # print debugging stuff # Each award (degree or certificate) award = p['award'] + " " + p['program_title'] d("\n" + p['award'] + " " + p['program_title']) print("\n" + award) this_program = p['award'] + " " + p['program_title'] this_rule = "" r = p['requirements'] course_count = 0 # Each numbered chunk (k) in the requirements section for k in sorted(r.keys()): # Each 'program block definition' # 1st is dict with unit totals, rest are lists. check_ands_ors_pbd( award, sorted( r[k][1:], key=lambda x: float(x[1])) ) for each_r in sorted( r[k][1:], key=lambda x: float(x[1])): if each_r[0] in ['and','or']: #print(' ', each_r[0],each_r[1]) pass if isinstance(each_r, list): #print(each_r) if each_r[0] == 'h3' or (each_r[0]=='noncourse' and is_noncourse_new_section(each_r[2])): # This is a rule title if this_rule and course_count: d(this_rule) # + f" ({course_count}) " course_count = 0 raw_rule = each_r[2] good_rule, num = lookup_rule(raw_rule) if good_rule: #print(f"\t{good_rule}") n = word2num(num) if num else "" if v2: print(f"\tn = {n}") actual_rule = re.sub(r'\sn\s',f' {n} ',good_rule) if v2: print(f"\tactual rule is: {actual_rule}") #this_rule = f"{good_rule} ({n}) [{raw_rule}] from " this_rule = f"{actual_rule} from " else: #print(f"\t{raw_rule}") if not this_rule: this_rule = "take_all from " this_rule = " * " + raw_rule + " " + "from " #elif each_r[0] == 'noncourse': # also a rule title, some kind of sub-rule? # d( f" ++ (noncourse) {each_r[2]}") elif each_r[0] == 'course': # course in a rule if not this_rule: this_rule = "take_all from " is_lab = '[L]' if is_lab_class(each_r[2]['code']) else '' #this_rule += f"{each_r[2]['code']}{is_lab}({each_r[1]}), " this_rule += f"{each_r[2]['code']}{is_lab}, " if v2: print(f"\t\tthis rule is now: {this_rule}") course_count += 1 if course_count: d(this_rule) # + f" ({course_count})" d_out.close() d_in = codecs.open('cache/prog_debug.txt','r','utf-8').readlines() progs = [] this_prog = [] for line in d_in: if line.strip() == '': if this_prog: progs.append(this_prog) this_prog = [] else: this_prog.append(line.strip()) okay = [] notokay = [] for p in progs: ok =1 for line in p: if line[0] == '*': notokay.append(p) ok = 0 continue if ok: okay.append(p) print("\n\n\n\nThese programs are okay:") for p in okay: for l in p: print(l) print() print("\n\nThese programs are not okay:") for p in notokay: for l in p: print(l) print() print(f"okay: {len(okay)}") print(f"not okay: {len(notokay)}") return do_parse('\n'.join(course_spec)) if __name__ == "__main__": options = { 1: ['parsing example',parser] , 2: ['build program rules', build_program_rules], } print ('') if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]): resp = int(sys.argv[1]) print("\n\nPerforming: %s\n\n" % options[resp][0]) else: print ('') for key in options: print(str(key) + '.\t' + options[key][0]) print('') resp = input('Choose: ') # Call the function in the options dict options[ int(resp)][1]()