stats refine data prep
This commit is contained in:
parent
e6ffe7a28d
commit
34dccd0a2b
123
stats.py
123
stats.py
|
|
@ -51,6 +51,33 @@ from collections import defaultdict
|
||||||
all_grades_file = f"cache/grades_all.csv"
|
all_grades_file = f"cache/grades_all.csv"
|
||||||
all_courses_file = f"cache/course_grades_all.csv"
|
all_courses_file = f"cache/course_grades_all.csv"
|
||||||
|
|
||||||
|
def sem_num_to_code(sem_num):
|
||||||
|
p = re.search(r'^(\d\d\d\d)(\d\d)$', sem_num)
|
||||||
|
if p:
|
||||||
|
yr = p.group(1)[2:4]
|
||||||
|
sem = p.group(2)
|
||||||
|
lookup = {'10':'wi','30':'sp', '50':'su', '70':'fa'}
|
||||||
|
return f"{lookup[sem]}{yr}"
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def sem_code_to_num(sem_code): # fa23
|
||||||
|
p = re.search(r'^([a-z]{2})(\d\d)$', sem_code)
|
||||||
|
if p:
|
||||||
|
s = p.group(1)
|
||||||
|
y = p.group(2)
|
||||||
|
lookup = {'wi':'10','sp':'30', 'su':'50', 'fa':'70'}
|
||||||
|
return f"20{y}{lookup[s]}"
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def codetest():
|
||||||
|
sems = '202330 202310 202270 202250 202230 202210 202170 202150 202130 202070 202050 202030 202010 201970 201950 201930 201910 201870 201850 201830'.split(' ')
|
||||||
|
codes = 'fa21 wi22 sp23 su23 fa23 wi24'.split(' ')
|
||||||
|
for s in sems:
|
||||||
|
print("{}: {}".format(s, sem_num_to_code(s)))
|
||||||
|
|
||||||
|
for c in codes:
|
||||||
|
print("{}: {}".format(c, sem_code_to_num(c)))
|
||||||
|
|
||||||
def get_all():
|
def get_all():
|
||||||
terms = '178 177 176 175 174 173 172 171 168 65 64 62 63 61 60 25 26 23 22 21'.split(' ')
|
terms = '178 177 176 175 174 173 172 171 168 65 64 62 63 61 60 25 26 23 22 21'.split(' ')
|
||||||
sems = '202330 202310 202270 202250 202230 202210 202170 202150 202130 202070 202050 202030 202010 201970 201950 201930 201910 201870 201850 201830'.split(' ')
|
sems = '202330 202310 202270 202250 202230 202210 202170 202150 202130 202070 202050 202030 202010 201970 201950 201930 201910 201870 201850 201830'.split(' ')
|
||||||
|
|
@ -120,7 +147,10 @@ def to_crn_fallback(name):
|
||||||
#print("Exception: ", e, name)
|
#print("Exception: ", e, name)
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
def ilearn_name_to_course_code(iname):
|
||||||
|
parts = iname.split(' ')
|
||||||
|
code = parts[0]
|
||||||
|
return code
|
||||||
|
|
||||||
def short_name_to_crn(name):
|
def short_name_to_crn(name):
|
||||||
#print(name)
|
#print(name)
|
||||||
|
|
@ -168,7 +198,27 @@ def short_name_to_teacher_type_crn_sem(name):
|
||||||
|
|
||||||
return None, None, None, None
|
return None, None, None, None
|
||||||
|
|
||||||
|
pocrs = {}
|
||||||
|
|
||||||
|
def load_pocrs():
|
||||||
|
global pocrs
|
||||||
|
if not pocrs:
|
||||||
|
with open('cache/pocr_passed.csv') as csvfile:
|
||||||
|
csvreader = csv.reader(csvfile)
|
||||||
|
next(csvreader)
|
||||||
|
for row in csvreader:
|
||||||
|
pocrs[row[0] + " " + row[1]] = row[2]
|
||||||
|
return pocrs
|
||||||
|
|
||||||
|
def lookup_pocr(teacher,course,sem):
|
||||||
|
p = load_pocrs()
|
||||||
|
pcode = teacher + " " + course
|
||||||
|
if pcode in p:
|
||||||
|
sem_passed = sem_code_to_num(p[pcode])
|
||||||
|
sem_test = sem_code_to_num(sem)
|
||||||
|
if sem_passed < sem_test:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def nametest():
|
def nametest():
|
||||||
with open(all_courses_file) as csvfile:
|
with open(all_courses_file) as csvfile:
|
||||||
|
|
@ -182,21 +232,22 @@ def nametest():
|
||||||
def above_70(li,maximum):
|
def above_70(li,maximum):
|
||||||
cutoff = 0.7 * maximum
|
cutoff = 0.7 * maximum
|
||||||
above = list(filter(lambda x: x >= cutoff, li))
|
above = list(filter(lambda x: x >= cutoff, li))
|
||||||
return (len(above)/len(li))
|
return round(len(above)/len(li), 3)
|
||||||
|
|
||||||
def process_one_course_grades(block, output):
|
def process_one_course_grades(block, output, teacher_to_code, course_to_code):
|
||||||
fxns = [mean, median, stdev, min, max, len]
|
fxns = [mean, median, stdev, min, max, len]
|
||||||
c_id = block[0][0]
|
c_id = block[0][0]
|
||||||
sem = block[0][1]
|
sem = block[0][1]
|
||||||
course_code = block[0][2]
|
course_code = block[0][2]
|
||||||
cur_scores = [num(x[6]) for x in block]
|
cur_scores = [num(x[6]) for x in block]
|
||||||
final_scores = [num(x[7]) for x in block]
|
final_scores = [num(x[7]) for x in block]
|
||||||
print(course_code)
|
|
||||||
teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(course_code)
|
teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(course_code)
|
||||||
if not teacher:
|
if not teacher:
|
||||||
return
|
return
|
||||||
#print(cur_scores)
|
tch_code = teacher_to_code[teacher]
|
||||||
#print(final_scores)
|
crs_code = course_to_code[course_code]
|
||||||
|
if len(final_scores) < 2:
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
(cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count) = [round(f(cur_scores)) for f in fxns]
|
(cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count) = [round(f(cur_scores)) for f in fxns]
|
||||||
(final_mean, final_median, final_stdev, final_min, final_max, final_count) = [round(f(final_scores)) for f in fxns]
|
(final_mean, final_median, final_stdev, final_min, final_max, final_count) = [round(f(final_scores)) for f in fxns]
|
||||||
|
|
@ -204,19 +255,62 @@ def process_one_course_grades(block, output):
|
||||||
cur_pct_passed = above_70(cur_scores, cur_max)
|
cur_pct_passed = above_70(cur_scores, cur_max)
|
||||||
final_pct_passed = above_70(final_scores, final_max)
|
final_pct_passed = above_70(final_scores, final_max)
|
||||||
|
|
||||||
print("Course % > 70 mean median stdev min max count")
|
if final_max == 0: return
|
||||||
print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count))
|
|
||||||
print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, final_count))
|
scaled_final_scores = [ x / final_max for x in final_scores]
|
||||||
print()
|
(scl_mean, scl_median, scl_stdev, scl_min, scl_max, scl_count) = [round(f(scaled_final_scores),2) for f in fxns]
|
||||||
|
|
||||||
|
good_code = ilearn_name_to_course_code(course_code)
|
||||||
|
pocr = 1 if lookup_pocr(teacher, good_code, sem2) else 0
|
||||||
|
|
||||||
|
|
||||||
|
#print("Course % > 70 mean median stdev min max count")
|
||||||
|
#print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count))
|
||||||
|
#print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, final_count))
|
||||||
|
#print()
|
||||||
#output.writerow( [sem2, crn, course_code, "current score", teacher, mode, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count] )
|
#output.writerow( [sem2, crn, course_code, "current score", teacher, mode, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count] )
|
||||||
output.writerow( [sem2, crn, course_code, "final score", teacher, mode, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, final_count] )
|
#output.writerow( [crs_code, pocr, tch_code, mode, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, scl_mean, scl_median, scl_stdev, scl_min, scl_max, final_count] )
|
||||||
|
output.writerow( [crs_code, good_code, pocr, tch_code, mode, final_pct_passed, scl_mean, scl_median, scl_stdev, scl_min, scl_max, final_count] )
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Exception:", e)
|
print("Exception:", e)
|
||||||
|
|
||||||
def process_grades():
|
def process_grades():
|
||||||
|
# first loop to get all names
|
||||||
|
courses_labeled = {}
|
||||||
|
teacher_to_code = {}
|
||||||
|
code_to_teacher = {}
|
||||||
|
|
||||||
|
course_to_code = {}
|
||||||
|
code_to_course = {}
|
||||||
|
|
||||||
|
index = 1001
|
||||||
|
crs_index = 4001
|
||||||
|
|
||||||
|
with open(all_grades_file, newline="") as csvfile:
|
||||||
|
csvreader = csv.reader(csvfile)
|
||||||
|
next(csvreader)
|
||||||
|
for row in csvreader:
|
||||||
|
crn_sem = row[0] + '_' + row[1]
|
||||||
|
if not crn_sem in courses_labeled:
|
||||||
|
teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(row[2])
|
||||||
|
courses_labeled[crn_sem] = teacher
|
||||||
|
|
||||||
|
if not row[2] in course_to_code:
|
||||||
|
course_to_code[row[2]] = crs_index
|
||||||
|
code_to_course[crs_index] = row[2]
|
||||||
|
crs_index += 1
|
||||||
|
|
||||||
|
if teacher:
|
||||||
|
if not teacher in teacher_to_code:
|
||||||
|
teacher_to_code[teacher] = index
|
||||||
|
code_to_teacher[index] = teacher
|
||||||
|
index += 1
|
||||||
|
codecs.open('cache/teacher_lookup_codes.json','w','utf-8').write( json.dumps( [teacher_to_code, code_to_teacher], indent=2) )
|
||||||
|
codecs.open('cache/course_lookup_codes.json','w','utf-8').write( json.dumps( [course_to_code, code_to_course], indent=2) )
|
||||||
|
|
||||||
with open(all_courses_file, "w", newline="") as output_f:
|
with open(all_courses_file, "w", newline="") as output_f:
|
||||||
output = csv.writer(output_f)
|
output = csv.writer(output_f)
|
||||||
output.writerow("sem crn shortname score_type teacher mode percent_passed mean median stdev min max count".split(" "))
|
output.writerow("course_code course pocr_status teacher_code mode percent_passed scl_mean scl_median scl_stdev scl_min scl_max count".split(" "))
|
||||||
|
|
||||||
with open(all_grades_file, newline="") as csvfile:
|
with open(all_grades_file, newline="") as csvfile:
|
||||||
csvreader = csv.reader(csvfile)
|
csvreader = csv.reader(csvfile)
|
||||||
|
|
@ -230,14 +324,14 @@ def process_grades():
|
||||||
|
|
||||||
if index != current_index:
|
if index != current_index:
|
||||||
if block:
|
if block:
|
||||||
process_one_course_grades(block, output)
|
process_one_course_grades(block, output, teacher_to_code, course_to_code)
|
||||||
block = []
|
block = []
|
||||||
current_index = index
|
current_index = index
|
||||||
|
|
||||||
block.append(row)
|
block.append(row)
|
||||||
|
|
||||||
if block:
|
if block:
|
||||||
process_one_course_grades(block, output)
|
process_one_course_grades(block, output, teacher_to_code, course_to_code)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -247,6 +341,7 @@ if __name__ == "__main__":
|
||||||
options = { 1: ['get all historical grades from ilearn',get_all] ,
|
options = { 1: ['get all historical grades from ilearn',get_all] ,
|
||||||
2: ['process grades csv file',process_grades] ,
|
2: ['process grades csv file',process_grades] ,
|
||||||
3: ['test shortname parse',nametest] ,
|
3: ['test shortname parse',nametest] ,
|
||||||
|
4: ['test sem codes',codetest] ,
|
||||||
}
|
}
|
||||||
print ('')
|
print ('')
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue