916 lines
33 KiB
Python
916 lines
33 KiB
Python
# statistics
|
|
|
|
"""
|
|
## Investigate: Success rates (grades) of students in:
|
|
|
|
- online courses (over all)
|
|
- sync and async and online live
|
|
- teachers/courses that have passed POCR (are all async?)
|
|
- teachers that have done more than the minimum training in online teaching
|
|
- in person classes, if grades are available
|
|
|
|
|
|
|
|
## Data collection
|
|
|
|
- Choose how many semesters (10?)
|
|
- Script 1 - given a CRN and Semester, download all grades
|
|
- Check if grades were used and make sense
|
|
- Compute mean, % > 70, median, etc.
|
|
|
|
- Anonymization steps
|
|
- replace teacher names w/ id number
|
|
- replace student names w/ id number
|
|
- replace course names w/ course code
|
|
|
|
- Script 2 - given all semester schedules, generate lists of:
|
|
- CRNs which are online, online live, hybrid, inperson, excluded
|
|
- CRNs in which teacher and course have passed pocr (and semester is greater than their pass date)
|
|
- CRNs in which teacher passed pocr for a different course (and semester is greater than their pass date)
|
|
- CRNs to exclude, for example SP20, because of covid. Possibly SU20 and FA20
|
|
|
|
- CRNs with are POCR approved
|
|
- CRNs in which teacher has done more than the minimum training in online teaching
|
|
|
|
- Student ids which have participated in the online orientation over a certain threshold
|
|
|
|
- Next steps: generate the x-reference for what categories teachers are in, and
|
|
integrate into the main data file.
|
|
|
|
|
|
- Next steps (June/July 2023)
|
|
- add campus, time of day, and sem_order (which semester in their college career did they take it) columns
|
|
- Organize rows by students
|
|
+ Develop a way to categorize them: by course set and/or score set (cluestering: kmeans, forest, etc)
|
|
|
|
- Goals
|
|
- display and summarize clusters of students on a dashboard
|
|
- ongoing categorization (implying course recommendations and interventions) based on it
|
|
-
|
|
|
|
|
|
## Hypothesis Testing
|
|
|
|
-
|
|
"""
|
|
import codecs, os
|
|
import json, csv, requests, sys, re
|
|
from multiprocessing import Semaphore
|
|
from statistics import mean, median, stdev
|
|
from pipelines import fetch, url
|
|
from courses import getCoursesInTerm, course_enrollment
|
|
from localcache import get_course_enrollments
|
|
from collections import defaultdict
|
|
|
|
all_grades_file = f"cache/grades_all.csv"
|
|
all_courses_file = f"cache/course_grades_all.csv"
|
|
all_courses_file2 = f"cache/course_grades_compact.csv"
|
|
all_courses_file3 = f"cache/course_grades_full.csv"
|
|
all_courses_file4 = "cache/course_grades_full_bystudent.csv"
|
|
all_courses_file5 = "cache/courses_passed_bystudent.csv"
|
|
student_courses_scores = "cache/courses_student_scores.csv"
|
|
student_orientation_participation = f'cache/participation_orientation_courses.json'
|
|
|
|
|
|
|
|
def num(s):
|
|
if s == '': return 0
|
|
s = re.sub(r'\.0','',s)
|
|
try:
|
|
return int(s)
|
|
except ValueError:
|
|
return float(s)
|
|
|
|
|
|
|
|
def sem_num_to_code(sem_num):
|
|
p = re.search(r'^(\d\d\d\d)(\d\d)$', sem_num)
|
|
if p:
|
|
yr = p.group(1)[2:4]
|
|
sem = p.group(2)
|
|
lookup = {'10':'wi','30':'sp', '50':'su', '70':'fa'}
|
|
return f"{lookup[sem]}{yr}"
|
|
return ""
|
|
|
|
def sem_code_to_num(sem_code): # fa23
|
|
p = re.search(r'^([a-z]{2})(\d\d)$', sem_code)
|
|
if p:
|
|
s = p.group(1)
|
|
y = p.group(2)
|
|
lookup = {'wi':'10','sp':'30', 'su':'50', 'fa':'70'}
|
|
return f"20{y}{lookup[s]}"
|
|
return ""
|
|
|
|
def codetest():
|
|
sems = '202330 202310 202270 202250 202230 202210 202170 202150 202130 202070 202050 202030 202010 201970 201950 201930 201910 201870 201850 201830'.split(' ')
|
|
codes = 'fa21 wi22 sp23 su23 fa23 wi24'.split(' ')
|
|
for s in sems:
|
|
print("{}: {}".format(s, sem_num_to_code(s)))
|
|
|
|
for c in codes:
|
|
print("{}: {}".format(c, sem_code_to_num(c)))
|
|
|
|
def get_all():
|
|
terms = '178 177 176 175 174 173 172 171 168 65 64 62 63 61 60 25 26 23 22 21'.split(' ')
|
|
sems = '202330 202310 202270 202250 202230 202210 202170 202150 202130 202070 202050 202030 202010 201970 201950 201930 201910 201870 201850 201830'.split(' ')
|
|
# Save grades to a CSV file
|
|
with open(all_grades_file, "w", newline="") as csvfile:
|
|
writer = csv.writer(csvfile)
|
|
writer.writerow(["crn", "sem", "coursecode", "s_can_id","g","name", "current", "final"])
|
|
for (term,sem) in zip(terms,sems):
|
|
print(term,sem,"\n")
|
|
courses = getCoursesInTerm(term,get_fresh=0,show=0,active=1)
|
|
for c in courses:
|
|
print(c['name'])
|
|
c_code = c['course_code']
|
|
grades(writer, sem, c['id'], c_code)
|
|
csvfile.flush()
|
|
|
|
|
|
def grades(writer, sem, COURSE_ID, course_code):
|
|
params = { "include[]": ["enrollments", "current_grading_period_scores"] }
|
|
grades = fetch(url + f"/api/v1/courses/{COURSE_ID}/users",0, params)
|
|
#grades = json.loads(grades.text)
|
|
|
|
for student in grades:
|
|
try:
|
|
id = student["id"]
|
|
name = student["name"]
|
|
g = student["login_id"]
|
|
print("\t", name)
|
|
if student['enrollments'][0]['type'] == 'StudentEnrollment':
|
|
grade = student["enrollments"][0]["grades"]["final_score"]
|
|
current = student["enrollments"][0]["grades"]["current_score"]
|
|
writer.writerow([COURSE_ID, sem, course_code, id, g, name, current, grade])
|
|
except Exception as e:
|
|
print("Exception:", e)
|
|
|
|
|
|
def get_student_orientations():
|
|
courses = {'iLearn Student Orientation 2022':'9768', # 8170 students
|
|
'Kickstart Online Orientation - Transfer':'36', # 6149
|
|
'Kickstart Online Orientation - New to College':'35', # 5392
|
|
'LIB732 SP18':'3295', # 2193
|
|
'LIB732 FA17':'2037', # 1868
|
|
'LIB732 SP17':'69', # 1645
|
|
'Kickstart Online Orientation - Returning':'37', # 1463
|
|
'iLearn Student Orientation 2023':'15924', # 1292
|
|
'LIB732 SU17':'1439' # 1281
|
|
}
|
|
|
|
views_bycourse = {}
|
|
all_student_ids = set()
|
|
|
|
# get pageviews of each orientation course
|
|
for c,i in courses.items():
|
|
print(c)
|
|
cache_file_name = f'cache/participation_course_{i}.json'
|
|
student_ids = [x[1] for x in get_course_enrollments(i)]
|
|
all_student_ids.update(student_ids)
|
|
if os.path.exists(cache_file_name):
|
|
pv = json.loads(codecs.open(cache_file_name,'r','utf-8').read())
|
|
else:
|
|
pv = get_student_page_views(i, student_ids)
|
|
codecs.open(cache_file_name,'w','utf-8').write(json.dumps(pv,indent=2))
|
|
views_bycourse[i] = pv
|
|
|
|
# add up pageviews for each student
|
|
views_bystudent = {}
|
|
for student_id in all_student_ids:
|
|
views_bystudent[student_id] = sum([views_bycourse[i].get(student_id,0) for i in courses.values()])
|
|
codecs.open(student_orientation_participation,'w','utf-8').write(json.dumps(views_bystudent,indent=2))
|
|
|
|
def get_student_page_views(course_id, student_ids):
|
|
page_views = {}
|
|
verbose = 0
|
|
|
|
for student_id in student_ids:
|
|
a = f'/api/v1/courses/{course_id}/analytics/users/{student_id}/activity'
|
|
response = fetch(url + a, verbose)
|
|
page_views[student_id] = sum(response.get('page_views', {}).values())
|
|
|
|
if verbose: print(page_views)
|
|
return page_views
|
|
|
|
schedules = {}
|
|
orientations = {}
|
|
|
|
def load_schedules():
|
|
global schedules
|
|
if not schedules:
|
|
for f in os.listdir('cache/schedule'):
|
|
m = re.search(r'(\w\w\d\d)_sched_expanded\.json', f)
|
|
if m:
|
|
sem = m.group(1)
|
|
schedules[sem] = json.loads( codecs.open('cache/schedule/' + f, 'r', 'utf-8').read() )
|
|
|
|
def load_orientations():
|
|
global orientations
|
|
if not orientations:
|
|
orientations = json.loads( codecs.open(student_orientation_participation,'r','utf-8').read() )
|
|
return orientations
|
|
|
|
|
|
def to_crn_fallback(name):
|
|
#print(name)
|
|
name = name.lower()
|
|
try:
|
|
m1 = re.search(r'(\d\d\d\d\d)',name)
|
|
if m1:
|
|
crn = m1.group(1)
|
|
else:
|
|
return None,None
|
|
m2 = re.search(r'([wispufa][wispufa]\d\d)',name.lower())
|
|
if m2:
|
|
sem = m2.group(1)
|
|
else:
|
|
return None, None
|
|
#print(name, crn, sem)
|
|
return crn, sem
|
|
except Exception as e:
|
|
#print("Exception: ", e, name)
|
|
return None, None
|
|
|
|
def ilearn_name_to_course_code(iname):
|
|
parts = iname.split(' ')
|
|
code = parts[0]
|
|
return code
|
|
|
|
def short_name_to_crn(name):
|
|
#print(name)
|
|
try:
|
|
parts = name.split(' ')
|
|
code = parts[0]
|
|
sem = parts[1]
|
|
crn = parts[2]
|
|
m_sem = re.search(r'^(\w\w\d\d)$',sem)
|
|
if not m_sem:
|
|
return to_crn_fallback(name)
|
|
m = re.search(r'^(\d\d\d\d\d)$',crn)
|
|
if m:
|
|
return crn,sem
|
|
else:
|
|
crn_parts = crn.split('/')
|
|
m = re.search(r'^(\d\d\d\d\d)$',crn_parts[0])
|
|
if m:
|
|
return crn_parts[0],sem
|
|
#print("non standard course short name: ", code, sem, crn)
|
|
return to_crn_fallback(name)
|
|
except Exception as e:
|
|
#print("Exception: ", e, name)
|
|
return to_crn_fallback(name)
|
|
|
|
|
|
def fixname(n):
|
|
return re.sub(r'\s+',' ', n).strip()
|
|
|
|
|
|
def short_name_to_teacher_type_crn_sem(name):
|
|
load_schedules()
|
|
crn, sem = short_name_to_crn(name)
|
|
|
|
try:
|
|
if sem:
|
|
sem = sem.lower()
|
|
if sem[0:2]=='wi':
|
|
sem = 'sp' + sem[2:]
|
|
for course in schedules[sem]:
|
|
if course['crn'] == crn:
|
|
return fixname(course['teacher']), course['type'], crn, sem
|
|
except Exception as e:
|
|
return None, None, None, None
|
|
|
|
return None, None, None, None
|
|
|
|
pocrs = {}
|
|
|
|
def load_pocrs():
|
|
global pocrs
|
|
if not pocrs:
|
|
with open('cache/pocr_passed.csv') as csvfile:
|
|
csvreader = csv.reader(csvfile)
|
|
next(csvreader)
|
|
for row in csvreader:
|
|
pocrs[row[0] + " " + row[1]] = row[2]
|
|
return pocrs
|
|
|
|
def lookup_pocr(teacher,course,sem):
|
|
p = load_pocrs()
|
|
pcode = teacher + " " + course
|
|
if pcode in p:
|
|
sem_passed = sem_code_to_num(p[pcode])
|
|
sem_test = sem_code_to_num(sem)
|
|
if sem_passed < sem_test:
|
|
return True
|
|
return False
|
|
|
|
def nametest():
|
|
with open(all_courses_file) as csvfile:
|
|
csvreader = csv.reader(csvfile)
|
|
next(csvreader)
|
|
|
|
for row in csvreader:
|
|
print(row[0], "-", short_name_to_teacher_type_crn_sem(row[0]))
|
|
next(csvreader)
|
|
|
|
def above_70(li,maximum):
|
|
cutoff = 0.7 * maximum
|
|
above = list(filter(lambda x: x >= cutoff, li))
|
|
return round(len(above)/len(li), 3)
|
|
|
|
|
|
|
|
|
|
|
|
# v1, does a row of averages for each course
|
|
def process_one_course_grades(block, output, out_c, teacher_to_code, course_to_code):
|
|
fxns = [mean, median, stdev, min, max, len]
|
|
c_id = block[0][0]
|
|
sem = block[0][1]
|
|
course_code = block[0][2]
|
|
cur_scores = [num(x[6]) for x in block]
|
|
final_scores = [num(x[7]) for x in block]
|
|
teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(course_code)
|
|
if not teacher:
|
|
return
|
|
tch_code = teacher_to_code[teacher]
|
|
crs_code = course_to_code[course_code]
|
|
if len(final_scores) < 2:
|
|
return
|
|
try:
|
|
(cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count) = [round(f(cur_scores)) for f in fxns]
|
|
(final_mean, final_median, final_stdev, final_min, final_max, final_count) = [round(f(final_scores)) for f in fxns]
|
|
|
|
cur_pct_passed = above_70(cur_scores, cur_max)
|
|
final_pct_passed = above_70(final_scores, final_max)
|
|
|
|
if final_max == 0: return
|
|
|
|
scaled_final_scores = [ x / final_max for x in final_scores]
|
|
(scl_mean, scl_median, scl_stdev, scl_min, scl_max, scl_count) = [round(f(scaled_final_scores),2) for f in fxns]
|
|
|
|
good_code = ilearn_name_to_course_code(course_code)
|
|
pocr = 1 if lookup_pocr(teacher, good_code, sem2) else 0
|
|
|
|
output.writerow( [crs_code, good_code, pocr, tch_code, mode, final_pct_passed, scl_mean, scl_median, scl_stdev, scl_min, scl_max, final_count] )
|
|
out_c.writerow([crs_code, good_code, pocr, tch_code, mode, final_pct_passed, scl_mean, scl_median, scl_stdev, final_count])
|
|
except Exception as e:
|
|
print("Exception:", e)
|
|
|
|
|
|
|
|
|
|
# v2, one line per student/course
|
|
def process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code):
|
|
fxns = [mean, median, stdev, min, max, len]
|
|
c_id = block[0][0]
|
|
sem = block[0][1]
|
|
course_code = block[0][2]
|
|
cur_scores = [num(x[6]) for x in block]
|
|
final_scores = [num(x[7]) for x in block]
|
|
teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(course_code)
|
|
if not teacher:
|
|
return
|
|
tch_code = teacher_to_code[teacher]
|
|
crs_code = course_to_code[course_code]
|
|
if len(final_scores) < 2:
|
|
return
|
|
try:
|
|
|
|
# "course_code course pocr_status orientation_status teacher_code mode student_id scaled_score"
|
|
|
|
(final_mean, final_median, final_stdev, final_min, final_max, final_count) = [round(f(final_scores)) for f in fxns]
|
|
final_pct_passed = above_70(final_scores, final_max)
|
|
|
|
if final_max == 0: return
|
|
|
|
scaled_final_scores = [ x / final_max for x in final_scores]
|
|
(scl_mean, scl_median, scl_stdev, scl_min, scl_max, scl_count) = [round(f(scaled_final_scores),2) for f in fxns]
|
|
|
|
good_code = ilearn_name_to_course_code(course_code)
|
|
pocr = 1 if lookup_pocr(teacher, good_code, sem2) else 0
|
|
|
|
o = load_orientations()
|
|
|
|
for row in block:
|
|
student_id = row[3]
|
|
orientation = o[student_id] if student_id in o else 0
|
|
scaled_score = round(num(row[7]) / final_max, 2)
|
|
out_f.writerow([crs_code, good_code, pocr, orientation, tch_code, mode, student_id, scaled_score])
|
|
print(course_code)
|
|
except Exception as e:
|
|
print("Exception:", e)
|
|
|
|
def process_grades():
|
|
# first loop to get all names
|
|
courses_labeled = {}
|
|
teacher_to_code = {}
|
|
code_to_teacher = {}
|
|
|
|
course_to_code = {}
|
|
code_to_course = {}
|
|
|
|
index = 1001
|
|
crs_index = 4001
|
|
|
|
with open(all_grades_file, newline="") as csvfile:
|
|
csvreader = csv.reader(csvfile)
|
|
next(csvreader)
|
|
for row in csvreader:
|
|
crn_sem = row[0] + '_' + row[1]
|
|
if not crn_sem in courses_labeled:
|
|
teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(row[2])
|
|
courses_labeled[crn_sem] = teacher
|
|
|
|
if not row[2] in course_to_code:
|
|
course_to_code[row[2]] = crs_index
|
|
code_to_course[crs_index] = row[2]
|
|
crs_index += 1
|
|
|
|
if teacher:
|
|
if not teacher in teacher_to_code:
|
|
teacher_to_code[teacher] = index
|
|
code_to_teacher[index] = teacher
|
|
index += 1
|
|
codecs.open('cache/teacher_lookup_codes.json','w','utf-8').write( json.dumps( [teacher_to_code, code_to_teacher], indent=2) )
|
|
codecs.open('cache/course_lookup_codes.json','w','utf-8').write( json.dumps( [course_to_code, code_to_course], indent=2) )
|
|
|
|
out_fullrows = codecs.open(all_courses_file3,'w','utf-8')
|
|
out_f = csv.writer(out_fullrows)
|
|
out_f.writerow("course_code course pocr_status orientation_status teacher_code mode student_id scaled_score".split(" "))
|
|
|
|
out_compact = codecs.open(all_courses_file2,'w','utf-8')
|
|
out_c = csv.writer(out_compact)
|
|
out_c.writerow("course_code course pocr_status teacher_code mode percent_passed scl_mean scl_median scl_stdev count".split(" "))
|
|
with open(all_courses_file, "w", newline="") as output_f:
|
|
output = csv.writer(output_f)
|
|
output.writerow("course_code course pocr_status teacher_code mode percent_passed scl_mean scl_median scl_stdev scl_min scl_max count".split(" "))
|
|
|
|
with open(all_grades_file, newline="") as csvfile:
|
|
csvreader = csv.reader(csvfile)
|
|
block = []
|
|
current_index = None
|
|
|
|
next(csvreader)
|
|
|
|
for row in csvreader:
|
|
index = row[0]
|
|
|
|
if index != current_index:
|
|
if block:
|
|
process_one_course_grades(block, output, out_c, teacher_to_code, course_to_code)
|
|
process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code)
|
|
block = []
|
|
current_index = index
|
|
|
|
block.append(row)
|
|
|
|
if block:
|
|
process_one_course_grades(block, output, out_c, teacher_to_code, course_to_code)
|
|
process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code)
|
|
|
|
|
|
def reorganize_grades_student():
|
|
with open(all_courses_file3, newline="") as csvfile:
|
|
csvreader = csv.reader(csvfile)
|
|
bystudent = defaultdict(list)
|
|
|
|
next(csvreader)
|
|
|
|
for row in csvreader:
|
|
st = row[6]
|
|
bystudent[st].append(row)
|
|
|
|
students = sorted(bystudent.keys())
|
|
with open(all_courses_file4, "w", newline="") as output_f:
|
|
with open(all_courses_file5, "w", newline="") as output_s:
|
|
with open(student_courses_scores,'w') as output_scs:
|
|
output_s.write("student,courses\n")
|
|
output = csv.writer(output_f)
|
|
output.writerow("course_code course pocr_status orientation_status teacher_code mode student_id scaled_score".split(" "))
|
|
# student id 0 has no courses
|
|
output.writerow([0,])
|
|
for st in students:
|
|
courses = [r[1] for r in bystudent[st]]
|
|
scores = [r[7] for r in bystudent[st]]
|
|
zipped = zip(courses,scores)
|
|
output_scs.write(st + ",")
|
|
for c,s in zipped:
|
|
output_scs.write(f"{c}|{s},")
|
|
output_scs.write("\n")
|
|
output_s.write(st + "," + " ".join(courses) + "\n")
|
|
for row in bystudent[st]:
|
|
output.writerow(row)
|
|
|
|
|
|
def all_course_names_setup():
|
|
cc = json.loads(codecs.open('cache/courses/courses_built.json','r','utf-8').read())
|
|
courses = {}
|
|
for C in cc.values():
|
|
name = C['dept'] + C['number']
|
|
#print(name)
|
|
courses[ name ] = C
|
|
|
|
#co = codecs.open('cache/courses/names.json','w','utf-8')
|
|
#for c in sorted(courses.keys()):
|
|
# co.write(c + "\n")
|
|
|
|
cr = codecs.open('cache/courses/names.json','r','utf-8')
|
|
|
|
|
|
from_data = codecs.open('cache/courses_student_scores.csv','r','utf-8').readlines()
|
|
unknown = {}
|
|
for line in from_data:
|
|
parts = line.split(',')
|
|
stu_id = parts[0]
|
|
ea = parts[1:]
|
|
for C in ea:
|
|
each = C.split('|')
|
|
name = each[0]
|
|
if not name in courses:
|
|
unknown[name] = name
|
|
#data_courses[each[0]] += 1
|
|
for c in sorted(unknown.keys()):
|
|
print(c)
|
|
|
|
#co.write( json.dumps( {'unknown':unknown, 'coursenames':courses}, indent=2 ))
|
|
|
|
|
|
lookup = {}
|
|
names = {}
|
|
|
|
def shell2course(shell):
|
|
global lookup, names
|
|
if not lookup:
|
|
cr = json.loads(codecs.open('cache/courses/names.json','r','utf-8').read())
|
|
lookup = cr['unknown']
|
|
allcourses = cr['coursenames']
|
|
names = allcourses.keys()
|
|
|
|
if shell in names:
|
|
return shell
|
|
if shell in lookup:
|
|
c = lookup[shell]
|
|
if c in names:
|
|
return c
|
|
#print(f"Can't find course: {shell}")
|
|
return ""
|
|
|
|
|
|
|
|
|
|
def stu_record_line(line):
|
|
line = line.strip()
|
|
line = line.strip(',')
|
|
parts = line.split(',')
|
|
stu_id = parts[0]
|
|
courses = []
|
|
for C in parts[1:]:
|
|
courses.append(C.split('|'))
|
|
return stu_id, courses
|
|
|
|
def stu_record_to_vector(line, boolean=0):
|
|
id, courses = stu_record_line(line)
|
|
|
|
yesval = "true" if boolean else 1
|
|
noval = "false" if boolean else 0
|
|
|
|
template = json.loads(codecs.open('cache/courses/course_main_record.json','r','utf-8').read())
|
|
lookup = {}
|
|
for i,c in enumerate(template):
|
|
lookup[c] = i
|
|
vector = [noval for x in range(len(template))]
|
|
for C in courses:
|
|
goodname = shell2course(C[0])
|
|
if goodname:
|
|
vector[lookup[goodname]] = yesval # C[1] # score
|
|
return id,vector,courses
|
|
|
|
|
|
def grades_to_vectors(boolean=0, verbose=0):
|
|
grades = codecs.open('cache/courses_student_scores.csv','r','utf-8').readlines()
|
|
for L in grades:
|
|
id, vector, courses = stu_record_to_vector(L,boolean)
|
|
if verbose: print(id, vector)
|
|
yield id, vector, courses
|
|
|
|
def course_main_record():
|
|
return json.loads(codecs.open('cache/courses/course_main_record.json','r','utf-8').read())
|
|
|
|
|
|
def courses_to_vector_ordered(course_list):
|
|
# each course is (name, semester_order, score)
|
|
template = course_main_record()
|
|
lookup = {}
|
|
for i,c in enumerate(template):
|
|
lookup[c] = i
|
|
vector = ['0' for x in range(len(template))]
|
|
for course,order,score in course_list:
|
|
goodname = shell2course(course)
|
|
if goodname:
|
|
vector[lookup[goodname]] = str(order)
|
|
return vector
|
|
|
|
def courses_to_vector(course_list, boolean=1):
|
|
#print(course_list)
|
|
yesval = "true" if boolean else 1
|
|
noval = "false" if boolean else 0
|
|
template = course_main_record()
|
|
lookup = {}
|
|
for i,c in enumerate(template):
|
|
lookup[c] = i
|
|
vector = [noval for x in range(len(template))]
|
|
for C in course_list:
|
|
C = C.strip()
|
|
#goodname = shell2course(C[0])
|
|
#if goodname:
|
|
#print(C)
|
|
vector[lookup[C]] = yesval # C[1] # score
|
|
#print(vector)
|
|
return vector
|
|
|
|
def course_vector_to_names(vector):
|
|
template = course_main_record()
|
|
names = []
|
|
for i,v in enumerate(vector):
|
|
if v:
|
|
names.append(template[i])
|
|
return names
|
|
|
|
|
|
def all_course_names():
|
|
ac = json.loads(codecs.open('cache/courses/courses_built.json','r','utf-8').read())
|
|
master_record = []
|
|
for C in ac.values():
|
|
if C['status'] == 'Draft':
|
|
continue
|
|
name = C['dept'] + C['number']
|
|
master_record.append(name)
|
|
master_record = set(master_record)
|
|
master_record = list(master_record)
|
|
master_record = sorted(master_record)
|
|
|
|
## Extract from all 'accomplished courses'...
|
|
if 0:
|
|
complete_list = {}
|
|
missing_names = {}
|
|
with open(student_courses_scores,'r') as input_f:
|
|
for L in input_f:
|
|
stu_id, courses = stu_record_line(L)
|
|
for C in courses:
|
|
real_name = shell2course(C[0])
|
|
if real_name:
|
|
complete_list[real_name] = 1
|
|
else:
|
|
missing_names[C[0]] = 1
|
|
master_record = sorted(complete_list.keys())
|
|
print(f"Found {len(master_record)} courses")
|
|
print(master_record)
|
|
print(f"Missing {len(missing_names)} courses")
|
|
print(missing_names)
|
|
mr = codecs.open('cache/courses/course_main_record.json','w','utf-8')
|
|
mr.write(json.dumps(master_record,indent=2))
|
|
|
|
|
|
from semesters import semester_list, canvas_label
|
|
from semesters import code as semester_order
|
|
from localcache import all_students_history
|
|
from datetime import datetime, timedelta
|
|
|
|
def semester_dates():
|
|
#print()
|
|
for c in canvas_label:
|
|
print(semester_list[c])
|
|
|
|
length = 15
|
|
if semester_list[c]['code'][0:2] == 'su':
|
|
length = 5
|
|
start_date = semester_list[c]['start']
|
|
# Convert the date string to a datetime object
|
|
date_object = datetime.strptime(start_date, '%m/%d/%y')
|
|
start_fmt = date_object.strftime('%a %b %d, %Y')
|
|
|
|
# Add 15weeks, 5days to the date
|
|
new_date = date_object + timedelta(weeks=15)
|
|
new_date = new_date + timedelta(days=5)
|
|
|
|
# Format the new date as a string
|
|
new_date_string = new_date.strftime('%m/%d/%y')
|
|
end_fmt = new_date.strftime('%a %b %d, %Y')
|
|
|
|
# Print the new date
|
|
print(f"start: {start_fmt}, end: {end_fmt}")
|
|
|
|
|
|
|
|
current_student = ""
|
|
current_student_block = []
|
|
current_student_info = {'first':'', 'last':''}
|
|
normalized_blocks = []
|
|
|
|
ignore_courses = "El,zACCT20,GASPAR".split(",")
|
|
seen_courses = []
|
|
|
|
def course_line_process(line):
|
|
global current_student, current_student_block, seen_courses, normalized_blocks, current_student_info
|
|
|
|
sem = line['term_name']
|
|
m1 = re.search(r'^(\d\d\d\d)\s(\w+)$', sem)
|
|
if not m1: # is NOT an academic semester, skip
|
|
return
|
|
|
|
uid = line['canvasid']
|
|
if uid != current_student:
|
|
if current_student_block:
|
|
current_student_block.append(current_student_info)
|
|
normalized_blocks.append(current_student_block)
|
|
current_student_block = []
|
|
current_student_info = {'first':semester_list[sem]['code'], 'last':''}
|
|
current_student = uid
|
|
#print(f"Student: {uid} ({line['user_name']})")
|
|
|
|
# line is a dict
|
|
current_student_info['last'] = semester_list[sem]['code']
|
|
year, season = m1.group(1), m1.group(2)
|
|
date_format = "%Y-%m-%d %H:%M:%S.%f"
|
|
create_dt = datetime.strptime(line['created'], date_format)
|
|
update_dt = datetime.strptime(line['updated'], date_format)
|
|
sem_start = datetime.strptime(semester_list[sem]['start'], '%m/%d/%y')
|
|
|
|
course = line['course_name']
|
|
c_parts = course.split(' ')
|
|
if c_parts[0] in ignore_courses or c_parts[0] in seen_courses:
|
|
return
|
|
classname = shell2course(c_parts[0])
|
|
if not classname:
|
|
# print empty dict entry for initial setup
|
|
# print(f" \"{c_parts[0]}\": \"\",")
|
|
seen_courses.append(c_parts[0])
|
|
else:
|
|
#
|
|
flow = line['workflow']
|
|
mark = '+'
|
|
if flow == "deleted": mark = '-'
|
|
# normal start & finish, give add date
|
|
add_day = sem_start - create_dt
|
|
add_day = add_day.days
|
|
sign = '-'
|
|
if add_day < 0:
|
|
add_day = -add_day
|
|
sign = '+'
|
|
#print(f" {mark} {classname} added T{sign}{add_day} {semester_list[sem]['code']}")
|
|
temp_usr_name = re.sub(r',','',line['user_name'])
|
|
current_student_block.append(f"{uid},{temp_usr_name},{classname},add,T{sign}{add_day},{semester_list[sem]['code']}")
|
|
if flow == "deleted":
|
|
# deleted, give delete date
|
|
del_day = sem_start - update_dt
|
|
del_day = del_day.days
|
|
sign = '-'
|
|
if del_day < 0:
|
|
del_day = -del_day
|
|
sign = '+'
|
|
#print(f" {mark} {classname} deleted T{sign}{del_day} {semester_list[sem]['code']}")
|
|
current_student_block.append(f"{uid},{temp_usr_name},{classname},del,T{sign}{del_day},{semester_list[sem]['code']}")
|
|
|
|
|
|
def normalize_course_histories():
|
|
global normalized_blocks, current_student_block, current_student_info
|
|
all_students_history(course_line_process, limit=99910000)
|
|
current_student_block.append(current_student_info)
|
|
normalized_blocks.append(current_student_block)
|
|
|
|
codecs.open('cache/normalized_student_add_drop.json','w','utf-8').write(json.dumps(normalized_blocks,indent=2))
|
|
|
|
# let's see if we can get grades...
|
|
grades_by_student_course = defaultdict(dict)
|
|
print("Doing grades...")
|
|
with codecs.open('cache/courses_student_scores.csv','r','utf-8') as gradesfile:
|
|
for s in gradesfile:
|
|
parts = s.split(',')
|
|
stu = int(parts[0])
|
|
#print(stu)
|
|
for c in parts[1:]:
|
|
try:
|
|
#print(c)
|
|
crs,gra = c.split('|')
|
|
grades_by_student_course[stu][crs] = gra
|
|
except Exception as e:
|
|
pass
|
|
|
|
# go through again
|
|
print("Second pass of grades and student history...")
|
|
student_history = codecs.open('cache/normalized_student_history.csv','w','utf-8')
|
|
student_history.write("studentid,studentname,course,action,when,grade,sem_name,first_sem,last_sem,tenure_length,sem_index\n")
|
|
semester_order.reverse()
|
|
for blk in normalized_blocks:
|
|
info = blk[-1]
|
|
first = semester_order.index(info['first']) + 1
|
|
last = semester_order.index(info['last']) + 1
|
|
length = last - first + 1
|
|
|
|
for course in blk[:-1]:
|
|
parts = course.split(',')
|
|
#print(parts)
|
|
sem = parts[5]
|
|
sem_index = semester_order.index(sem) - first + 2
|
|
stu = int(parts[0])
|
|
crs = parts[2]
|
|
grade = ""
|
|
if stu in grades_by_student_course:
|
|
if crs in grades_by_student_course[stu]:
|
|
grade = grades_by_student_course[stu][crs]
|
|
|
|
student_history.write(",".join([parts[0], parts[1], parts[2], parts[3], parts[4], grade, parts[5], str(first), str(last), str(length), str(sem_index), ]) + '\n')
|
|
|
|
# make "unified records" or one line per student
|
|
student_history_2 = codecs.open('cache/normalized_student_history2.csv','w','utf-8')
|
|
allcourse = course_main_record()
|
|
#print(allcourse)
|
|
template = ['studentid', 'studentname', 'tenure_length']
|
|
template.extend(allcourse)
|
|
#print(template)
|
|
student_history_2.write( ",".join(template) + "\n" )
|
|
|
|
for blk in normalized_blocks:
|
|
student_block = []
|
|
info = blk[-1]
|
|
first = semester_order.index(info['first']) + 1
|
|
last = semester_order.index(info['last']) + 1
|
|
length = last - first + 1
|
|
|
|
temp_course_holder = {}
|
|
temp_course_grade_holder = {}
|
|
|
|
for course in blk[:-1]:
|
|
parts = course.split(',')
|
|
#print(parts)
|
|
sem = parts[5]
|
|
sem_index = semester_order.index(sem) - first + 2
|
|
stu = int(parts[0])
|
|
crs = parts[2]
|
|
if parts[3] == 'add':
|
|
temp_course_holder[crs] = sem_index
|
|
elif parts[3] == 'del' and crs in temp_course_holder:
|
|
del temp_course_holder[crs]
|
|
|
|
# now the temp_course_holder has the courses and semesters
|
|
for crs,sem_index in temp_course_holder.items():
|
|
grade = ""
|
|
if stu in grades_by_student_course:
|
|
if crs in grades_by_student_course[stu]:
|
|
grade = grades_by_student_course[stu][crs]
|
|
this_record = (crs, sem_index, grade)
|
|
student_block.append(this_record)
|
|
student_vector = [ parts[0], parts[1], str(length) ]
|
|
student_vector.extend(courses_to_vector_ordered(student_block))
|
|
|
|
student_history_2.write(",".join(student_vector) + '\n')
|
|
#print(student_vector)
|
|
|
|
def cluster_student_histories():
|
|
infile = 'cache/courses_student_scores.csv'
|
|
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
from kneed import KneeLocator
|
|
from sklearn.datasets import make_blobs
|
|
from sklearn.cluster import KMeans
|
|
from sklearn.metrics import silhouette_score
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
df = pd.read_csv(infile)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
options = { 1: ['get all historical grades from ilearn',get_all] ,
|
|
2: ['process grades csv file',process_grades] ,
|
|
3: ['reorganize full grades file by student', reorganize_grades_student],
|
|
4: ['test shortname parse',nametest] ,
|
|
5: ['test sem codes',codetest] ,
|
|
6: ['get student data from orientations', get_student_orientations],
|
|
7: ['manage course master list', all_course_names],
|
|
8: ['grades to vectors', grades_to_vectors],
|
|
9: ['semester startdates list', semester_dates],
|
|
10: ['normalize course histories', normalize_course_histories],
|
|
11: ['cluster student histories', cluster_student_histories],
|
|
}
|
|
print ('')
|
|
|
|
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
|
resp = int(sys.argv[1])
|
|
print("\n\nPerforming: %s\n\n" % options[resp][0])
|
|
|
|
else:
|
|
print ('')
|
|
for key in options:
|
|
print(str(key) + '.\t' + options[key][0])
|
|
|
|
print('')
|
|
resp = input('Choose: ')
|
|
|
|
# Call the function in the options dict
|
|
options[ int(resp)][1]()
|