commit 09fb62577258d23701710ee2dc71dd55abcc6951
Author: Coding with Peter "+t+" This is paragraph one. This is paragraph two.'''
+ soup = bs(s, features='lxml')
+ for tag in soup.recursiveChildGenerator():
+ try:
+ tag.attrs = {key:value for key,value in tag.attrs.iteritems()
+ if key not in REMOVE_ATTRIBUTES}
+ except AttributeError:
+ # 'NavigableString' object has no attribute 'attrs'
+ pass
+ return soup.prettify()
+
+def mycleaner(s):
+ s = re.sub(r'"+t+"
\n"
+
+ def para(t): return ""+t+"
\n"
+
+ def li(t): return "' + fname + '
\n'
+ number = -1
+ count = 0
+ try:
+ for F in os.listdir(path+fname): #'assignments'):A
+ cmd = "/usr/bin/node " + \
+ "/home/phowell/Documents/access/node_modules/pa11y/bin/pa11y.js --standard Section508 " + \
+ path + fname + "/" + F
+ print(("" + path + fname + "/" + F))
+ output = subprocess.run(cmd, stdout=subprocess.PIPE,
+ universal_newlines=True, shell=True, check=False)
+
+ report += "" + F + "
\n"
+ line = output.stdout.split('\n')[-3]
+ if re.search('No\sissues',line):
+ pass
+ #print("Got zero")
+ else:
+ m = re.search('(\d+)\sErr',line)
+ if m:
+ count += int(m.group(1))
+ lines = output.stdout.split("\n")
+ #pdb.set_trace()
+ lines = lines[4:]
+ report += "" + html.escape("\n".join(lines)) + "\n\n\n"
+ except Exception as e:
+ print('finished with error or folder missing')
+ print(e)
+ return int(count), report
+
+def check_class(folder):
+ path = "/home/phowell/hdd/SCRIPTS/everything-json/course_temps/" + folder + "/"
+ class_report = "Report on course: " + folder + "
\n\n"
+ (cnt_a,rep_a) = check_folder('assignments',path)
+ (cnt_p,rep_p) = check_folder('pages',path)
+ class_report += rep_a
+ class_report += rep_p
+
+ #oo = open(path+'report.html','w')
+ #oo.write(class_report)
+ #oo.close()
+ #print(class_report)
+ return cnt_a+cnt_p, class_report
+
+def check_all():
+ hd_path = '/home/phowell/hdd/SCRIPTS/everything-json/'
+
+ rep_f = open(hd_path+'report.html','w')
+ rep_s = open(hd_path+'summary.html','w')
+
+ rep_f.write('\n')
+
+ listt = os.listdir('/home/phowell/hdd/SCRIPTS/everything-json/course_temps')
+ #listt = ['course_4341',] # for testing
+ for L in listt:
+ print(('Directory is: '+L))
+ m = glob.glob('/home/phowell/hdd/SCRIPTS/everything-json/course_temps/' +L+'/*.txt')
+ if m: name = m[0]
+ else: name = 'unknown.txt'
+ name = name.split('.')[0]
+ name = name.split('/')[-1]
+
+ print(('name is: ' + name))
+ (cnt,rep) = check_class(L)
+ rep_f.write(""+name+"
\n"+rep+"\n\n
\n\n")
+ rep_f.flush()
+ rep_s.write("("+str(cnt)+") Class: "+name+"
\n")
+ rep_s.flush()
+
+if __name__ == "__main__":
+ check_all()
+
+ #print(('arguments: '+str(sys.argv)))
+
+ # test
+ """
+ file = 'course_temps/course_6862/pages/choose-the-right-browser.html'
+ dir = 'course_temps/course_6862/pages/'
+ #ff = open(file,'r').read()
+ #print safe_html(ff)
+
+ for file in os.listdir(dir):
+ if re.search('_cleaned\.html',file):
+ os.remove(dir+file)
+
+ for file in os.listdir(dir):
+ if file.endswith(".html"):
+ newfname = re.sub('\.html','_cleaned.html',file)
+ ff = codecs.open(dir+file,'r','utf-8').read()
+ print(file)
+ print(newfname)
+ newf = codecs.open(dir+newfname,'w','utf-8')
+ newf.write(safe_html(ff))
+ newf.close()
+ """
+
+
diff --git a/content.py b/content.py
new file mode 100644
index 0000000..be5553d
--- /dev/null
+++ b/content.py
@@ -0,0 +1,860 @@
+
+
+#saved_titles = json.loads( codecs.open('cache/saved_youtube_titles.json','r','utf-8').read() )
+import requests, codecs, os, re, json
+from pipelines import header, fetch, url
+from util import clean_title, to_file_friendly
+from bs4 import BeautifulSoup as bs
+from html.parser import HTMLParser
+import tomd, checker
+import html2markdown as h2m
+import pypandoc
+h = HTMLParser()
+
+
+DBG = 1
+
+def d(s):
+ global DBG
+ if DBG: print(s)
+
+def stripper(s):
+ REMOVE_ATTRIBUTES = [
+ 'lang','language','onmouseover','onmouseout','script','style','font',
+ 'dir','face','size','color','style','class','width','height','hspace',
+ 'border','valign','align','background','bgcolor','text','link','vlink',
+ 'alink','cellpadding','cellspacing']
+
+ #doc = '''
','\n',s)
+ s = re.sub(r'<\/?b>','',s)
+ s = re.sub(r' +',' ',s)
+ s = re.sub(r'^[\s\t\r\n]+$','',s,flags=re.MULTILINE)
+ s = re.sub('^ ','',s)
+ return s
+
+def freshdesk():
+ path = "C:\\Users\\peter\\Downloads\\freshdesk\\Solutions.xml"
+ soup = bs( codecs.open(path,'r','utf-8').read() ,features="lxml")
+
+ outpt = codecs.open('cache/faqs.txt','w')
+ out = ""
+ for a in soup.find_all('solution-article'):
+
+ print("TITLE\n"+a.find('title').get_text())
+ out += a.find('title').get_text()
+
+ """for d in a.find_all('description'):
+ #print(d)
+ if d:
+ d = h.unescape(d.get_text())
+ e = stripper(d)
+ m = tomd.convert( e )
+ m = mycleaner(m)
+ print("\nDESCRIPTION\n"+m)"""
+
+ #print("\nWHAT IS THIS?\n" +
+ hh = a.find('desc-un-html').get_text()
+ d = h.unescape(hh)
+ e = stripper(d)
+ m = tomd.convert( e )
+ m = mycleaner(m)
+ print("\nDESCRIPTION\n"+m)
+ out += "\n\n" + m + "\n\n"
+
+ print("-----------\n\n")
+ outpt.write(out)
+
+# Download everything interesting in a course to a local folder
+# Build a master file with the entire class content
+def accessible_check(id=""):
+ if not id:
+ id = input("ID of course to check? ")
+ pagebreak = '\n\n\n\n'
+ verbose = 1
+
+ save_file_types = ['application/pdf','application/docx','image/jpg','image/png','image/gif','image/webp','application/vnd.openxmlformats-officedocument.wordprocessingml.document']
+
+ courseinfo = fetch('/api/v1/courses/' + str(id), verbose )
+
+ item_id_to_index = {}
+ items_inorder = ["" + courseinfo['name'] + "\n\n" + pagebreak,]
+ running_index = 1
+
+ modules = fetch('/api/v1/courses/' + str(id) + '/modules',verbose)
+
+ items = []
+ for x in range(9000): items.append(0)
+
+ video_link_list = []
+
+ for m in modules:
+ items[running_index] = '%s
%s\n' % ( m['name'], pagebreak )
+ running_index += 1
+
+ mod_items = fetch('/api/v1/courses/' + str(id) + '/modules/'+str(m['id'])+'/items', verbose)
+
+ for I in mod_items:
+
+ if I['type'] in ['SubHeader', 'Page', 'Quiz', 'Discussion', 'ExternalUrl' ] or 'content_id' in I:
+ running_index += 1
+
+ if I['type'] == 'SubHeader':
+ #print('subheader: ' + str(I))
+ items[running_index] = '%s
\n' % str(json.dumps(I,indent=2))
+
+ if I['type'] == 'Page':
+ item_id_to_index[ I['page_url'] ] = running_index
+
+ if I['type'] == 'Quiz':
+ item_id_to_index[ I['content_id'] ] = running_index
+
+ if I['type'] == 'Discussion':
+ item_id_to_index[ I['content_id'] ] = running_index
+
+ if I['type'] == 'ExternalUrl':
+ items[running_index] = "%s
\n\n" % (I['external_url'], I['title'])
+
+ # ?
+ #if 'content_id' in I:
+ # item_id_to_index[ I['content_id'] ] = running_index
+ else:
+ print("What is this item? " + str(I))
+
+
+ #items_inorder.append('Not included: '+ I['title'] + '(a ' + I['type'] + ')\n\n\n' )
+
+ # I['title']
+ # I['content_id']
+ # I['page_url']
+ # I['type']
+ # I['published']
+ # assignments and files have content_id, pages have page_url
+
+ course_folder = '../course_temps/course_'+id
+ index = []
+ try:
+ os.mkdir(course_folder)
+ except:
+ print("Course folder exists.")
+ ###
+ ### FILES
+ ###
+ files_f = course_folder + '/files'
+ headered = 0
+ print("\nFILES")
+ try:
+ os.mkdir(files_f)
+ except:
+ print(" * Files folder already exists.")
+
+ files = fetch('/api/v1/courses/' + str(id) + '/files', verbose)
+ print("LISTING COURSE FILES")
+ for f in files:
+ for arg in 'filename,content-type,size,url'.split(','):
+ if arg=='size':
+ f['size'] = str(int(f['size']) / 1000) + 'k'
+
+ if f['content-type'] in save_file_types:
+ d(' - %s' % f['filename'])
+
+ if not os.path.exists(files_f + '/' + f['filename']):
+ r = requests.get(f['url'],headers=header, stream=True)
+ with open(files_f + '/' + f['filename'], 'wb') as fd:
+ for chunk in r.iter_content(chunk_size=128):
+ fd.write(chunk)
+ else:
+ d(" - already downloaded %s" % files_f + '/' + f['filename'])
+
+ if not headered:
+ index.append( ('
Files
') )
+ headered = 1
+ index.append( ('files/' + f['filename'], f['filename']) )
+
+ ###
+ ### PAGES
+ ###
+ pages_f = course_folder + '/pages'
+ headered = 0
+ image_count = 0
+ print("\nPAGES")
+ try:
+ os.mkdir(pages_f)
+ except:
+ print(" * Pages folder already exists.")
+
+
+ pages = fetch('/api/v1/courses/' + str(id) + '/pages', verbose)
+ for p in pages:
+ d(' - %s' % p['title'])
+
+ p['title'] = clean_title(p['title'])
+ easier_filename = clean_title(p['url'])
+ this_page_filename = "%s/%s.html" % (pages_f, easier_filename)
+ #for a in 'title,updated_at,published'.split(','):
+ # print(str(p[a]), "\t", end=' ')
+
+ if not headered:
+ index.append( ('
Pages
') )
+ headered = 1
+ index.append( ( 'pages/' + easier_filename + '.html', p['title'] ) )
+
+
+ if os.path.exists(this_page_filename):
+ d(" - already downloaded %s" % this_page_filename)
+ this_page_content = open(this_page_filename,'r').read()
+ elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
+ d(' * skipping file behind passwords')
+ else:
+ t2 = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
+ if t2 and 'body' in t2 and t2['body']:
+ bb = bs(t2['body'],features="lxml")
+ a_links = bb.find_all('a')
+ for A in a_links:
+ if re.search( r'youtu', A['href']):
+ video_link_list.append( (A['href'], A.text, 'pages/'+easier_filename + ".html") )
+
+
+ page_images = bb.find_all('img')
+ for I in page_images:
+ d(' - %s' % I['src'])
+ if re.search(r'eis-prod',I['src']) or re.search(r'gavilan\.ins',I['src']):
+ d(' * skipping file behind passwords')
+ else:
+ try:
+ r = requests.get(I['src'],headers=header, stream=True)
+ mytype = r.headers['content-type']
+ #print("Response is type: " + str(mytype))
+ r_parts = mytype.split("/")
+ ending = r_parts[-1]
+
+ with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
+ for chunk in r.iter_content(chunk_size=128):
+ fd.write(chunk)
+ image_count += 1
+ except Exception as e:
+ d( ' * Error downloading page image, %s' % str(e) )
+
+ try:
+ with codecs.open(this_page_filename, 'w','utf-8') as fd:
+ this_page_content = "%s
\n%s" % ( t2['title'], t2['body'] )
+ fd.write(this_page_content)
+ except:
+ d(' * problem writing page content')
+ ## TODO include linked pages even if they aren't in module
+ else:
+ d(' * nothing returned or bad fetch')
+ # write to running log of content in order of module
+ if p and p['url'] in item_id_to_index:
+ items[ item_id_to_index[ p['url'] ] ] = this_page_content +'\n\n'+pagebreak
+ else:
+ d(' -- This page didnt seem to be in the modules list.')
+
+
+ ###
+ ### ASSIGNMENTS
+ ###
+ headered = 0
+ asm_f = course_folder + '/assignments'
+ print("\nASSIGNMENTS")
+ try:
+ os.mkdir(asm_f)
+ except:
+ d(" - Assignments dir exists")
+
+ asm = fetch('/api/v1/courses/' + str(id) + '/assignments', verbose)
+ for p in asm:
+ d(' - %s' % p['name'])
+
+
+ try:
+ friendlyfile = to_file_friendly(p['name'])
+ this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
+ if os.path.exists(this_assmt_filename):
+ d(" - already downloaded %s" % this_assmt_filename)
+ this_assmt_content = open(this_assmt_filename,'r').read()
+ else:
+ t2 = fetch('/api/v1/courses/' + str(id) + '/assignments/'+str(p['id']), verbose)
+ with codecs.open(this_assmt_filename, 'w','utf-8') as fd:
+ this_assmt_content = "%s
\n%s\n\n" % (t2['name'], t2['description'])
+ fd.write(this_assmt_content)
+ if not headered:
+ index.append( ('
Assignments
') )
+ headered = 1
+ index.append( ('assignments/' + str(p['id'])+"_"+friendlyfile + '.html', p['name']) )
+
+ # write to running log of content in order of module
+ if p['id'] in item_id_to_index:
+ items[ item_id_to_index[ p['url'] ] ] = this_assmt_content+'\n\n'+pagebreak
+ except Exception as e:
+ d(' * Problem %s' % str(e))
+
+ ###
+ ### FORUMS
+ ###
+ """forum_f = course_folder + '/forums'
+ headered = 0
+ image_count = 0
+ print("\nFORUMS")
+ try:
+ os.mkdir(forum_f)
+ forums = fetch('/api/v1/courses/' + str(id) + '/discussion_topics', verbose)
+ for p in forums:
+ p['title'] = clean_title(p['title'])
+ forum_id = p['id']
+ easier_filename = p['title']
+ for a in 'title,posted_at,published'.split(','):
+ print(str(p[a]), "\t", end=' ')
+ print("")
+ t2 = fetch('/api/v1/courses/' + str(id) + '/discussion_topics/'+str(forum_id), verbose)
+
+
+ #### REMOVED
+ bb = bs(t2['body'],features="lxml")
+ print("IMAGES IN THIS PAGE")
+ page_images = bb.find_all('img')
+ for I in page_images:
+ r = requests.get(I['src'],headers=header, stream=True)
+ mytype = r.headers['content-type']
+ print("Response is type: " + str(mytype))
+ r_parts = mytype.split("/")
+ ending = r_parts[-1]
+
+ with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
+ for chunk in r.iter_content(chunk_size=128):
+ fd.write(chunk)
+ image_count += 1
+ #### END REMOVED
+
+ try:
+ with codecs.open(forum_f + '/' + easier_filename + '.html', 'w','utf-8') as fd:
+ fd.write(""+t2['title']+"
\n")
+ fd.write(t2['message'])
+ if not headered: index.append( ('
Discussion Forums
') )
+ headered = 1
+ index.append( ( 'forums/' + easier_filename + '.html', p['title'] ) )
+
+ # write to running log of content in order of module
+ if p['id'] in item_id_to_index:
+ items_inorder[ item_id_to_index[ p['id'] ] ] = ''+t2['title']+'
\n\n'+t2['message']+'\n\n'+pagebreak
+ else:
+ print(' This forum didnt seem to be in the modules list.')
+ except Exception as e:
+ print("Error here:", e)
+ #print p
+ #print results_dict
+ except Exception as e:
+ print("** Forum folder seems to exist. Skipping those.")
+ print(e)
+
+
+
+
+
+
+ ###
+ ### QUIZZES
+ ###
+
+
+ # get a list external urls
+ headered = 0
+ t = url + '/api/v1/courses/' + str(id) + '/modules'
+ while t: t = fetch(t)
+ mods = results
+ results = []
+ for m in mods:
+ results = []
+ t2 = url + '/api/v1/courses/' + str(id) + '/modules/' + str(m['id']) + '/items'
+ while t2: t2 = fetch(t2)
+ items = results
+ for i in items:
+ #print i
+ if i['type'] == "ExternalUrl":
+ #print i
+ for j in 'id,title,external_url'.split(','):
+ print unicode(i[j]), "\t",
+ print ""
+ if not headered: index.append( ('
External Links
') )
+ headered = 1
+ index.append( (i['external_url'], i['title']) )
+ """
+
+
+
+ # Create index page of all gathered items
+ myindex = codecs.open(course_folder+'/index.html','w','utf-8')
+ for i in index:
+ if len(i)==2: myindex.write(""+i[1]+"
\n")
+ else: myindex.write(i)
+
+
+
+ # Full course content in single file
+ print("Writing main course files...")
+ mycourse = codecs.open(course_folder+'/fullcourse.raw.html','w','utf-8')
+
+ for I in items:
+ if I:
+ mycourse.write( I )
+
+
+
+ temp = open('cache/coursedump.txt','w')
+ temp.write( "items: " + json.dumps(items,indent=2) )
+ temp.write("\n\n\n")
+ temp.write( "index: " + json.dumps(index,indent=2) )
+ temp.write("\n\n\n")
+ temp.write( "items_inorder: " + json.dumps(items_inorder,indent=2) )
+ temp.write("\n\n\n")
+ temp.write( "item_id_to_index: " + json.dumps(item_id_to_index,indent=2) )
+
+
+
+
+
+
+
+ if video_link_list:
+ mycourse.write('\nVideos Linked in Pages
\n')
+ for V in video_link_list:
+ (url, txt, pg) = V
+ mycourse.write("
\n")
+
+ mycourse.close()
+ output = pypandoc.convert_file(course_folder+'/fullcourse.raw.html', 'html', outputfile=course_folder+"/fullcourse.html")
+ output1 = pypandoc.convert_file(course_folder+'/fullcourse.html', 'md', outputfile=course_folder+"/fullcourse.md")
+ output2 = pypandoc.convert_file(course_folder+'/fullcourse.html', 'docx', outputfile=course_folder+"/fullcourse.docx")
+
+
+def pan_testing():
+ course_folder = '../course_temps/course_6862'
+ output3 = pypandoc.convert_file(course_folder+'/fullcourse.md', 'html', outputfile=course_folder+"/fullcourse.v2.html")
+
+# Given course, page url, and new content, upload the new revision of a page
+def create_page(course_num,new_title,new_content):
+ t3 = url + '/api/v1/courses/' + str(course_num) + '/pages'
+ #xyz = raw_input('Enter 1 to continue and send back to: ' + t3 + ': ')
+ #print("Creating page: %s\nwith content:%s\n\n\n" % (new_title,new_content))
+ print("Creating page: %s" % new_title)
+ xyz = input('type 1 to confirm: ') #'1'
+ if xyz=='1':
+ data = {'wiki_page[title]':new_title, 'wiki_page[body]':new_content}
+ r3 = requests.post(t3, headers=header, params=data)
+ print(r3)
+ print('ok')
+
+
+def md_to_course():
+ #input = 'C:/Users/peter/Nextcloud/Documents/gavilan/student_orientation.txt'
+ #output = 'C:/Users/peter/Nextcloud/Documents/gavilan/stu_orientation/student_orientation.html'
+ id = "11214"
+ infile = 'cache/pages/course_%s.md' % id
+ output = 'cache/pages/course_%s_fixed.html' % id
+ output3 = pypandoc.convert_file(infile, 'html', format='md', outputfile=output)
+
+ xx = codecs.open(output,'r','utf-8').read()
+ soup = bs( xx, features="lxml" )
+ soup.encode("utf-8")
+
+ current_page = ""
+ current_title = ""
+
+ for child in soup.body.children:
+ if child.name == "h1" and not current_title:
+ current_title = child.get_text()
+ elif child.name == "h1":
+ upload_page(id,current_title,current_page)
+ current_title = child.get_text()
+ current_page = ""
+ print( "Next page: %s" % current_title )
+ else:
+ #print(dir(child))
+ if 'prettify' in dir(child):
+ current_page += child.prettify(formatter="html")
+ else:
+ current_page += child.string
+
+ upload_page(id,current_title,current_page)
+ print("Done")
+
+
+# DL pages only
+def grab_course_pages(course_num=-1):
+ global results, results_dict, url, header
+ # course_num = raw_input("What is the course id? ")
+ if course_num<0:
+ course_num = input("Id of course? ")
+ else:
+ course_num = str(course_num)
+ modpagelist = []
+ modurllist = []
+ # We want things in the order of the modules
+ t4 = url + '/api/v1/courses/'+str(course_num)+'/modules?include[]=items'
+ results = fetch(t4)
+ i = 1
+ pageout = codecs.open('cache/pages/course_'+str(course_num)+'.html','w','utf-8')
+ pageoutm = codecs.open('cache/pages/course_'+str(course_num)+'.md','w','utf-8')
+ divider = "\n### "
+ for M in results:
+ print("Module Name: " + M['name'])
+ for I in M['items']:
+ if I['type']=='Page':
+ modpagelist.append(I['title'])
+ modurllist.append(I['page_url'])
+ pageout.write(divider+I['title']+'### '+I['page_url']+'\n')
+ easier_filename = clean_title(I['page_url'])
+ print(" " + str(i) + ". " + I['title'])
+ t2 = url + '/api/v1/courses/' + str(course_num) + '/pages/'+I['page_url']
+ print('Getting: ' + t2)
+ mypage = fetch(t2)
+ fixed = checker.safe_html(mypage['body'])
+ if fixed:
+ #markdown = h2m.convert(fixed)
+ #p_data = pandoc.read(mypage['body'])
+ markdown = pypandoc.convert_text("\n \n")
+ mycourse.write(""+txt+" on " + pg + " " + I['title'] + "
\n" + mypage['body'], 'md', format='html')
+ pageout.write(fixed+'\n')
+ pageoutm.write(markdown+'\n')
+ pageout.flush()
+ i += 1
+ pageout.close()
+ pageoutm.close()
+
+# Upload pages. Local copy has a particular format.
+# Appears to not be used
+def put_course_pages():
+ course_num = '6862'
+ filein = codecs.open('cache/pages/course_'+str(course_num)+'.html','r','utf-8')
+ my_titles = []
+ my_urls = []
+ my_bodys = []
+ started = 0
+ current_body = ""
+ for L in filein.readlines():
+ ma = re.search('^###\s(.*)###\s(.*)$',L)
+ if ma:
+ my_titles.append(ma.group(1))
+ my_urls.append(ma.group(2))
+ if started:
+ my_bodys.append(current_body)
+ current_body = ""
+ started = 1
+ else:
+ current_body += "\n" + L
+ my_bodys.append(current_body)
+
+ i = 0
+ for U in my_urls:
+ # and now upload it....lol
+ upload_page(course_num,U,my_bodys[i])
+ i += 1
+
+# Also not used
+def put_revised_pages():
+ course_num = '6862'
+ course_folder = '../course_temps/course_6862'
+ filein = codecs.open(course_folder+'/fullcourse.v2.html','r','utf-8')
+ my_titles = []
+ my_urls = []
+ my_bodys = []
+ started = 0
+ current_body = ""
+ for L in filein.readlines():
+ ma = re.search('^(.*)
.*$',L)
+ if ma:
+ my_titles.append(ma.group(1))
+ my_urls.append(ma.group(2))
+ if started:
+ my_bodys.append(current_body)
+ current_body = ""
+ started = 1
+ else:
+ current_body += "\n" + L
+ my_bodys.append(current_body)
+
+ i = 0
+ for U in my_urls:
+ # and now upload it....lol
+ upload_page(course_num,U,my_bodys[i])
+ i += 1
+
+# Download, clean html, and reupload page
+def update_page():
+ global results, results_dict, url, header
+ # course_num = raw_input("What is the course id? ")
+ course_num = '6862'
+ t = url + '/api/v1/courses/' + str(course_num) + '/pages'
+ while t: t = fetch(t)
+ pages = results
+ results = []
+ mypagelist = []
+ myurllist = []
+ modpagelist = []
+ modurllist = []
+ for p in pages:
+ p['title'] = clean_title(p['title'])
+ mypagelist.append(p['title'])
+ myurllist.append(p['url'])
+ easier_filename = clean_title(p['url'])
+ #for a in 'title,updated_at,published'.split(','):
+ # print unicode(p[a]), "\t",
+ #print ""
+
+ # We want things in the order of the modules
+ t4 = url + '/api/v1/courses/'+str(course_num)+'/modules?include[]=items'
+ while t4: t4 = fetch(t4)
+ mods = results
+ results = []
+ i = 1
+ print("\nWhat page do you want to repair?")
+ for M in mods:
+ print("Module Name: " + M['name'])
+ for I in M['items']:
+ if I['type']=='Page':
+ modpagelist.append(I['title'])
+ modurllist.append(I['page_url'])
+ print(" " + str(i) + ". " + I['title'])
+ i += 1
+
+ choice = input("\n> ")
+ choice = int(choice) - 1
+ chosen_url = modurllist[choice]
+ print('Fetching: ' + modpagelist[choice])
+ t2 = url + '/api/v1/courses/' + str(course_num) + '/pages/'+chosen_url
+ print('From: ' + t2)
+
+ results_dict = {}
+ while(t2): t2 = fetch_dict(t2)
+ mypage = results_dict
+ fixed_page = checker.safe_html(mypage['body'])
+ upload_page(course_num,chosen_url,fixed_page)
+
+# Given course, page url, and new content, upload the new revision of a page
+def upload_page(course_num,pageurl,new_content):
+ print("Repaired page:\n\n")
+ #print new_content
+ print(pageurl)
+ t3 = url + '/api/v1/courses/' + str(course_num) + '/pages/' + pageurl
+ xyz = input('Enter 1 to continue and send back to: ' + t3 + ': ')
+ #xyz = '1'
+ if xyz=='1':
+ data = {'wiki_page[body]':new_content}
+ r3 = requests.put(t3, headers=header, params=data)
+ print(r3)
+ print('ok')
+
+# Use template to build html page with homegrown subtitles
+def build_srt_embed_php(data):
+ template = codecs.open('template_srt_and_video.txt','r','utf-8').readlines()
+ result = ''
+ for L in template:
+ L = re.sub('FRAMEID',data['frameid'],L)
+ L = re.sub('TITLE',data['title'],L)
+ L = re.sub('EMBEDLINK',data['embedlink'],L)
+ L = re.sub('SRTFOLDERFILE',data['srtfolderfile'],L)
+ result += L
+ return result
+
+
+
+
+def yt_title(code):
+ global saved_titles
+ if code in saved_titles:
+ return saved_titles[code]
+ a = requests.get('https://www.youtube.com/watch?v=%s' % code)
+ bbb = bs(a.content,"lxml")
+ ccc = bbb.find('title').text
+ ccc = re.sub(r'\s\-\sYouTube','',ccc)
+ saved_titles[code] = ccc
+ codecs.open('saved_youtube_titles.json','w','utf-8').write(json.dumps(saved_titles))
+ return ccc
+
+def swap_youtube_subtitles():
+ # example here: http://siloor.github.io/youtube.external.subtitle/examples/srt/
+
+ # srt folder, look at all filenames
+ srtlist = os.listdir('video_srt')
+ i = 0
+ for V in srtlist:
+ print(str(i) + '. ' + V)
+ i += 1
+ choice = input("Which SRT folder? ")
+ choice = srtlist[int(choice)]
+ srt_folder = 'video_srt/'+choice
+ class_srt_folder = choice
+ srt_files = os.listdir(srt_folder)
+ srt_shorts = {}
+ print("\nThese are the subtitle files: " + str(srt_files))
+ for V in srt_files:
+ if V.endswith('srt'):
+ V1 = re.sub(r'(\.\w+$)','',V)
+ srt_shorts[V] = minimal_string(V1)
+
+ crs_id = input("What is the id of the course? ")
+ grab_course_pages(crs_id)
+ v1_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
+ v1_content = v1_pages.read()
+
+ # a temporary page of all youtube links
+ tp = codecs.open('page_revisions/links_' + str(crs_id) + '.html', 'w','utf-8')
+
+ # course pages, get them all and look for youtube embeds
+ title_shorts = {}
+ title_embedlink = {}
+ title_list = []
+ print("I'm looking for iframes and youtube links.")
+ for L in v1_content.split('\n'):
+ if re.search('
" % (this_title, this_src, this_src) )
+ # match them
+ # lowercase, non alpha or num chars become a single space, try to match
+ # if any srts remain unmatched, ask.
+ tp.close()
+ webbrowser.open_new_tab('file://C:/SCRIPTS/everything-json/page_revisions/links_'+str(crs_id)+'.html')
+
+ matches = {} # key is Title, value is srt file
+ for S,v in list(srt_shorts.items()):
+ found_match = 0
+ print(v, end=' ')
+ for T, Tv in list(title_shorts.items()):
+ if v == Tv:
+ print(' \tMatches: ' + T, end=' ')
+ found_match = 1
+ matches[T] = S
+ break
+ #print "\n"
+
+ print("\nThese are the srt files: ")
+ print(json.dumps(srt_shorts,indent=2))
+ print("\nThese are the titles: ")
+ print(json.dumps(title_shorts,indent=2))
+ print("\nThese are the matches: ")
+ print(json.dumps(matches,indent=2))
+
+ print(("There are %d SRT files and %d VIDEOS found. " % ( len(list(srt_shorts.keys())), len(list(title_shorts.keys())) ) ))
+
+ for S,v in list(srt_shorts.items()):
+ if not S in list(matches.values()):
+ print("\nDidn't find a match for: " + S)
+ i = 0
+ for T in title_list:
+ if not T in list(matches.keys()): print(str(i+1) + ". " + T.encode('ascii', 'ignore'))
+ i += 1
+ print("Here's the first few lines of the SRT:")
+ print(( re.sub(r'\s+',' ', '\n'.join(open(srt_folder+"/"+S,'r').readlines()[0:10]))+"\n\n"))
+ choice = input("Which one should I match it to? (zero for no match) ")
+ if int(choice)>0:
+ matches[ title_list[ int(choice)-1 ] ] = S
+ print("SRT clean name was: %s, and TITLE clean name was: %s" % (v,title_shorts[title_list[ int(choice)-1 ]] ))
+ print("ok, here are the matches:")
+ print(json.dumps(matches,indent=2))
+
+ # construct subsidiary pages, upload them
+ i = 0
+ for m,v in list(matches.items()):
+ # open template
+ # do replacement
+ i += 1
+ data = {'frameid':'videoframe'+str(i), 'title':m, 'embedlink':title_embedlink[m], 'srtfolderfile':v }
+ print(json.dumps(data,indent=2))
+ file_part = v.split('.')[0]
+ new_php = codecs.open(srt_folder + '/' + file_part + '.php','w','utf-8')
+ new_php.write(build_srt_embed_php(data))
+ new_php.close()
+ #srt_files = os.listdir(srt_folder)
+ put_file(class_srt_folder)
+
+
+def test_swap():
+ crs_id = '6923'
+ # swap in embed code and re-upload canvas pages
+ v2_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
+ v2_content = v2_pages.read()
+ ma = re.compile('(\w+)=(".*?")')
+
+ for L in v2_content.split('\n'):
+ find = re.findall('
canvas id: {{ id }} +{% else %} +
canvas id: {{ id }} +{% else %} +
+This is a page. Vue is: 1. set up your data. fetch json of either 1 item or the whole list. +
+ + + + + + + + + + +1.1 Make your main div with id, and custom tags in it.
+ +2. Make some components
+ + + + +3. Including the one that corresponds to the html / main div above.
+ + + + + diff --git a/templates/sample-simple-vue-starter.html b/templates/sample-simple-vue-starter.html new file mode 100644 index 0000000..90fddb0 --- /dev/null +++ b/templates/sample-simple-vue-starter.html @@ -0,0 +1,194 @@ + +canvas id: {{ id }} +{% else %} +
+This is a page. Vue is: 1. set up your data. fetch json of either 1 item or the whole list. +
+ + + + + +1.1 Make your main div with id, and custom tags in it.
+ +2. Make some components
+ + + + +3. Including the one that corresponds to the html / main div above.
+ + + + + + + + + diff --git a/timer.py b/timer.py new file mode 100644 index 0000000..a084727 --- /dev/null +++ b/timer.py @@ -0,0 +1,35 @@ +from threading import Timer +import time, datetime + +mm = 18 + +t = datetime.datetime.today() +future = datetime.datetime(t.year,t.month,t.day,23,mm) +diff = future - t +delta = diff.total_seconds() + +print("waiting until 11:%i PM, which is %i seconds from now." % (mm,delta)) + + + + + +def func(a, b): + print("Called function") + return a * b + +# Schedule a timer for 5 seconds +# We pass arguments 3 and 4 +t = Timer(delta, func, [3, 4]) + +start_time = time.time() + +# Start the timer +t.start() + +end_time = time.time() + +if end_time - start_time < 5.0: + print("Timer will wait for sometime before calling the function") +else: + print("%i seconds already passed. Timer finished calling func()" % mm) \ No newline at end of file diff --git a/token.pickle b/token.pickle new file mode 100644 index 0000000..f58564b Binary files /dev/null and b/token.pickle differ diff --git a/users.py b/users.py new file mode 100644 index 0000000..bf946aa --- /dev/null +++ b/users.py @@ -0,0 +1,2203 @@ + +import json, codecs, requests, re, pdb, csv, textdistance +import sys, csv, string, funcy, math, shutil, imghdr, os +import pytz, time +import pandas as pd +import matplotlib.pyplot as plt + +#from pandas import TimeGrouper +from collections import defaultdict +from pipelines import fetch, fetch_stream, getSemesterSchedule, header, url, FetchError, put_file +from courses import course_enrollment, users_in_semester +from localcache import users_this_semester_db, unwanted_req_paths, timeblock_24hr_from_dt, dt_from_24hr_timeblock +from localcache import teachers_courses_semester +from util import dept_from_name, most_common_item +from os.path import exists, getmtime + +#from localcache import users_file, com_channel_dim + +from dateutil import parser +from datetime import datetime as dt +from datetime import timedelta +import datetime + +import queue +from threading import Thread +from os import path + +# for NLP +import spacy +from gensim import corpora, models, similarities, downloader, utils +from nltk import stem + + +# todo: these constants + +#last_4_semesters = 'fall2020 summer2020 spring2020 fall2019'.split(' ') +#last_4_semesters_ids = [62, 60, 61, 25] +last_4_semesters = 'spring2021 fall2020 summer2020 spring2020'.split(' ') +last_4_semesters_ids = [168, 65, 64, 62] + +log_default_startdate = "2021-08-23T00:00:00-07:00" +lds_stamp = parser.parse(log_default_startdate) + +recvd_date = '2021-08-23T00:00:00Z' +num_threads = 25 +max_log_count = 250000 + + +########## +########## +########## GETTING USER DATA +########## +########## + +# All users to a cache file cache/allusers.json +def fetchAllUsers(): + + if exists('cache/allusers.json'): + time = date_time = dt.fromtimestamp( getmtime('cache/allusers.json') ) + newname = 'cache/allusers_'+ time.strftime('%Y%m%d') + ".json" + print("renaming old data file to %s" % newname) + os.rename('cache/allusers.json', newname) + + + + out1 = codecs.open('cache/allusers.json','w','utf-8') + out2 = codecs.open('cache/allusers_ids.json','w','utf-8') + all_u = fetch_stream(url + '/api/v1/accounts/1/users?per_page=100', 1) + + ids = [] + main_list = [] + for this_fetch in all_u: + for U in this_fetch: + ids.append(U['id']) + main_list.append(U) + + ids.sort() + out2.write( json.dumps(ids, indent=2)) + out1.write( json.dumps(main_list, indent=2)) + out2.close() + out1.close() + return ids + + + +########## +########## +########## TEACHERS LIST AND LOCAL USERS FILE +########## +########## + +# Fetch teacher users objects from local cache +def teacherRolesCache(): # I used to be load_users + users_raw = json.load(open('cache/ilearn_staff.json','r')) + users = {} + users_by_id = {} + for U in users_raw: + users[ U['login_id'] ] = U + users_by_id[ U['id'] ] = U + return users, users_by_id + + + + + + +# Outputs: cache/ilearn_staff.json +# Canvas: Fetch all people with gavilan.edu email address +def teacherRolesUpdateCache(): # I used to be get_users + t = fetch('/api/v1/accounts/1/users?per_page=500&search_term=%40gavilan.edu&include[]=email') + g = open('cache/ilearn_staff.json','w') + g.write( json.dumps(t) ) + g.close() + #put_file('/gavilan.edu/staff/flex/2020/','cache/','ilearn_staff.json') + print("Wrote to 'cache/ilearn_staff.json'") + return teacherRolesCache() + + +# Fetch preferred email address for a given user id. ( Canvas ) +def getEmail(user_id): + results = fetch("/api/v1/users/" + str(user_id) + "/communication_channels") + for r in results: + if r['type']=='email': + return r['address'] + return '' + + +########## +########## +########## TEACHERS AND OTHER STAFF +########## +########## +# +# Gather all my info, CRM style, in the folder teacherdata +# +# +# Typical actions: For everyone with a teacher role: +# - What are the courses they taught for the last X semesters? +# - What's their activity level each semester? +# - Which of those courses are Online, Hybrid or Face2face? +# + column for each semester: OHLOHL +# - How many online classes have they taught in the past? +# - Are they brand new, or brand new online?# further... +# - what's their department? +# - what's their badges and 'tech level?' +# - + + +# All teachers in a particular course +def getAllTeachers(course_id=59): # a list + qry = '/api/v1/courses/' + str(course_id) + '/search_users?enrollment_type=teacher' + t = url + qry + while(t): t = fetch(t) +# +def classType(t): + if t == 'lecture': return 'L' + if t == 'online': return 'O' + if t == 'hours': return 'R' + if t == 'lab': return 'A' + if t == 'hybrid': return 'H' + else: return 'L' # todo: fix bug in schedule parser so non-online classes have a type field + +def my_blank_string(): return "no data" +def my_blank_dict(): return {'name':'NoName','email':'noemail@gavilan.edu'} +def my_empty_dict(): return defaultdict(my_blank_string) + +def get_email_from_rec(name,name_to_record): + #print "Looking up: " + name + try: + return name_to_record[name]['email'] + except Exception as e: + print("Missing Teacher %s" % name) + return 'noemail@gavilan.edu' + + + + +# Pull the staff directory on the webpage. Convert to pandas dataframe +def staff_dir(get_fresh=False): + """ + if get_fresh: + url = "http://www.gavilan.edu/staff/dir.php" + regex = "var\slist=(\[.*\]);" + response = requests.get(url).text + m = re.search(regex,response) + if m: + output = '{"staff":' + m.group(1) + '}' + of = open('cache/teacherdata/staff_dir.json','w') + of.write(output) + js = json.loads(output) + df = pd.DataFrame(js['staff']) + return df + print("Wrote cache/teacherdata/staff_dir.json") + else: + print("Failed on staff directory scrape") + return '' + else: + input = json.loads(open('cache/teacherdata/staff_dir.json','r').read()) + df = pd.DataFrame(input['staff']) + return df + """ + + # TODO lol get fresh again... + + old_dir = csv.reader(open('cache/personnel2020_04_12.csv'), delimiter=',') + dept1_crxn = {r[0]:r[1] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') } + dept2_crxn = {r[0]:r[2] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') } + title_crxn = {r[0]:r[3] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') } + revised_dir = [ ] + columns = next(old_dir) + + for r in old_dir: + old_dept = r[2] + if old_dept in dept1_crxn: + new_one = dept1_crxn[old_dept] + if dept2_crxn[old_dept]: new_one += '/' + dept2_crxn[old_dept] + if title_crxn[old_dept]: new_one += '/' + title_crxn[old_dept] + r[2] = new_one + revised_dir.append(r) + print(revised_dir) + return pd.DataFrame(revised_dir,columns=columns) + + +# +# +# +# ### +# ### TEACHER CRM FUNCTIONS +# ### +# + +def schedForTeacherOverview(long,short): + sem = getSemesterSchedule(short) + sem['type'] = sem['type'].apply(classType) + #sem['code'] = sem[['code','type']].apply(' '.join,axis=1) + sem['sem'] = short + sem = sem.drop(['time','loc','name','date','days'],axis=1) # ,'crn' + return sem + + + + +# Return a dataframe of the last 4 semester schedules put together +def oneYearSchedule(): + sp19 = schedForTeacherOverview('2019spring','sp19') + su19 = schedForTeacherOverview('2019summer','su19') + fa19 = schedForTeacherOverview('2019fall','fa19') + sp20 = schedForTeacherOverview('2020spring','sp20') + + # The four-semester schedule + a = pd.concat([sp19,su19,fa19,sp20], sort=True, ignore_index=True) + a = a.drop(['cap','cmp','extra','rem','sec','cred','act'], axis=1) + a.to_csv('cache/one_year_schedule.csv') + return a + +def num_sections_last_year(line): + #if not type(line)=='str': return 0 + parts = line.split(' ') + return len(parts) + +def sec_type_stats(line): + #print(type(line)) + #if not type(line)=='str': return {'fail':1} + #print("in sts: " + str(line)) + parts = line.split(' ') + output = defaultdict(int) + for p in parts: output[p] += 1 + return output + +def prct_online(line): + d = sec_type_stats(line) + #print(d) + total = 0 + my_total = 0 + for k,v in d.items(): + total += v + if k == 'O': my_total += v + return int(100 * ((1.0)*my_total / total)) + +def prct_lecture(line): + #print(line) + d = sec_type_stats(line) + #if 'fail' in d: return 0 + total = 0 + my_total = 0 + for k,v in d.items(): + total += v + if k == 'L': my_total += v + return int(100 * ((1.0)*my_total / total)) + + +def prct_hybrid(line): + d = sec_type_stats(line) + #if 'fail' in d: return 0 + total = 0 + my_total = 0 + for k,v in d.items(): + total += v + if k == 'H': my_total += v + return int(100 * ((1.0)*my_total / total)) + +# Given the names of teachers in last year's schedules, fill in email, etc. from ilearn files +def teacher_basic_info(sched, from_ilearn, names): + bi = from_ilearn # pd.DataFrame(from_ilearn) + bi.rename(columns={'id':'canvasid','login_id':'goo'}, inplace=True) + # bi.drop(['name',],axis=1,inplace=True) + + #print(bi) + #input('xx') + + sp20 = schedForTeacherOverview('2020spring','sp20') + + + codes_sp20 = sp20.groupby('teacher')['code'].apply( lambda x: ' '.join(funcy.distinct(x)) ) + crns_sp20 = sp20.groupby('teacher')['crn'].apply( lambda x: ' '.join( map( str, funcy.distinct(x))) ) + codes_sp20.rename(columns={'code':'sp20code'}, inplace=True) + codes_sp20.to_csv('cache/trash/codes_sp20.csv',header=True) + crns_sp20.rename(columns={'crn':'sp20crn'}, inplace=True) + crns_sp20.to_csv('cache/trash/crns_sp20.csv',header=True) + + + a = sched.groupby('teacher')['code'].apply( lambda x: ' '.join(funcy.distinct(x)) ) + a = pd.DataFrame(a) + a.reset_index(inplace=True) + a['dept'] = a.apply(guessDept,axis=1) + print(a) + + def find_that_name(x): + #print(x) + if 'teacher' in x: return names(x['teacher']) + #print('name not found?') + return '' + + a['ilearn_name'] = a.apply( find_that_name, axis=1) + + a.rename(columns={'code':'courses'}, inplace=True) + #print(type(a)) + a.reset_index(inplace=True) + + a = pd.merge(a,codes_sp20.rename('sp20courses'), on='teacher') + a = pd.merge(a,crns_sp20.rename('sp20crns'), on='teacher') + a.to_csv('cache/trash/sched_w_sp20.csv',header=True) + print(a) + + a['canvasid'] = a['teacher'].map(names) + #print(a) + c = pd.merge(bi, a, left_on='name', right_on='ilearn_name', how='outer') + c.to_csv('cache/trash/basic.csv',header=True) + #print(c) + return c + + +# what percentage of their sections were online / hybrid /lecture ? +# Consumes: output/semesters/fa19_sched.json and etc for 1 year +# Outputs: cache/teacher_by_semester.csv, +def teacherModalityHistory(sched=[],names=[]): + if not len(sched): + sched = oneYearSchedule() + names = match_username() + + # How many classes a teacher taught lect/online/hybrid/hours + sec_type = sched.groupby(['teacher','sem'])['type'].apply(' '.join) + sec_type.to_csv('cache/teacherdata/teacher_by_semester.csv',header=True) + ## THIS IS THE LIST of how many + ## lecture, hybrid, online they've taught + + #sec_type = pd.read_csv('cache/teacherdata/teacher_by_semester.csv') + + sec_grp = sec_type.groupby('teacher').aggregate( ' '.join ) + #sec_grp.to_csv('cache/trash/sec_grp_3.csv',header=True) + + #sec_grp = sec_grp.iloc[1:] ## I'm seeing bad items on the first 2 + #sec_grp.drop(index='teacher') + #sec_grp.to_csv('cache/trash/sec_grp_0.csv',header=True) + + # + sec_grp = pd.DataFrame(sec_grp) + #print(type(sec_grp)) + sec_grp['prct_online'] = sec_grp['type'].map(prct_online) + + sec_grp['prct_lecture'] = sec_grp['type'].map(prct_lecture) + sec_grp['prct_hybrid'] = sec_grp['type'].map(prct_hybrid) + sec_grp['num_sections_last_year'] = sec_grp['type'].map(num_sections_last_year) + sec_grp.drop('type',axis=1,inplace=True) + sec_grp.reset_index(inplace=True) + sec_grp.to_csv('cache/teacherdata/modality_history.csv') + return sec_grp + + + +def teacherCourseHistory(a,names): + pass + # actually not using this. moved to _basic_info + + # YEEEAH + sched = a.groupby(['teacher','code']) + #for name,group in sched: + # print(name) + #print(sched.count()) + return + a['name'] = a.apply(lambda x: records_by_sname[x['teacher']]['name'],axis=1) + a['email'] = a.apply(lambda x: records_by_sname[x['teacher']]['email'],axis=1) + a.sort_values(by=['dept','teacher','codenum'],inplace=True) + a = a.drop(['teacher'],axis=1) + a.to_csv('cache/teacherdata/courses_taught.csv') + + return a + """ + d = a.groupby(['teacher']) # ,'dept','codenum','codeletter' + + out1 = open('teacherdata/courses_taught.csv','w') + by_dept = {} # x todo: sort by dept also + for name, group in d: + #print name + if re.search(r'^\d+',name) or name=='TBA': + print("Skipping weird name: ", name) + continue + rec = {'email':'xx'} + try: + rec = records_by_sname[name] + #print rec + except Exception as e: + print("Missing Teacher %s" % name) + continue + out1.write(name+"\t"+rec['email']) + s = set() + #print group + for idx,r in group.iterrows(): + s.add( str(r[1]) + str(r[2]) + str(r[3])) + for clas in sorted(s): + d = dept_from_name(clas) + if d in by_dept: + if name in by_dept[d]: + by_dept[d][name].append(clas) + else: + by_dept[d][name] = [ clas, ] + else: + by_dept[d] = { name: [ clas, ] } + + out1.write("\n\t"+str(clas)) + out1.write("\n") + out1.write( json.dumps(by_dept,indent=2))""" + + + +# Consumes: output/semesters/fa19_sched.json and etc for 1 year +# Outputs: cache/course_teacher_combos.csv, +def teacherSharedCourses(a=[]): + if not len(a): a = oneYearSchedule() + + # List of classes. Group by teacher/format. Shows who has historically + # taught a class and who teaches it most often. + c = a.drop(['code','partofday','sem','site','type'],axis=1) #,'dept','codeletter' + c = c.groupby(['dept','codenum','codeletter']) #,'teacher' + c = c.aggregate(lambda x: set(x)) + c.to_csv('teacherdata/course_teacher_combos.csv') ## THIS is the list of teachers who + ## share courses + return c + + + +# Consumes: output/semesters/fa19_sched.json and etc for 1 year +# Outputs: cache/num_courses_per_dept.csv (not teacher_course_oer_deptcount) +# How many courses in each department were taught in the last year? +def departmentCountCourses(a=[]): + if not len(a): a = oneYearSchedule() + + tt = a.drop(['code','partofday','sem','site','type'],axis=1) #,'dept','codeletter' + + records_by_sname = defaultdict(my_empty_dict, match_usernames()) + tt.drop_duplicates(keep='first',inplace=True) + tt['name'] = tt.apply(lambda x: records_by_sname[x['teacher']]['name'],axis=1) + tt['email'] = tt.apply(lambda x: records_by_sname[x['teacher']]['email'],axis=1) + tt = tt.drop(['teacher'],axis=1) + tt.sort_values(by=['dept','name','codenum'],inplace=True) + count = tt['dept'].value_counts() + count.to_csv('cache/num_courses_per_dept.csv', header=True) + + +def clean_nonprint(s): + return re.sub(f'[^{re.escape(string.printable)}]', '', s) + +def read_cmte(names): + output = [] + out2 = defaultdict(list) + input = codecs.open('cache/teacherdata/committees_2018_2019.csv','r','utf-8') + with input as csvfile: + cmtereader = csv.reader(csvfile, delimiter=',', quotechar='"') + for row in cmtereader: + for R in row: + R = R.strip() + R = clean_nonprint(R) + (fname,lname,cmtes) = row + a = re.split(",\s*",cmtes) + if len(a)>1: + cmtes = a + else: + cmtes = a + + name1 = lname + ", " + fname + name2 = fname + " " + lname + name = name1 + realname = names(name1) + if not realname: + realname = names(name2) + name = name2 + if realname: + for cmm in cmtes: + output.append( [realname, cmm] ) + out2[realname].append(cmm) + else: + print("committee participant name failed: %s / %s:\t%s" % (name1,name2,str(a))) + print(type(name1)) + #print(out2) + return output,out2 + +def read_training_records(): + myinput = open('cache/teacherdata/more_2018_2019_training_attendance.txt','r').readlines() + current_sesh = "" + ppl_in_sesh = {} + all_ppl = set() + + for L in myinput: + L = L.strip() + if L: + if L.startswith('#'): + ma = re.search(r'^\#\s(.*)$',L) + if ma: + current_sesh = ma.group(1) + else: + print("-- read_training_records: Couldn't find training set? " + L) + else: + if current_sesh in ppl_in_sesh: + ppl_in_sesh[current_sesh].append(L) + else: + ppl_in_sesh[current_sesh] = [ L, ] + all_ppl.add(L) + if 0: + print(ppl_in_sesh) + print(all_ppl) + + # Want to pivot the dict, so key is a name, value is another dict, where k2 is session name, v2 is Y/N + d_of_d = defaultdict(dict) + + for k,v in ppl_in_sesh.items(): + for user in v: + d_of_d[user][k] = 'Y' + + return d_of_d + +# open a file and mark the people with their ids given. Return a dataframe +def read_bootcamp1(filename): + a = pd.read_csv(filename) + #print(a) + b = a.loc[:, ['canvas_id','grade','last_activity']] + b.rename(columns={'canvas_id':'bc1canvasid','grade':'bootcamp_grade','last_activity':'bootcamp_date'}, inplace=True) + #print(b) + return b + +# open a file and mark the people with their ids given. Return a dataframe +def read_bootcamp2(filename): + a = pd.read_csv(filename) + #print(a) + b = a.loc[:, ['canvas_id','grade','last_activity']] + b.rename(columns={'canvas_id':'bc2canvasid','grade':'bootcamp_progress','last_activity':'bootcamp_date'}, inplace=True) + #print(b) + return b + + +def not_blank_or_pound(L): + if L.startswith("#"): return False + L = L.strip() + if L == "": return False + return True + +def temp1(x): + #print(x[1]) + return x[1] + +def add_realnames(df,names): # the surveys. raw name is in 2nd column + df['ilearn_name'] = df.apply( lambda x: names(temp1(x),1), axis=1) + return df + +def compareToughNames(a,b): + # search for a in b + m = re.search(a, b) + if m: return True + return False + + +def compareNames(a,b,verbose=0): + if a == b: return True + + cnDBG = 0 + try: + parts_a = [ W.lower() for W in re.split("[\s,]", a) ] + [ x.strip() for x in parts_a ] + + parts_b = [ W.lower() for W in re.split("[\s,]", b) ] + [ x.strip() for x in parts_b ] + + pa2 = sorted([ parts_a[0], parts_a[-1] ]) + pb2 = sorted([ parts_b[0], parts_b[-1] ]) + + if pa2 == pb2: + if cnDBG: print("->Match: %s, %s" % (a,b)) + return True + if pa2[0] == pb2[0] or pa2[-1] == pb2[-1]: + if cnDBG: print("--->Near match: %s" % b) + return False + + except Exception as e: + #print("Problem with compareNames %s , %s" % (a,b)) + #print(e) + return False + + if len(pa2[0])>3 and len(pb2[0])>3: + if pa2[0][0] == pb2[0][0]: + if pa2[0][1] == pb2[0][1]: + if pa2[0][2] == pb2[0][2]: + if cnDBG: print("===> Near match (first 3): %s, %s, %s, %s" % (a, b, pa2[0], pb2[0])) + pass + + b = b.lower() + a = a.lower() + + #if verbose: print("searching: %s / %s" % (a,b)) + if re.search( b, a): + #print("REGEX MATCH: %s | %s" % (a,b)) + return True + if re.search( a, b): + #print("REGEX MATCH: %s | %s" % (a,b)) + return True + return False + +def find_ilearn_record(ilearn_records,manual_records, othername,verbose=0): + # manual records are ('name':'canvas_id') + #print(ilearn_records) + if not othername: return "" + if type(othername) == type(1.25): return "" + #if math.isnan(othername): return False + + if othername in manual_records: + a = funcy.first( funcy.where( ilearn_records, id=int(manual_records[othername]) )) + if a: + return a['name'] + + for x in ilearn_records: + #print('f_i_r') + #print(othername) + #print(x) + if compareNames(othername,x['name'],verbose): + return x['name'] + + for k,v in manual_records.items(): + #print(k) + #print(othername) + #print(type(othername)) + b = re.search( k, othername) + if b: + a = funcy.first( funcy.where( ilearn_records, id=int(manual_records[k]) )) + if a: + return a['name'] + return "" + + +def manualNamesAndDept(): + # copied from // getTeachersInfoMain .... + + schedule_one_yr = oneYearSchedule() + from_ilearn = list( map( lambda y: funcy.select_keys( lambda z: z in ['name','id','email','login_id','sortable_name'], y), \ + json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read()) ) ) + manual_names = manualNames() + names_lookup = funcy.partial(find_ilearn_record, from_ilearn, manual_names) + teacher_info = teacher_basic_info(schedule_one_yr, from_ilearn, names_lookup) + # till here + + + # the staff directory + dr = staff_dir(False) + print(dr) + print(dr.columns) + print( dr['department'].unique() ) + + # now to reconcile and combine these.... + # + # we want: + # - alternate names of academic / other depts, with one preferred + # - some people are PT Fac, FT Fac, Director, assistant, spec, and some titles are unknown. + # - sometimes the hierarchy is of departments, and sometimes of people. try not to confuse that. + # + + + # eventually, want to get pics or other info from other sources too, o365, cranium cafe, etc + # + + + +def manualNames(): + mm = dict([ x.strip().split(',') for x in \ + open('cache/teacherdata/teacher_manual_name_lookup.csv','r').readlines()]) + mz = {} + for k,v in mm.items(): + mz[k] = v + mz[k.lower()] = v + parts = k.split(" ") + if len(parts)==2: + mz[ parts[1] + ", " + parts[0] ] = v + mz[ parts[1] + "," + parts[0] ] = v + #print(mz) + return mz + +# given a list of class codes, return the most common (academic) department +def guessDept(d_list): + li = str(d_list.code).split(" ") + count = defaultdict(int) + #print(str(d_list.code)) + for i in li: + m = re.search(r'^([A-Z]+)$',i) + if m: + count[m.group(1)] += 1 + mmax = 0 + max_L = '' + for k,v in count.items(): + #print(" %s:%i, " % (k,v), end='') + if v > mmax: + mmax = v + max_L = k + print("") + return max_L + +""" +# Faculty Info Plans + + + +bootcamp_active.csv Started bootcamp. Remind them to finish it? + +bootcamp_passed.csv Badge'd for BC. Online and Hybrid teachers not on this list need reminding. + +courses_taught.csv x + +course_teacher_combos.csv Teachers who share the teaching of a course. Courses in common. + +emails_deans+chairs.txt Just a email list + +FA2017 Faculty Survey.csv Look at answers for video, helpful formats, and comments + +faculty_main_info.csv Has percentage mix of a teachers' online/hybrid/lecture history + +historical_shells_used.json x + +SP2019 Faculty Survey.csv Look at rate tech skills, topics interested in, would add video, and comments + +committees 2018 2019.csv Committees people serve on. + + + +Not so useful: + +teacher_by_semester.csv precursor to faculty_main_info. Has semesters separated. + +""" +# +# +# +# Call all the teacher info / CRM gathering stuff +# Make one big csv file of everything I know about a teacher +def getTeachersInfoMain(): + + schedule_one_yr = oneYearSchedule() + #print(schedule_one_yr) + #if input('q to quit ')=='q': return + + # comes from teacherRolesUpdateCache ... search for @gavilan.edu in email address + from_ilearn = list( map( lambda y: funcy.select_keys( lambda z: z in ['name','id','email','login_id','sortable_name'], y), \ + json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read()) ) ) + #names_from_ilearn = list( [x.lower() for x in map( str, sorted(list(funcy.pluck('name',from_ilearn)))) ] ) + from_ilearn_df = pd.DataFrame(from_ilearn) + + + manual_names = manualNames() + names_lookup = funcy.partial(find_ilearn_record, from_ilearn, manual_names) + #print(from_ilearn_df) + #if input('q to quit ')=='q': return + + + #print(schedule_one_yr) + #print("This is one year schedule.") + #input('\npress enter to continue') + + teacher_info = teacher_basic_info(schedule_one_yr, from_ilearn_df, names_lookup) + #print(teacher_info) + #input('\nThis is teacher info.\npress enter to continue') + + modality_history = teacherModalityHistory(schedule_one_yr,names_lookup) + print(modality_history) + #print("This is teacher modality history.") + #input('\npress enter to continue') + + + master = pd.merge( modality_history, teacher_info, on='teacher', how='outer') + print(master) + master.to_csv('cache/trash/joined1.csv') + print(master.columns) + #input('\nThis is Joined 1.\npress enter to continue') + + wp = read_bootcamp1('cache/teacherdata/bootcamp_passed.csv') + #print(wp) + master2 = pd.merge( master, wp, left_on='canvasid_x', right_on='bc1canvasid', how='outer') + master2.to_csv('cache/trash/joined2.csv') + print(master2) + print(master2.columns) + #input('\nThis is Joined 2.\npress enter to continue') + + + wp = read_bootcamp2('cache/teacherdata/bootcamp_active.csv') + master3 = pd.merge( master2, wp, left_on='canvasid_x', right_on='bc2canvasid', how='outer') + master3.to_csv('cache/trash/joined3.csv') + print(master3) + print(master3.columns) + #input('\nThis is Joined 3.\npress enter to continue') + + + # THE VIEWS / HISTORY. UPDATE with get_recent_views() .... check it for appropriate dates.... + views = json.loads( codecs.open('cache/teacherdata/activitysummary.json','r','utf-8').read() ) + vdf = pd.DataFrame.from_dict(views,orient='index',columns=['cid','cname','views','goo','dates','dateviews']) + print(vdf) + #input('k') + + #master3.set_index('canvasid_x') + master3 = pd.merge(master3, vdf, left_on='canvasid_x', right_on='cid',how='outer') + + dir_records = pd.DataFrame(staff_dir()) + dir_records['email'] = dir_records['email'].str.lower() + master3['email'] = master3['email'].str.lower() + + print(dir_records) + master3 = pd.merge(master3, dir_records, on='email',how='outer') + print(master3) + #if input('q to quit ')=='q': return + + #master3.fillna(0, inplace=True) + #master3['views'] = master3['views'].astype(int) + #master3['num_sections_last_year'] = master3['num_sections_last_year'].astype(int) + + + #cmte = pd.read_csv('cache/teacherdata/committees_2018_2019.csv') + cmte,cmte_by_name = read_cmte(names_lookup) + cmte_str_by_name = {} + for k in cmte_by_name.keys(): + #print(k) + #print(cmte_by_name[k]) + cmte_str_by_name[k] = ",".join(cmte_by_name[k]) + cc = pd.DataFrame.from_dict(cmte_str_by_name,orient='index',columns=['committees']) # 'teacher', + cc.reset_index(inplace=True) + master4 = pd.merge(master3, cc, left_on='name', right_on='index', how='outer') + master4.to_csv('cache/trash/joined4.csv') + + master4.drop(['teacher','ilearn_name','canvasid_y','bc1canvasid','bc2canvasid','cid','cname','index_y'],axis=1,inplace=True) + + # Exclude surveys for now + """ + survey_2017 = pd.read_csv('cache/teacherdata/FA2017 Faculty Survey.csv') + survey_2017 = add_realnames(survey_2017,names_lookup) + survey_2017.to_csv('cache/trash/survey1.csv') + master5 = pd.merge(master4, survey_2017, left_on='name', right_on='ilearn_name', how='left') + master5.to_csv('cache/trash/joined5.csv') + + survey_2019 = pd.read_csv('cache/teacherdata/SP2019 Faculty Survey.csv') + survey_2019 = add_realnames(survey_2019,names_lookup) + master6 = pd.merge(master5, survey_2019, left_on='name', right_on='ilearn_name', how='left') + master6.to_csv('cache/trash/joined6.csv') + + + newnames = [ x.strip() for x in open('cache/poll_question_names.txt','r').readlines() ] + namedict = {} + for i,n in enumerate(newnames): + if i%3==1: newname = n + if i%3==2: namedict[oldname] = newname + if i%3==0: oldname = n + master6 = master6.rename(columns=namedict) + master6.to_csv('cache/teacherdata/staff_main_table.csv') + master6.to_csv('cache/teacherdata/staff_main_table.csv') + """ + + + master4.to_csv('cache/teacherdata/staff_main_table.csv') + master4.to_csv('gui/public/staff_main_table.csv') + + other_training_records = read_training_records() + #print(json.dumps(other_training_records,indent=2)) + #print("This is misc workshops.") + tt = pd.DataFrame.from_dict(other_training_records,orient='index') + tt = tt.fillna("") + #print(tt) + #input('\npress enter to continue') + + + + #teacherSharedCourses(schedule_one_yr) + #getAllTeachersInTerm() + + + +def enroll_staff_shell(): + staff = users_with_gavilan_email() + for i,s in staff.iterrows(): + print(s['canvasid'],s['name']) + u = url + '/api/v1/courses/8528/enrollments' + param = { + 'enrollment[user_id]':s['canvasid'], + 'enrollment[type]': 'StudentEnrollment', + 'enrollment[enrollment_state]': 'active', + } + + res = requests.post(u, headers = header, data=param) + print(res.text) + +#"Jun 28 2018 at 7:40AM" -> "%b %d %Y at %I:%M%p" +#"September 18, 2017, 22:19:55" -> "%B %d, %Y, %H:%M:%S" +#"Sun,05/12/99,12:30PM" -> "%a,%d/%m/%y,%I:%M%p" +#"Mon, 21 March, 2015" -> "%a, %d %B, %Y" +#"2018-03-12T10:12:45Z" -> "%Y-%m-%dT%H:%M:%SZ" + + +# take a list of raw hits. +def activity_summary(hits): + #infile = "cache/teacherdata/activity/G00101483.json" + #data = json.loads(open(infile,'r').read()) + #hits = data['raw'] + if not hits: + return [ [], [], ] + dt_list = [] + + one_week = datetime.timedelta(days=14) # actually two.... + today = dt.now().replace(tzinfo=pytz.timezone('UTC')) + + target = today - one_week + + for h in hits: + the_stamp = parser.parse(h['created_at']) + if the_stamp > target: + dt_list.append(the_stamp) + df = pd.DataFrame(dt_list, columns=['date',]) + df.set_index('date', drop=False, inplace=True) + df.rename(columns={'date':'hits'}, inplace=True) + #df.resample('1D').count().plot(kind='bar') + #return df.resample('1D').count().to_json(date_format='iso') + #print(hits) + #print(df) + if not df.size: + return [ [], [], ] + bins = df.resample('1D').count().reset_index() + bins['date'] = bins['date'].apply(str) + #print(bins) + return [bins['date'].to_list(), bins['hits'].to_list()] + + #plt.show() + + #df = df.groupby([df['date'].dt.to_period('D')]).count().unstack() + #df.groupby(TimeGrouper(freq='10Min')).count().plot(kind='bar') + #df.plot(kind='bar') + + + +# next step +# 1. save timestamp of the fetch +# +# 2. parse it and only fetch since then. afterwards, pull out non-hits. Summarize day/week/month stats. +# +# 2a. merge old and new records, and re-summarize. +# +# 3. Next improvements in GUI. hook up to python server backend. +# +# Get views counts on current teachers. todo: month is hardcoded here +def get_recent_views(id=1): + dt_format = "%Y-%m-%dT%H:%M:%SZ" + default_start_time = dt.strptime("2020-08-14T00:00:00Z", dt_format) + default_start_time = default_start_time.replace(tzinfo=pytz.timezone('UTC')) + end_time = dt.now(pytz.utc) + print("End time is: %s" % str(end_time)) + myheaders = "x,teacher,prct_online,prct_lecture,prct_hybrid,num_sections_last_year,canvasid_x,name,sortable_name,goo,email,index_x,courses,dept,ilearn_name_x,canvasid_y,canvasid_x,bootcamp_grade,bootcamp_date_x,canvasid_y,bootcamp_progress,bootcamp_date_y,index_y,committees".split(",") + + teachers = [row for row in csv.reader(open('cache/teacherdata/staff_main_table.csv','r'))][1:] + + #tt = teachers[6:10] + + summary = {} + + for t in teachers: + name = t[1] + if name=="" or name=="TBA": continue + if not t[6]: continue + the_id = int(float(t[6])) + if the_id == 290: continue # STAFF STAFF + goo = t[9] + print(goo) + + # read log of this person: + try: + prev_logf = codecs.open('cache/teacherdata/activity/%s.json' % goo,'r','utf-8') + prev_log = json.loads(prev_logf.read()) + prev_logf.close() + except: + print("Exception happened on reading previous temp logs.") + prev_log = '' + + if type(prev_log) == dict: + lastfetch = dt.strptime(prev_log['meta']['lastfetch'], dt_format) + lastfetch = lastfetch.replace(tzinfo=pytz.timezone('UTC')) + print("last fetch is: " + str(lastfetch)) + print("Hits BEFORE was: %i" % len(prev_log['raw'])) + else: + lastfetch = default_start_time + prev_log = { "raw":[], } + + end_time = dt.now(pytz.utc) + u = url + "/api/v1/users/%s/page_views?start_time=%s&end_time=%s&per_page=100" % (str(the_id),lastfetch.strftime(dt_format), end_time.strftime(dt_format)) + #print(u) + #input('getting this url') + + print(name + "\t",end='\n') + if 1: # get fresh data? + r = fetch(u) + prev_log['raw'].extend( r ) + summ = activity_summary(prev_log['raw']) + mydata = {'meta':{'lastfetch':end_time.strftime(dt_format)},'summary':summ,'raw':prev_log['raw']} + codecs.open('cache/teacherdata/activity/%s.json' % goo,'w','utf-8').write( json.dumps(mydata,indent=2)) + summary[the_id] = [the_id, name, len(prev_log['raw']),goo, summ ,mydata['meta']] + print("Hits AFTER is: %i" % len(prev_log['raw'])) + codecs.open('cache/teacherdata/activitysummary.json','w','utf-8').write( json.dumps(summary,indent=2) ) + codecs.open('gui/public/activitysummary.json','w','utf-8').write( json.dumps(summary,indent=2) ) + + + +# Have they taught online or hybrid classes? +def categorize_user(u): + global role_table, term_courses + their_courses = get_enrlmts_for_user(u, role_table) + num_s = 0 + num_t = 0 + type = 's' + online_only = 1 + is_online = [] + #print their_courses + for x in their_courses.iterrows(): + if len(x): + ttype = x[1]['type'] + if ttype=='StudentEnrollment': num_s += 1 + if ttype=='TeacherEnrollment': num_t += 1 + cid = x[1]['course_id'] + current_term = term_courses[lambda x: x['id']==cid] + if not current_term.empty: + is_online.append(current_term['is_online'].values[0]) + else: online_only = 0 + else: online_only = 0 + if num_t > num_s: type='t' + if len(is_online)==0: online_only = 0 + + for i in is_online: + if i==0: online_only = 0 + #print "Type: " + type + " All online: " + str(online_only) + " Number courses this term: " + str(len(is_online)) + return (u[0],type, online_only, len(is_online)) + + + +########## +########## +########## PHOTOS +########## +########## # todo: threaded + +# Doest the account have a photo loaded? +def checkForAvatar(id=2): + try: + t = url + '/api/v1/users/%s?include[]=last_login' % str(id) + r2 = requests.get(t, headers = header) + result = json.loads(r2.text) + codecs.open('cache/users/%s.txt' % str(id),'w','utf-8').write( json.dumps(result,indent=2) ) + + if 'avatar_url' in result: + if re.search(r'avatar\-50',result['avatar_url']): return 0 + else: return (result['login_id'], result['avatar_url'], result['name']) + except Exception as e: + print("Looking for an avatar / profile pic had a problem: %s" % str(e)) + return 0 + +# Grab em. Change the first if when continuing after problems.... +def downloadPhoto(): + pix_dir = 'cache/picsCanvas2022/' + # Update the list of all ilearn users? + i_last_ix = '-1' + photo_log_f = '' + if 0: ## CHANGE TO 0 IF CRASHED / RESUMING.... + ii = fetchAllUsers() + photo_log_f = open("cache/fotolog.txt", "w") + else: + ii = json.loads(codecs.open('cache/allusers_ids.json','r').read()) + photo_log_f = open("cache/fotolog.txt", "r+") + i_last_ix = -1 + try: + ab = photo_log_f.read() + print(ab) + ac = ab.split("\n") + print(ac) + i_last_ix = ac[-2] + print(i_last_ix) + except: + i_last_ix = -1 + i_last_ix = int(i_last_ix) + + + print("Last user index checked was: %s, which is id: %s" % \ + (i_last_ix, ii[i_last_ix] )) + + print("Max index is: %i" % len(ii)) + + + i_last_ix += 1 + for index in range(i_last_ix, len(ii)): + i = ii[index] + photo_log_f.write("\n%i" % i ) + + a = checkForAvatar(i) + if a: + print(str(i) + ":\t" + str(a[0]) + "\t" + str(a[2]) ) + + try: + r = requests.get(a[1], stream=True) + if r.status_code == 200: + r.raw.decode_content = True + h=r.raw + with open(pix_dir + a[0].lower(), 'wb') as f: + shutil.copyfileobj(h, f) + # rename to right file extension + img_type = imghdr.what(pix_dir + a[0].lower()) + if img_type == 'jpeg': img_type = 'jpg' + try: + shutil.move(pix_dir + a[0].lower(),pix_dir + a[0].lower()+'.'+img_type) + except Exception as e: + print(" \tCouldn't rewrite file") + else: + print(str(i) + ":\t didn't get expected photo") + except Exception as e: + print(" \tProblem with download " + str(e)) + else: + print(str(i) + ":\tno user or no photo") + pass + + +def mergePhotoFolders(): + + staff = [ row for row in csv.reader( open('cache/teacherdata/staff_main_table.csv','r') ) ] + + headers = staff[0] + staff = staff[1:] + + activestaff = [] + + for i,h in enumerate(headers): + #print("%i. %s" % (i,h) ) + pass + + for S in staff: + if S[7] and S[15]: # if teacher (name present) and sp20crns (taught in sp20) + activestaff.append(S[9].lower()) + activestaffset=set(activestaff) + + #return + + a = 'cache/picsCanvas' + b = 'gui/public/picsCanvas2018' + c = 'gui/public/picsCanvasAll' + + + # I want a big list of who has an avatar pic. + + # and i want to know how many updated since last DL, and how many are in only one or the other. + + + old = os.listdir(b) + count = defaultdict(int) + + oldset = set() + newset = set() + + for O in old: + if O.endswith('.jpg') or O.endswith('.png'): + g = O.split(r'.')[0] + oldset.add(g) + + for N in os.listdir(a): + if N.endswith('.jpg') or N.endswith('.png'): + g = N.split(r'.')[0] + newset.add(g) + + """print("Active SP20 Teachers") + print(activestaffset) + + print("Old Avatars") + print(oldset) + + print("New Avatars") + print(newset)""" + + updated_set = oldset.union(newset) + + tch_set = updated_set.intersection(activestaffset) + + only_old = oldset.difference(newset) + + only_new = newset.difference(oldset) + + print("Tch: %i Old: %i New: %i" % (len(activestaffset),len(oldset),len(newset))) + + print("All avatars: %i Teachers: %i Only in old: %i Only in new: %i" % ( len(updated_set), len(tch_set), len(only_old), len(only_new))) + + allpics = os.listdir(c) + + haveapic = {} + for A in allpics: + if A.endswith('.jpg') or A.endswith('.png'): + g = (A.split(r'.')[0]).upper() + + haveapic[g] = A + outie = codecs.open('gui/public/pics.json','w').write( json.dumps( haveapic,indent=2)) + + +def mergePhotoFolders2(): + + staff = [ row for row in csv.reader( open('cache/teacherdata/staff_main_table.csv','r') ) ] + + headers = staff[0] + staff = staff[1:] + + activestaff = [] + + for i,h in enumerate(headers): + #print("%i. %s" % (i,h) ) + pass + + for S in staff: + if S[5]: + activestaff.append(S[9].lower()) + + a = 'cache/picsCanvas' + b = 'gui/public/picsCanvas2018' + c = 'gui/public/picsCanvasAll' + + old = os.listdir(b) + count = defaultdict(int) + for N in os.listdir(a): + if N.endswith('.jpg') or N.endswith('.png'): + g = N.split(r'.')[0] + if g in activestaff: + count['s'] += 1 + if N in old: + #print( "Y - %s" % N) + count['y'] += 1 + else: + #print( "N - %s" %N ) + count['n'] += 1 + else: + #print("x - %s" % N) + count['x'] += 1 + print("Of the 2020 avatars, %i are in the 2018 folder, and %i are new." % (count['y'],count['n'])) + print("Of %i active teachers, %i have avatars." % (len(activestaff),count['s'])) + #print(json.dumps(count,indent=2)) + + + +# Go through my local profile pics, upload any that are missing. +def uploadPhoto(): + files = os.listdir('pics2017') + #print json.dumps(files) + pics_i_have = {} + #goo = "g00188606" + canvas_users = json.loads(open('canvas/users.json','r').read()) + t = url + '/api/v1/users/self/files' + i = 0 + j = 0 + pics_dir = 'pics2017/' + + for x in canvas_users: + j += 1 + if x['login_id'].lower() + '.jpg' in files: + #print x['login_id'] + " " + x['name'] + i += 1 + pics_i_have[x['id']] = x + + print('Canvas users: ' + str(j)) + print('Pic matches: ' + str(i)) + account_count = 0 + ids_i_uploaded = [] + + for id, target in list(pics_i_have.items()): + #if account_count > 50: + # print 'Stopping after 5.' + # break + + print('trying ' + target['name'] + '(' + str(id) + ')') + if checkForAvatar(id): + print("Seems to have avatar loaded.") + continue + + goo = target['login_id'].lower() + local_img = pics_dir + goo + '.jpg' + inform_parameters = { + 'name':goo + '.jpg', + 'size':os.path.getsize(local_img), # read the filesize + 'content_type':'image/jpeg', + 'parent_folder_path':'profile pictures', + 'as_user_id':'{0}'.format(id) + } + + res = requests.post(t, headers = header, data=inform_parameters) + print("Done prepping Canvas for upload, now sending the data...") + json_res = json.loads(res.text,object_pairs_hook=collections.OrderedDict) + files = {'file':open(local_img,'rb').read()} + + _data = list(json_res.items()) + _data[1] = ('upload_params',list(_data[1][1].items())) + print("Yes! Done sending pre-emptive 'here comes data' data, now uploading the file...") + upload_file_response = requests.post(json_res['upload_url'],data=_data[1][1],files=files,allow_redirects=False) + # Step 3: Confirm upload + print("Done uploading the file, now confirming the upload...") + confirmation = requests.post(upload_file_response.headers['location'],headers=header) + if 'id' in confirmation.json(): + file_id = confirmation.json()['id'] + else: + print('no id here') + #print(confirmation.json()) + print("upload confirmed...nicely done!") + + time.sleep(1) + # Make api call to set avatar image to the token of the uploaded imaged (file_id) + params = { 'as_user_id':'{0}'.format(id)} + avatar_options = requests.get("https://%s/api/v1/users/%s/avatars"%(domain,'{0}'.format(id)),headers=header,params=params) + #print "\nAvatar options: " + #print avatar_options.json() + for ao in avatar_options.json(): + #print ao.keys() + if ao.get('display_name')==goo + '.jpg': + #print("avatar option found...") + #print((ao.get('display_name'),ao.get('token'), ao.get('url'))) + params['user[avatar][token]'] = ao.get('token') + set_avatar_user = requests.put("https://%s/api/v1/users/%s"%(domain,'{0}'.format(id)),headers=header,params=params) + if set_avatar_user.status_code == 200: + print(('success uploading user avatar for {0}'.format(id))) + account_count += 1 + ids_i_uploaded.append(id) + else: + print('some problem setting avatar') + else: + pass #print 'didnt get right display name?' + print("Uploaded these guys: " + json.dumps(ids_i_uploaded)) + + + + +########## +########## +########## EMAILING PEOPLE +########## +########## + + + +def test_email(): + send_z_email("Peter Howell", "Peter", "phowell@gavilan.edu", ['CSIS85','CSIS42']) + + +def create_ztc_list(): + course_combos = pd.read_csv('cache/teacher_course_oer_email_list.csv') + course_combos.fillna('',inplace=True) + + # read this file and make it a dict (in one line!) + dept_counts = { x[0]:x[1].strip() for x in [ y.split(',') for y in open('cache/teacher_course_oer_deptcount.csv','r').readlines() ][1:] } + + + course_template = "%s " + url_template = "https://docs.google.com/forms/d/e/1FAIpQLSfZLQp6wHFEdqsmpZ7jz2Y8HtKLo8XTAhrE2fyvTDOEgquBDQ/viewform?usp=pp_url&entry.783353363=%s&entry.1130271051=%s" # % (FULLNAME, COURSE1) + + + + # list depts + mydepts = sorted(list(set(course_combos['dept'] ))) + i = 0 + outp = open("output/oer_email_list.csv","w") + outp.write("fullname,firstname,email,link,courses\n") + + ones_i_did = [ int(x) for x in "40 38 31 21 7 12 24 25 1 13 18 22 44 55 56 51 20 16 2 3 4 5 6 8 9 10 11 14 15 17 23 53 52 50 30 48 39 37 54 49 47 46 45 43 42 41 33 32 29 28 27 26".split(" ") ] + + for D in mydepts: + i += 1 + extra = '' + if D in dept_counts: + extra = " (%s)" % dept_counts[D] + extra2 = '' + if i in ones_i_did: + extra2 = "xxxx " + print("%s %i. %s %s" % (extra2,i,D,extra)) + choice_list = input("Which department? (for multiple, separate with spaces) ").split(' ') + + all_people_df = [] + + for choice in choice_list: + is_cs = course_combos['dept']==mydepts[int(choice)-1] + filtered = pd.DataFrame(course_combos[is_cs]) + if len(all_people_df): all_people_df = pd.concat([filtered,all_people_df]) + else: all_people_df = filtered + print(mydepts[int(choice)-1]) + print(all_people_df) + print(' ') + all_people_df.sort_values(by=['name'],inplace=True) + print(all_people_df) + + b = all_people_df.groupby(['name']) + for name,group in b: + if name == 'no data': continue + nameparts = name.split(', ') + fullname = nameparts[1] + ' ' + nameparts[0] + firstname = nameparts[1] + + outp.write(fullname + ',' + firstname + ',') + email = '' + link = '' + courses = [] + flag = 1 + for i in group.iterrows(): + g = i[1] # wtf is this shi..... + this_course = g.dept + ' ' + str(g.codenum) + g.codeletter + courses.append( this_course ) #print(g) + email = g.email + if flag: + link = url_template % (fullname, this_course) + flag = 0 + + outp.write(email + ',' + link + "," + " ".join(courses) + "\n") + + outp.close() + + +########## +########## +########## FORENSICS TYPE STUFF +########## +########## + +# better name for this standard fetch. so they stay together in alpha order too.... + +def get_user_info(id): + u = fetch( '/api/v1/users/%i' % id ) + ff = codecs.open('cache/users/%i.txt' % id, 'w', 'utf-8') + ff.write( json.dumps(u, indent=2)) + return u + + +# these are any messages that get pushed out to their email +def comm_mssgs_for_user(uid=0): + if not uid: + uid = input('Canvas id of the user? ') + u = url + '/api/v1/comm_messages?user_id=%s&start_time=%s&end_time=%s' % (uid,'2021-01-01T01:01:01Z','2021-08-01T01:01:01Z') # &filter[]=user_%s' % uid + convos = fetch(u,1) + + oo = codecs.open('cache/comms_push_user_%s.txt' % str(uid), 'w') + oo.write('USER %s\n' % uid) + oo.write(json.dumps(convos, indent=2)) + + print(convos) + + +# +def convos_for_user(uid=0): + if not uid: + uid = input('Canvas id of the user? ') + u = url + '/api/v1/conversations?include_all_conversation_ids=true&as_user_id=%s' % uid # &filter[]=user_%s' % uid + convos = fetch(u,1) + + oo = codecs.open('cache/convo_user_%s.txt' % str(uid), 'w') + oo.write('USER %s\n' % uid) + oo.write(json.dumps(convos, indent=2)) + + convo_ids_list = convos["conversation_ids"] + print(convo_ids_list) + + u2 = url + '/api/v1/conversations?include_all_conversation_ids=true&scope=archived&as_user_id=%s' % uid # &filter[]=user_%s' % uid + archived_convos = fetch(u2,1) + try: + aconvo_ids_list = archived_convos["conversations_ids"] + print(aconvo_ids_list) + except: + print("didnt seem to be any archived.") + aconvo_ids_list = [] + + u3 = url + '/api/v1/conversations?include_all_conversation_ids=true&scope=sent&as_user_id=%s' % uid # &filter[]=user_%s' % uid + sent_convos = fetch(u3,1) + try: + sconvo_ids_list = sent_convos["conversations_ids"] + print(sconvo_ids_list) + except: + print("didnt seem to be any sent.") + sconvo_ids_list = [] + + convo_ids_list.extend(aconvo_ids_list) + convo_ids_list.extend(sconvo_ids_list) + + + ## + ## Now get all the messages in each of these conversations + ## + + for cid in convo_ids_list: + print("Fetching conversation id: %s" % cid) + oo.write("\n\n----------------\nconversation id: %s\n\n" % cid) + + u4 = url + '/api/v1/conversations/%s?as_user_id=%s' % (cid,uid) # ' % (cid, uid + coverstn = fetch(u4,1) + oo.write("\n%s\n\n" % json.dumps(coverstn,indent=2)) + + + + + + """ + for c in convos: + c['participants'] = ", ".join([ x['name'] for x in c['participants'] ]) + includes = tuple("last_message subject last_message_at participants".split(" ")) + convos = list( \ + reversed([ funcy.project(x, includes) for x in convos ])) + """ + + # + + #print(json.dumps(convos, indent=2)) + + +# single q sub +def quiz_get_sub(courseid, quizid, subid=0): + u = url + "/api/v1/courses/%s/quizzes/%s/submissions/%s" % ( str(courseid), str(quizid), str(subid) ) + + u = url + "/api/v1/courses/%s/quizzes/%s/questions?quiz_submission_id=%s" % \ + ( str(courseid), str(quizid), str(subid) ) + + u = url + "/api/v1/courses/%s/assignments/%s/submissions/%s?include[]=submission_history" % \ + ( str(courseid), str(quizid), str(subid) ) + + u = url + "/api/v1/courses/%s/students/submissions?student_ids[]=all&include=submission_history&grouped=true&workflow_state=submitted" % str(courseid) + return fetch(u) + + #?quiz_submission_id=%s" + +# quiz submissions for quiz id x, in course id y +def quiz_submissions(courseid=9768, quizid=32580): + #subs = quiz_get_sub(courseid, quizid) + #print( json.dumps( subs, indent=2 ) ) + + if 1: + # POST + data = { "quiz_report[includes_all_versions]": "true", "quiz_report[report_type]": "student_analysis" } + + u = url + "/api/v1/courses/%s/quizzes/%s/reports?" % ( str(courseid), str(quizid) ) + res = requests.post(u, headers = header, data=data) + print(res.content) + + #u2 = url + "/api/v1/courses/%s/quizzes/%s/reports" % ( str(courseid), str(quizid) ) + #res2 = fetch(u2) + #print( json.dumps(res2.content, indent=2)) + + jres2 = json.loads( res.content ) + print(jres2) + if jres2['file'] and jres2['file']['url']: + u3 = jres2['file']['url'] + r = requests.get(u3, headers=header, allow_redirects=True) + open('cache/quizreport.txt', 'wb').write(r.content) + return + + for R in res2: + if R['id'] == 7124: + u3 = R['url'] + r = requests.get(u3, headers=header, allow_redirects=True) + open('cache/quizreport.txt', 'wb').write(r.content) + return + + u3 = url + "/api/v1/courses/%s/quizzes/%s/reports/%s" % ( str(courseid), str(quizid), res2[''] ) + + oo = codecs.open('cache/submissions.json','w', 'utf-8') + oo.write('[\n') + for s in subs: + if len(s['submissions']): + j = json.dumps(s, indent=2) + print(j) + oo.write(j) + oo.write('\n') + + oo.write('\n]\n') + return 0 + + + #u = url + "/api/v1/courses/%s/quizzes/%s/submissions?include[]=submission" % (str(courseid), str(quizid)) + u = url + "/api/v1/courses/%s/quizzes/%s/submissions" % (str(courseid), str(quizid)) + subs = fetch(u, 0) + print( json.dumps( subs, indent=1 ) ) + + for S in subs['quiz_submissions']: + print(json.dumps(S)) + submis = quiz_get_sub(courseid, quizid, S['id']) + print(json.dumps(submis, indent=2)) + + + +# return (timeblock, course, read=0,write=1) +def requests_line(line,i=0): + try: + L = line # strip? + if type(L) == type(b'abc'): L = line.decode('utf-8') + for pattern in unwanted_req_paths: + if pattern in L: + return 0 + i = 0 + line_parts = list(csv.reader( [L] ))[0] + #for p in line_parts: + # print("%i\t%s" % (i, p)) + # i += 1 + + d = parser.parse(line_parts[7]) + d = d.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific')) + d = timeblock_24hr_from_dt(d) + + #r = re.search('context\'\:\s(\d+)', line_parts[22]) + #c = 0 + #if r: + # c = r.groups(1) + str1 = line_parts[20] + str2 = str1.replace("'",'"') + str2 = str2.replace("None",'""') + #print(str2) + j = json.loads(str2 ) + c = j['context'] + a = line_parts[5] + #print( str( (d, c, a) )) + return (d, str(c), a) + except Exception as e: + #print("Exception: " + str(e)) + return 0 + + +# +def report_logs(id=0): + if not id: + L = ['10531', ] + else: + L = [ id, ] + report = [] + for id in L: + emt_by_id = course_enrollment(id) + for U in emt_by_id.values(): + user_d = defaultdict( int ) + print( "Lookin at user: %s" % U['user']['name'] ) + report.append( "User: %s\n" % U['user']['name'] ) + log_file_name = 'cache/users/logs/%i.csv' % U['user']['id'] + if path.exists(log_file_name): + print("Log file %s exists" % log_file_name) + temp = open(log_file_name, 'r').readlines() + for T in temp[1:]: + #print(T) + result = requests_line(T) + if result: + (d, c, a) = result + if c == id: + user_d[d] += 1 + print(json.dumps(user_d, indent=2)) + for V in sorted(user_d.keys()): + report.append( "\t%s: %i\n" % ( dt_from_24hr_timeblock(V), user_d[V]) ) + report.append("\n\n") + return report + + +def track_users_in_sem(): + L = users_this_semester_db() + sL = list(L) + sL.sort(reverse=True) + fetch_queue = queue.Queue() + + for i in range(num_threads): + worker = Thread(target=track_user_q, args=(i,fetch_queue)) + worker.setDaemon(True) + worker.start() + + for U in sL: + print( "adding %s to the queue" % U ) + fetch_queue.put( U ) + + fetch_queue.join() + print("Done.") + + +def track_users_in_class(L=[]): + if len(L)==0: + #id = '10531' + ids = input("Course ids, separated with comma: ") + L = [x for x in ids.split(',')] + print("Getting users in: " + str(L)) + + fetch_queue = queue.Queue() + + for i in range(num_threads): + worker = Thread(target=track_user_q, args=(i,fetch_queue)) + worker.setDaemon(True) + worker.start() + + + users_set = set() + for id in L: + emt_by_id = course_enrollment(id) + print(emt_by_id) + for U in emt_by_id.values(): + if not U['user_id'] in users_set: + print(U) + print( "adding %s to the queue" % U['user']['name'] ) + fetch_queue.put( U['user_id'] ) + users_set.add(U['user_id']) + + all_reports = [] + fetch_queue.join() + print("Done with %i users in these courses." % len(users_set)) + for id in L: + rpt = report_logs(id) + all_reports.append(rpt) + outp = codecs.open('cache/courses/report_%s.txt' % id, 'w', 'utf-8') + outp.write(''.join(rpt)) + outp.close() + return all_reports + +def track_user_q(id, q): + while True: + user = q.get() + print("Thread %i: Going to download user %s" % (id, str(user))) + try: + track_user(user, id) + except FetchError as e: + pass + q.task_done() + + +# honestly it doesn't make much sense to get full histories this way if they're +# already in the canvas data tables.... + +# just the most recent hits or a short period +# +# Live data would be better. + +# Maintain local logs. Look to see if we have some, download logs since then for a user. +def track_user(id=0,qid=0): + global recvd_date + L = [id,] + if not id: + ids = input("User ids (1 or more separated by comma): ") + L = [int(x) for x in ids.split(',')] + print("Getting users: " + json.dumps(L)) + + + for id in L: + id = int(id) + # Open info file if it exists, check for last day retrived + try: + infofile = open("cache/users/%i.txt" % id, 'r') + info = json.loads( infofile.read() ) + + # TODO: set up this info file if it isn't there. check any changes too. it + # was written where?.... + infofile.close() + except Exception as e: + print("failed to open info file for user id %i" % id) + + info = get_user_info(id) + + print("(%i) Student %i Info: " % (qid,id)) + #print( json.dumps(info, indent=2)) + + url_addition = "" + + if 1: # hard code dates + + url_addition = "?start_time=%s&end_time=%s" % ( '2022-06-15T00:00:00-07:00', '2022-12-31T00:00:00-07:00' ) + elif 'last_days_log' in info: + print("There's existing log data for %s (%s)" % (info['name'] , info['sis_user_id'])) + print("Last day logged was: %s" % info['last_days_log']) + url_addition = "?start_time=%s" % info['last_days_log'] + the_stamp = parser.parse(info['last_days_log']) + the_stamp = the_stamp.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific')) + now = dt.now() + now = now.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific')) + dif = now - the_stamp + print("It was %s ago" % str(dif)) + if the_stamp < lds_stamp: + print("Too long, taking default") + url_addition = "?start_time=%s" % log_default_startdate + + #lds_stamp = parser.parse(log_default_startdate) + +########## + else: + url_addition = "?start_time=%s" % log_default_startdate + #if dif.days > 1: + + url = "/api/v1/users/%i/page_views%s" % (id, url_addition) + print(url) + + try: + + api_gen = fetch_stream(url,0) + + log_file_name = 'cache/users/logs/%i.csv' % id + if path.exists(log_file_name): + print("Log file %s exists" % log_file_name) + temp = open(log_file_name, 'a', newline='') + csv_writer = csv.writer(temp) + else: + print("Creating new log file: %s" % log_file_name) + temp = open(log_file_name, 'w', newline='') ### TODO + csv_writer = csv.writer(temp) + + + count = 0 + for result in api_gen: + if count == 0 and len(result): + header = result[0].keys() + csv_writer.writerow(header) + # results come in newest first.... + recvd_date = result[0]['updated_at'] + print("(%i) Most recent hit is %s" % (qid,recvd_date)) + + count += len(result) + indent = " " * qid + #print("(%i) Got %i records, %i so far" % (qid,len(result),count)) + print("(%s - %i) %s %i" % (qid, id, indent, count)) + if count > max_log_count: + print("Too many logs, bailing. sorry.") + break + + for R in result: + csv_writer.writerow(R.values()) + + latest = parser.parse(recvd_date) + #last_full_day = (latest - timedelta(days=1)).isoformat() + info['last_days_log'] = recvd_date #last_full_day + + infofile = open("cache/users/%i.txt" % id, 'w') + infofile.write(json.dumps( info, indent=2 )) + infofile.close() + + print("(%i) Output to 'cache/users/log/%i.csv'" % (qid,id)) + except FetchError as e: + print("Getting a 502 error.") + raise FetchError() + except Exception as e2: + print("Got an error receiving logs: %s" % str(e2)) + +# +def track_users_by_teacherclass(): + all_teachers = teachers_courses_semester() + + skip_to = "Punit Kamrah" + skipping = 1 + + grouped = funcy.group_by( lambda x: x[4], all_teachers ) + g2 = {} + for k,v in grouped.items(): + print(k) + if skipping and skip_to != k: + print("skipping") + continue + skipping = 0 + + g2[k] = list(funcy.distinct( v, 1 )) + print("\n\n\n\n\n") + print(k) + print("\n\n\n\n\n") + + teacherfile = codecs.open('cache/teacherdata/reports/%s.txt' % k.replace(" ","_"),'w','utf-8') + class_ids = funcy.lpluck(1,v) + class_names = funcy.lpluck(2,v) + print(class_ids) + print(class_names) + + rpts = track_users_in_class(class_ids) + + for i, R in enumerate(rpts): + teacherfile.write('\n\n\n---\n\n%s \n\n' % class_names[i]) + teacherfile.write(''.join(R)) + teacherfile.flush() + teacherfile.close() + + + + print(json.dumps(g2, indent=2)) + + +def nlp_sample(): + # Stream a training corpus directly from S3. + #corpus = corpora.MmCorpus("s3://path/to/corpus") + + stemmer = stem.porter.PorterStemmer() + + strings = [ + "Human machine interface for lab abc computer applications", + "A survey of user opinion of computer system response time", + "The EPS user interface management system", + "System and human system engineering testing of EPS", + "Relation of user perceived response time to error measurement", + "The generation of random binary unordered trees", + "The intersection graph of paths in trees", + "Graph minors IV Widths of trees and well quasi ordering", + "Graph minors A survey", +] + processed = [ [ stemmer.stem(y) for y in utils.simple_preprocess(x, min_len=4)] for x in strings] + print(processed) + dictionary = corpora.Dictionary( processed ) + dct = dictionary + print(dictionary) + + corpus = [dictionary.doc2bow(text) for text in processed] + + print(corpus) + + # Train Latent Semantic Indexing with 200D vectors. + lsi = models.LsiModel(corpus, num_topics=4) + print(lsi.print_topics(-1)) + + # Convert another corpus to the LSI space and index it. + #index = similarities.MatrixSimilarity(lsi[another_corpus]) + + tfidf = models.TfidfModel(corpus) + + #index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_features=12) + index = similarities.MatrixSimilarity(lsi[corpus]) + print(index) + + + # Compute similarity of a query vs indexed documents. + query = "tree graph".split() + query_bow = dictionary.doc2bow(query) + vec_lsi = lsi[query_bow] + + print(query_bow) + print(tfidf[query_bow]) + print(vec_lsi) + print("ok") + + # LdaMulticore + + lda_model = models.LdaModel(corpus=corpus, + id2word=dictionary, + random_state=100, + num_topics=4, + passes=40, + chunksize=1000, + #batch=False, + alpha='asymmetric', + decay=0.5, + offset=64, + eta=None, + eval_every=0, + iterations=100, + gamma_threshold=0.001, + per_word_topics=True) + lda_model.save('cache/lda_model.model') + print(lda_model.print_topics(-1)) + print(lda_model) + + for c in lda_model[corpus]: + print("Document Topics : ", c[0]) # [(Topics, Perc Contrib)] + print("Word id, Topics : ", c[1][:3]) # [(Word id, [Topics])] + print("Phi Values (word id) : ", c[2][:2]) # [(Word id, [(Topic, Phi Value)])] + print("Word, Topics : ", [(dct[wd], topic) for wd, topic in c[1][:2]]) # [(Word, [Topics])] + print("Phi Values (word) : ", [(dct[wd], topic) for wd, topic in c[2][:2]]) # [(Word, [(Topic, Phi Value)])] + print("------------------------------------------------------\n") + + + sims = index[vec_lsi] + print("ok2") + print(list(enumerate(sims))) + + for document_number, score in sorted(enumerate(sims), key=lambda x: x[1], reverse=True): + print(document_number, score) + + +def nlp_sample2(): + # load english language model + nlp = spacy.load('en_core_web_sm',disable=['ner','textcat']) + + text = "This is a sample sentence." + + # create spacy + doc = nlp(text) + + for token in doc: + print(token.text,'->',token.pos_) + + + + + +def one_course_enrol(): + + users = '96 18771 2693 5863 327'.split() + course = '11015' + the_type = 'TeacherEnrollment' # 'StudentEnrollment' + u = url + '/api/v1/courses/%s/enrollments' % course + + for user in users: + param = { + 'enrollment[user_id]':user, + 'enrollment[type]': the_type, + 'enrollment[enrollment_state]': 'active', + } + + res = requests.post(u, headers = header, data=param) + print(res.text) + + +def find_new_teachers(): + filename = "cache/fa22_sched.json" + jj = json.loads(codecs.open(filename,'r','utf-8').read()) + for J in jj: + print( J['teacher']) + + + +def user_db_sync(): + #fetch all personnel dir entries from dir_api.php. PERSL unique emails + persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1") + persl_emails = set([x.lower() for x in funcy.pluck('email',persl)]) + #persl_ids = set([x.lower() for x in funcy.pluck('email',persl)]) + # + #fetch all staff from ilearn ILRN unique emails + ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read()) + ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)]) + # + #fetch all conf_users from dir_api.php CONUSR unique emails + conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1") + conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)]) + + #fetch all gavi_personnel_ext from dir_api.php GPEREXT must have column 'personnel' or 'c_users' or both. + gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1") + + all_emails = set(persl_emails) + all_emails.update(ilrn_emails) + all_emails.update(conusr_emails) + + all_emails = list(all_emails) + all_emails.sort() + + fout = codecs.open('cache/db_staff_report.csv','w','utf-8') + fout.write('email,personnel_dir,ilearn,conf_user\n') + for e in all_emails: + + if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'): + E = funcy.first(funcy.where(ilrn,email=e)) + goo = E['login_id'][3:] + #print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) ) + print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) ) + + # goo (minus G00) email, and name go into conf_users + + fout.write(e+',') + if e in persl_emails: + fout.write('1,') + else: + fout.write('0,') + if e in ilrn_emails: + fout.write('1,') + else: + fout.write('0,') + if e in conusr_emails: + fout.write('1,') + else: + fout.write('0,') + fout.write('\n') + fout.close() + # + + #print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) ) + print('done') + +import traceback + + +def find_no_goo(): + + DO_DELETE_USERS = 0 + DO_DELETE_PORTFOLIOS = 0 + + output = codecs.open('cache/no_goo_numbers.json','w','utf-8') + output2 = codecs.open('cache/wrong_root_acct.json','w','utf-8') + output3 = codecs.open('cache/wrong_sis_import_id.json','w','utf-8') + output4 = codecs.open('cache/bad_portfolios.json','w','utf-8') + #output5 = codecs.open('cache/bad_portfolios_detail.html','w','utf-8') + all = [] + no_root = [] + no_sis = [] + port = [] + i = 0 + j = 0 + k = 0 + p = 0 + users = json.loads(codecs.open('cache/allusers.json','r','utf-8').read()) + for u in users: + if not 'login_id' in u: + print(u['name']) + i+=1 + all.append(u) + user_port = [] + pp = fetch(url + '/api/v1/users/%s/eportfolios' % str(u['id'])) + for p_user in pp: + try: + user_port.append( fetch(url+'/api/v1/eportfolios/%s' % str(p_user['id']) ) ) + if DO_DELETE_PORTFOLIOS: + output5.write("