From e7a80d888095488f3ab0a29af157b6c2bc0c0dd6 Mon Sep 17 00:00:00 2001 From: Coding with Peter Date: Tue, 6 Feb 2024 07:45:07 -0800 Subject: [PATCH] req.txt and ezproxy script --- content.py | 32 ++++++++ localcache2.py | 204 +++++++++++++++++++++++++++++++++++++++++++++++ pipelines.py | 70 +++++++++++++++- requirements.txt | 51 ++++++++++++ 4 files changed, 355 insertions(+), 2 deletions(-) create mode 100644 localcache2.py diff --git a/content.py b/content.py index 66d5c08..e7f96bb 100644 --- a/content.py +++ b/content.py @@ -1248,6 +1248,37 @@ def search_embeddings(): print(f'Top {i+1}: {r}, {search_index[r]}') #{file} - {sentence} - (Score: {score})') + +def repairy_ezproxy_links(): + from localcache2 import pages_in_term + + # get all pages in term + all_pages = pages_in_term() + + # c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body + for p in all_pages: + course = p[1] + title = p[4] + url = p[5] + body = p[7] + # print(body) + try: + #s = re.search('''["']https:\/\/ezproxy\.gavilan\.edu\/login\?url=(.*)["']''',body) + a = re.search(r'Online Library Services',title) + if a: + continue + s = re.findall('\n.*ezproxy.*\n',body) + if s: + print(course, title, url) + print(" ", s, "\n") # s.group()) + except Exception as e: + #print(f"Skipped: {title}, {e}") + pass + + + + + if __name__ == "__main__": print ('') @@ -1267,6 +1298,7 @@ if __name__ == "__main__": 14: ['do a vector search', search_embeddings], 15: ['test priority', test_priority], 16: ['test embed', test_embed], + 17: ['repair ezproxy links', repairy_ezproxy_links], } if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]): diff --git a/localcache2.py b/localcache2.py new file mode 100644 index 0000000..a1908b1 --- /dev/null +++ b/localcache2.py @@ -0,0 +1,204 @@ +# Local data, saving and manipulating + +import os, re, gzip, codecs, funcy, pytz, json, random, functools, requests, sys, csv, time, psycopg2 +import pandas as pd +import numpy as np +from collections import defaultdict +from datetime import datetime as dt +from datetime import timedelta +from dateutil.parser import parse +from os.path import exists, getmtime +from pipelines import sync_non_interactive, url, header, gp, dean +from tabulate import tabulate + +######### +######### LOCAL DB +######### + +CON = '' +CURSOR = '' + +def db(): + global CON,CURSOR + CON = psycopg2.connect(database="db", + host="192.168.1.6", + user="postgres", + password="rolley34", + port="5432") + + CURSOR = CON.cursor() + return CON,CURSOR + + +''' +# Help the next function to upload new users directly to conf database on gavilan. +def employees_refresh_flex(data): + try: + data['a'] = 'set/newuser' + data['sis_user_id'] = data['sis_user_id'][3:] + print("\nUploading this: \n") + print(json.dumps(data, indent=2)) + print("\n") + a = input("Continue (y) or skip (n) ? ") + if a == 'y': + # This is what I was missing.......... + # req.add_header("Content-type", "application/x-www-form-urlencoded") + r3 = requests.post('https://www.gavilan.edu/staff/flex/2020/api.php', params=data) + print(r3.text) + #print(r3.headers) + except Exception as ex: + print("Failed on: %s\nErr: %s" % (str(data),str(ex))) + + + +# Everyone in iLearn DB with an xyz@gavilan.edu email address. +def all_gav_employees(): + (connection,cursor) = db() + connection.row_factory = dict_factory + q = """SELECT u.canvasid, u.name, u.created, u.sortablename, h.address, h.type, h.workflow_state, + h.updated_at, p.last_request_at, p.last_login_at, p.current_login_at, p.last_login_ip, + p.current_login_ip, p.sis_user_id, p.unique_name FROM users AS u + JOIN comm_channel AS h ON u.id=h.user_id + JOIN pseudonym AS p ON p.user_id=u.id + WHERE h.address LIKE "%@gavilan.edu" + ORDER BY u.sortablename""" + cursor = connection.cursor() + cursor.execute(q) + everyone = cursor.fetchall() + everyone_set = set() + for E in everyone: + try: + everyone_set.add( E['address'].lower() ) + except Exception as e: + print("Exception: %s\nwith: %s" % (str(e), str(E))) + + oo = open('cache/temp1.txt','w') + oo.write(json.dumps(list(everyone_set), indent=2)) + existing = requests.get('https://gavilan.edu/staff/flex/2020/api.php?a=get/users') + ex = json.loads( existing.text ) + already_enrolled = set() + for usr in ex['users']: + try: + #already_enrolled.add( (usr['goo'], usr['email'].lower(), usr['name']) ) + already_enrolled.add( usr['email'].lower() ) + except Exception as e: + print("Exception: %s\nWith: %s" % (str(e),str(usr))) + + oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' ) + oo.write(json.dumps(list(already_enrolled), indent=2)) + + # conf_users wants: goo, email, name, active + # and emails have random capitalization + # name is First Last, and sometimes with Middle in there. + # + + # using sets: to_enroll = [ x for x in students if x not in already_enrolled ] + new_emp = [ x for x in everyone_set if x not in already_enrolled ] + + # take the all_employee list, filter -> anyone who's in 'existing' is removed + + # funcy.where( lambda x: x['email'] == ae[4] , existing ) + + #new_emp = list(funcy.filter( lambda ae: funcy.where( existing, email=ae['email'] ), all_emp )) + #new_emp = list(funcy.where( existing, email=b'phowell@gavilan.edu')) #ae['email'] )) + print(new_emp) + oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' ) + oo.write(json.dumps(list(new_emp), indent=2)) + + # Now, iLearn db (everyone)... find the rows that match the email addresses + # that we've decided we need to add (new_emp) + + #print(everyone) + #print( "searching for %s" % j ) + #print( "searched for %s, found: %s" % (j, str(to_add) )) + #print("\nUploading...\n") + for j in new_emp: + #j = new_emp[0] + print(j) + to_add = list(funcy.where( everyone, address=j )) + if to_add: + employees_refresh_flex(to_add[0]) + else: + print("Didn't find an entry for that account.") + print("done uploading") + +''' + + + +def teachers_by_term(TERM = "202430"): + q = f"""SELECT c.id, c.name, c.course_code, c.sis_source_id, c.created_at, c.start_at, c.workflow_state, e.last_attended_at, +u.id, u.sortable_name, u.created_at FROM canvas.courses AS c +JOIN canvas.enrollments AS e ON e.course_id=c.id +JOIN canvas.users AS u ON u.id=e.user_id +WHERE c.sis_source_id LIKE '{TERM}%' AND e.type='TeacherEnrollment' ORDER BY u.sortable_name, c.course_code;""" + (connection,cursor) = db() + cursor.execute(q) + all_teachers = cursor.fetchall() + + table = [ [t[9],t[1],t[3],t[6]] for t in all_teachers] + print(tabulate(table)) + + #for t in all_teachers: + # print("\t".join( [str(x) for x in [t[9],t[1],t[3],t[6]]])) + return all_teachers + + + +def courses_in_term(TERM = "202430"): + q = f"""SELECT c.id, c.name, c.course_code, c.sis_source_id, c.workflow_state FROM canvas.courses AS c +WHERE c.sis_source_id LIKE '{TERM}%' ORDER BY c.course_code;""" + (connection,cursor) = db() + cursor.execute(q) + all = cursor.fetchall() + + #table = [ [t[9],t[1],t[3],t[6]] for t in all_teachers] + print(tabulate(all)) + + + +def pages_in_term(TERM="202430"): # + q = f"""SELECT c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body +FROM canvas.courses c +JOIN canvas.wiki_pages wp ON wp.context_id=c.id +WHERE c.sis_source_id LIKE '{TERM}%' +ORDER BY c.sis_source_id, wp.title;""" + (connection,cursor) = db() + cursor.execute(q) + all = cursor.fetchall() + #print(tabulate(all)) + return all + + + + + + + + +if __name__ == "__main__": + + print ('') + options = { + 1: ['all teachers', teachers_by_term], + 2: ['courses in term', courses_in_term], + 3: ['pages in term', pages_in_term] + } + + + if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]): + resp = int(sys.argv[1]) + print("\n\nPerforming: %s\n\n" % options[resp][0]) + + else: + print ('') + for key in options: + print(str(key) + '.\t' + options[key][0]) + + print('') + resp = input('Choose: ') + + # Call the function in the options dict + options[ int(resp)][1]() + + diff --git a/pipelines.py b/pipelines.py index c1b8876..7fadd2c 100644 --- a/pipelines.py +++ b/pipelines.py @@ -13,6 +13,12 @@ from deepdiff import DeepDiff from canvas_secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, GOO, GOO_PIN, token, url, domain, account_id, header, g_id, g_secret from canvas_secrets import instructure_url, instructure_username, instructure_private_key +import os, asyncio +from dap.api import DAPClient +from dap.dap_types import Credentials +from dap.integration.database import DatabaseConnection +from dap.replicator.sql import SQLReplicator + """ @@ -431,7 +437,65 @@ def get_enrlmts_for_user(user,enrollments): ################ ################ + +# Get canvas data 2024 style +def canvas_data_2024_run(): + print("Updating all tables.") + asyncio.run(canvas_data_2024()) + print("Done with all tables.") + + +async def canvas_data_2024(): + + base_url: str = os.environ["DAP_API_URL"] + client_id: str = os.environ["DAP_CLIENT_ID"] + client_secret: str = os.environ["DAP_CLIENT_SECRET"] + connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db" + desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',') + credentials = Credentials.create(client_id=client_id, client_secret=client_secret) + + async with DatabaseConnection(connection_string).open() as db_connection: + async with DAPClient(base_url, credentials) as session: + #tables = await session.get_tables("canvas") + for table in desired_tables: + print(f" trying to update {table} ") + try: + #await SQLReplicator(session, db_connection).initialize("canvas", table) + await SQLReplicator(session, db_connection).synchronize("canvas", table) + except Exception as e: + print(f" - skipping {table} because {e}") + + + +# Get canvas data 2024 style +def setup_canvas_data_2024_run(): + print("Setting up all tables.") + asyncio.run(setup_canvas_data_2024()) + print("Done with all tables.") + + +async def setup_canvas_data_2024(): + + base_url: str = os.environ["DAP_API_URL"] + client_id: str = os.environ["DAP_CLIENT_ID"] + client_secret: str = os.environ["DAP_CLIENT_SECRET"] + connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db" + + desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',') + credentials = Credentials.create(client_id=client_id, client_secret=client_secret) + + async with DatabaseConnection(connection_string).open() as db_connection: + async with DAPClient(base_url, credentials) as session: + #tables = await session.get_tables("canvas") + for table in desired_tables: + print(f" {table}") + try: + await SQLReplicator(session, db_connection).initialize("canvas", table) + except Exception as e: + print(f" - skipping {table} because {e}") + + # Get something from Canvas Data def do_request(path): #Set up the request pieces @@ -2199,9 +2263,11 @@ if __name__ == "__main__": 5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] , 6: ['Canvas data: interactive sync', interactive ], 7: ['Canvas data: automated sync', sync_non_interactive ], - 8: ['Scrape schedule from ssb', scrape_schedule_multi ], + 8: ['Get canvas data 2024 style', canvas_data_2024_run ], + 9: ['Set up canvas data 2024 style', setup_canvas_data_2024_run], + 16: ['Scrape schedule from ssb', scrape_schedule_multi ], 14: ['Generate latestart schedule', list_latestarts ], - 9: ['Test ssb calls with python', scrape_schedule_py ], + 15: ['Test ssb calls with python', scrape_schedule_py ], 10: ['schedule to db', scrape_for_db ], 11: ['clean argos draft schedule file', argos_data_from_cvc], 12: ['make expanded schedule json files of old semesters', expand_old_semesters ], diff --git a/requirements.txt b/requirements.txt index c184b47..454b662 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,30 +1,41 @@ +absl-py==2.1.0 aiofiles==23.2.1 aiohttp==3.9.3 aiohttp-retry==2.8.3 aiomysql==0.2.0 aiosignal==1.3.1 +annotated-types==0.6.0 annoy==1.17.3 arrow==1.3.0 async-timeout==4.0.3 asyncpg==0.29.0 attrs==23.2.0 +Automat==22.10.0 Babel==2.14.0 bcrypt==4.1.2 beautifulsoup4==4.12.3 bidict==0.22.1 blinker==1.7.0 +blis==0.7.11 cachetools==5.3.2 +catalogue==2.0.10 certifi==2024.2.2 cffi==1.16.0 charset-normalizer==3.3.2 click==8.1.7 +cloudpathlib==0.16.0 colorama==0.4.6 +confection==0.1.4 +constantly==23.10.4 contourpy==1.2.0 courlan==1.0.0 cryptography==42.0.2 +cssselect==1.2.0 cycler==0.12.1 +cymem==2.0.8 dateparser==1.2.0 deepdiff==6.7.1 +dm-tree==0.1.8 docxcompose==1.4.0 docxtpl==0.16.7 durable-rules==2.0.28 @@ -46,22 +57,30 @@ google-auth-oauthlib==1.2.0 googleapis-common-protos==1.62.0 greenlet==3.0.3 h11==0.14.0 +h5py==3.10.0 html2markdown==0.1.7 htmldate==1.7.0 httplib2==0.22.0 huggingface-hub==0.20.3 +hyperlink==21.0.0 ics==0.7.2 idna==3.6 +incremental==22.10.0 instructure-dap-client==0.3.18 +itemadapter==0.8.0 +itemloaders==1.1.0 itsdangerous==2.1.2 Jinja2==3.1.3 +jmespath==1.0.1 joblib==1.3.2 json_strong_typing==0.3.2 jsondiff==2.0.0 jsonschema==4.21.1 jsonschema-specifications==2023.12.1 jusText==3.0.0 +keras==3.0.4 kiwisolver==1.4.5 +kneed==0.8.5 langcodes==3.3.0 lark==1.1.9 linkify-it-py==2.0.2 @@ -76,6 +95,8 @@ mdurl==0.1.2 minizinc==0.9.0 mpmath==1.3.0 multidict==6.0.4 +murmurhash==1.0.10 +namex==0.0.7 networkx==3.2.1 nltk==3.8.1 numpy==1.26.3 @@ -89,23 +110,33 @@ paho-mqtt==1.6.1 pampy==0.3.0 pandas==2.2.0 paramiko==3.4.0 +parsel==1.8.1 path-dict==4.0.0 pathlib==1.0.1 +patsy==0.5.6 pdfminer==20191125 pdfminer.six==20231228 piexif==1.1.3 pillow==10.2.0 +plotly==5.18.0 +preshed==3.0.9 +Protego==0.3.0 protobuf==4.25.2 +psycopg2==2.9.9 pyarrow==15.0.0 pyasn1==0.5.1 pyasn1-modules==0.3.0 pycparser==2.21 pycryptodome==3.20.0 +pydantic==2.6.1 +pydantic_core==2.16.2 +PyDispatcher==2.0.7 pygame==2.5.2 Pygments==2.17.2 PyJWT==2.8.0 PyMySQL==1.1.0 PyNaCl==1.5.0 +pyOpenSSL==24.0.0 pypandoc==1.12 pyparsing==3.1.1 PyPDF2==3.0.1 @@ -118,10 +149,12 @@ python-socketio==5.11.0 pytz==2024.1 pywin32==306 PyYAML==6.0.1 +queuelib==1.6.2 redis==5.0.1 referencing==0.33.0 regex==2023.12.25 requests==2.31.0 +requests-file==2.0.0 requests-oauthlib==1.3.1 rich==13.7.0 rpds-py==0.17.1 @@ -130,9 +163,11 @@ safetensors==0.4.2 schedule==1.2.1 scikit-learn==1.4.0 scipy==1.12.0 +Scrapy==2.11.0 selenium==4.17.2 sentence-transformers==2.3.1 sentencepiece==0.1.99 +service-identity==24.1.0 simple-websocket==1.0.0 simpy==4.1.1 six==1.16.0 @@ -140,14 +175,23 @@ smart-open==6.4.0 sniffio==1.3.0 sortedcontainers==2.4.0 soupsieve==2.5 +spacy==3.7.2 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 SQLAlchemy==2.0.25 +srsly==2.4.8 +statsmodels==0.14.1 striprtf==0.0.26 sympy==1.12 +tabulate==0.9.0 TatSu==5.11.3 +tenacity==8.2.3 textdistance==4.6.1 textual==0.48.2 +thinc==8.2.2 threadpoolctl==3.2.0 tld==0.13 +tldextract==5.1.1 tokenizers==0.15.1 tomd==0.1.3 toolz==0.12.1 @@ -157,6 +201,9 @@ trafilatura==1.7.0 transformers==4.37.2 trio==0.24.0 trio-websocket==0.11.1 +Twisted==22.10.0 +twisted-iocpsupport==1.0.4 +typer==0.9.0 types-aiofiles==23.2.0.20240106 types-python-dateutil==2.8.19.20240106 typing_extensions==4.9.0 @@ -165,9 +212,13 @@ tzlocal==5.2 uc-micro-py==1.0.2 uritemplate==4.1.1 urllib3==2.2.0 +w3lib==2.1.2 +wasabi==1.1.2 +weasel==0.3.4 Werkzeug==3.0.1 Whoosh==2.7.4 wsproto==1.2.0 xlwt==1.3.0 yarl==1.9.4 yattag==1.15.2 +zope.interface==6.1