req.txt and ezproxy script

2024-02-06 07:45:07 -08:00 · 2024-02-06 07:45:07 -08:00 · e7a80d8880
parent 36008e461b
commit e7a80d8880
4 changed files with 355 additions and 2 deletions
--- a/content.py
+++ b/content.py
@ -1248,6 +1248,37 @@ def search_embeddings():
            print(f'Top {i+1}: {r}, {search_index[r]}')     #{file} - {sentence} - (Score: {score})')
 def repairy_ezproxy_links():
    from localcache2 import pages_in_term
    # get all pages in term
    all_pages = pages_in_term()
    # c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
    for p in all_pages:
        course = p[1]
        title = p[4]
        url = p[5]
        body = p[7]
        # print(body)
        try:
            #s = re.search('''["']https:\/\/ezproxy\.gavilan\.edu\/login\?url=(.*)["']''',body)
            a = re.search(r'Online Library Services',title)
            if a:
                continue
            s = re.findall('\n.*ezproxy.*\n',body)
            if s:
                print(course, title, url)
                print("   ", s, "\n")    # s.group())
        except Exception as e:
            #print(f"Skipped: {title},   {e}")
            pass
 if __name__ == "__main__":
    print ('')
@ -1267,6 +1298,7 @@ if __name__ == "__main__":
               14: ['do a vector search', search_embeddings],
               15: ['test priority', test_priority], 
               16: ['test embed', test_embed], 
               17: ['repair ezproxy links', repairy_ezproxy_links],
              }
    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
--- a/localcache2.py
+++ b/localcache2.py
@ -0,0 +1,204 @@
 # Local data, saving and manipulating
 import os, re, gzip, codecs, funcy, pytz, json, random, functools, requests, sys, csv, time, psycopg2
 import pandas as pd
 import numpy as np
 from collections import defaultdict
 from datetime import datetime as dt
 from datetime import timedelta
 from dateutil.parser import parse
 from os.path import exists, getmtime
 from pipelines import sync_non_interactive, url, header, gp, dean
 from tabulate import tabulate
 #########
 ######### LOCAL DB
 #########
 CON = ''
 CURSOR = ''
 def db():
    global CON,CURSOR
    CON = psycopg2.connect(database="db",
                        host="192.168.1.6",
                        user="postgres",
                        password="rolley34",
                        port="5432")
    CURSOR = CON.cursor()
    return CON,CURSOR
 '''
 # Help the next function to upload new users directly to conf database on gavilan.
 def employees_refresh_flex(data):
    try:
        data['a'] = 'set/newuser'
        data['sis_user_id'] = data['sis_user_id'][3:]
        print("\nUploading this: \n")
        print(json.dumps(data, indent=2))
        print("\n")
        a = input("Continue (y) or skip (n) ? ")
        if a == 'y':
            # This is what I was missing..........
            # req.add_header("Content-type", "application/x-www-form-urlencoded")
            r3 = requests.post('https://www.gavilan.edu/staff/flex/2020/api.php', params=data)
            print(r3.text)
        #print(r3.headers)
    except Exception as ex:
        print("Failed on: %s\nErr: %s" % (str(data),str(ex)))
 # Everyone in iLearn DB with an   xyz@gavilan.edu   email address. 
 def all_gav_employees():
    (connection,cursor) = db()
    connection.row_factory = dict_factory
    q = """SELECT u.canvasid, u.name, u.created, u.sortablename, h.address, h.type, h.workflow_state, 
           h.updated_at, p.last_request_at, p.last_login_at, p.current_login_at, p.last_login_ip, 
           p.current_login_ip, p.sis_user_id, p.unique_name FROM users AS u
           JOIN comm_channel AS h ON u.id=h.user_id
           JOIN pseudonym AS p ON p.user_id=u.id
           WHERE h.address LIKE "%@gavilan.edu"
           ORDER BY u.sortablename"""
    cursor = connection.cursor()
    cursor.execute(q)
    everyone = cursor.fetchall()
    everyone_set = set()
    for E in everyone:
        try:
            everyone_set.add(    E['address'].lower()  )
        except Exception as e:
            print("Exception: %s\nwith: %s" % (str(e), str(E)))
    oo = open('cache/temp1.txt','w')
    oo.write(json.dumps(list(everyone_set), indent=2))
    existing = requests.get('https://gavilan.edu/staff/flex/2020/api.php?a=get/users')
    ex = json.loads( existing.text )
    already_enrolled = set()
    for usr in ex['users']:
        try:
            #already_enrolled.add(   (usr['goo'], usr['email'].lower(), usr['name'])  )
            already_enrolled.add(  usr['email'].lower()   )
        except Exception as e:
            print("Exception: %s\nWith: %s" % (str(e),str(usr)))
    oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' )
    oo.write(json.dumps(list(already_enrolled), indent=2))
    # conf_users wants:   goo, email, name, active
    #   and emails have random capitalization
    #   name is First Last,   and sometimes with Middle in there. 
    #
    # using sets:     to_enroll = [ x for x in students if x not in already_enrolled ]     
    new_emp = [ x for x in everyone_set if x not in already_enrolled ]
    # take the all_employee list,   filter -> anyone who's in 'existing' is removed
    # funcy.where( lambda x: x['email'] == ae[4]   ,  existing )
    #new_emp = list(funcy.filter( lambda ae: funcy.where( existing, email=ae['email'] ),  all_emp ))
    #new_emp = list(funcy.where( existing, email=b'phowell@gavilan.edu')) #ae['email'] ))
    print(new_emp)
    oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' )
    oo.write(json.dumps(list(new_emp), indent=2))
    # Now, iLearn db (everyone)... find the rows that match the email addresses
    # that we've decided we need to add (new_emp)
    #print(everyone)
    #print( "searching for %s" % j )
    #print( "searched for %s, found: %s" % (j, str(to_add) ))
    #print("\nUploading...\n")
    for j in new_emp:
        #j = new_emp[0]
        print(j)
        to_add = list(funcy.where( everyone, address=j ))
        if to_add:
            employees_refresh_flex(to_add[0])
        else:
            print("Didn't find an entry for that account.")
    print("done uploading")
 '''
 def teachers_by_term(TERM = "202430"):
    q = f"""SELECT c.id, c.name, c.course_code, c.sis_source_id, c.created_at, c.start_at, c.workflow_state, e.last_attended_at, 
 u.id, u.sortable_name, u.created_at FROM canvas.courses AS c 
 JOIN canvas.enrollments AS e ON e.course_id=c.id 
 JOIN canvas.users AS u ON u.id=e.user_id
 WHERE c.sis_source_id LIKE '{TERM}%' AND e.type='TeacherEnrollment' ORDER BY u.sortable_name, c.course_code;"""
    (connection,cursor) = db()
    cursor.execute(q)
    all_teachers = cursor.fetchall()
    table = [ [t[9],t[1],t[3],t[6]]  for t in all_teachers]
    print(tabulate(table))
    #for t in all_teachers:
    #    print("\t".join( [str(x) for x in [t[9],t[1],t[3],t[6]]]))
    return all_teachers
 def courses_in_term(TERM = "202430"):
    q = f"""SELECT c.id, c.name, c.course_code, c.sis_source_id, c.workflow_state FROM canvas.courses AS c
 WHERE c.sis_source_id LIKE '{TERM}%' ORDER BY c.course_code;"""
    (connection,cursor) = db()
    cursor.execute(q)
    all = cursor.fetchall()
    #table = [ [t[9],t[1],t[3],t[6]]  for t in all_teachers]
    print(tabulate(all))
 def pages_in_term(TERM="202430"):    #
    q = f"""SELECT c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
 FROM canvas.courses c
 JOIN canvas.wiki_pages wp ON wp.context_id=c.id
 WHERE c.sis_source_id  LIKE '{TERM}%' 
 ORDER BY c.sis_source_id, wp.title;"""
    (connection,cursor) = db()
    cursor.execute(q)
    all = cursor.fetchall()
    #print(tabulate(all))
    return all
 if __name__ == "__main__":
    print ('')
    options = { 
            1: ['all teachers', teachers_by_term],
            2: ['courses in term', courses_in_term],
            3: ['pages in term', pages_in_term]
    }
    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
        resp = int(sys.argv[1])
        print("\n\nPerforming: %s\n\n" % options[resp][0])
    else:
        print ('')
        for key in options:
            print(str(key) + '.\t' + options[key][0])
        print('')
        resp = input('Choose: ')
    # Call the function in the options dict
    options[ int(resp)][1]() 
--- a/pipelines.py
+++ b/pipelines.py
@ -13,6 +13,12 @@ from deepdiff import DeepDiff
 from canvas_secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, GOO, GOO_PIN, token, url, domain, account_id, header, g_id, g_secret
 from canvas_secrets import instructure_url, instructure_username, instructure_private_key
 import os, asyncio
 from dap.api import DAPClient
 from dap.dap_types import Credentials
 from dap.integration.database import DatabaseConnection
 from dap.replicator.sql import SQLReplicator
 """
@ -432,6 +438,64 @@ def get_enrlmts_for_user(user,enrollments):
 ################
 # Get canvas data 2024 style
 def canvas_data_2024_run():
    print("Updating all tables.")
    asyncio.run(canvas_data_2024())
    print("Done with all tables.")
 async def canvas_data_2024():
    base_url: str = os.environ["DAP_API_URL"]
    client_id: str = os.environ["DAP_CLIENT_ID"]
    client_secret: str = os.environ["DAP_CLIENT_SECRET"]
    connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db"
    desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',')
    credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
    async with DatabaseConnection(connection_string).open() as db_connection:
        async with DAPClient(base_url, credentials) as session:
            #tables = await session.get_tables("canvas")
            for table in desired_tables:
                print(f"  trying to update {table} ")
                try:
                    #await SQLReplicator(session, db_connection).initialize("canvas", table) 
                    await SQLReplicator(session, db_connection).synchronize("canvas", table)
                except Exception as e:
                    print(f"  - skipping {table} because {e}")
 # Get canvas data 2024 style
 def setup_canvas_data_2024_run():
    print("Setting up all tables.")
    asyncio.run(setup_canvas_data_2024())
    print("Done with all tables.")
 async def setup_canvas_data_2024():
    base_url: str = os.environ["DAP_API_URL"]
    client_id: str = os.environ["DAP_CLIENT_ID"]
    client_secret: str = os.environ["DAP_CLIENT_SECRET"]
    connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db"
    desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',')
    credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
    async with DatabaseConnection(connection_string).open() as db_connection:
        async with DAPClient(base_url, credentials) as session:
            #tables = await session.get_tables("canvas")
            for table in desired_tables:
                print(f"  {table}")
                try:
                    await SQLReplicator(session, db_connection).initialize("canvas", table) 
                except Exception as e:
                    print(f"  - skipping {table} because {e}")
 # Get something from Canvas Data
 def do_request(path):  
    #Set up the request pieces
@ -2199,9 +2263,11 @@ if __name__ == "__main__":
                5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] , 
                6: ['Canvas data: interactive sync', interactive ], 
                7: ['Canvas data: automated sync', sync_non_interactive ], 
-                8: ['Scrape schedule from ssb', scrape_schedule_multi ], 
+                8: ['Get canvas data 2024 style', canvas_data_2024_run ],
                9: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
                16: ['Scrape schedule from ssb', scrape_schedule_multi ], 
                14: ['Generate latestart schedule', list_latestarts ],
-                9: ['Test ssb calls with python', scrape_schedule_py ], 
+                15: ['Test ssb calls with python', scrape_schedule_py ], 
                10: ['schedule to db', scrape_for_db ], 
                11: ['clean argos draft schedule file', argos_data_from_cvc],
                12: ['make expanded schedule json files of old semesters', expand_old_semesters ],
--- a/requirements.txt
+++ b/requirements.txt
@ -1,30 +1,41 @@
 absl-py==2.1.0
 aiofiles==23.2.1
 aiohttp==3.9.3
 aiohttp-retry==2.8.3
 aiomysql==0.2.0
 aiosignal==1.3.1
 annotated-types==0.6.0
 annoy==1.17.3
 arrow==1.3.0
 async-timeout==4.0.3
 asyncpg==0.29.0
 attrs==23.2.0
 Automat==22.10.0
 Babel==2.14.0
 bcrypt==4.1.2
 beautifulsoup4==4.12.3
 bidict==0.22.1
 blinker==1.7.0
 blis==0.7.11
 cachetools==5.3.2
 catalogue==2.0.10
 certifi==2024.2.2
 cffi==1.16.0
 charset-normalizer==3.3.2
 click==8.1.7
 cloudpathlib==0.16.0
 colorama==0.4.6
 confection==0.1.4
 constantly==23.10.4
 contourpy==1.2.0
 courlan==1.0.0
 cryptography==42.0.2
 cssselect==1.2.0
 cycler==0.12.1
 cymem==2.0.8
 dateparser==1.2.0
 deepdiff==6.7.1
 dm-tree==0.1.8
 docxcompose==1.4.0
 docxtpl==0.16.7
 durable-rules==2.0.28
@ -46,22 +57,30 @@ google-auth-oauthlib==1.2.0
 googleapis-common-protos==1.62.0
 greenlet==3.0.3
 h11==0.14.0
 h5py==3.10.0
 html2markdown==0.1.7
 htmldate==1.7.0
 httplib2==0.22.0
 huggingface-hub==0.20.3
 hyperlink==21.0.0
 ics==0.7.2
 idna==3.6
 incremental==22.10.0
 instructure-dap-client==0.3.18
 itemadapter==0.8.0
 itemloaders==1.1.0
 itsdangerous==2.1.2
 Jinja2==3.1.3
 jmespath==1.0.1
 joblib==1.3.2
 json_strong_typing==0.3.2
 jsondiff==2.0.0
 jsonschema==4.21.1
 jsonschema-specifications==2023.12.1
 jusText==3.0.0
 keras==3.0.4
 kiwisolver==1.4.5
 kneed==0.8.5
 langcodes==3.3.0
 lark==1.1.9
 linkify-it-py==2.0.2
@ -76,6 +95,8 @@ mdurl==0.1.2
 minizinc==0.9.0
 mpmath==1.3.0
 multidict==6.0.4
 murmurhash==1.0.10
 namex==0.0.7
 networkx==3.2.1
 nltk==3.8.1
 numpy==1.26.3
@ -89,23 +110,33 @@ paho-mqtt==1.6.1
 pampy==0.3.0
 pandas==2.2.0
 paramiko==3.4.0
 parsel==1.8.1
 path-dict==4.0.0
 pathlib==1.0.1
 patsy==0.5.6
 pdfminer==20191125
 pdfminer.six==20231228
 piexif==1.1.3
 pillow==10.2.0
 plotly==5.18.0
 preshed==3.0.9
 Protego==0.3.0
 protobuf==4.25.2
 psycopg2==2.9.9
 pyarrow==15.0.0
 pyasn1==0.5.1
 pyasn1-modules==0.3.0
 pycparser==2.21
 pycryptodome==3.20.0
 pydantic==2.6.1
 pydantic_core==2.16.2
 PyDispatcher==2.0.7
 pygame==2.5.2
 Pygments==2.17.2
 PyJWT==2.8.0
 PyMySQL==1.1.0
 PyNaCl==1.5.0
 pyOpenSSL==24.0.0
 pypandoc==1.12
 pyparsing==3.1.1
 PyPDF2==3.0.1
@ -118,10 +149,12 @@ python-socketio==5.11.0
 pytz==2024.1
 pywin32==306
 PyYAML==6.0.1
 queuelib==1.6.2
 redis==5.0.1
 referencing==0.33.0
 regex==2023.12.25
 requests==2.31.0
 requests-file==2.0.0
 requests-oauthlib==1.3.1
 rich==13.7.0
 rpds-py==0.17.1
@ -130,9 +163,11 @@ safetensors==0.4.2
 schedule==1.2.1
 scikit-learn==1.4.0
 scipy==1.12.0
 Scrapy==2.11.0
 selenium==4.17.2
 sentence-transformers==2.3.1
 sentencepiece==0.1.99
 service-identity==24.1.0
 simple-websocket==1.0.0
 simpy==4.1.1
 six==1.16.0
@ -140,14 +175,23 @@ smart-open==6.4.0
 sniffio==1.3.0
 sortedcontainers==2.4.0
 soupsieve==2.5
 spacy==3.7.2
 spacy-legacy==3.0.12
 spacy-loggers==1.0.5
 SQLAlchemy==2.0.25
 srsly==2.4.8
 statsmodels==0.14.1
 striprtf==0.0.26
 sympy==1.12
 tabulate==0.9.0
 TatSu==5.11.3
 tenacity==8.2.3
 textdistance==4.6.1
 textual==0.48.2
 thinc==8.2.2
 threadpoolctl==3.2.0
 tld==0.13
 tldextract==5.1.1
 tokenizers==0.15.1
 tomd==0.1.3
 toolz==0.12.1
@ -157,6 +201,9 @@ trafilatura==1.7.0
 transformers==4.37.2
 trio==0.24.0
 trio-websocket==0.11.1
 Twisted==22.10.0
 twisted-iocpsupport==1.0.4
 typer==0.9.0
 types-aiofiles==23.2.0.20240106
 types-python-dateutil==2.8.19.20240106
 typing_extensions==4.9.0
@ -165,9 +212,13 @@ tzlocal==5.2
 uc-micro-py==1.0.2
 uritemplate==4.1.1
 urllib3==2.2.0
 w3lib==2.1.2
 wasabi==1.1.2
 weasel==0.3.4
 Werkzeug==3.0.1
 Whoosh==2.7.4
 wsproto==1.2.0
 xlwt==1.3.0
 yarl==1.9.4
 yattag==1.15.2
 zope.interface==6.1