diff --git a/content.py b/content.py index 1eee614..848d476 100644 --- a/content.py +++ b/content.py @@ -883,7 +883,9 @@ def crawl(): avoid = ['ezproxy','community\.gavilan\.edu','archive\/tag','archive\/category', 'my\.gavilan\.edu', 'augusoft', - 'eis-prod', 'ilearn\.gavilan', 'mailto', 'cgi-bin', 'edu\/old\/schedule', ] + 'eis-prod', 'ilearn\.gavilan', 'mailto', 'cgi-bin', 'edu\/old\/schedule', + 'admit\/search\.php', 'GavilanTrusteeAreaMaps2022\.pdf', 'schedule\/2019', 'schedule\/2020', 'schedule\/2021', + 'schedule\/2022', 'schedule\/previous', ] class MySpider(scrapy.Spider): name = 'myspider' @@ -903,9 +905,10 @@ def crawl(): def parse(self, response): print('visited:', repr(response.url), 'status:', response.status) + done = 0 if re.search(r'\.pdf$', response.url): - m = re.search(r'\/([^\/]+\.'+ext+')$', response.url) + m = re.search(r'\/([^\/]+\.pdf)$', response.url) if m: print("saving to ", save_folder + '/' + clean_fn(response.url)) pdf_response = requests.get(response.url) @@ -913,8 +916,9 @@ def crawl(): f.write(pdf_response.content) text = extract_text(save_folder + '/' + clean_fn(response.url)) codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(text) + done = 1 - for ext in ['doc','docx','ppt','pptx']: + for ext in ['doc','docx','ppt','pptx','rtf','xls','xlsx']: if re.search(r'\.'+ext+'$', response.url): m = re.search(r'\/([^\/]+\.'+ext+')$', response.url) if m: @@ -923,55 +927,62 @@ def crawl(): with open(save_folder + '/' + clean_fn(response.url), 'wb') as f: f.write(pdf_response.content) #text = extract_text(save_folder + '/' + clean_fn(response.url) + '.txt') - output = pypandoc.convert_file(save_folder + '/' + clean_fn(response.url), 'html', extra_args=['--extract-media=%s' % hash ]) - txt_output = trafilatura.extract(response.text,include_links=True, deduplicate=True, include_images=True, include_formatting=True) + pandoc_infile = save_folder + '/' + clean_fn(response.url) + pandoc_outfile = save_folder + '/' + clean_fn(response.url) + '.html' + print("pandoc in file: %s" % pandoc_infile) + print("pandoc outfile: %s" % pandoc_outfile) + pypandoc.convert_file(pandoc_infile, 'html', outputfile=pandoc_outfile, extra_args=['--from=%s' % ext, '--extract-media=%s' % save_folder + '/img' ]) + pandoc_output = codecs.open(pandoc_outfile,'r','utf-8').read() + txt_output = trafilatura.extract(pandoc_output,include_links=True, deduplicate=True, include_images=True, include_formatting=True) if txt_output: codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(txt_output) + done = 1 - for ext in ['jpg','jpeg','gif','webp']: + for ext in ['jpg','jpeg','gif','webp','png','svg','bmp','tiff','tif','ico']: if re.search(r'\.'+ext+'$', response.url): m = re.search(r'\/([^\/]+\.'+ext+')$', response.url) if m: - print("saving to ", save_folder + '/' + clean_fn(response.url)) + print("saving to ", save_folder + '/img/' + clean_fn(response.url)) pdf_response = requests.get(response.url) - with open(save_folder + '/' + clean_fn(response.url), 'wb') as f: + with open(save_folder + '/img/' + clean_fn(response.url), 'wb') as f: f.write(pdf_response.content) + done = 1 - f_out = codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8') + if not done: + f_out = codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8') - this_output = trafilatura.extract(response.text,include_links=True, deduplicate=True, include_images=True, include_formatting=True) - if this_output: - f_out.write(this_output) - f_out.close() - links = response.css('a::attr(href)').getall() + this_output = trafilatura.extract(response.text,include_links=True, deduplicate=True, include_images=True, include_formatting=True) + if this_output: + f_out.write(this_output) + f_out.close() + links = response.css('a::attr(href)').getall() - # Follow each link and parse its contents - - for link in links: - go = 1 - full_link = response.urljoin(link) - print('++++++ trying ', full_link) + # Follow each link and parse its contents + for link in links: + go = 1 + full_link = response.urljoin(link) + print('++++++ trying ', full_link) - if not re.search(r'gavilan\.edu',full_link): - go = 0 - print('--- not gav edu') - else: - if re.search(r'hhh\.gavilan\.edu',full_link): - pass - elif not re.search(r'^https?:\/\/www\.gavilan\.edu',full_link): - # need to add www to gavilan.edu - m = re.search(r'^(https?:\/\/)gavilan\.edu(\/.*)$',full_link) - if m: - full_link = m.group(1) + 'www.' + m.group(2) - for a in avoid: - if re.search(a,full_link): + if not re.search(r'gavilan\.edu',full_link): go = 0 - print('--- avoid ', a) + print('--- not gav edu') + else: + if re.search(r'hhh\.gavilan\.edu',full_link): + pass + elif not re.search(r'^https?:\/\/www\.gavilan\.edu',full_link): + # need to add www to gavilan.edu + m = re.search(r'^(https?:\/\/)gavilan\.edu(\/.*)$',full_link) + if m: + full_link = m.group(1) + 'www.' + m.group(2) + for a in avoid: + if re.search(a,full_link): + go = 0 + print('--- avoid ', a) - if go: yield scrapy.Request(full_link, callback=self.parse, - headers={"User-Agent": "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"}) - else: - print("------ avoiding ", full_link) + if go: yield scrapy.Request(full_link, callback=self.parse, + headers={"User-Agent": "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"}) + else: + print("------ avoiding ", full_link) # Instantiate a CrawlerProcess object process = CrawlerProcess() @@ -1065,6 +1076,137 @@ def search_embeddings(): print(f'Top {i+1}: {file} - {sentence} - (Score: {score})') + +from whoosh import fields, columns +from whoosh.index import create_in, open_dir +from whoosh.fields import Schema, TEXT, ID, STORED, NUMERIC +from whoosh.qparser import QueryParser +from whoosh.analysis import StemmingAnalyzer + +def priority_from_url(url): + priority = 1 + # url is like this: https++www.gavilan.edu+news+Newsletters.php.txt + m = re.search(r'gavilan\.edu\+(.*)\.\w\w\w\w?$',url) + if m: + address = m.group(1) + parts = address.split('+') + if parts[0] in ['accreditation','curriculum','senate','research','old','committee','board','styleguide']: + priority += 20 + if parts[0] in ['news','IT','HOM','administration']: + priority += 10 + if parts[0] == 'admit' and parts[1] == 'schedule': + priority += 10 + if 'accreditation' in parts: + priority += 50 + if re.search(r'hhh\.gavilan\.edu',url): + priority += 100 + priority *= len(parts) + #print(priority, parts) + else: + priority *= 50 + #print(priority, url) + return priority + + +def test_priority(): + ff = os.listdir('cache/crawl') + for f in ff: + priority_from_url(f) + + + +def displayfile(f,aslist=0): + lines = codecs.open('cache/crawl/' + f,'r','utf-8').readlines() + lines = [L.strip() for L in lines] + lines = [L for L in lines if L and not re.search(r'^\|$',L)] + if aslist: + return lines + return "\n".join(lines) + +def any_match(line, words): + # true if any of the words are in line + for w in words: + if re.search(w, line, re.IGNORECASE): + return True + return False + + +def find_match_line(filename, query): + q_words = query.split(" ") + lines = codecs.open('cache/crawl/' + filename,'r','utf-8').readlines() + lines = [L.strip() for L in lines] + lines = [L for L in lines if L and not re.search(r'^\|$',L)] + lines = [L for L in lines if any_match(L, q_words)] + return "\n".join(lines) + + + +def search_index(): + s = '' + schema = Schema(url=STORED, title=TEXT(stored=True), content=TEXT, priority=fields.COLUMN(columns.NumericColumn("i"))) + ix = open_dir("cache/searchindex") + + + #with ix.reader() as reader: + #print(reader.doc_count()) # number of documents in the index + #print(reader.doc_frequency("content", "example")) # number of documents that contain the term "example" in the "content" field + #print(reader.field_length("content")) # total number of terms in the "content" field + #print(reader.term_info("content", "example")) # information about the term "example" in the "content" field + #print(reader.dump()) # overview of the entire index + + + while s != 'q': + s = input("search or 'q' to quit: ") + if s == 'q': + return + + # Define the query parser for the index + with ix.searcher() as searcher: + query_parser = QueryParser("content", schema=schema) + + # Parse the user's query + query = query_parser.parse(s) + print(query) + + # Search the index for documents matching the query + results = searcher.search(query, sortedby="priority") + + # Print the results + i = 1 + for result in results: + print(i, result) # result["url"], result["content"]) + print(find_match_line(result['url'], s)) + print() + i += 1 + + + +def create_search_index(): + # Define the schema for the index + + stem_ana = StemmingAnalyzer() + schema = Schema(url=STORED, title=TEXT(stored=True), content=TEXT, priority=fields.COLUMN(columns.NumericColumn("i"))) + + # Create a new index in the directory "myindex" + ix = create_in("cache/searchindex", schema) + + # Open an existing index + #ix = open_dir("cache/searchindex") + + # Define the writer for the index + writer = ix.writer() + + # Index some documents + files = os.listdir('cache/crawl') + files.sort() + for f in files: + m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f) + if m: + print(f) + writer.add_document(url=f, title=m.group(1), content=displayfile(f), priority=priority_from_url(f)) + writer.commit() + + def create_embeddings(): model = SentenceTransformer('all-MiniLM-L6-v2') files = os.listdir('cache/crawl') @@ -1074,9 +1216,7 @@ def create_embeddings(): for f in files: m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f) if m: - lines = codecs.open('cache/crawl/' + f,'r','utf-8').readlines() - lines = [L.strip() for L in lines] - lines = [L for L in lines if L] + lines = displayfile(f,1) embeddings = model.encode(lines) print("\n-----", f) @@ -1104,7 +1244,10 @@ if __name__ == "__main__": 9: ['clean text index', txt_clean_index], 10: ['make web dir struct', manual_index], 11: ['create search embeddings', create_embeddings], - 12: ['do a search', search_embeddings], + 12: ['create search index', create_search_index], + 13: ['do an index search', search_index], + 14: ['do a vector search', search_embeddings], + 15: ['test priority', test_priority], } if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]): diff --git a/depricated.py b/depricated.py index 0fb44e7..e8c4ffa 100644 --- a/depricated.py +++ b/depricated.py @@ -301,6 +301,614 @@ def serve(): """ + + + +### interactive.py + + + + +"""class HelloWorldExample(object): + def make_teacher_rel(self, tchr, clss): + with self._driver.session() as tx: + tx.run("MERGE (tchr:Teacher {name: $tchr}) MERGE (tchr)-[:TEACHES]->(clss:Class {name: $clss})", \ + tchr=tchr, clss=clss) + + def __init__(self, uri, user, password): + self._driver = GraphDatabase.driver(uri, auth=(user, password)) + + def close(self): + self._driver.close() + + + + def print_greeting(self, message): + with self._driver.session() as session: + greeting = session.write_transaction(self._create_and_return_greeting, message) + print(greeting) + + @staticmethod + def _create_and_return_greeting(tx, message): + result = tx.run("CREATE (a:Greeting) " + "SET a.message = $message " + "RETURN a.message + ', from node ' + id(a)", message=message) + return result.single()[0] +""" + + +def make_teacher_rel(g, tchr, clss): + g.run("MERGE (tchr:Teacher {name: $tchr}) MERGE (tchr)-[:TEACHES]->(clss:Class {name: $clss})", \ + tchr=tchr, clss=clss) + + +def testgraph(): + gg = Graph("bolt://localhost:7687", auth=("neo4j", "asdf")) + + #gg.run("DROP CONSTRAINT ON (tchr:Teacher) ASSERT tchr.name IS UNIQUE") + #gg.run("DROP CONSTRAINT ON (clss:Class) ASSERT clss.name IS UNIQUE") + + #gg.run("CREATE INDEX ON :Teacher(name)") + #gg.run("CREATE INDEX ON :Class(name)") + + stuff = json.loads( open('output/semesters/2020spring/sp20_sched.json','r').read()) + + # make lists of unique course code+name, teacher, locations + tch = {} + crs = {} + loc = {} + sem = Node("Semester", name="sp20") + for c in stuff: + if not c['teacher'] in tch: + tch[c['teacher']] = Node("Teacher", name=c['teacher']) + gg.create(tch[c['teacher']]) + if not c['code'] in crs: + crs[ c['code'] ] = Node("Course section", name=c['name'], code=c['code']) + gg.create(crs[ c['code'] ]) + if not c['loc'] in loc: + loc[ c['loc'] ] = Node("Location", loc=c['loc']) + gg.create(loc[ c['loc'] ]) + sect = Node("Section", crn=int(c['crn'])) + gg.create(Relationship(tch[c['teacher']], "TEACHES", sect )) + gg.create(Relationship(sect, "CLASS OF", crs[ c['code'] ] )) + gg.create(Relationship( sect, "LOCATED AT", loc[ c['loc'] ] )) + + """ + for c in stuff: + print(c['crn']) + q = "CREATE (section:Section { Name: "+c['name']+", Code: "+c['code']+", Crn: "+c['crn']+", Teacher: "+c['teacher']+" })" + q = 'CREATE (section:Section { Name: "%s", Code: "%s", Crn: "%s", Teacher: "%s" })' % \ + (c['name'], c['code'], c['crn'], c['teacher']) + gg.run(q) + """ + #gg = HelloWorldExample("bolt://localhost:7687", "neo4j", "asdf") + #gg.print_greeting("hi there world") + """ + make_teacher_rel(gg, "Peter Howell","CSIS 42") + make_teacher_rel(gg, "Alex Stoykov","CSIS 42") + make_teacher_rel(gg, "Sabrina Lawrence","CSIS 85") + make_teacher_rel(gg, "Peter Howell","CSIS 85") + """ + +screen = 0 + +def Memoize( func): + """ + Memoize decorator + """ + cache = {} + + @wraps(func) + def wrapper(*args): + if args not in cache: + cache[args] = func(*args) + return cache[args] + return wrapper + + + + +class MyRepl: + description = { + "switch ": "Switch stream. You can use either 'switch public' or 'switch mine'", + "home " : "Show your timeline. 'home 7' will show 7 tweet.", + "harry " : "a guys name.", + "homo " : "means the same.", + "view " : "'view @mdo' will show @mdo's home.", + "h " : "Show help.", + "t " : "'t opps' will tweet 'opps' immediately.", + "s " : "'s #AKB48' will search for '#AKB48' and return 5 newest tweets." + } + + + def startup(self, outfile): + global screen # make it self + self.g = {} + self.buf = {} + screen = None + self.enter_ary = [curses.KEY_ENTER,10] + self.delete_ary = [curses.KEY_BACKSPACE,curses.KEY_DC,8,127,263] + self.tab_ary = [9] + self.up_ary = [curses.KEY_UP] + self.down_ary = [curses.KEY_DOWN] + + # Init curses screen + screen = curses.initscr() + screen.keypad(1) + curses.noecho() + try: + curses.start_color() + curses.use_default_colors() + for i in range(0, curses.COLORS): + curses.init_pair(i + 1, i, -1) + except curses.error: + pass + curses.cbreak() + self.g['height'] , self.g['width'] = screen.getmaxyx() + #print("Width: %i" % self.g['width']) + + # Init color function + s = self + self.white = lambda x:curses_print_word(x,7) #0) + self.grey = lambda x:curses_print_word(x, 3) #3)1) + self.red = lambda x:curses_print_word(x,7) #2) + self.green = lambda x:curses_print_word(x, 3) #3) + self.yellow = lambda x:curses_print_word(x,7) #4) + self.blue = lambda x:curses_print_word(x,3) + self.magenta = lambda x:curses_print_word(x,7) #6) + self.cyan = lambda x:curses_print_word(x,7) #7) + self.colors_shuffle = [s.grey, s.red, s.green, s.yellow, s.blue, s.magenta, s.cyan] + self.cyc = itertools.cycle(s.colors_shuffle[1:]) + self.index_cyc = itertools.cycle(range(1,8)) + self.setup_command(outfile) + + + def set_my_dict(self,d): + self.description = d + + @Memoize + def cycle_color(self, s): + """ + Cycle the colors_shuffle + """ + return next(self.cyc) + + + def ascii_art(self, text): + """ + Draw the Ascii Art + """ + fi = figlet_format(text, font='doom') + for i in fi.split('\n'): + self.curses_print_line(i,next(self.index_cyc)) + + + def close_window(self, ): + """ + Close screen + """ + global screen + screen.keypad(0); + curses.nocbreak(); + curses.echo() + curses.endwin() + + + def suggest(self, word): + """ + Find suggestion + """ + rel = [] + if not word: return rel + word = word.lower() + + for candidate in self.description: + + ca = candidate.lower() + #if ca.startswith(word): rel.append(candidate) + + for eachword in ca.split(" "): + if eachword.startswith(word): + rel.append(candidate) + + return rel + + + def curses_print_word(self, word,color_pair_code): + """ + Print a word + """ + global screen + word = word.encode('utf8') + screen.addstr(word,curses.color_pair(color_pair_code)) + + + def curses_print_line(self, line,color_pair_code): + """ + Print a line, scroll down if need + """ + global screen + line = line.encode('utf8') + y,x = screen.getyx() + if y - self.g['height'] == -3: + self.scroll_down(2,y,x) + screen.addstr(y,0,line,curses.color_pair(color_pair_code)) + self.buf[y] = line, color_pair_code + elif y - self.g['height'] == -2: + self.scroll_down(3,y,x) + screen.addstr(y-1,0,line,curses.color_pair(color_pair_code)) + self.buf[y-1] = line ,color_pair_code + else: + screen.addstr(y+1,0,line,curses.color_pair(color_pair_code)) + self.buf[y+1] = line, color_pair_code + + + def redraw(self, start_y,end_y,fallback_y,fallback_x): + """ + Redraw lines from buf + """ + global screen + for cursor in range(start_y,end_y): + screen.move(cursor,0) + screen.clrtoeol() + try: + line, color_pair_code = self.buf[cursor] + screen.addstr(cursor,0,line,curses.color_pair(color_pair_code)) + except: + pass + screen.move(fallback_y,fallback_x) + + + def scroll_down(self, noredraw,fallback_y,fallback_x): + """ + Scroll down 1 line + """ + global screen + # Recreate buf + # noredraw = n means that screen will scroll down n-1 line + trip_list = heapq.nlargest(noredraw-1,buf) + for i in buf: + if i not in trip_list: + self.buf[i] = self.buf[i+noredraw-1] + for j in trip_list: + buf.pop(j) + + # Clear and redraw + screen.clear() + self.redraw(1,g['height']-noredraw,fallback_y,fallback_x) + + + def clear_upside(self, n,y,x): + """ + Clear n lines upside + """ + global screen + for i in range(1,n+1): + screen.move(y-i,0) + screen.clrtoeol() + screen.refresh() + screen.move(y,x) + + + def display_suggest(self, y,x,word): + """ + Display box of suggestion + """ + global screen + g = self.g + side = 2 + + # Check if need to print upside + upside = y+6 > int(g['height']) + + # Redraw if suggestion is not the same as previous display + sug = self.suggest(word) + if sug != self.g['prev']: + # 0-line means there is no suggetions (height = 0) + # 3-line means there are many suggetions (height = 3) + # 5-line means there is only one suggetions (height = 5) + # Clear upside section + if upside: + # Clear upside is a bit difficult. Here it's seperate to 4 case. + # now: 3-lines / previous : 0 line + if len(sug) > 1 and not self.g['prev']: + self.clear_upside(3,y,x) + # now: 0-lines / previous :3 lines + elif not sug and len(g['prev'])>1: + self.redraw(y-3,y,y,x) + # now: 3-lines / previous :5 lines + elif len(sug) > 1 == len(g['prev']): + self.redraw(y-5,y-3,y,x) + self.clear_upside(3,y,x) + # now: 5-lines / previous :3 lines + elif len(sug) == 1 < len(g['prev']): + self.clear_upside(3,y,x) + # now: 0-lines / previous :5 lines + elif not sug and len(g['prev'])==1: + self.redraw(y-5,y,y,x) + # now: 3-lines / previous :3 lines + elif len(sug) == len(g['prev']) > 1: + self.clear_upside(3,y,x) + # now: 5-lines / previous :5 lines + elif len(sug) == len(g['prev']) == 1: + self.clear_upside(5,y,x) + screen.refresh() + else: + # Clear downside + screen.clrtobot() + screen.refresh() + self.g['prev'] = sug + + if sug: + # More than 1 suggestion + if len(sug) > 1: + if len(sug) > 5: sug = sug[:5] + + #needed_lenth = sum([len(i)+side for i in sug]) + side + needed_lenth = max( self.g['width']-5, sum([len(i)+side for i in sug]) + side) + print(self.g['width']) + print(word) + print(sug) + print(needed_lenth) + if upside: + win = curses.newwin(3,needed_lenth,y-3,0) + win.erase() + win.box() + win.refresh() + cur_width = side + for i in range(len(sug)): + if cur_width+len(sug[i]) > self.g['width']: break + screen.addstr(y-2,cur_width,sug[i],curses.color_pair(4)) + cur_width += len(sug[i]) + side + if cur_width > self.g['width']: + break + else: + win = curses.newwin(3,needed_lenth,y+1,0) + win.erase() + win.box() + win.refresh() + cur_width = side + for i in range(len(sug)): + screen.addstr(y+2,cur_width,sug[i],curses.color_pair(4)) + cur_width += len(sug[i]) + side + if cur_width > self.g['width']: + break + # Only 1 suggestion + else: + can = sug[0] + if upside: + win = curses.newwin(5,len(self.description[can])+2*side,y-5,0) + win.box() + win.refresh() + screen.addstr(y-4,side,can,curses.color_pair(4)) + screen.addstr(y-2,side,self.description[can],curses.color_pair(3)) + else: + win = curses.newwin(5,len(self.description[can])+2*side,y+1,0) + win.box() + win.refresh() + screen.addstr(y+2,side,can,curses.color_pair(4)) + screen.addstr(y+4,side,self.description[can],curses.color_pair(3)) + + + def inputloop(self, ): + """ + Main loop input + """ + global screen + word = '' + screen.addstr("\n" + self.g['prefix'],curses.color_pair(7)) + + while True: + # Current position + y,x = screen.getyx() + # Get char + event = screen.getch() + try : + char = chr(event) + except: + char = '' + + # Test curses_print_line + if char == '?': + self.buf[y] = self.g['prefix'] + '?', 0 + self.ascii_art('dtvd88') + + # TAB to complete + elif event in self.tab_ary: + # First tab + try: + if not self.g['tab_cycle']: + self.g['tab_cycle'] = itertools.cycle(self.suggest(word)) + + suggestion = next(self.g['tab_cycle']) + # Clear current line + screen.move(y,len(self.g['prefix'])) + screen.clrtoeol() + # Print out suggestion + word = suggestion + screen.addstr(y,len(self.g['prefix']),word) + self.display_suggest(y,x,word) + screen.move(y,len(word)+len(self.g['prefix'])) + except: + pass + + # UP key + elif event in self.up_ary: + if self.g['hist']: + # Clear current line + screen.move(y,len(self.g['prefix'])) + screen.clrtoeol() + # Print out previous history + if self.g['hist_index'] > 0 - len(self.g['hist']): + self.g['hist_index'] -= 1 + word = self.g['hist'][self.g['hist_index']] + screen.addstr(y,len(self.g['prefix']),word) + self.display_suggest(y,x,word) + screen.move(y,len(word)+len(self.g['prefix'])) + + # DOWN key + elif event in self.down_ary: + if self.g['hist']: + # clear current line + screen.move(y,len(self.g['prefix'])) + screen.clrtoeol() + # print out previous history + if not self.g['hist_index']: + self.g['hist_index'] = -1 + if self.g['hist_index'] < -1: + self.g['hist_index'] += 1 + word = self.g['hist'][self.g['hist_index']] + screen.addstr(y,len(self.g['prefix']),word) + self.display_suggest(y,x,word) + screen.move(y,len(word)+len(self.g['prefix'])) + + # Enter key #### I should get the command out of there? + # #### Can I register a callback function? + + elif event in self.enter_ary: + self.g['tab_cycle'] = None + self.g['hist_index'] = 0 + self.g['hist'].append(word) + if word== 'q': + self.cleanup_command() + break; + self.display_suggest(y,x,'') + screen.clrtobot() + self.handle_command(word) + + self.buf[y] = self.g['prefix'] + word, 0 + # Touch the screen's end + if y - self.g['height'] > -3: + self.scroll_down(2,y,x) + screen.addstr(y,0,self.g['prefix'],curses.color_pair(7)) ## SHOW NEW PROMPT + else: + screen.addstr(y+1,0,self.g['prefix'],curses.color_pair(7)) + word = '' + + # Delete / Backspace + elif event in self.delete_ary: + self.g['tab_cycle'] = None + # Touch to line start + if x < len(self.g['prefix']) + 1: + screen.move(y,x) + word = '' + # Midle of line + else: + word = word[:-1] + screen.move(y,x-1) + screen.clrtoeol() + self.display_suggest(y,x,word) + screen.move(y,x-1) + + # Another keys + else: + self.g['tab_cycle'] = None + # Explicitly print char + try: + screen.addstr(char) + word += char + self.display_suggest(y,x,word) + screen.move(y,x+1) + except ValueError as e: # got errors here when i adjusted the volume.... + pass + + # Reset + self.close_window() + + def setup_command(self,outfile): + self.data = open(outfile,'a') + + self.g['prev'] = None + self.g['tab_cycle'] = None + self.g['prefix'] = '[gav]: ' + self.g['hist_index'] = 0 + # Load history from previous session + try: + o = open('completer.hist') + self.g['hist'] = [i.strip() for i in o.readlines()] + except: + self.g['hist'] = [] + + def cleanup_command(self): + o = open('completer.hist','a') + o.write("\n".join(self.g['hist'])) + o.close() + self.data.close() + + def handle_command(self, cmd): + r1 = re.search( r'^n\s(.*)$',cmd) + if r1: + # new data collection mode + mode = r1.group(1) + self.g['prefix'] = "[" + mode + "]" + + self.data.write("\n\n# %s\n" % mode) + else: + #winsound.Beep(440,300) + self.data.write(cmd + "\n") + self.data.flush() + + + +def repl_staff(): + + tch = json.loads( open('cache/teacherdata/teachers.json','r').read() ) + newdict = {} + for T in tch: + newdict[T['name']] = 'teacher with id ' + T['login_id'] + c = MyRepl() + + c.set_my_dict(newdict) + c.startup('cache/people_logs.txt') + c.inputloop() + + +def repl_degs(): + + tch = csv.reader( open('cache/attainment_masterlist.csv','r'),delimiter=",") + + newdict = {} + num = 0 + for row in tch: + if num==0: + pass + else: + d = ' ' + if row[0]: d = row[0] + newdict[row[4]] = d + num += 1 + + #print(newdict) + #input('ready') + c = MyRepl() + + c.set_my_dict(newdict) + +#c.startup('cache/g_path_cluster2020_.txt') +# c.inputloop() + +def repl(): + repl_degs() + + + + #input('ready') + c = MyRepl() + + c.set_my_dict(newdict) + +#c.startup('cache/g_path_cluster2020_.txt') +# c.inputloop() + +def repl(): + repl_degs() + + + + + + + ### courses.py diff --git a/interactive.py b/interactive.py index 7fb76f4..e74efd0 100644 --- a/interactive.py +++ b/interactive.py @@ -31,8 +31,6 @@ else: q = Queue() -HOST_NAME = '127.0.0.1' # -HOST_NAME = '192.168.1.6' # HOST_NAME = '192.168.1.6' # PORT_NUMBER = 8080 # Maybe set this to 9000. @@ -342,600 +340,3 @@ if __name__ == '__main__': - -"""class HelloWorldExample(object): - def make_teacher_rel(self, tchr, clss): - with self._driver.session() as tx: - tx.run("MERGE (tchr:Teacher {name: $tchr}) MERGE (tchr)-[:TEACHES]->(clss:Class {name: $clss})", \ - tchr=tchr, clss=clss) - - def __init__(self, uri, user, password): - self._driver = GraphDatabase.driver(uri, auth=(user, password)) - - def close(self): - self._driver.close() - - - - def print_greeting(self, message): - with self._driver.session() as session: - greeting = session.write_transaction(self._create_and_return_greeting, message) - print(greeting) - - @staticmethod - def _create_and_return_greeting(tx, message): - result = tx.run("CREATE (a:Greeting) " - "SET a.message = $message " - "RETURN a.message + ', from node ' + id(a)", message=message) - return result.single()[0] -""" - - -def make_teacher_rel(g, tchr, clss): - g.run("MERGE (tchr:Teacher {name: $tchr}) MERGE (tchr)-[:TEACHES]->(clss:Class {name: $clss})", \ - tchr=tchr, clss=clss) - - -def testgraph(): - gg = Graph("bolt://localhost:7687", auth=("neo4j", "asdf")) - - #gg.run("DROP CONSTRAINT ON (tchr:Teacher) ASSERT tchr.name IS UNIQUE") - #gg.run("DROP CONSTRAINT ON (clss:Class) ASSERT clss.name IS UNIQUE") - - #gg.run("CREATE INDEX ON :Teacher(name)") - #gg.run("CREATE INDEX ON :Class(name)") - - stuff = json.loads( open('output/semesters/2020spring/sp20_sched.json','r').read()) - - # make lists of unique course code+name, teacher, locations - tch = {} - crs = {} - loc = {} - sem = Node("Semester", name="sp20") - for c in stuff: - if not c['teacher'] in tch: - tch[c['teacher']] = Node("Teacher", name=c['teacher']) - gg.create(tch[c['teacher']]) - if not c['code'] in crs: - crs[ c['code'] ] = Node("Course section", name=c['name'], code=c['code']) - gg.create(crs[ c['code'] ]) - if not c['loc'] in loc: - loc[ c['loc'] ] = Node("Location", loc=c['loc']) - gg.create(loc[ c['loc'] ]) - sect = Node("Section", crn=int(c['crn'])) - gg.create(Relationship(tch[c['teacher']], "TEACHES", sect )) - gg.create(Relationship(sect, "CLASS OF", crs[ c['code'] ] )) - gg.create(Relationship( sect, "LOCATED AT", loc[ c['loc'] ] )) - - """ - for c in stuff: - print(c['crn']) - q = "CREATE (section:Section { Name: "+c['name']+", Code: "+c['code']+", Crn: "+c['crn']+", Teacher: "+c['teacher']+" })" - q = 'CREATE (section:Section { Name: "%s", Code: "%s", Crn: "%s", Teacher: "%s" })' % \ - (c['name'], c['code'], c['crn'], c['teacher']) - gg.run(q) - """ - #gg = HelloWorldExample("bolt://localhost:7687", "neo4j", "asdf") - #gg.print_greeting("hi there world") - """ - make_teacher_rel(gg, "Peter Howell","CSIS 42") - make_teacher_rel(gg, "Alex Stoykov","CSIS 42") - make_teacher_rel(gg, "Sabrina Lawrence","CSIS 85") - make_teacher_rel(gg, "Peter Howell","CSIS 85") - """ - -screen = 0 - -def Memoize( func): - """ - Memoize decorator - """ - cache = {} - - @wraps(func) - def wrapper(*args): - if args not in cache: - cache[args] = func(*args) - return cache[args] - return wrapper - - - - -class MyRepl: - description = { - "switch ": "Switch stream. You can use either 'switch public' or 'switch mine'", - "home " : "Show your timeline. 'home 7' will show 7 tweet.", - "harry " : "a guys name.", - "homo " : "means the same.", - "view " : "'view @mdo' will show @mdo's home.", - "h " : "Show help.", - "t " : "'t opps' will tweet 'opps' immediately.", - "s " : "'s #AKB48' will search for '#AKB48' and return 5 newest tweets." - } - - - def startup(self, outfile): - global screen # make it self - self.g = {} - self.buf = {} - screen = None - self.enter_ary = [curses.KEY_ENTER,10] - self.delete_ary = [curses.KEY_BACKSPACE,curses.KEY_DC,8,127,263] - self.tab_ary = [9] - self.up_ary = [curses.KEY_UP] - self.down_ary = [curses.KEY_DOWN] - - # Init curses screen - screen = curses.initscr() - screen.keypad(1) - curses.noecho() - try: - curses.start_color() - curses.use_default_colors() - for i in range(0, curses.COLORS): - curses.init_pair(i + 1, i, -1) - except curses.error: - pass - curses.cbreak() - self.g['height'] , self.g['width'] = screen.getmaxyx() - #print("Width: %i" % self.g['width']) - - # Init color function - s = self - self.white = lambda x:curses_print_word(x,7) #0) - self.grey = lambda x:curses_print_word(x, 3) #3)1) - self.red = lambda x:curses_print_word(x,7) #2) - self.green = lambda x:curses_print_word(x, 3) #3) - self.yellow = lambda x:curses_print_word(x,7) #4) - self.blue = lambda x:curses_print_word(x,3) - self.magenta = lambda x:curses_print_word(x,7) #6) - self.cyan = lambda x:curses_print_word(x,7) #7) - self.colors_shuffle = [s.grey, s.red, s.green, s.yellow, s.blue, s.magenta, s.cyan] - self.cyc = itertools.cycle(s.colors_shuffle[1:]) - self.index_cyc = itertools.cycle(range(1,8)) - self.setup_command(outfile) - - - def set_my_dict(self,d): - self.description = d - - @Memoize - def cycle_color(self, s): - """ - Cycle the colors_shuffle - """ - return next(self.cyc) - - - def ascii_art(self, text): - """ - Draw the Ascii Art - """ - fi = figlet_format(text, font='doom') - for i in fi.split('\n'): - self.curses_print_line(i,next(self.index_cyc)) - - - def close_window(self, ): - """ - Close screen - """ - global screen - screen.keypad(0); - curses.nocbreak(); - curses.echo() - curses.endwin() - - - def suggest(self, word): - """ - Find suggestion - """ - rel = [] - if not word: return rel - word = word.lower() - - for candidate in self.description: - - ca = candidate.lower() - #if ca.startswith(word): rel.append(candidate) - - for eachword in ca.split(" "): - if eachword.startswith(word): - rel.append(candidate) - - return rel - - - def curses_print_word(self, word,color_pair_code): - """ - Print a word - """ - global screen - word = word.encode('utf8') - screen.addstr(word,curses.color_pair(color_pair_code)) - - - def curses_print_line(self, line,color_pair_code): - """ - Print a line, scroll down if need - """ - global screen - line = line.encode('utf8') - y,x = screen.getyx() - if y - self.g['height'] == -3: - self.scroll_down(2,y,x) - screen.addstr(y,0,line,curses.color_pair(color_pair_code)) - self.buf[y] = line, color_pair_code - elif y - self.g['height'] == -2: - self.scroll_down(3,y,x) - screen.addstr(y-1,0,line,curses.color_pair(color_pair_code)) - self.buf[y-1] = line ,color_pair_code - else: - screen.addstr(y+1,0,line,curses.color_pair(color_pair_code)) - self.buf[y+1] = line, color_pair_code - - - def redraw(self, start_y,end_y,fallback_y,fallback_x): - """ - Redraw lines from buf - """ - global screen - for cursor in range(start_y,end_y): - screen.move(cursor,0) - screen.clrtoeol() - try: - line, color_pair_code = self.buf[cursor] - screen.addstr(cursor,0,line,curses.color_pair(color_pair_code)) - except: - pass - screen.move(fallback_y,fallback_x) - - - def scroll_down(self, noredraw,fallback_y,fallback_x): - """ - Scroll down 1 line - """ - global screen - # Recreate buf - # noredraw = n means that screen will scroll down n-1 line - trip_list = heapq.nlargest(noredraw-1,buf) - for i in buf: - if i not in trip_list: - self.buf[i] = self.buf[i+noredraw-1] - for j in trip_list: - buf.pop(j) - - # Clear and redraw - screen.clear() - self.redraw(1,g['height']-noredraw,fallback_y,fallback_x) - - - def clear_upside(self, n,y,x): - """ - Clear n lines upside - """ - global screen - for i in range(1,n+1): - screen.move(y-i,0) - screen.clrtoeol() - screen.refresh() - screen.move(y,x) - - - def display_suggest(self, y,x,word): - """ - Display box of suggestion - """ - global screen - g = self.g - side = 2 - - # Check if need to print upside - upside = y+6 > int(g['height']) - - # Redraw if suggestion is not the same as previous display - sug = self.suggest(word) - if sug != self.g['prev']: - # 0-line means there is no suggetions (height = 0) - # 3-line means there are many suggetions (height = 3) - # 5-line means there is only one suggetions (height = 5) - # Clear upside section - if upside: - # Clear upside is a bit difficult. Here it's seperate to 4 case. - # now: 3-lines / previous : 0 line - if len(sug) > 1 and not self.g['prev']: - self.clear_upside(3,y,x) - # now: 0-lines / previous :3 lines - elif not sug and len(g['prev'])>1: - self.redraw(y-3,y,y,x) - # now: 3-lines / previous :5 lines - elif len(sug) > 1 == len(g['prev']): - self.redraw(y-5,y-3,y,x) - self.clear_upside(3,y,x) - # now: 5-lines / previous :3 lines - elif len(sug) == 1 < len(g['prev']): - self.clear_upside(3,y,x) - # now: 0-lines / previous :5 lines - elif not sug and len(g['prev'])==1: - self.redraw(y-5,y,y,x) - # now: 3-lines / previous :3 lines - elif len(sug) == len(g['prev']) > 1: - self.clear_upside(3,y,x) - # now: 5-lines / previous :5 lines - elif len(sug) == len(g['prev']) == 1: - self.clear_upside(5,y,x) - screen.refresh() - else: - # Clear downside - screen.clrtobot() - screen.refresh() - self.g['prev'] = sug - - if sug: - # More than 1 suggestion - if len(sug) > 1: - if len(sug) > 5: sug = sug[:5] - - #needed_lenth = sum([len(i)+side for i in sug]) + side - needed_lenth = max( self.g['width']-5, sum([len(i)+side for i in sug]) + side) - print(self.g['width']) - print(word) - print(sug) - print(needed_lenth) - if upside: - win = curses.newwin(3,needed_lenth,y-3,0) - win.erase() - win.box() - win.refresh() - cur_width = side - for i in range(len(sug)): - if cur_width+len(sug[i]) > self.g['width']: break - screen.addstr(y-2,cur_width,sug[i],curses.color_pair(4)) - cur_width += len(sug[i]) + side - if cur_width > self.g['width']: - break - else: - win = curses.newwin(3,needed_lenth,y+1,0) - win.erase() - win.box() - win.refresh() - cur_width = side - for i in range(len(sug)): - screen.addstr(y+2,cur_width,sug[i],curses.color_pair(4)) - cur_width += len(sug[i]) + side - if cur_width > self.g['width']: - break - # Only 1 suggestion - else: - can = sug[0] - if upside: - win = curses.newwin(5,len(self.description[can])+2*side,y-5,0) - win.box() - win.refresh() - screen.addstr(y-4,side,can,curses.color_pair(4)) - screen.addstr(y-2,side,self.description[can],curses.color_pair(3)) - else: - win = curses.newwin(5,len(self.description[can])+2*side,y+1,0) - win.box() - win.refresh() - screen.addstr(y+2,side,can,curses.color_pair(4)) - screen.addstr(y+4,side,self.description[can],curses.color_pair(3)) - - - def inputloop(self, ): - """ - Main loop input - """ - global screen - word = '' - screen.addstr("\n" + self.g['prefix'],curses.color_pair(7)) - - while True: - # Current position - y,x = screen.getyx() - # Get char - event = screen.getch() - try : - char = chr(event) - except: - char = '' - - # Test curses_print_line - if char == '?': - self.buf[y] = self.g['prefix'] + '?', 0 - self.ascii_art('dtvd88') - - # TAB to complete - elif event in self.tab_ary: - # First tab - try: - if not self.g['tab_cycle']: - self.g['tab_cycle'] = itertools.cycle(self.suggest(word)) - - suggestion = next(self.g['tab_cycle']) - # Clear current line - screen.move(y,len(self.g['prefix'])) - screen.clrtoeol() - # Print out suggestion - word = suggestion - screen.addstr(y,len(self.g['prefix']),word) - self.display_suggest(y,x,word) - screen.move(y,len(word)+len(self.g['prefix'])) - except: - pass - - # UP key - elif event in self.up_ary: - if self.g['hist']: - # Clear current line - screen.move(y,len(self.g['prefix'])) - screen.clrtoeol() - # Print out previous history - if self.g['hist_index'] > 0 - len(self.g['hist']): - self.g['hist_index'] -= 1 - word = self.g['hist'][self.g['hist_index']] - screen.addstr(y,len(self.g['prefix']),word) - self.display_suggest(y,x,word) - screen.move(y,len(word)+len(self.g['prefix'])) - - # DOWN key - elif event in self.down_ary: - if self.g['hist']: - # clear current line - screen.move(y,len(self.g['prefix'])) - screen.clrtoeol() - # print out previous history - if not self.g['hist_index']: - self.g['hist_index'] = -1 - if self.g['hist_index'] < -1: - self.g['hist_index'] += 1 - word = self.g['hist'][self.g['hist_index']] - screen.addstr(y,len(self.g['prefix']),word) - self.display_suggest(y,x,word) - screen.move(y,len(word)+len(self.g['prefix'])) - - # Enter key #### I should get the command out of there? - # #### Can I register a callback function? - - elif event in self.enter_ary: - self.g['tab_cycle'] = None - self.g['hist_index'] = 0 - self.g['hist'].append(word) - if word== 'q': - self.cleanup_command() - break; - self.display_suggest(y,x,'') - screen.clrtobot() - self.handle_command(word) - - self.buf[y] = self.g['prefix'] + word, 0 - # Touch the screen's end - if y - self.g['height'] > -3: - self.scroll_down(2,y,x) - screen.addstr(y,0,self.g['prefix'],curses.color_pair(7)) ## SHOW NEW PROMPT - else: - screen.addstr(y+1,0,self.g['prefix'],curses.color_pair(7)) - word = '' - - # Delete / Backspace - elif event in self.delete_ary: - self.g['tab_cycle'] = None - # Touch to line start - if x < len(self.g['prefix']) + 1: - screen.move(y,x) - word = '' - # Midle of line - else: - word = word[:-1] - screen.move(y,x-1) - screen.clrtoeol() - self.display_suggest(y,x,word) - screen.move(y,x-1) - - # Another keys - else: - self.g['tab_cycle'] = None - # Explicitly print char - try: - screen.addstr(char) - word += char - self.display_suggest(y,x,word) - screen.move(y,x+1) - except ValueError as e: # got errors here when i adjusted the volume.... - pass - - # Reset - self.close_window() - - def setup_command(self,outfile): - self.data = open(outfile,'a') - - self.g['prev'] = None - self.g['tab_cycle'] = None - self.g['prefix'] = '[gav]: ' - self.g['hist_index'] = 0 - # Load history from previous session - try: - o = open('completer.hist') - self.g['hist'] = [i.strip() for i in o.readlines()] - except: - self.g['hist'] = [] - - def cleanup_command(self): - o = open('completer.hist','a') - o.write("\n".join(self.g['hist'])) - o.close() - self.data.close() - - def handle_command(self, cmd): - r1 = re.search( r'^n\s(.*)$',cmd) - if r1: - # new data collection mode - mode = r1.group(1) - self.g['prefix'] = "[" + mode + "]" - - self.data.write("\n\n# %s\n" % mode) - else: - #winsound.Beep(440,300) - self.data.write(cmd + "\n") - self.data.flush() - - - -def repl_staff(): - - tch = json.loads( open('cache/teacherdata/teachers.json','r').read() ) - newdict = {} - for T in tch: - newdict[T['name']] = 'teacher with id ' + T['login_id'] - c = MyRepl() - - c.set_my_dict(newdict) - c.startup('cache/people_logs.txt') - c.inputloop() - - -def repl_degs(): - - tch = csv.reader( open('cache/attainment_masterlist.csv','r'),delimiter=",") - - newdict = {} - num = 0 - for row in tch: - if num==0: - pass - else: - d = ' ' - if row[0]: d = row[0] - newdict[row[4]] = d - num += 1 - - #print(newdict) - #input('ready') - c = MyRepl() - - c.set_my_dict(newdict) - -#c.startup('cache/g_path_cluster2020_.txt') -# c.inputloop() - -def repl(): - repl_degs() - - - - #input('ready') - c = MyRepl() - - c.set_my_dict(newdict) - -#c.startup('cache/g_path_cluster2020_.txt') -# c.inputloop() - -def repl(): - repl_degs() - - - diff --git a/server.py b/server.py index 502e352..c65f3d3 100644 --- a/server.py +++ b/server.py @@ -20,6 +20,9 @@ import socket this_host = socket.gethostname() print('\n\n server host: ' + this_host, '\n\n') +datafile2 = "cache/datafile.txt" + + LECPATH = "/media/hd2/peter_home_offload/lecture/" host = 'http://192.168.1.6:5000' news_path = '/media/hd2/peter_home/Documents/scripts/browser/'