safe folders
This commit is contained in:
parent
f4c82c237c
commit
1ddf9be13a
|
|
@ -1,16 +1,58 @@
|
|||
import os
|
||||
import builtins as _builtins
|
||||
import codecs as _codecs
|
||||
|
||||
# --- Safe file I/O monkey patches ---
|
||||
# Ensure parent folders exist for write/append modes when using open/codecs.open
|
||||
_orig_open = _builtins.open
|
||||
_orig_codecs_open = _codecs.open
|
||||
|
||||
def _ensure_parent_dir(path):
|
||||
try:
|
||||
if isinstance(path, (str, bytes, os.PathLike)):
|
||||
d = os.path.dirname(os.fspath(path))
|
||||
if d and not os.path.exists(d):
|
||||
os.makedirs(d, exist_ok=True)
|
||||
except Exception:
|
||||
# Never block the open call due to directory check errors
|
||||
pass
|
||||
|
||||
def _open_with_dirs(file, mode='r', *args, **kwargs):
|
||||
try:
|
||||
if isinstance(file, (str, bytes, os.PathLike)) and any(m in mode for m in ('w','a','x','+')):
|
||||
_ensure_parent_dir(file)
|
||||
finally:
|
||||
return _orig_open(file, mode, *args, **kwargs)
|
||||
|
||||
def _codecs_open_with_dirs(filename, mode='r', encoding=None, errors='strict', buffering=1):
|
||||
try:
|
||||
if isinstance(filename, (str, bytes, os.PathLike)) and any(m in mode for m in ('w','a','x','+')):
|
||||
_ensure_parent_dir(filename)
|
||||
finally:
|
||||
return _orig_codecs_open(filename, mode, encoding, errors, buffering)
|
||||
|
||||
# Apply patches once
|
||||
_builtins.open = _open_with_dirs
|
||||
_codecs.open = _codecs_open_with_dirs
|
||||
|
||||
# Patch pandas to_csv to auto-create parent folder if available
|
||||
try:
|
||||
import pandas as _pd # noqa: F401
|
||||
_orig_to_csv = _pd.DataFrame.to_csv
|
||||
def _to_csv_with_dirs(self, path_or_buf=None, *args, **kwargs):
|
||||
if isinstance(path_or_buf, (str, bytes, os.PathLike)):
|
||||
_ensure_parent_dir(path_or_buf)
|
||||
return _orig_to_csv(self, path_or_buf, *args, **kwargs)
|
||||
_pd.DataFrame.to_csv = _to_csv_with_dirs
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
import re, csv
|
||||
from collections import defaultdict
|
||||
from bs4 import BeautifulSoup as bs
|
||||
import pytz, datetime, dateutil, json
|
||||
from datetime import timedelta
|
||||
from dateutil import tz
|
||||
|
||||
import functools
|
||||
|
||||
from functools import reduce
|
||||
|
||||
# Teacher name format changed. Remove commas and switch first to last
|
||||
|
|
@ -84,6 +126,8 @@ def extract_key_values(lst, x):
|
|||
return reduce(lambda acc, item: acc + [item[x]] if isinstance(item, dict) and x in item else acc, lst, [])
|
||||
|
||||
def stripper(s):
|
||||
from bs4 import BeautifulSoup as bs
|
||||
|
||||
REMOVE_ATTRIBUTES = [
|
||||
'lang','language','onmouseover','onmouseout','script','style','font',
|
||||
'dir','face','size','color','style','class','width','height','hspace',
|
||||
|
|
@ -281,6 +325,8 @@ def clean_fn(s):
|
|||
return s
|
||||
|
||||
def format_html(html):
|
||||
from bs4 import BeautifulSoup as bs
|
||||
|
||||
soup = bs(html, 'html.parser')
|
||||
return soup.prettify()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue