import os
import re
#from weasyprint import HTML # https://weasyprint.readthedocs.io/en/stable/tutorial.html
import glob
myAdLink ='Powered by
'
p_symbol = "pg"
#fn = symbol+"-"+"10k-q4-2019-it.htm"
#fn_pdf = symbol+"-"+"10k-q4-2019-it.pdf"
myDownloadDir = "/home/samystocks/data_installer/temporar/"+p_symbol.lower()
myDownloadDirL = "/home/samystocks/data_installer/temporar/"+p_symbol.lower()+"/*.htm"
myDir = glob.glob(myDownloadDirL)
for f in myDir:
print(f)
#e = f.split("/")
filename = os.path.basename(f)
print(filename)
f = open(f, 'r')
text = f.read() # python will convert \n to os.linesep
f.close() # you can omit in most cases as the destructor will call it
text = re.sub("(.*?)", "", text, flags=re.DOTALL)
text = re.sub("", "",text, flags=re.DOTALL)
text = re.sub("