import os
import re
#from weasyprint import HTML # https://weasyprint.readthedocs.io/en/stable/tutorial.html
import glob
myAdLink ='Powered by
'
p_symbol = "jnj"
#fn = symbol+"-"+"10k-q4-2019-it.htm"
#fn_pdf = symbol+"-"+"10k-q4-2019-it.pdf"
myDownloadDir = "/home/samystocks/data_installer/temporar/"+p_symbol.lower()
myDownloadDirL = "/home/samystocks/data_installer/temporar/"+p_symbol.lower()+"/*.htm"
myDir = glob.glob(myDownloadDirL)
for f in myDir:
print(f)
e = f.split("/")
filename = e[-1:]
print(filename)
"""
#fn = p_symbol.lower()+"-"+qu+"-"+str(myYear)+"-"+"en.htm" #msft-10q-q3-2019-en.htm
#fileW = "/home/samystocks/data_installer/"+symbol+"/"+fn
#fileW = "/home/samystocks/data_installer/"+fn
#filePDF = "/home/samystocks/data_installer/"+fn_pdf
#fileR = "/home/samystocks/data_installer/temporar/MICROSOFT CORPORATION it.htm"
f = open(f, 'r')
text = f.read() # python will convert \n to os.linesep
f.close() # you can omit in most cases as the destructor will call it
text = re.sub("(.*?)", "", text, flags=re.DOTALL)
text = re.sub("", "",text, flags=re.DOTALL)
text = re.sub("