import os import re #from weasyprint import HTML # https://weasyprint.readthedocs.io/en/stable/tutorial.html import glob myAdLink ='Powered by

www.samystocks.com








' p_symbol = "pg" #fn = symbol+"-"+"10k-q4-2019-it.htm" #fn_pdf = symbol+"-"+"10k-q4-2019-it.pdf" myDownloadDir = "/home/samystocks/data_installer/temporar/"+p_symbol.lower() myDownloadDirL = "/home/samystocks/data_installer/temporar/"+p_symbol.lower()+"/*.htm" myDir = glob.glob(myDownloadDirL) for f in myDir: print(f) #e = f.split("/") filename = os.path.basename(f) print(filename) f = open(f, 'r') text = f.read() # python will convert \n to os.linesep f.close() # you can omit in most cases as the destructor will call it text = re.sub("(.*?)", "", text, flags=re.DOTALL) text = re.sub("", "",text, flags=re.DOTALL) text = re.sub("", "",text, flags=re.DOTALL) text = re.sub("", "",text, flags=re.DOTALL) text = re.sub("", "",text, flags=re.DOTALL) text = re.sub("", "", text, flags=re.DOTALL) text = re.sub("
", "", text, flags=re.DOTALL) text = re.sub("
", "", text, flags=re.DOTALL) text = re.sub("
(.*?)
", "", text, flags=re.DOTALL) text = re.sub("https://translate.google.com/translate", "#", text, flags=re.DOTALL) text = re.sub("http://", "", text, flags=re.DOTALL) text = re.sub("https://", "", text, flags=re.DOTALL) text = re.sub("#0000ff", "", text, flags=re.DOTALL) text = re.sub("", "", text, flags=re.DOTALL) myPDFText = myAdLink + text fileW = "/home/samystocks/data_installer/final"+"/"+filename print("Schreibe HTML Datei:",fileW) f = open(fileW, 'w') f.write(myPDFText) # python will convert \n to os.linesep f.close() # you can omit in most cases as the destructor will call it print("Schreibe PDF Datei:") myNewFile = fileW.replace("htm","pdf") os.system("wkhtmltopdf "+fileW+" "+myNewFile) exit(1)