import os import re import subprocess import MySQLdb as mdb from MySQLdb import escape_string from bs4 import BeautifulSoup, SoupStrainer import base64 from PIL import Image # pip3 install -U Pillow from io import BytesIO # install google chrome in ubuntu: https://tecadmin.net/setup-selenium-chromedriver-on-ubuntu/ def getEntryToMysql(): a = datetime.datetime.now() tList = list() """ myWordCloudJson = str(myWordCloudJson) p_simpletext = remove_control_chars(p_simpletext) MainKeyword = re_pattern.sub(u'\uFFFD', MainKeyword) p_simpletext = p_simpletext.encode('unicode_escape').decode('unicode_escape') p_timestamp = time.time() """ db = mdb.connect(host="localhost",user="root", passwd="rouTer99", db="SAMYSTOCKS", use_unicode=True, charset="utf8mb4") cursor = db.cursor() cursor.execute("SET NAMES utf8mb4"); cursor.execute("SET CHARACTER SET utf8mb4"); # Execute the SQL command sql = "INSERT INTO samystocks_webpage (p_company_name, p_company_description,p_investor_relations,p_company_link,p_symbol,p_isin,p_dividend_payer,p_dividend_history,p_trafficlight,p_smileytype,p_qualityscore_details,p_qualityscore,p_lastmodified,p_kurs) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" p_lastmodified = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') try: cursor.execute(sql, ('SELECT p_symbol FROM `samystocks_webpage`')) db.commit() # disconnect from server cursor.close() except Exception as e: print("Error %d: %s" % (e.args[0],e.args[1])) exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) for row in cursor.fetchall(): tList.append(str(row)) db.close() b = datetime.datetime.now() delta = b - a print("DBify do getEntryToMysql(): Processing finished after:", delta) return tList p_symbol = "MSFT" #u_d= "google-chrome --headless --disable-gpu --dump-dom --no-sandbox https://www.samystocks.com/intern/charts.php?a="+p_symbol+" > /home/samystocks/data_installer/d.txt" u_c= "google-chrome --headless --disable-gpu --dump-dom --no-sandbox https://www.samystocks.com/intern/dividend.php?a="+p_symbol+" > /home/samystocks/data_installer/c.txt" #os.system(u_d) os.system(u_c) with open('/home/samystocks/data_installer/d.txt', 'r', encoding='utf-8') as fp: dividend=fp.read() fp.close() soup = BeautifulSoup(dividend,features="lxml") for link in soup.find_all('a'): base_content = link.get('href') #print(base_content) try: if base_content.find("data:image/png;base64") != -1: base_content = base_content.replace('data:image/png;base64,','') #base_content = b''+base_content+'' #base_content = bytes(base_content, "ascii") #with open("/home/samystocks/data_installer/imageToSave.png", "wb") as fh: # fh.write(base64.decodebytes(base_content)) #fh.close() #with open("/home/samystocks/data_installer/imageToSave.png", "wb") as fh: # fh.write(base_content.decode('base64')) #fh.close() #print(base_content) #exit(1) im = Image.open(BytesIO(base64.b64decode(base_content))) im.save('/home/samystocks/data_installer/imageToSave.png', 'PNG') except Exception as e: 1 os.unlink("/home/samystocks/data_installer/d.txt") os.unlink("/home/samystocks/data_installer/c.txt") exit(1)