from datetime import datetime
import errno    
import os
import requests
import re
import json
import MySQLdb as mdb
from MySQLdb import escape_string
import urllib.request
import requests	# pip3 install --upgrade requests
from urllib.parse import urlparse# pip3 install --upgrade urlparse
import urllib3 # url=urllib.unquote(url).decode('utf8')
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from weasyprint import HTML	# https://weasyprint.readthedocs.io/en/stable/tutorial.html
import sys, unicodedata, re
import datetime
import langid	# https://github.com/saffsd/langid.py
from langdetect import detect	# also https://github.com/saffsd/langid.py
#import deepl	# https://github.com/freundTech/deepl-cli
import six
import uuid

#from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer as Summarizer
from sumy.summarizers.reduction import ReductionSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

import json
#pip3 install -U pydeepl
import spacy  # See "Installing spaCy"
import justext	# pip3 install -U justext
import pydeepl	# pip3 install -U pydeepl
#import textwrap	# pip3 install -U textwrap
#from googletrans import Translator #pip3 install -U googletrans
# pip3 install -U google-cloud-translate
# pip3 install -U google-cloud-storage

#translator 			= Translator()
nlp_de				= spacy.load('de_core_news_sm')
nlp_en 				= spacy.load('en_core_web_sm')
nlp_de.max_length 	= 1000000
nlp_en.max_length 	= 1000000

nlp_allowed 		= [u"NN",u"NNP",u"NNPS",u"PROPN",u"NOUN",u"NE", u"NNE"]
verb_allowed 		= [u"VMFIN", u"VMINF", u"VMPP", u"VVFIN", u"VVIMP", u"VVINF", u"VVIZU", u"VVPP", u"VERB"]

# https://www.tutorialspoint.com/How-to-trim-down-non-printable-characters-from-a-string-in-Python
# Get all unicode characters
all_chars 				= (chr(i) for i in range(sys.maxunicode))
# Get all non printable characters
control_chars 			= ''.join(c for c in all_chars if unicodedata.category(c) == 'Cc')
# Create regex of above characters
control_char_re 		= re.compile('[%s]' % re.escape(control_chars))
re_pattern 				= re.compile(u'[^\u0000-\uD7FF\uE000-\uFFFF]', re.UNICODE)

UserAgent				= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
UserAgentMobile			= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
Headers 				= {'user-agent': UserAgent, 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate'}
HeadersSimple			= {'user-agent': UserAgentMobile, 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate'}
HeadersSimpleADV		= {'user-agent': UserAgentMobile, 'Authorization': 'ce78143f444846d14d338f0da26a2434', 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate'}
# curl -X GET -H "Authorization: ce78143f444846d14d338f0da26a2434" "https://free.donreach.com/shares?providers=facebook,google,twitter,linkedin,xing&url=http://9gag.com/"
## csvsql --db mysql://root:###########99@localhost:3306/SAMYSTOCKS --tables shareprices_daily --insert /home/samystocks/simfin/regular_download/2019-10-14/us-shareprices-daily.csv

bonusList 				= ["dollar","euro","$","€","revenue","money","growth","company","Earning","Price","Rating","Debt","Equity","Return","margin","Gross margin","Operating margin","Net margin","EBITDA","Cash flow margin","Return on assets","Return on equity","Return on invested capital","cashflow","assets","capital","turnover","Interest","income","Payout","ROE","investor","stock","invest","investment","dividend","cash","flow"]

stigmaList				= ["XBRL"]

#print("todo: mysql aktivieren und json ins sql schreiben")
#exit(1)

def _is_wordlike(tok):
	return tok.orth_ and tok.orth_[0].isalpha()

def sentence_division_suppresor(doc):
	"""Spacy pipeline component that prohibits sentence segmentation between two tokens that start with a letter.
	Useful for taming overzealous sentence segmentation in German model, possibly others as well."""
	for i, tok in enumerate(doc[:-1]):
		if _is_wordlike(tok) and _is_wordlike(doc[i + 1]):
			doc[i + 1].is_sent_start = False
	return doc

nlp_de.add_pipe(sentence_division_suppresor, name='sent_fix', before='parser')
nlp_en.add_pipe(sentence_division_suppresor, name='sent_fix', before='parser')

def translateText(txttranslate1, target):
	
	textList = wrap(txttranslate1, 4500)
	myFinalReturnText = str("")
	
	for txttranslate in textList:
	
		#def translateText(text, target, model=translate.NMT):
		translation=""
		#translate_client = translate.Client()

		if isinstance(txttranslate, six.binary_type):
			txttranslate = txttranslate.decode('utf-8')

		#try:
		# Text can also be a sequence of strings, in which case this method
		# will return a sequence of results for each text.
		#result = translate_client.translate(text, target_language=target, model=model)
		subscriptionKey = 'eaac938f51ab405998eab07017b0bb8f'
		subscriptionKey = 'ab4e516e288146f88d6b6cb001171d12'

		# If you encounter any issues with the base_url or path, make sure
		# that you are using the latest endpoint: https://docs.microsoft.com/azure/cognitive-services/translator/reference/v3-0-translate
		base_url = 'https://api.cognitive.microsofttranslator.com'
		path = '/translate?api-version=3.0'
		#params = '&to=de&to=it'
		params = '&to='+target.lower()
		constructed_url = base_url + path + params

		headers = {
			'Ocp-Apim-Subscription-Key': subscriptionKey,
			'Content-type': 'application/json',
			'X-ClientTraceId': str(uuid.uuid4())
		}

		# You can pass more than one object in body.
		body = [{
			'text' : txttranslate
		}]
		request 	= requests.post(constructed_url, headers=headers, json=body)
		resp 		= request.json()
		response 	= resp[0]
		#print(type(response))
		#print(response['translatedText']['text'])
		#print(request)
			
		#json1_data = json.loads(response[0])
		#print(json1_data)

		for key,val in response.items():
			if key == "translations":
				for key2,val2 in response[key][0].items():
					#print(key2, " --- >", val2)
					if key2 == "text":
						#return val2 #response[key][0][key2]
						myFinalReturnText = myFinalReturnText +" "+ val2
	
	return myFinalReturnText
	
def translateTextDeepL(targetLang, text):
	translation=""
	try:
		translation, extra_data = deepl.translate(text, target=targetLang)
	except Exception as e:
		print("translateTextDeepL(): DeepL Translation failed: ", e)
		print("Unexpected error:", sys.exc_info()[0])
		exc_type, exc_obj, exc_tb = sys.exc_info()
		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
		print(exc_type, fname, exc_tb.tb_lineno)
	return translation

def google_translate(myText,toLanguage):
	# https://github.com/GoogleCloudPlatform/python-docs-samples/tree/master/translate/cloud-client
	# [START translate_quickstart]
	# Imports the Google Cloud client library
	from google.cloud import translate

	# Instantiates a client
	translate_client = translate.Client()

	# The text to translate
	text = myText
	# The target language
	target = toLanguage

	# Translates some text into Russian
	translation = translate_client.translate(
		text,
		target_language=target)

	# print(u'Text: {}'.format(text))
	# print(u'Translation: {}'.format(translation['translatedText']))
	# [END translate_quickstart]
	return translation['translatedText']
	
def wrap(s, w):
	"""
	:param s: str; source string
	:param w: int; width to split on
	"""
	return [s[i:i + w] for i in range(0, len(s), w)]

def split_sentences(text):
	"""
	rList 	= list()
	lFlag	= detectTextLanguage(text)
	if lFlag == "de":
		nlp_de.max_length = len(text) + 1
		doc = nlp_de(text)
	else:
		nlp_en.max_length = len(text) + 1
		doc = nlp_en(text)
	for sent in doc.sents:
		rList.append(str(sent))
	
	#return TAG_RE.sub('', text)
	#return re.split(r'(?<=[^A-Z].[.!?]) +(?=[A-Z])', text)#, re.MULTILINE)
	#####DER WAR DER BESTE: return re.split(r'(?<=[^A-Z\{\}].[.!?]) +(?=[A-Z])', text)#, re.MULTILINE)
	#return [s.strip() for s in re.split('[\.\?!]' , text) if s]
	return rList
	"""
	rList 				= list()
	nlp_en.max_length 	= len(text) + 1
	doc 				= nlp_en(text)
	for sent in doc.sents:
		rList.append(str(sent))
	
	return rList
	
def doLsaSummarizer(text):
	"""
	LANGUAGE 		= "german"
	if "en" in Language.lower():
		#LANGUAGE_DE = "german"
		LANGUAGE 	=	 "english"
	"""
	LANGUAGE 		= "english"
	SENTENCES_COUNT	= 85
	parser 			= PlaintextParser.from_string(text,Tokenizer(LANGUAGE))
		
	# or for plain text files
	# parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
	stemmer 		= Stemmer(LANGUAGE)
	
	summarizer 		= Summarizer(stemmer)
	summarizer.stop_words = get_stop_words(LANGUAGE)
	summarizer.null_words = get_stop_words(LANGUAGE)
	summarizer.bonus_words = bonusList
	summarizer.stigma_words = stigmaList
	
	contentText		= str("")
	s_count			= 0
	for sentence in summarizer(parser.document, SENTENCES_COUNT):
		if s_count <= SENTENCES_COUNT:
			s_sent = str(sentence)
			contentText=contentText+s_sent+" "
			s_count+=1
	
	return contentText

def doReductionSummarizer(text, Language):
	text = beautifyUpperLowercase(text)
	
	LANGUAGE = "german"
	if "en" in Language.lower():
		#LANGUAGE_DE = "german"
		LANGUAGE =	 "english"
	
	SENTENCES_COUNT	= 3
	parser 			= PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
		
	# or for plain text files
	# parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
	#stemmer = Stemmer(LANGUAGE)
	summarizer = ReductionSummarizer(Stemmer(LANGUAGE))
	
	summarizer.stop_words = get_stop_words(LANGUAGE)
	#summarizer.null_words = get_stop_words(LANGUAGE)
	#summarizer.bonus_words = [MainKeyword,SubKeywords]
	#summarizer.stigma_words = ["und", "der", "die", "das", "oder", "wie", "aber"]
	
	contentText=""
	s_count=0
	for sentence in summarizer(parser.document, SENTENCES_COUNT):
		if s_count < SENTENCES_COUNT:
			s_sent = str(sentence)
			contentText=contentText+s_sent+" "
			s_count+=1
	
	return contentText

def unique(iterable):
	""" Returns a list copy in which each item occurs only once (in-order).
	"""
	seen = set()
	return [x for x in iterable if x not in seen and not seen.add(x)]

def remove_control_chars(s):
	return control_char_re.sub('', s)

def detectTextLanguage(text):
	text	= str(text)
	languz1	= detectTextLanguage1(text)
	languz2	= detectTextLanguage2(text)
	if languz1 == languz2:
		return languz1.lower()
	else:
		return languz2.lower()
	return 'de'

def detectTextLanguage2(text):	# https://github.com/saffsd/langid.py
	lang	="de"
	text	=str(text)
	langid.set_languages(['de','en','es','it','fr'])
	try:
		langList=langid.classify(text)
		#print(langList[0])
		if lang in ['de','en','es','it','fr']:
			return langList[0].lower()
	except Exception as e:
		1#print("Language Detection failed: ",e)
	return 'de'
	
def detectTextLanguage1(text):
	lang	="de"
	text	=str(text)
	try:
		lang=detect(text)
	except Exception as e:
		1#print("Language Detection failed: ",e)
	if lang in ['de','en','es','it','fr']:
		return lang.lower()
	return 'de'

"""
Quelle: https://github.com/chiphuyen/lazynlp/blob/master/lazynlp/cleaner.py
"""
def replace_unprintable(txt):
	"""Replace non-printable characters with printable characters
	""" 
	unprint_file	= "/home/unaique/library/blacklists/unprintable_chars.txt"
	printable 		= set(string1.printable)
	lines 			= open(os.path.join(dir_path, unprint_file), 'r').readlines()
	chars 			= {line.strip().split(':')[0]: line.strip().split(':')[1] for line in lines}
	return ''.join([c if c in printable else chars[c] for c in txt])

def remove_html_tags(text):
	"""Remove html tags from a string"""
	clean = re.compile('<.*?>')
	return re.sub(clean, '', text)
	
def preprocessorInternal(dom):
    "Removes unwanted parts of DOM."
    options = {
        "processing_instructions": False,
        "remove_unknown_tags": True,
        "safe_attrs_only": False,
        "page_structure": False,
        "annoying_tags": False,
        "frames": False,
        "meta": False,
        "links": False,
        "javascript": False,
        "scripts": True,
        "comments": True,
        "style": True,
        "embedded": True,
        "forms": True,
        "kill_tags": ("head",),
    }
    cleaner = Cleaner(**options)
    return cleaner.clean_html(dom)
	
def parse_html(page):
	""" 
	Clean HTML tags for webpages that aren't Gutenberg books
	# https://github.com/miso-belica/jusText/tree/dev/justext/stoplists
	"""
	#page 		= preprocessorInternal(page1)
	tmpPage 	= remove_html_tags(page)
	lang		= detectTextLanguage(tmpPage)
	try:
		lang 	= lang.lower()
		#parts = justext.justext(page, justext.get_stoplist('English'))
		if lang == "de" or lang in "de":
			parts = justext.justext(page, justext.get_stoplist('German'))
		elif lang == "en" or lang == "us" or lang in "en" or lang in "us":
			parts = justext.justext(page, justext.get_stoplist('English'))
		else:
			parts = justext.justext(page, justext.get_stoplist('German'))
		
	except lxml.etree.ParserError as e:
		print('library.Htmlify() Page empty')
		return ''
	except UnicodeDecodeError as e:
		print("library.Htmlify() Can't decode utf-8")
		return ''
	paragraphs = []
	for part in parts:
		if not part.is_boilerplate:
			paragraphs.append(part.text)
	return '\n\n'.join(paragraphs)

def count_words(text):
	return len(text.split())

def removeDumpSentences(iList):
	rList = list()
	for e in iList:
		c = count_words(e)
		if len(e) > 30 and c > 5:
			rList.append(e)
	return "".join(rList)

def encodeToLatin1(text):
	text 		= text.replace('ß','ss')
	encResults  = text.encode('utf-8', "ignore")
	#encResults = text.encode('utf-8', "ignore")
	s_string	= str(encResults.decode('latin-1', "ignore"))
	#textv1 		= re_pattern.sub(u'\uFFFD', s_string)
	return s_string

def encodeToUTF8Adv(text):
	encResults 	= text.encode('utf-8', "ignore")
	#return str(encResults.decode('latin-1', "ignore"))
	s_string	= str(encResults.decode('utf-8', "remove"))
	#textv1 		= re_pattern.sub(u'\uFFFD', s_string)
	return s_string
	
def encodeToUTF8(text):
	return text.encode('utf-8', "ignore")

def insertEntryToMysql(p_company_name, p_company_description,p_investor_relations,p_company_link,p_symbol,p_isin,p_dividend_payer,p_dividend_history,p_trafficlight,p_smileytype,p_qualityscore_details,p_qualityscore,p_kurs,p_summary):
	a 				= datetime.datetime.now()
	"""
	myWordCloudJson = str(myWordCloudJson)
	p_simpletext	= remove_control_chars(p_simpletext)
	MainKeyword 	= re_pattern.sub(u'\uFFFD', MainKeyword)
	p_simpletext	= p_simpletext.encode('unicode_escape').decode('unicode_escape')
	p_timestamp 	= time.time()
	"""
	
	db 				= mdb.connect(host="localhost",user="root", passwd="###########99", db="SAMYSTOCKS", use_unicode=True, charset="utf8mb4")
	cursor 			= db.cursor()
	cursor.execute("SET NAMES utf8mb4");
	cursor.execute("SET CHARACTER SET utf8mb4");
	
	# Execute the SQL command
	sql = "INSERT INTO samystocks_webpage (p_company_name, p_company_description,p_investor_relations,p_company_link,p_symbol,p_isin,p_dividend_payer,p_dividend_history,p_trafficlight,p_smileytype,p_qualityscore_details,p_qualityscore,p_lastmodified,p_kurs,p_summary) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
	p_lastmodified 	= datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
	try:
		cursor.execute(sql, (p_company_name, p_company_description,p_investor_relations,p_company_link,p_symbol,p_isin,p_dividend_payer,p_dividend_history,p_trafficlight,p_smileytype,p_qualityscore_details,p_qualityscore,p_lastmodified,p_kurs,p_summary))
		db.commit()
		# disconnect from server
		cursor.close()
	except Exception as e:
		print("Error %d: %s" % (e.args[0],e.args[1]))
		exc_type, exc_obj, exc_tb = sys.exc_info()
		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
		print(exc_type, fname, exc_tb.tb_lineno)
	##	return -1
	##except:
	##   # Rollback in case there is any error
	##   cursor.rollback()
	
	b 			= datetime.datetime.now()
	delta 		= b - a
	print("DBify do dbify.insertCacheToMysql(): Processing finished after:", delta)
	return 1

def getWebpagesSimple(link):
	if link.lower().startswith(("http", "https", "ftp", "ftps")):
		#print("getWebpagesSimple():", link)
		try:
			r1 		= requests.get(link, headers=HeadersSimple, timeout=5, verify=False)
			myText 	= r1.text
			myText	= myText.replace('\n', ' ')
			myText	= myText.replace("\n", ' ')
			if len(myText) >= 100:
				return myText.strip()
		except Exception as er:
			#print("Unexpected error: getWebpagesSimple(link)", sys.exc_info()[0])
			exc_type, exc_obj, exc_tb = sys.exc_info()
			fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
			#print(exc_type, fname, exc_tb.tb_lineno)
			
		try:
			request 	= urllib3.Request(link, headers=HeadersSimple, timeout=5, context=ssl._create_unverified_context())
			contents 	= urllib3.urlopen(request).read()
			contents	= contents.replace('\n', ' ')
			contents	= contents.replace("\n", ' ')
			if len(contents) >= 100:
				return contents.strip()
		except Exception as er:
			#print("Unexpected error: getWebpagesSimple(link)", sys.exc_info()[0])
			exc_type, exc_obj, exc_tb = sys.exc_info()
			fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
			#print(exc_type, fname, exc_tb.tb_lineno)
		
	#print("htmlify.getWebpagesSimple(link): Empty HTML Document!")
	return str("")

def getCompanyName(input):
	db 				= mdb.connect("localhost","root","###########99","SAMYSTOCKS")
	# prepare a cursor object using cursor() method
	cursor 			= db.cursor()
	try:
		sql 		= "SELECT Company_Name, IndustryId FROM `companies` WHERE `Ticker` = '"+input+"' Limit 1;"
		# Execute the SQL command
		cursor.execute(sql)
		data		= cursor.fetchall()
		returnData 	= [x[0] for x in data]
		for r in data:
			if r:
				s=str(r[0])
				s=s.replace('"','')
				return s
	except:
		1
	db.close()
	return str("")
	
def getCompanyIndustry(input):
	db 				= mdb.connect("localhost","root","###########99","SAMYSTOCKS")
	# prepare a cursor object using cursor() method
	cursor 			= db.cursor()
	try:
		sql 		= "SELECT Company_Name, IndustryId FROM `companies` WHERE `Ticker` = '"+input+"' Limit 1;"
		# Execute the SQL command
		cursor.execute(sql)
		data		= cursor.fetchall()
		for r in data:
			if r:
				s=str(r[1])
				#s=s.replace('"','')
				return s
	except:
		1
	db.close()
	return str("")

def getSektor(input):
	db 				= mdb.connect("localhost","root","###########99","SAMYSTOCKS")
	# prepare a cursor object using cursor() method
	cursor 			= db.cursor()
	try:
		#SELECT Industry FROM `industries` WHERE IndustryId=IndustryId Limit 1
		sql 		= "SELECT Industry FROM `industries` WHERE IndustryId='"+input+"' Limit 1;"
		# Execute the SQL command
		cursor.execute(sql)
		data		= cursor.fetchall()
		returnData 	= [x[0] for x in data]
		for r in data:
			if r:
				s=str(r[0])
				s=s.replace('"','')
				#s=s.replace('"','')
				return s
	except:
		1
	db.close()
	return str("")

def getKurs(input):
	db 				= mdb.connect("localhost","root","###########99","SAMYSTOCKS")
	# prepare a cursor object using cursor() method
	cursor 			= db.cursor()
	resultsList		= list()
	try:
		#SELECT Industry FROM `industries` WHERE IndustryId=IndustryId Limit 1
		sql 		= "SELECT Date,Close FROM `shareprices_daily` WHERE `Ticker` ='"+input+"' Limit 20000;"
		# Execute the SQL command
		cursor.execute(sql)
		data		= cursor.fetchall()
		#returnData 	= [x[0] for x in data]
		for r in data:
			if r:
				d=str(r[0])
				k=str(r[1])
				e = {d:k}
				resultsList.append(e)
				#s=s.replace('"','')
				#return r
	except:
		1
	db.close()
	j=json.dumps(resultsList)
	return j

def getDividende(input):
	db 				= mdb.connect("localhost","root","###########99","SAMYSTOCKS")
	# prepare a cursor object using cursor() method
	cursor 			= db.cursor()
	resultsList		= list()
	try:
		#SELECT Industry FROM `industries` WHERE IndustryId=IndustryId Limit 1
		sql 		= "SELECT Date,Dividend FROM `shareprices_daily` WHERE `Ticker` ='"+input+"' AND Dividend > 0 Limit 20000;"
		# Execute the SQL command
		cursor.execute(sql)
		data		= cursor.fetchall()
		#returnData 	= [x[0] for x in data]
		for r in data:
			if r:
				d=str(r[0])
				k=str(r[1])
				e = {d:k}
				resultsList.append(e)
				#s=s.replace('"','')
				#return r
	except:
		1
	db.close()
	j=json.dumps(resultsList)
	return j
	
def getSource(input, year):
	db 				= mdb.connect("localhost","root","###########99","SAMYSTOCKS")
	# prepare a cursor object using cursor() method
	cursor 			= db.cursor()
	resultsList		= list()
	try:
		#SELECT Industry FROM `industries` WHERE IndustryId=IndustryId Limit 1
		sql 		= "SELECT `Source`,`Fiscal Period` FROM `balance_full_quarterly` WHERE `Ticker` = '"+input+"' AND `Fiscal Year` ='"+year+"' Limit 8;"
		# Execute the SQL command
		cursor.execute(sql)
		data		= cursor.fetchall()
		#print(data)
		for r in data:
			if r:
				s=str(r[0])
				d=str(r[1])
				s=s.replace('"','')
				resultsList.append({d:s})
				#s=s.replace('"','')
				#return s
	except:
		1
	db.close()
	return resultsList


#t=translateText("I am a good guy walking through the woods.", "DE")
#print(t)
#exit(1)

# https://dividendenfluss.de/dividenden-koenige-liste-2018-26-aktien-mit-ueber-50-jahre-dividenden-wachstum/
# Beschreibungstext: https://simfin.com/data/companies/89661
# ISIN: https://www.google.com/search?q=KO+isin
# Investor Relations: https://www.google.com/search?q=KO+investor+relations
# Übersetzer: https://www.deepl.com/translator#en/de/
p_symbol 					="KO"	# Großschreibung
myYear 						="2019"
p_isin						="US1912161007"
#myCompany_description = "Microsoft Corp is a technology company. It develops, licenses, and supports a wide range of software products and services. Its business is organized into three segments: Productivity and Business Processes, Intelligent Cloud, and More Personal Computing."
p_company_description 		="Coca-Cola Co ist ein Unternehmen für alkoholfreie Getränke, das eine Vielzahl von kohlensäurehaltigen und kohlensäurefreien Marken herstellt, darunter Coca-Cola, Diet Coke, Fanta, Sprite, Minute Maid, Powerade und Dasani."
p_investor_relations 		="https://www.coca-colacompany.com/investors"
p_company_link				="https://www.cocacola.de/de/home/"
p_dividend_payer			="1"
p_trafficlight				="2"	# 0:rot, 1:gelb, 2:grün
p_smileytype				="1"	# 1: yes, 0: no
p_qualityscore				= "-1";
p_qualityscore_details		= "-1"

#Company name: SELECT Company_Name, IndustryId FROM `companies` WHERE `Ticker` = symbol Limit 1
p_company_name				=getCompanyName(p_symbol)
print("Company Name:"+p_company_name)
print()
industryID=getCompanyIndustry(p_symbol)
#print("Industry ID:"+industryID)
#print()
s=getSektor(industryID)
print("Sektor:"+s)
print()
p_kurs=getKurs(p_symbol)
#print("Kurs:"+len(p_kurs))
#print("Kurs:"+p_kurs)
print()
p_dividend_history=getDividende(p_symbol)
#print("Dividende:"+p_dividend_history)
print()
ms=getSource(p_symbol, myYear)
#ms1=getSource(p_symbol, "2018")

myDownloadDir 	= "/home/samystocks/data_installer/temporar/"+p_symbol.lower()
if not os.path.exists(myDownloadDir):
    os.makedirs(myDownloadDir)

resultsList2 	= list()
for myListkey in ms:
	for key in myListkey:
		#print("KEY:",key)
		#print("URL",myListkey[key])
		qu	=key
		url	=myListkey[key]
		h	=getWebpagesSimple(url)
		l	=h.split(">")
		c	=0
		for ele in l:
			c	= c+1
			
			if ele.find("10-Q") != -1:
				hit = l[c+1]
				if hit.find(".htm") != -1:
					s	= hit.replace('<a href=','')
					s	= s.replace('"','')
					s	= s.replace('ix?doc=/','')
					#print("HIT FORM 10-Q:"+"https://www.sec.gov"+s)
					item =str("10Q-"+qu).lower()
					resultsList2.append({item:"https://www.sec.gov"+s})
			elif ele.find("10-K") != -1:
				hit = l[c+1]
				if hit.find(".htm") != -1:
					s	= hit.replace('<a href=','')
					s	= s.replace('"','')
					s	= s.replace('ix?doc=/','')
					#print("HIT FORM 10-K:"+"https://www.sec.gov"+s)
					item =str("10K-"+qu).lower()
					resultsList2.append({item:"https://www.sec.gov"+s})
			"""
			if ele.find("10-K") != -1:
				hit = l[c+1]
				if hit.find(".htm") != -1:
					s	= hit.replace('<a href=','')
					s	= s.replace('"','')
					#print("HIT FORM 10-K:"+"https://www.sec.gov"+s)
					item =str("10K-"+qu).lower()
					resultsList2.append({item:"https://www.sec.gov"+s})
			"""		
	
for myListkey2 in resultsList2:
	for key2 in myListkey2:
		qu	=str(key2)
		url	=str(myListkey2[key2])
		print("Quartal:",qu)
		print("URL:",url)
		
		h	=getWebpagesSimple(url)
		de 	=qu.split(':')
		fn	=p_symbol.lower()+"-"+qu+"-"+str(myYear)+"-"+"en.htm" #msft-10q-q3-2019-en.htm
		fnp	=p_symbol.lower()+"-"+qu+"-"+str(myYear)+"-"+"en.pdf" #msft-10q-q3-2019-en.htm
		print("Schreibe HTML Datei:",fn)
		f 	=open(myDownloadDir+"/"+fn, 'w')
		f.write(h)  # python will convert \n to os.linesep
		f.close()  # you can omit in most cases as the destructor will call it
		print("Schreibe PDF Datei:",fnp)
		#html = HTML(string=h)
		#html.write_pdf(myDownloadDir+"/"+fnp)
		myHTM =myDownloadDir+"/"+fn
		myPDF =myDownloadDir+"/"+fnp
		os.system("wkhtmltopdf "+myHTM+" "+myPDF)
		plainText =parse_html(h)
		#mySentences =split_sentences(plainText)
		#myFinalSentenceString =removeDumpSentences(mySentences)
		#myFinalSentenceString = " ".join()
		#myLSASummary =doLsaSummarizer(myFinalSentenceString)
		myLSASummary =doLsaSummarizer(plainText)
		print("#########################")
		print("#########################")
		print("#########################")
		#print(encodeToLatin1(myLSASummary))
		print()
		print("Laenge bei 43 Saetzen:",len(myLSASummary))
		#textListEN = wrap(myLSASummary, 4500)
		#myLSASummary = myLSASummary[0:4990]
		
		print("#########################")
		print("#########################")
		print("#########################")
		
		#isE=encodeToLatin1(translationDE)
		#print(translationDE)
		#exit(1)
		translationDE=translateText(myLSASummary,'DE')
		translationFR=translateText(myLSASummary,'FR')
		translationES=translateText(myLSASummary,'ES')
		translationIT=translateText(myLSASummary,'IT')
		translationRU=translateText(myLSASummary,'RU')
		#translationDE = google_translate(myLSASummary,'DE')
		#translationFR = google_translate(myLSASummary,'FR')
		#translationES = google_translate(myLSASummary,'ES')
		#translationIT = google_translate(myLSASummary,'IT')
		#translationRU = google_translate(myLSASummary,'RU')
		#print(encodeToLatin1(translationsDE))
		
		wTranslate 	= {"en":myLSASummary, "de" : translationDE, "fr":translationFR, "es":translationES, "it":translationIT, "ru":translationRU}
		with open('/home/samystocks/data_installer/json_result.json', 'w', encoding='utf-8') as fp:
			json.dump(wTranslate, fp, indent=4, sort_keys=True, ensure_ascii=False)#.encode('utf8') # encoding='utf8'
			#fp.write(jk)
		fp.close()
				

with open('/home/samystocks/data_installer/json_result.json', 'r', encoding='utf-8') as fp:
	p_summary = fp.read()
fp.close()

print("Eintrag ins SQL schreiben")
insertEntryToMysql(p_company_name, p_company_description,p_investor_relations,p_company_link,p_symbol,p_isin,p_dividend_payer,p_dividend_history,p_trafficlight,p_smileytype,p_qualityscore_details,p_qualityscore,p_kurs,p_summary)

print()
print("Dann Firefox Öffnen mit Google Translate https://translate.google.com/?hl=de")
print("Dann die umgewandelte PDF Datei hochladen und in verschiedene Sprachen transformieren lassen")
print("Dann die umgewandelte Datei im Firefox mit Speichern unter als Vollständige Webseite speichern")
print("Dann den python3 link_remover.py drüber laufen lassen")
exit(1)