# -*- coding: utf-8 -*-
#!/usr/bin/env python
"""
python3 DowserySummaryPreprepareADV.py

python3 textsum_data_convert.py --command text_to_binary --in_directories train_data --out_files binary_data2/dowsery-train.bin,binary_data2/dowsery-validation.bin,binary_data2/dowsery-test.bin --split 1.0,0,0

python3 textsum_data_convert.py --command text_to_vocabulary --in_directories train_data --out_files binary_data2/vocab.bin

python3 pointer-generator/run_summarization.py --mode=train --data_path=/home/Framework/Prototyp/tensorflow-text-summary/binary_data2/dowsery-* --vocab_path=/home/Framework/Prototyp/tensorflow-text-summary/binary_data2/vocab.bin --log_root=/home/Framework/Prototyp/tensorflow-text-summary/binary_data2 --exp_name=myexperiment2

-> Single Pass
python3 pointer-generator/run_summarization.py --mode=decode --data_path=/home/Framework/Prototyp/tensorflow-text-summary/binary_data/dowsery-train.bin --vocab_path=/home/Framework/Prototyp/tensorflow-text-summary/binary_data/vocab.bin --log_root=/home/Framework/Prototyp/tensorflow-text-summary/binary_data --exp_name=myexperiment


https://github.com/pltrdy/pointer-generator

"""
from colored import fg, bg, attr
import translitcodec
import unicodedata
import os
import sys, time
import MySQLdb as mdb
import codecs
import os
import re
import sys
import codecs
import string
import time
import glob
import getopt
import argparse
from unidecode import unidecode
from datetime import datetime as dTime
from pprint import PrettyPrinter

file_path = "/home/Framework/Prototyp/dowsery-demo/train_data/"
os.makedirs(file_path, exist_ok=True)

file_path1 = "/home/Framework/Prototyp/dowsery-demo/binary_data/"
os.makedirs(file_path1, exist_ok=True)


#SqlQuery = "SELECT DISTINCT * FROM openjurv4 WHERE 1=1 AND p_hasleitsatz=1 ORDER BY RAND() LIMIT 25;"; # db:dowery_prototype
SqlQuery = "SELECT DISTINCT * FROM openjurv4 WHERE 1=1 AND p_hasleitsatz=1 LIMIT 25000;"; # db:dowery_prototype

# https://conceptnet.s3.amazonaws.com/downloads/2017/numberbatch/numberbatch-17.06.txt.gz

def encodeToUTF8Adv(text):
	encResults = text.encode('utf-8', "ignore")
	#return str(encResults.decode('latin-1', "ignore"))
	return str(encResults.decode('utf-8', "remove"))

def encodeToLatin1(text):
	#n_String=replaceUmlauts(text)
	encResults = text.encode('utf-8', "ignore")
	#encResults = text.encode('utf-8', "ignore")
	return str(encResults.decode('latin-1', "ignore"))

def all_same(items):
	return all(x == items[0] for x in items)

print()
print(SqlQuery)
print()

# open a database connection
# be sure to change the host IP address, username, password and database name to match your own
connection = mdb.connect (unix_socket = '/var/run/mysqld/mysqld.sock', host = "localhost", user = "root", passwd = "###########99", db = "dowery_prototype")
# prepare a cursor object using cursor() method
cursor = connection.cursor (mdb.cursors.DictCursor)
# execute the SQL query using execute() method.
cursor.execute (SqlQuery)
# fetch all of the rows from the query
# print the rows
result_set = cursor.fetchall()
c_Count=1

###
###### Hole die Ergebnisse von der Echtzeit Suche
###
#fo = open("/home/Framework/Prototyp/amazon-reviews/reviews1.csv", "a+", encoding="utf-8")
#fo.write(str("Id;ProductId;UserId;ProfileName;HelpfulnessNumerator;HelpfulnessDenominator;Score;Time;Summary;Text\n"))
#fo.close()

count=0
for row in result_set:
	p_uid=str(row["p_uid"])
	p_shortid=str(row["p_shortid"])
	p_gruendePlain=str(row["p_gruendePlain"])
	p_leitsatz=str(row["p_leitsatz"])
	
	p_gruendePlain=p_gruendePlain.replace(";", " - ")
	p_leitsatz=p_leitsatz.replace(";", " - ")
	
	p_gruendePlain=p_gruendePlain.strip()
	p_leitsatz=p_leitsatz.strip()
	
	table = {
		ord('ä'): 'ae',
		ord('ö'): 'oe',
		ord('ü'): 'ue',
		ord('Ä'): 'Ae',
		ord('Ö'): 'Oe',
		ord('Ü'): 'Ue',
		ord('ß'): 'ss',
	}
	
	p_leitsatz=p_leitsatz.translate(table)
	p_gruendePlain=p_gruendePlain.translate(table)
	
	#p_leitsatz=p_leitsatz.encode().decode('latin-1').encode('translit/long').encode('ascii')
	#p_gruendePlain=p_gruendePlain.encode().decode('latin-1').encode('translit/long').encode('ascii')
	
	fo = open(file_path+"/"+"file-"+str(count), "w", encoding="utf-8")
	#fo.write(str("LEITSATZ====="+p_leitsatz+"\n"+"GRUENDE====="+p_gruendePlain+"\n"))
	fo.write(str(p_leitsatz+"\n"+p_gruendePlain+"\n"))
	fo.close()
	count=count+1

# close the cursor object
cursor.close()
# close the connection
connection.close()

print("####################################")
print("###### BIN FERTIG ##################")
print("####################################")
sys.exit()