# -*- coding: utf-8 -*- #!/usr/bin/env python # https://developers.google.com/custom-search/docs/xml_results#countryCodes #https://www.linkedin.com/countserv/count/share?format=jsonp&url=https://www.buzzerstar.com # pip install --upgrade spacy tensorflow gensim sumy keras markovify google-api-python-client beautifulsoup4 from colored import fg, bg, attr import sys, time import MySQLdb as mdb import codecs import os import re import sys import codecs import string import time import glob import getopt import argparse from unidecode import unidecode from datetime import datetime as dTime from pprint import PrettyPrinter #SqlQuery = "SELECT DISTINCT * FROM openjurv4 WHERE 1=1 AND p_hasleitsatz=1 ORDER BY RAND() LIMIT 25;"; # db:dowery_prototype SqlQuery = "SELECT DISTINCT * FROM openjurv4 WHERE 1=1 AND p_hasleitsatz=1 LIMIT 5000;"; # db:dowery_prototype # https://conceptnet.s3.amazonaws.com/downloads/2017/numberbatch/numberbatch-17.06.txt.gz def encodeToUTF8Adv(text): encResults = text.encode('utf-8', "ignore") #return str(encResults.decode('latin-1', "ignore")) return str(encResults.decode('utf-8', "remove")) def encodeToLatin1(text): #n_String=replaceUmlauts(text) encResults = text.encode('utf-8', "ignore") #encResults = text.encode('utf-8', "ignore") return str(encResults.decode('latin-1', "ignore")) def all_same(items): return all(x == items[0] for x in items) print() print(SqlQuery) print() # open a database connection # be sure to change the host IP address, username, password and database name to match your own connection = mdb.connect (unix_socket = '/var/run/mysqld/mysqld.sock', host = "localhost", user = "root", passwd = "###########99", db = "dowery_prototype") # prepare a cursor object using cursor() method cursor = connection.cursor (mdb.cursors.DictCursor) # execute the SQL query using execute() method. cursor.execute (SqlQuery) # fetch all of the rows from the query # print the rows result_set = cursor.fetchall() c_Count=1 ### ###### Hole die Ergebnisse von der Echtzeit Suche ### fo = open("/home/Framework/Prototyp/amazon-reviews/reviews1.csv", "a+", encoding="utf-8") fo.write(str("Id;ProductId;UserId;ProfileName;HelpfulnessNumerator;HelpfulnessDenominator;Score;Time;Summary;Text\n")) fo.close() for row in result_set: p_uid=str(row["p_uid"]) p_shortid=str(row["p_shortid"]) p_gruendePlain=str(row["p_gruendePlain"]) p_leitsatz=str(row["p_leitsatz"]) p_gruendePlain=p_gruendePlain.replace(";", " - ") p_leitsatz=p_leitsatz.replace(";", " - ") p_gruendePlain=p_gruendePlain.strip() p_leitsatz=p_leitsatz.strip() fo = open("/home/Framework/Prototyp/amazon-reviews/reviews1.csv", "a+", encoding="utf-8") fo.write(str("Id;ProductId;UserId;ProfileName;HelpfulnessNumerator;HelpfulnessDenominator;Score;Time;"+p_leitsatz+";"+p_gruendePlain+"\n"))#Summary;Text")); fo.close() # close the cursor object cursor.close() # close the connection connection.close() print("####################################") print("###### BIN FERTIG ##################") print("####################################") sys.exit()