# -*- coding: utf-8 -*- #!/usr/bin/python2.7 -S # python -m spacy.en.download python -m spacy.de.download # https://spacy.io/docs/#tutorials # CSS: http://codepen.io/explosion/pen/xEpgKz # CSS 2: https://explosion.ai/blog/displacy-ent-named-entity-visualizer import time start_time = time.time() # pip install --upgrade thinc # pip3 install suds-jurko import spacy #import base64 import json #import site import codecs import locale import shelve import gensim, logging import pprint import os.path #import sphinxapi as SP from sphinxapi import * # pip install --upgrade mysql-python import MySQLdb logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # pip3 install --upgrade spacy # apt-get install python3-pip # pip install --upgrade pip # pip3 install --upgrade pip3 # pip install -U textblob # python -m textblob.download_corpora # pip install --upgrade langdetect # pip install -U textblob-de # python -m textblob.download_corpora #from textblob_de import TextBlobDE as TextBlob from langdetect import detect #from textblob_de import TextBlobDE #from textblob import TextBlob from libleipzig import * # pip install --upgrade libleipzig # pip install --upgrade libleipzig # pip3 install --upgrade libleipzig # pip install --upgrade pattern import sys, time # import sys package, if not already imported reload(sys) sys.setdefaultencoding('utf-8') #sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout) # http://radimrehurek.com/gensim/models/word2vec.html#gensim.models.word2vec.Word2Vec # #path = "/home/100biere/100BIERE_model.bin" path = "/dev/shm/bigmodel/Zusammenhalt_model.bin" word = "Zusammenhalt" model = gensim.models.Word2Vec.load(path) sim1 = model.most_similar(positive=[word], negative=[], topn=10, restrict_vocab=None, indexer=None) sim2 = model.most_similar_cosmul(positive=[word], negative=[], topn=10) sim3 = model.similar_by_word(word, topn=10, restrict_vocab=None) print("SIM 1") pprint.pprint(sim1) print("SIM 2") pprint.pprint(sim2) print("SIM 3") pprint.pprint(sim3) ''' v = sim1[0] wv = v[1] wvv = v[0] print(word +" -> Similarity Score: " + wvv), print(wv) ''' print("Script Runtime: --- %s seconds ---" % (time.time() - start_time)) sys.exit(0)