#!/usr/bin/env python 

import os
import sqlite3
import json
import gensim
from hypernym import HypernymManager

blacklist_input = ['sky', 'net', 'kerne', 'rod', 'scenarie', 'genstand', 'stift', 'balle', 'vest', 
    'fremgang', 'stormester', 'lyde', 'presse', 'stigning', 'ordination', 'klipning', 
    'periode', 'bark', 'matador', 'konfirmand', 'perser', 'junke', 'islænder', 'impromptu', 
    'forsikring','systematik', 'del', 'bøsse', 'teske', 'fruentimmer', 'damemenneske', 
    'hoved', 'rom']
blacklist_output = ["hr.", "m/k'er", 'TOP', 'mandsperson', 'fruentimmer', 'hoved', 'damemenneske']

overrides = {
    'hals': ['legemsdel'],
    'sult': ['følelse'],
    'hest': ['hovdyr'],
    'hoved': ['legemsdel'],
    'seng': ['møbel']
    }

class Dumper(object):

    def __init__(self, db=None):
#        if not db:
#            fname = os.path.join(os.path.dirname(os.path.abspath(__file__)), 
#                 './data/dannet.db')
#            db = sqlite3.connect(fname)
# 
#        self.conn = db
#        self.cursor = db.cursor()
        fname = './data/da.gensim'
        self.model = gensim.models.KeyedVectors.load(fname, mmap='r')
        self.manager = HypernymManager()
        print('READY!')


    def get_all_nouns(self):
        """
        """
        sql = 'SELECT form FROM words WHERE pos="Noun"'
        self.cursor.execute(sql)
        res = self.cursor.fetchall()
        words = [x[0] for x in res]
        return words

    def dump_nouns(self):
        with open('nouns.json', 'w') as f:
            nouns = self.get_all_nouns()
            json.dump(nouns, f)
        

    def get_hypernym_synsets_from_word(self, word):
        synset_ids = self.manager.get_synset_ids_from_word(word)
        hypernym_synsets = map(self.manager.get_hypernyms_from_synset_id, synset_ids)
        #hypernym_synsets = map(self.manager.get_words_from_synset, hypernym_synsets_ids)

        return list(hypernym_synsets)


    def pick_hypernyms(self):
        with open('nouns.json', 'r') as f:
            nouns = json.load(f)
 
        nouns = [noun for noun in nouns if noun not in blacklist_input]

        out = {}
        for word in nouns:
            idx = 0

            if word in overrides:
                out[word] = overrides.get(word)
                continue

#            print(word)
            hyper_sets = self.get_hypernym_synsets_from_word(word)
            if not hyper_sets:
                continue
#            if not word in self.model.vocab:
#                print('HIT')
#                idx = 0
#                print(hyper_sets)
#                print(hyper_sets[id_])
#                continue

            if len(hyper_sets) > 1:
                try:
                    ranks = [self.model.n_similarity([word], hypers) for hypers in hyper_sets]
                    idx = ranks.index(max(ranks))
                    print(word)
                    print(hyper_sets)
                    print(hyper_sets[idx])
                    print('=============')
 
                except:
                    idx = 0 
        
            hypernyms = hyper_sets[idx]
            hypernyms = [h for h in hypernyms if h not in blacklist_output and h != word]
            hypernyms = list(set(hypernyms))
            out[word] = hypernyms

        with open('hypernyms.json', 'w') as f:
            json.dump(out, f, indent = 2)



def cleanup():
    with open('hypernyms.json', 'r') as f:
        hypernyms = json.load(f)
    print(len(hypernyms))
#    import pdb;pdb.set_trace()
    for input in blacklist_input:
        if input in hypernyms:
            del hypernyms[input]
    
    for k,v in overrides.items():
        hypernyms[k] = v

    with open('hypernyms_filtered.json', 'w') as f:
        json.dump(hypernyms, f, indent=2)
    print(len(hypernyms))


def main(word):
    """ 
    Pick up hypernyms, usage:
    hypernyms.py word
    """

    hypernymdb = HypernymManager()
    words = hypernymdb.get_hypernyms(word)
        
    print(words)

if __name__ == '__main__':
#    dumper = Dumper()
#    dumper.pick_hypernyms()
    cleanup()

