
import os
import gensim

# sentences = [['Tom', 'loves', 'pizza'], ['Peter', 'loves', 'fries']]

# model = gensim.models.Word2Vec(sentences, min_count=1)

class MySentences(object):
    def __init__(self, dirname):
        self.dirname = dirname
 
    def __iter__(self):
        for fname in os.listdir(self.dirname):
            for line in open(os.path.join(self.dirname, fname)):
                yield line.split()
 
sentences = MySentences('examples') # load Gensim_example_1.txt from folder, a memory-friendly iterator


# Step 1: Initialize model
model = gensim.models.Word2Vec(vector_size=100, window=5, min_count=1)

# Step 2: Build vocabulary
model.build_vocab(sentences)

# Step 3: Train the model
model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)

# Check vocabulary
print(model.wv.key_to_index.keys())


model.wv.most_similar(positive=['soup', 'cook'], negative=['Google'], topn=1)
# model.doesnt_match("breakfast cereal dinner lunch".split())
# model.similarity('soup', 'Google')

model.wv['lives']