Skip to content

Commit

Permalink
Update models for Ixalan; reorganize files
Browse files Browse the repository at this point in the history
  • Loading branch information
clayadavis committed Oct 11, 2017
1 parent 756f9a0 commit c6a0436
Show file tree
Hide file tree
Showing 17 changed files with 378,525 additions and 13 deletions.
15 changes: 12 additions & 3 deletions webapp/build_library.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
import gzip
import json

import os
import requests


DIR_NAME = 'library'

def _get(d, fields):
return {k:d[k] for k in fields if k in d}

if __name__ == '__main__':
try:
os.chdir(DIR_NAME)
except FileNotFoundError:
os.mkdir(DIR_NAME)
os.chdir(DIR_NAME)

url = 'https://mtgjson.com/json/AllSetsArray.json.gz'
resp = requests.get(url)
sets = json.loads(gzip.decompress(resp.content).decode('utf8'))
Expand All @@ -28,9 +37,9 @@ def _get(d, fields):
card_info['set'] = serie_info
cards[card['name']] = card_info

fname = 'card_commander_library.json.gz'
fname = 'card_codex_library.json.gz'
json.dump(list(cards.values()), gzip.open(fname, 'wt'))

fname = 'card_commander_cardlist.txt'
fname = 'card_codex_cardlist.txt'
with open(fname, 'wt') as f:
f.write('\n'.join(sorted(cards.keys())))
21 changes: 14 additions & 7 deletions webapp/build_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import gzip
import itertools
import json
import os
import re
from operator import itemgetter

Expand All @@ -10,20 +11,23 @@
from gensim import corpora, models, similarities


LIBRARY_DIR = 'library'
MODEL_DIR = 'models'

LIBRARY = os.path.join(LIBRARY_DIR, 'card_codex_library.json.gz')
DICTIONARY = os.path.join(MODEL_DIR, 'card_text_dictionary.dict')
CORPUS = os.path.join(MODEL_DIR, 'card_text_corpus.mm')
INDEX = os.path.join(MODEL_DIR, 'card_text_lsi.index')
TFIDF = os.path.join(MODEL_DIR, 'card_text_tfidf.model')
LSI = os.path.join(MODEL_DIR, 'card_text_lsi.model')

#try:
# stopwords = set(nltk.corpus.stopwords.words('english'))
#except LookupError:
# nltk.download('stopwords')
stopwords = set(nltk.corpus.stopwords.words('english'))
stemmer = nltk.stem.snowball.SnowballStemmer('english')

LIBRARY = 'card_commander_library.json.gz'
DICTIONARY = 'card_text_dictionary.dict'
CORPUS = 'card_text_corpus.mm'
INDEX = 'card_text_lsi.index'
TFIDF = 'card_text_tfidf.model'
LSI = 'card_text_lsi.model'


class Similaritron(object):
# TODO: num_topics is a magic number?
Expand Down Expand Up @@ -153,6 +157,9 @@ def tokenize(card):


if __name__ == '__main__':
if not os.path.isdir(MODEL_DIR):
os.mkdir(MODEL_DIR)

cards = json.load(gzip.open(LIBRARY, 'rt'))

card_names = [c['name'] for c in cards]
Expand Down
Loading

0 comments on commit c6a0436

Please sign in to comment.