From 86736beb056d12a0c27172949eeef94781d24640 Mon Sep 17 00:00:00 2001 From: bjascob Date: Wed, 10 Mar 2021 10:12:13 -0700 Subject: [PATCH] minor updates for spacy 3.x --- amrlib/graph_processing/annotator.py | 4 ++-- req_tested_versions.txt | 9 ++++----- requirements.txt | 2 +- setup.py | 2 +- tests/auto/ModelGenericTypes.py | 2 +- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/amrlib/graph_processing/annotator.py b/amrlib/graph_processing/annotator.py index 80a4cd1..5c5d640 100644 --- a/amrlib/graph_processing/annotator.py +++ b/amrlib/graph_processing/annotator.py @@ -69,11 +69,11 @@ def _process_penman(pen, tokens=None): pen.metadata['ner_iob'] = json.dumps([t.ent_iob_ for t in tokens]) pen.metadata['pos_tags'] = json.dumps([t.tag_ for t in tokens]) # Create lemmas - # SpaCy's lemmatizer returns -PRON- for pronouns so strip these + # The spaCy 2.0 lemmatizer returns -PRON- for pronouns so strip these (spaCy 3.x does not do this) # Don't try to lemmatize any named-entities or proper nouns. Lower-case any other words. lemmas = [] for t in tokens: - if t.lemma_ == '-PRON-': + if t.lemma_ == '-PRON-': # spaCy 2.x only lemma = t.text.lower() elif t.tag_.startswith('NNP') or t.ent_type_ not in ('', 'O'): lemma = t.text diff --git a/req_tested_versions.txt b/req_tested_versions.txt index 0aa5da9..b6b5c29 100644 --- a/req_tested_versions.txt +++ b/req_tested_versions.txt @@ -2,14 +2,13 @@ PyQt5 version: 5.15.0 graphviz version: 0.14.1 nltk version: 3.5 -numpy version: 1.19.1 +numpy version: 1.20.1 penman version: 1.1.0 requests version: 2.22.0 smatch version: 1.0.4 -spacy version: 2.3.2 -torch version: 1.7.0 +spacy version: 3.0.5 +torch version: 1.8.0 tqdm version: 4.48.2 -transformers version: 4.0.0 +transformers version: 4.2.2 unidecode version: 1.1.1 word2number version: 1.1 - diff --git a/requirements.txt b/requirements.txt index 65ea167..1dcc727 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ penman>=1.1.0 torch>=1.6 numpy -spacy>=2.0,<3.0 # also requires model download `python -m spacy download en_core_web_sm` +spacy>=2.0 # also requires model download `python -m spacy download en_core_web_sm` tqdm transformers>=3.0 # Note that original models trained with v3.4.0 smatch diff --git a/setup.py b/setup.py index 4d6d8ea..1cb7c76 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ 'alignments/isi_hand_alignments/*.txt']}, packages=setuptools.find_packages(), # Minimal requirements here. More extensive list in requirements.txt - install_requires=['penman>=1.1.0', 'torch>=1.6', 'numpy', 'spacy>=2.0,<3.0', 'tqdm', 'transformers>=3.0', 'smatch'], + install_requires=['penman>=1.1.0', 'torch>=1.6', 'numpy', 'spacy>=2.0', 'tqdm', 'transformers>=3.0', 'smatch'], classifiers=[ 'Programming Language :: Python :: 3', 'License :: OSI Approved :: MIT License', diff --git a/tests/auto/ModelGenericTypes.py b/tests/auto/ModelGenericTypes.py index bc3eb70..4076f6c 100644 --- a/tests/auto/ModelGenericTypes.py +++ b/tests/auto/ModelGenericTypes.py @@ -28,7 +28,7 @@ # The init time doesn't seem to get counted towards the total testing time. # To avoid loading things multiple times, load in globally and reference it in __init__ # as needed. -SPACY_NLP = spacy.load('en') +SPACY_NLP = spacy.load('en_core_web_sm') class ModelGenericTypes(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs)