From 86736beb056d12a0c27172949eeef94781d24640 Mon Sep 17 00:00:00 2001
From: bjascob <bjascob@msn.com>
Date: Wed, 10 Mar 2021 10:12:13 -0700
Subject: [PATCH] minor updates for spacy 3.x

---
 amrlib/graph_processing/annotator.py | 4 ++--
 req_tested_versions.txt              | 9 ++++-----
 requirements.txt                     | 2 +-
 setup.py                             | 2 +-
 tests/auto/ModelGenericTypes.py      | 2 +-
 5 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/amrlib/graph_processing/annotator.py b/amrlib/graph_processing/annotator.py
index 80a4cd1..5c5d640 100644
--- a/amrlib/graph_processing/annotator.py
+++ b/amrlib/graph_processing/annotator.py
@@ -69,11 +69,11 @@ def _process_penman(pen, tokens=None):
     pen.metadata['ner_iob']  = json.dumps([t.ent_iob_  for t in tokens])
     pen.metadata['pos_tags'] = json.dumps([t.tag_      for t in tokens])
     # Create lemmas
-    # SpaCy's lemmatizer returns -PRON- for pronouns so strip these
+    # The spaCy 2.0 lemmatizer returns -PRON- for pronouns so strip these (spaCy 3.x does not do this)
     # Don't try to lemmatize any named-entities or proper nouns.  Lower-case any other words.
     lemmas = []
     for t in tokens:
-        if t.lemma_ == '-PRON-':
+        if t.lemma_ == '-PRON-':    # spaCy 2.x only
             lemma = t.text.lower()
         elif t.tag_.startswith('NNP') or t.ent_type_ not in ('', 'O'):
             lemma = t.text
diff --git a/req_tested_versions.txt b/req_tested_versions.txt
index 0aa5da9..b6b5c29 100644
--- a/req_tested_versions.txt
+++ b/req_tested_versions.txt
@@ -2,14 +2,13 @@
 PyQt5        version:  5.15.0
 graphviz     version:  0.14.1
 nltk         version:  3.5
-numpy        version:  1.19.1
+numpy        version:  1.20.1
 penman       version:  1.1.0
 requests     version:  2.22.0
 smatch       version:  1.0.4
-spacy        version:  2.3.2
-torch        version:  1.7.0
+spacy        version:  3.0.5
+torch        version:  1.8.0
 tqdm         version:  4.48.2
-transformers version:  4.0.0
+transformers version:  4.2.2
 unidecode    version:  1.1.1
 word2number  version:  1.1
-
diff --git a/requirements.txt b/requirements.txt
index 65ea167..1dcc727 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@
 penman>=1.1.0
 torch>=1.6
 numpy
-spacy>=2.0,<3.0     # also requires model download `python -m spacy download en_core_web_sm`
+spacy>=2.0          # also requires model download `python -m spacy download en_core_web_sm`
 tqdm
 transformers>=3.0   # Note that original models trained with v3.4.0
 smatch
diff --git a/setup.py b/setup.py
index 4d6d8ea..1cb7c76 100755
--- a/setup.py
+++ b/setup.py
@@ -38,7 +38,7 @@
                             'alignments/isi_hand_alignments/*.txt']},
     packages=setuptools.find_packages(),
     # Minimal requirements here.  More extensive list in requirements.txt
-    install_requires=['penman>=1.1.0', 'torch>=1.6', 'numpy', 'spacy>=2.0,<3.0', 'tqdm', 'transformers>=3.0', 'smatch'],
+    install_requires=['penman>=1.1.0', 'torch>=1.6', 'numpy', 'spacy>=2.0', 'tqdm', 'transformers>=3.0', 'smatch'],
     classifiers=[
         'Programming Language :: Python :: 3',
         'License :: OSI Approved :: MIT License',
diff --git a/tests/auto/ModelGenericTypes.py b/tests/auto/ModelGenericTypes.py
index bc3eb70..4076f6c 100644
--- a/tests/auto/ModelGenericTypes.py
+++ b/tests/auto/ModelGenericTypes.py
@@ -28,7 +28,7 @@
 # The init time doesn't seem to get counted towards the total testing time.
 # To avoid loading things multiple times, load in globally and reference it in __init__
 # as needed.
-SPACY_NLP = spacy.load('en')
+SPACY_NLP = spacy.load('en_core_web_sm')
 class ModelGenericTypes(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)