Testing bug fixes and action

Fixes bugs, including one that makes caching ineffective. Also adds a GitHub Action to automatically test on commit.
ccb-hms · Oct 27, 2023 · b659a3e · b659a3e
1 parent ec71b2b
commit b659a3e
Show file tree

Hide file tree

Showing 7 changed files with 94 additions and 38 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -0,0 +1,39 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python application
+
+on:
+  push:
+    branches: [ "development" ]
+  pull_request:
+    branches: [ "main" ]
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.10"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        python -m unittest test/simple_tests
diff --git a/test/simple_tests.py b/test/simple_tests.py
@@ -2,13 +2,12 @@
 import unittest
 import pandas as pd
 import text2term
-from term import OntologyTermType
-from mapper import Mapper
+from text2term import OntologyTermType
+from text2term import Mapper
 from text2term import OntologyTermCollector
 
 pd.set_option('display.max_columns', None)
 
-
 class Text2TermTestSuite(unittest.TestCase):
 
     @classmethod
@@ -21,6 +20,11 @@ def setUpClass(cls):
         cls.MAPPING_SCORE_COLUMN = "Mapping Score"
         cls.TAGS_COLUMN = "Tags"
 
+    @classmethod
+    def tearDownClass(cls):
+        super(Text2TermTestSuite, cls).tearDownClass()
+        text2term.clear_cache()
+
     def test_caching_ontology_from_url(self):
         # Test caching an ontology loaded from a URL
         print("Test caching an ontology loaded from a URL...")

diff --git a/text2term/__init__.py b/text2term/__init__.py
@@ -8,3 +8,6 @@
 from .preprocess import preprocess_tagged_terms
 from .tagged_term import TaggedTerm
 from .term_collector import OntologyTermCollector
+from .term_collector import filter_terms
+from .term import OntologyTermType
+from .term import OntologyTerm
diff --git a/text2term/config.py b/text2term/config.py
@@ -1 +1 @@
-VERSION = "4.0.0"
+VERSION = "4.1.0"
diff --git a/text2term/t2t.py b/text2term/t2t.py
@@ -10,6 +10,7 @@
 from text2term.mapper import Mapper
 from text2term.term import OntologyTermType
 from text2term.term_collector import OntologyTermCollector
+from text2term.term_collector import filter_terms
 from text2term.term_graph_generator import TermGraphGenerator
 from text2term.bioportal_mapper import BioPortalAnnotatorMapper
 from text2term.syntactic_mapper import SyntacticMapper
@@ -21,6 +22,8 @@
 
 IGNORE_TAGS = ["ignore", "Ignore", "ignore ", "Ignore "]
 UNMAPPED_TAG = "unmapped"
+OUTPUT_COLUMNS = ["Source Term", "Source Term ID", "Mapped Term Label",
+                    "Mapped Term CURIE", "Mapped Term IRI", "Mapping Score", "Tags"]
 
 LOGGER = onto_utils.get_logger(__name__, level=logging.INFO)
 
@@ -174,16 +177,16 @@ def _load_data(input_file_path, csv_column_names, separator):
 
 
 def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_type=OntologyTermType.CLASS):
-    term_collector = OntologyTermCollector(ontology_iri=ontology)
     if use_cache:
         pickle_file = os.path.join("cache", ontology, ontology + "-term-details.pickle")
         LOGGER.info(f"Loading cached ontology from: {pickle_file}")
         onto_terms_unfiltered = pickle.load(open(pickle_file, "rb"))
-        onto_terms = term_collector.filter_terms(onto_terms_unfiltered, iris, exclude_deprecated, term_type)
+        onto_terms = filter_terms(onto_terms_unfiltered, iris, exclude_deprecated, term_type)
     else:
+        term_collector = OntologyTermCollector(ontology_iri=ontology)
         onto_terms = term_collector.get_ontology_terms(base_iris=iris, exclude_deprecated=exclude_deprecated,
                                                        term_type=term_type)
-    term_collector.close()
+        term_collector.close()
     LOGGER.info(f"Filtered ontology terms to those of type: {term_type}")
     if len(onto_terms) == 0:
         raise RuntimeError("Could not find any terms in the given ontology.")
@@ -270,7 +273,8 @@ def _filter_mappings(mappings_df, min_score):
 
 def _add_unmapped_terms(mappings_df, tags, source_terms, source_terms_ids):
     if mappings_df.size == 0:
-        mapped = ()
+        mapped = []
+        mappings_df = pd.DataFrame(columns=OUTPUT_COLUMNS)
     else:
         mapped = pd.unique(mappings_df["Source Term"])
     for (term, term_id) in zip(source_terms, source_terms_ids):

diff --git a/text2term/term_collector.py b/text2term/term_collector.py
@@ -48,17 +48,7 @@ def get_ontology_terms(self, base_iris=(), exclude_deprecated=False, term_type=O
         return ontology_terms
 
     def filter_terms(self, onto_terms, iris=(), excl_deprecated=False, term_type=OntologyTermType.ANY):
-        filtered_onto_terms = {}
-        for base_iri, term in onto_terms.items():
-            if type(iris) == str:
-                begins_with_iri = (iris == ()) or base_iri.startswith(iris)
-            else:
-                begins_with_iri = (iris == ()) or any(base_iri.startswith(iri) for iri in iris)
-            is_not_deprecated = (not excl_deprecated) or (not term.deprecated)
-            include = self._filter_term_type(term, term_type, True) 
-            if begins_with_iri and is_not_deprecated and include:
-                filtered_onto_terms.update({base_iri: term})
-        return filtered_onto_terms
+        return filter_terms(onto_terms, iris, exclude_deprecated, term_type)
 
     def _get_ontology_signature(self, ontology):
         signature = list(ontology.classes())
@@ -73,7 +63,7 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type
         ontology_terms = dict()
         for ontology_term in term_list:
             # Parse if should include ontology classes, properties, or both
-            include = self._filter_term_type(ontology_term, term_type, False)
+            include = _filter_term_type(ontology_term, term_type, False)
             if include and ontology_term is not Thing and ontology_term is not Nothing:
                 if (exclude_deprecated and not deprecated[ontology_term]) or (not exclude_deprecated):
                     iri = ontology_term.iri
@@ -84,9 +74,9 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type
                     instances = self._get_instances(ontology_term, ontology)
                     definitions = self._get_definitions(ontology_term)
                     is_deprecated = deprecated[ontology_term] == [True]
-                    if self._filter_term_type(ontology_term, OntologyTermType.CLASS, False):
+                    if _filter_term_type(ontology_term, OntologyTermType.CLASS, False):
                         owl_term_type = OntologyTermType.CLASS
-                    elif self._filter_term_type(ontology_term, OntologyTermType.PROPERTY, False):
+                    elif _filter_term_type(ontology_term, OntologyTermType.PROPERTY, False):
                         owl_term_type = OntologyTermType.PROPERTY
                     else:
                         owl_term_type = "undetermined"
@@ -100,22 +90,6 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type
                     self.logger.debug("Excluding deprecated ontology term: %s", ontology_term.iri)
         return ontology_terms
 
-    def _filter_term_type(self, ontology_term, term_type, cached):
-        if term_type == OntologyTermType.CLASS:
-            if cached:
-                return ontology_term.term_type == OntologyTermType.CLASS
-            else:
-                return isinstance(ontology_term, ThingClass)
-        elif term_type == OntologyTermType.PROPERTY:
-            if cached:
-                return ontology_term.term_type == OntologyTermType.PROPERTY
-            else:
-                return isinstance(ontology_term, PropertyClass)
-        elif term_type == OntologyTermType.ANY:
-            return True 
-        else:
-            raise ValueError("Invalid term-type option. Acceptable term types are: 'class' or 'property' or 'any'")
-
     def _get_parents(self, ontology_term):
         parents = dict()  # named/atomic superclasses except owl:Thing
         restrictions = dict()  # restrictions are class expressions such as 'pancreatitis disease_has_location pancreas'
@@ -401,3 +375,32 @@ def _log_ontology_metrics(self, ontology):
         self.logger.debug(" Object property count: %i", len(list(ontology.object_properties())))
         self.logger.debug(" Data property count: %i", len(list(ontology.data_properties())))
         self.logger.debug(" Annotation property count: %i", len(list(ontology.annotation_properties())))
+
+def filter_terms(onto_terms, iris=(), excl_deprecated=False, term_type=OntologyTermType.ANY):
+    filtered_onto_terms = {}
+    for base_iri, term in onto_terms.items():
+        if type(iris) == str:
+            begins_with_iri = (iris == ()) or base_iri.startswith(iris)
+        else:
+            begins_with_iri = (iris == ()) or any(base_iri.startswith(iri) for iri in iris)
+        is_not_deprecated = (not excl_deprecated) or (not term.deprecated)
+        include = _filter_term_type(term, term_type, True) 
+        if begins_with_iri and is_not_deprecated and include:
+            filtered_onto_terms.update({base_iri: term})
+    return filtered_onto_terms
+
+def _filter_term_type(ontology_term, term_type, cached):
+    if term_type == OntologyTermType.CLASS:
+        if cached:
+            return ontology_term.term_type == OntologyTermType.CLASS
+        else:
+            return isinstance(ontology_term, ThingClass)
+    elif term_type == OntologyTermType.PROPERTY:
+        if cached:
+            return ontology_term.term_type == OntologyTermType.PROPERTY
+        else:
+            return isinstance(ontology_term, PropertyClass)
+    elif term_type == OntologyTermType.ANY:
+        return True 
+    else:
+        raise ValueError("Invalid term-type option. Acceptable term types are: 'class' or 'property' or 'any'")
diff --git a/text2term/term_mapping.py b/text2term/term_mapping.py
@@ -55,6 +55,9 @@ def to_dict(self):
             self.MAPPING_SCORE: self.mapping_score
         }
 
+    def get_col_names(self):
+        return [SRC_TERM, SRC_TERM_ID, TGT_TERM_LBL, TGT_TERM_CURIE, TGT_TERM_IRI, MAPPING_SCORE]
+
     def __eq__(self, other):
         if isinstance(other, TermMapping):
             return self.source_term == other.source_term and self.mapped_term_iri == other.mapped_term_iri