Skip to content

Commit

Permalink
Testing bug fixes and action
Browse files Browse the repository at this point in the history
Fixes bugs, including one that makes caching ineffective. Also adds a GitHub Action to automatically test on commit.
  • Loading branch information
paynejason committed Oct 27, 2023
1 parent ec71b2b commit b659a3e
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 38 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python application

on:
push:
branches: [ "development" ]
pull_request:
branches: [ "main" ]

permissions:
contents: read

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
python -m unittest test/simple_tests
10 changes: 7 additions & 3 deletions test/simple_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
import unittest
import pandas as pd
import text2term
from term import OntologyTermType
from mapper import Mapper
from text2term import OntologyTermType
from text2term import Mapper
from text2term import OntologyTermCollector

pd.set_option('display.max_columns', None)


class Text2TermTestSuite(unittest.TestCase):

@classmethod
Expand All @@ -21,6 +20,11 @@ def setUpClass(cls):
cls.MAPPING_SCORE_COLUMN = "Mapping Score"
cls.TAGS_COLUMN = "Tags"

@classmethod
def tearDownClass(cls):
super(Text2TermTestSuite, cls).tearDownClass()
text2term.clear_cache()

def test_caching_ontology_from_url(self):
# Test caching an ontology loaded from a URL
print("Test caching an ontology loaded from a URL...")
Expand Down
3 changes: 3 additions & 0 deletions text2term/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@
from .preprocess import preprocess_tagged_terms
from .tagged_term import TaggedTerm
from .term_collector import OntologyTermCollector
from .term_collector import filter_terms
from .term import OntologyTermType
from .term import OntologyTerm
2 changes: 1 addition & 1 deletion text2term/config.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = "4.0.0"
VERSION = "4.1.0"
12 changes: 8 additions & 4 deletions text2term/t2t.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from text2term.mapper import Mapper
from text2term.term import OntologyTermType
from text2term.term_collector import OntologyTermCollector
from text2term.term_collector import filter_terms
from text2term.term_graph_generator import TermGraphGenerator
from text2term.bioportal_mapper import BioPortalAnnotatorMapper
from text2term.syntactic_mapper import SyntacticMapper
Expand All @@ -21,6 +22,8 @@

IGNORE_TAGS = ["ignore", "Ignore", "ignore ", "Ignore "]
UNMAPPED_TAG = "unmapped"
OUTPUT_COLUMNS = ["Source Term", "Source Term ID", "Mapped Term Label",
"Mapped Term CURIE", "Mapped Term IRI", "Mapping Score", "Tags"]

LOGGER = onto_utils.get_logger(__name__, level=logging.INFO)

Expand Down Expand Up @@ -174,16 +177,16 @@ def _load_data(input_file_path, csv_column_names, separator):


def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_type=OntologyTermType.CLASS):
term_collector = OntologyTermCollector(ontology_iri=ontology)
if use_cache:
pickle_file = os.path.join("cache", ontology, ontology + "-term-details.pickle")
LOGGER.info(f"Loading cached ontology from: {pickle_file}")
onto_terms_unfiltered = pickle.load(open(pickle_file, "rb"))
onto_terms = term_collector.filter_terms(onto_terms_unfiltered, iris, exclude_deprecated, term_type)
onto_terms = filter_terms(onto_terms_unfiltered, iris, exclude_deprecated, term_type)
else:
term_collector = OntologyTermCollector(ontology_iri=ontology)
onto_terms = term_collector.get_ontology_terms(base_iris=iris, exclude_deprecated=exclude_deprecated,
term_type=term_type)
term_collector.close()
term_collector.close()
LOGGER.info(f"Filtered ontology terms to those of type: {term_type}")
if len(onto_terms) == 0:
raise RuntimeError("Could not find any terms in the given ontology.")
Expand Down Expand Up @@ -270,7 +273,8 @@ def _filter_mappings(mappings_df, min_score):

def _add_unmapped_terms(mappings_df, tags, source_terms, source_terms_ids):
if mappings_df.size == 0:
mapped = ()
mapped = []
mappings_df = pd.DataFrame(columns=OUTPUT_COLUMNS)
else:
mapped = pd.unique(mappings_df["Source Term"])
for (term, term_id) in zip(source_terms, source_terms_ids):
Expand Down
63 changes: 33 additions & 30 deletions text2term/term_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,7 @@ def get_ontology_terms(self, base_iris=(), exclude_deprecated=False, term_type=O
return ontology_terms

def filter_terms(self, onto_terms, iris=(), excl_deprecated=False, term_type=OntologyTermType.ANY):
filtered_onto_terms = {}
for base_iri, term in onto_terms.items():
if type(iris) == str:
begins_with_iri = (iris == ()) or base_iri.startswith(iris)
else:
begins_with_iri = (iris == ()) or any(base_iri.startswith(iri) for iri in iris)
is_not_deprecated = (not excl_deprecated) or (not term.deprecated)
include = self._filter_term_type(term, term_type, True)
if begins_with_iri and is_not_deprecated and include:
filtered_onto_terms.update({base_iri: term})
return filtered_onto_terms
return filter_terms(onto_terms, iris, exclude_deprecated, term_type)

def _get_ontology_signature(self, ontology):
signature = list(ontology.classes())
Expand All @@ -73,7 +63,7 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type
ontology_terms = dict()
for ontology_term in term_list:
# Parse if should include ontology classes, properties, or both
include = self._filter_term_type(ontology_term, term_type, False)
include = _filter_term_type(ontology_term, term_type, False)
if include and ontology_term is not Thing and ontology_term is not Nothing:
if (exclude_deprecated and not deprecated[ontology_term]) or (not exclude_deprecated):
iri = ontology_term.iri
Expand All @@ -84,9 +74,9 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type
instances = self._get_instances(ontology_term, ontology)
definitions = self._get_definitions(ontology_term)
is_deprecated = deprecated[ontology_term] == [True]
if self._filter_term_type(ontology_term, OntologyTermType.CLASS, False):
if _filter_term_type(ontology_term, OntologyTermType.CLASS, False):
owl_term_type = OntologyTermType.CLASS
elif self._filter_term_type(ontology_term, OntologyTermType.PROPERTY, False):
elif _filter_term_type(ontology_term, OntologyTermType.PROPERTY, False):
owl_term_type = OntologyTermType.PROPERTY
else:
owl_term_type = "undetermined"
Expand All @@ -100,22 +90,6 @@ def _get_ontology_terms(self, term_list, ontology, exclude_deprecated, term_type
self.logger.debug("Excluding deprecated ontology term: %s", ontology_term.iri)
return ontology_terms

def _filter_term_type(self, ontology_term, term_type, cached):
if term_type == OntologyTermType.CLASS:
if cached:
return ontology_term.term_type == OntologyTermType.CLASS
else:
return isinstance(ontology_term, ThingClass)
elif term_type == OntologyTermType.PROPERTY:
if cached:
return ontology_term.term_type == OntologyTermType.PROPERTY
else:
return isinstance(ontology_term, PropertyClass)
elif term_type == OntologyTermType.ANY:
return True
else:
raise ValueError("Invalid term-type option. Acceptable term types are: 'class' or 'property' or 'any'")

def _get_parents(self, ontology_term):
parents = dict() # named/atomic superclasses except owl:Thing
restrictions = dict() # restrictions are class expressions such as 'pancreatitis disease_has_location pancreas'
Expand Down Expand Up @@ -401,3 +375,32 @@ def _log_ontology_metrics(self, ontology):
self.logger.debug(" Object property count: %i", len(list(ontology.object_properties())))
self.logger.debug(" Data property count: %i", len(list(ontology.data_properties())))
self.logger.debug(" Annotation property count: %i", len(list(ontology.annotation_properties())))

def filter_terms(onto_terms, iris=(), excl_deprecated=False, term_type=OntologyTermType.ANY):
filtered_onto_terms = {}
for base_iri, term in onto_terms.items():
if type(iris) == str:
begins_with_iri = (iris == ()) or base_iri.startswith(iris)
else:
begins_with_iri = (iris == ()) or any(base_iri.startswith(iri) for iri in iris)
is_not_deprecated = (not excl_deprecated) or (not term.deprecated)
include = _filter_term_type(term, term_type, True)
if begins_with_iri and is_not_deprecated and include:
filtered_onto_terms.update({base_iri: term})
return filtered_onto_terms

def _filter_term_type(ontology_term, term_type, cached):
if term_type == OntologyTermType.CLASS:
if cached:
return ontology_term.term_type == OntologyTermType.CLASS
else:
return isinstance(ontology_term, ThingClass)
elif term_type == OntologyTermType.PROPERTY:
if cached:
return ontology_term.term_type == OntologyTermType.PROPERTY
else:
return isinstance(ontology_term, PropertyClass)
elif term_type == OntologyTermType.ANY:
return True
else:
raise ValueError("Invalid term-type option. Acceptable term types are: 'class' or 'property' or 'any'")
3 changes: 3 additions & 0 deletions text2term/term_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ def to_dict(self):
self.MAPPING_SCORE: self.mapping_score
}

def get_col_names(self):
return [SRC_TERM, SRC_TERM_ID, TGT_TERM_LBL, TGT_TERM_CURIE, TGT_TERM_IRI, MAPPING_SCORE]

def __eq__(self, other):
if isinstance(other, TermMapping):
return self.source_term == other.source_term and self.mapped_term_iri == other.mapped_term_iri
Expand Down

0 comments on commit b659a3e

Please sign in to comment.