Merge pull request #136 from callahantiff/maintenance

Maintenance
callahantiff · Mar 12, 2023 · 9eab505 · 9eab505
2 parents 5312dc0 + 9b42ca3
commit 9eab505
Show file tree

Hide file tree

Showing 15 changed files with 477 additions and 168 deletions.
diff --git a/.github/workflows/build-qa.yml b/.github/workflows/build-qa.yml
@@ -4,9 +4,9 @@ jobs:
 
   build:
     name: Quality Check
-    runs-on: ubuntu-latest
+    runs-on: Ubuntu-20.04
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0
       - name: Setup Python
@@ -60,17 +60,17 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Set-up QEMU
-        uses: docker/setup-qemu-action@v1
+        uses: docker/setup-qemu-action@v2
       - name: Set-up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@v2
       - name: Login to DockerHub
-        uses: docker/login-action@v1
+        uses: docker/login-action@v2
         with:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
       - name: Build and Push Container to DockerHub
         id: docker_build
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v4
         with:
             push: true
             tags: callahantiff/pheknowlator:latest

diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,7 @@ __pycache__/
 pkt_kg.egg*
 build/*
 dist/*
+venv/*
 
 #### Testing
 .single_run

diff --git a/notebooks/OWLNETS_Example_Application.ipynb b/notebooks/OWLNETS_Example_Application.ipynb
@@ -94,9 +94,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# # uncomment and run to install any required modules from notebooks/requirements.txt\n",
+    "# # if running a local version of pkt_kg (i.e., not running the PyPI version), uncomment the code below\n",
     "# import sys\n",
-    "# !{sys.executable} -m pip install -r requirements.txt"
+    "# sys.path.append('../')"
    ]
   },
   {
@@ -105,9 +105,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# # if running a local version of pkt_kg (i.e., not running the PyPI version), uncomment the code below\n",
-    "# import sys\n",
-    "# sys.path.append('../')"
+    "%%collapse\n",
+    "# uncomment and run to install any required modules from notebooks/requirements.txt\n",
+    "# !python -m pip install -r requirements.txt"
    ]
   },
   {
@@ -613,8 +613,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "str1 = 'Decoded {} owl-encoded classes and axioms. Note the following:\\nPartially processed {} cardinality ' \\\n",
-    "               'elements\\nRemoved {} owl:disjointWith axioms\\n\\nIgnored:\\n  -{} misc classes;\\n  -{} classes constructed with ' \\\n",
+    "str1 = 'Decoded {} owl-encoded classes and axioms. Note the following:\\n  -Partially processed {} cardinality ' \\\n",
+    "               'elements\\n  -Removed {} owl:disjointWith axioms\\n\\nIgnored:\\n  -{} misc classes;\\n  -{} classes constructed with ' \\\n",
     "               'owl:complementOf;\\n  -{} classes containing negation (e.g. pr#lacks_part, cl#has_not_completed)\\n' \\\n",
     "               '\\nFiltering removed {} semantic support triples'\n",
     "stats_str = str1.format(\n",
@@ -648,7 +648,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# run line below if you want to ensure resulting graph contains \n",
+    "# run line below if you want to ensure resulting graph contains only a single connected component\n",
     "common_ancestor = 'http://purl.obolibrary.org/obo/BFO_0000001'\n",
     "owlnets.graph = owlnets.makes_graph_connected(owlnets.graph, common_ancestor)"
    ]
@@ -822,9 +822,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "venv",
    "language": "python",
-   "name": "python3"
+   "name": "venv"
   },
   "language_info": {
    "codemirror_mode": {
@@ -836,7 +836,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.2"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,

diff --git a/notebooks/tutorials/entity_search/Entity_Search.ipynb b/notebooks/tutorials/entity_search/Entity_Search.ipynb
@@ -245,7 +245,9 @@
    "outputs": [],
    "source": [
     "# load the knowledge graph\n",
-    "kg = nx.read_gpickle(write_location + data_urls[0].split('/')[-1])\n",
+    "with open(write_location + data_urls[0].split('/')[-1], 'rb') as f:\n",
+    "    kg = pickle.load(f)\n",
+    "\n",
     "undirected_kg = nx.to_undirected(kg)\n",
     "print('The knowledge graph contains {} nodes and {} edges'.format(len(kg.nodes()), len(kg.edges())))"
    ]
@@ -2131,9 +2133,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "venv",
    "language": "python",
-   "name": "python3"
+   "name": "venv"
   },
   "language_info": {
    "codemirror_mode": {
@@ -2145,7 +2147,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.2"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,

diff --git a/pkt_kg/__version__.py b/pkt_kg/__version__.py
@@ -1,2 +1,2 @@
 """Current version of package pkt_kg"""
-__version__ = "3.0.2"
+__version__ = "3.1.0"
diff --git a/pkt_kg/construction_approaches.py b/pkt_kg/construction_approaches.py
@@ -44,9 +44,9 @@ class KGConstructionApproach(object):
         write_location: A string pointing to the 'resources' directory.
 
     Raises:
-        TypeError: If graph is not an rdflib.graph object.
+        TypeError: If graph is not a rdflib.graph object.
         TypeError: If edge_info and edge_dict are not dictionary objects.
-        ValueError: If graph, edge_info, edge_dict, or subclass_dict files are empty.
+        ValueError: If a graph, edge_info, edge_dict, or subclass_dict files are empty.
         OSError: If there is no subclass_dict file in the resources/construction_approach directory.
     """
 
@@ -73,7 +73,7 @@ def __init__(self, write_location: str) -> None:
                 self.subclass_dict = pickle.load(filepath, encoding='bytes')
 
     def maps_node_to_class(self, edge_type: str, entity: str) -> Optional[List]:
-        """Takes an entity and checks whether or not it exists in a dictionary of subclass content, such that keys
+        """Takes an entity and checks whether it exists in a dictionary of subclass content, such that keys
         are non-class entity identifiers (e.g. Reactome identifiers) and values are sets of ontology class identifiers
         mapped to that non-class entity. For example:
             {'R-HSA-5601843': {'PW_0000001'}, 'R-HSA-77584': {'PW_0000001', 'GO_0008334'}}
@@ -113,7 +113,7 @@ def subclass_core_constructor(node1: URIRef, node2: URIRef, relation: URIRef, in
 
         Args:
             node1: A URIRef or BNode object containing a subject node.
-            node2: A URIRef or BNode object containing a object node.
+            node2: A URIRef or BNode object containing an object node.
             relation: A URIRef object containing an owl:ObjectProperty.
             inv_relation: A string containing an inverse relation identifier (i.e. RO_0002200) or None (i.e.
                 indicating no inverse relation).
@@ -158,7 +158,7 @@ def subclass_constructor(self, edge_info: Dict, edge_type: str) -> List:
 
         Assumption: All ontology class nodes use the obo namespace.
 
-        Note. We explicitly type each node as a owl:Class and each relation/inverse relation as a owl:ObjectProperty.
+        Note. We explicitly type each node as an owl:Class and each relation/inverse relation as an owl:ObjectProperty.
         This may seem redundant, but it is needed in order to ensure consistency between the data after applying the
         OWL API to reformat the data.
 
@@ -209,7 +209,7 @@ def instance_core_constructor(node1: URIRef, node2: URIRef, relation: URIRef, in
 
         Args:
             node1: A URIRef or BNode object containing a subject node.
-            node2: A URIRef or BNode object containing a object node.
+            node2: A URIRef or BNode object containing an object node.
             relation: A URIRef object containing an owl:ObjectProperty.
             inv_relation: A string containing the identifier for an inverse relation (i.e. RO_0002200) or None
                 (i.e. indicator of no inverse relation).

diff --git a/pkt_kg/downloads.py b/pkt_kg/downloads.py
@@ -98,7 +98,7 @@ def parses_resource_file(self) -> None:
 
         Raises:
             ValueError: If the file does not contain data.
-            ValueError: If there some of the input URLs were improperly formatted.
+            ValueError: If ome input URLs were improperly formatted.
         """
 
         pass
@@ -112,7 +112,7 @@ def downloads_data_from_url(self) -> None:
                               'phenotype': 'resources/ontologies/hp_with_imports.owl'}
 
         Raises:
-            ValueError: If not all of the URLs returned valid data.
+            ValueError: If not all the URLs returned valid data.
         """
 
         pass
@@ -220,15 +220,15 @@ def generates_source_metadata(self) -> None:
 
     @abstractmethod
     def gets_data_type(self) -> str:
-        """"A string representing the type of data being processed."""
+        """A string representing the type of data being processed."""
 
         pass
 
 
 class OntData(DataSource):
 
     def gets_data_type(self) -> str:
-        """"A string representing the type of data being processed."""
+        """A string representing the type of data being processed."""
 
         return 'Ontology Data'
 
@@ -242,7 +242,7 @@ def parses_resource_file(self) -> None:
 
         Raises:
             TypeError: If the file does not contain data.
-            ValueError: If there some of the input URLs were improperly formatted.
+            ValueError: If some input URLs were improperly formatted.
         """
 
         if os.stat(self.data_path).st_size == 0:
@@ -257,7 +257,7 @@ def parses_resource_file(self) -> None:
 
     def downloads_data_from_url(self, owltools_location: str = os.path.abspath('./pkt_kg/libs/owltools')) -> None:
         """Takes a string representing a file path/name to a text file as an argument. The function assumes
-        that each item in the input file list is an URL to an OWL/OBO ontology.
+        that each item in the input file list is a URL to an OWL/OBO ontology.
 
         For each URL, the referenced ontology is downloaded, and used as input to an OWLTools command line argument (
         https://github.com/owlcollab/owltools/wiki/Extract-Properties-Command), which facilitates the downloading of
@@ -306,7 +306,7 @@ def downloads_data_from_url(self, owltools_location: str = os.path.abspath('./pk
 class LinkedData(DataSource):
 
     def gets_data_type(self) -> str:
-        """"A string representing the type of data being processed."""
+        """A string representing the type of data being processed."""
 
         return 'Edge Data'
 

diff --git a/pkt_kg/edge_list.py b/pkt_kg/edge_list.py
@@ -74,7 +74,7 @@ def gets_source_info(self):
     @staticmethod
     def identify_header(file_path: str, delimiter: str, skip_rows: List[int]) -> Optional[int]:
         """Compares the similarity of the first line of a Pandas DataFrame to the column headers when read in with and
-        without a header to determine whether or not the data frame should be built with a header or not. This
+        without a header to determine whether the data frame should be built with a header or not. This
         function was modified from a Stack Overflow post: https://stackoverflow.com/a/40193509
 
         Args:
@@ -209,7 +209,7 @@ def data_reducer(cols: str, edge_data: pd.DataFrame) -> pd.DataFrame:
 
         edge_data = edge_data[[list(edge_data)[int(cols.split(';')[0])], list(edge_data)[int(cols.split(';')[1])]]]
         edge_data = edge_data.drop_duplicates(subset=None, keep='first', inplace=False)
-        # make sure neither column is float
+        # make sure neither column is a float
         for x in list(edge_data):
             if 'float' in str(edge_data[x].dtype): edge_data[x] = edge_data[x].astype(int)
 
@@ -246,7 +246,7 @@ def label_formatter(edge_data: pd.DataFrame, label_criteria: str) -> pd.DataFram
 
     def data_merger(self, node: int, mapping_data: str, edge_data: pd.DataFrame) -> List[Union[str, pd.DataFrame]]:
         """Processes a string that contains instructions for mapping a column in the edge_data Pandas DataFrame. This
-        function assumes that the mapping data pointed to contains two columns: (1) identifier in edge_data to be
+        function assumes that the mapping data contains two columns: (1) identifier in edge_data to be
         mapped and (2) the desired identifier to map to. If one of the columns does not need to be mapped to an
         identifier then the original node's column is used for the final merge.
 
@@ -288,7 +288,7 @@ def data_merger(self, node: int, mapping_data: str, edge_data: pd.DataFrame) ->
 
     def process_mapping_data(self, mapping_data: str, edge_data: pd.DataFrame) -> Tuple[Tuple[Any, Any], ...]:
         """Merges two mapped Pandas DataFrames into a single DataFrame. After merging the DataFrames, the function
-        removes all columns except the the mapped columns and removes any duplicate rows.
+        removes all columns except the mapped columns and removes any duplicate rows.
 
         Args:
             mapping_data: A ';' delimited string containing information on identifier mapping data. Each item
@@ -354,7 +354,7 @@ def creates_knowledge_graph_edges(self, x: str) -> None:
             x: A string containing an edge type (e.g. "gene-gene").
 
         Returns:
-            source_info: A dictionary that contains all of the master information for each edge type resource. For
+            source_info: A dictionary that contains all the master information for each edge type resource. For
                 example: {'chemical-complex': {'source_labels': ';;', 'data_type': 'class-entity',
                                                'edge_relation': 'RO_0002436', 'uri': ['https://ex/', 'https://ex/'],
                                                'delimiter': 't', 'column_idx': '0;1', 'identifier_maps': 'None',