Skip to content

Commit

Permalink
addressing issue #140
Browse files Browse the repository at this point in the history
  • Loading branch information
callahantiff committed Nov 18, 2023
1 parent 36fd2d1 commit e749409
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 40 deletions.
1 change: 1 addition & 0 deletions Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def main():
start = time.time()
combined_edges = dict(ent.data_files, **ont.data_files)
# master_edges = CreatesEdgeList(data_files=combined_edges, source_file='resources/resource_info.txt')
# master_edges.runs_creates_knowledge_graph_edges(source_file='resources/resource_info.txt', data_files=combined_edges, cpus=cpus)
master_edges = CreatesEdgeList(data_files=combined_edges, source_file=args.res)
master_edges.runs_creates_knowledge_graph_edges(source_file=args.res, data_files=combined_edges, cpus=cpus)
end = time.time(); timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
Expand Down
17 changes: 1 addition & 16 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -308,22 +308,7 @@ This project is licensed under Apache License 2.0 - see the `LICENSE.md`_ file f
Citing this Work
=================
**ISMB Conference Pre-print:**
Callahan TJ, Tripodi IJ, Hunter LE, Baumgartner WA. `A Framework for Automated Construction of Heterogeneous Large-Scale Biomedical Knowledge Graphs <https://www.biorxiv.org/content/10.1101/2020.04.30.071407v1.abstract>`_. bioRxiv. 2020 Jan 1.
**Zenodo**
.. code:: bash
@misc{callahan_tj_2019_3401437,
author = {Callahan, TJ},
title = {PheKnowLator},
year = 2019,
doi = {10.5281/zenodo.3401437},
url = {https://doi.org/10.5281/zenodo.3401437}}
Please see our preprint: https://arxiv.org/abs/2307.05727
.. |logo| image:: https://user-images.githubusercontent.com/8030363/195494933-d0faba60-5643-4cc6-8a48-41b4a94a7afe.png
:target: https://github.com/callahantiff/PheKnowLator
Expand Down
52 changes: 35 additions & 17 deletions main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,22 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import needed libraries\n",
"import glob\n",
"import json\n",
"import pandas\n",
"import psutil\n",
"import ray\n",
"import time\n",
"\n",
"# import module\n",
"from pkt_kg import downloads, edge_list, knowledge_graph"
"from pkt_kg.downloads import OntData, LinkedData\n",
"from pkt_kg.edge_list import CreatesEdgeList\n",
"from pkt_kg.knowledge_graph import FullBuild, PartialBuild, PostClosureBuild"
]
},
{
Expand Down Expand Up @@ -143,9 +146,8 @@
"metadata": {},
"outputs": [],
"source": [
"ont = pkt.OntData('resources/ontology_source_list.txt', 'resources/resource_info.txt')\n",
"ont.downloads_data_from_url()\n",
"ont.writes_source_metadata_locally()"
"ont = OntData('resources/ontology_source_list.txt', 'resources/resource_info.txt')\n",
"ont.downloads_data_from_url()"
]
},
{
Expand All @@ -164,9 +166,8 @@
"metadata": {},
"outputs": [],
"source": [
"edges = pkt.LinkedData('resources/edge_source_list.txt', 'resources/resource_info.txt')\n",
"edges.downloads_data_from_url()\n",
"edges.writes_source_metadata_locally()"
"edges = LinkedData('resources/edge_source_list.txt', 'resources/resource_info.txt')\n",
"edges.downloads_data_from_url()"
]
},
{
Expand Down Expand Up @@ -229,6 +230,15 @@
"ray.init()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"combined_edges"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -259,18 +269,17 @@
"outputs": [],
"source": [
"# # read in master edge dictionary\n",
"# master_edges = json.load(open('resources/Master_Edge_List_Dict.json', 'r'))\n",
"master_edges = json.load(open('resources/Master_Edge_List_Dict.json', 'r'))\n",
"\n",
"# read in relation data\n",
"relation_data = open('./resources/relations_data/RELATIONS_LABELS.txt').readlines()\n",
"relation_dict = {x.split('\\t')[0]: x.split('\\t')[1].strip('\\n') for x in relation_data}\n",
"\n",
"# print basic stats on each resource\n",
"edge_data = [[key,\n",
" relation_dict[master_edges.source_info[key]['edge_relation']],\n",
" ', '.join(master_edges.source_info[key]['edge_list'][0]),\n",
" len(master_edges.source_info[key]['edge_list'])]\n",
" for key in master_edges.source_info.keys()]\n",
"edge_data = [[key, master_edges[key]['edge_relation'],\n",
" ', '.join(master_edges[key]['edge_list'][0]),\n",
" len(master_edges[key]['edge_list'])]\n",
" for key in master_edges.keys()]\n",
"\n",
"# convert dict to pandas df for nice printing\n",
"df = pandas.DataFrame(edge_data, columns = ['Edge Type', 'Relation', 'Example Edge', 'Unique Edges']) \n",
Expand Down Expand Up @@ -357,9 +366,9 @@
"source": [
"# specify input arguments\n",
"build = 'full'\n",
"construction_approach = 'subclass'\n",
"add_node_data_to_kg = 'yes'\n",
"add_inverse_relations_to_kg = 'yes'\n",
"construction_approach = 'instance'\n",
"add_node_data_to_kg = 'no'\n",
"add_inverse_relations_to_kg = 'no'\n",
"decode_owl_semantics = 'yes'\n",
"kg_directory_location = './resources/knowledge_graphs'\n"
]
Expand Down Expand Up @@ -397,6 +406,15 @@
"ray.shutdown()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"kg.ontologies"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
2 changes: 1 addition & 1 deletion pkt_kg/__version__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Current version of package pkt_kg"""
__version__ = "3.1.1"
__version__ = "3.1.2"
4 changes: 2 additions & 2 deletions pkt_kg/knowledge_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def construct_knowledge_graph(self) -> None:
self.graph = Graph().parse(self.merged_ont_kg, format='xml')
else:
log_str = '*** Merging Ontology Data ***'; print(log_str); logger.info(log_str)
merges_ontologies(self.ontologies, self.merged_ont_kg.split('/')[-1], self.owl_tools)
merges_ontologies(self.ontologies, self.write_location + '/', self.merged_ont_kg.split('/')[-1], self.owl_tools)
self.graph.parse(self.merged_ont_kg, format='xml')
stats = 'Merged Ontologies {}'.format(derives_graph_statistics(self.graph)); print(stats); logger.info(stats)

Expand Down Expand Up @@ -561,7 +561,7 @@ def construct_knowledge_graph(self) -> None:
self.graph = Graph().parse(self.merged_ont_kg, format='xml')
else:
log_str = '*** Merging Ontology Data ***'; print(log_str); logger.info(log_str)
merges_ontologies(self.ontologies, self.merged_ont_kg.split('/')[-1], self.owl_tools)
merges_ontologies(self.ontologies, self.write_location + '/', self.merged_ont_kg.split('/')[-1], self.owl_tools)
self.graph.parse(self.merged_ont_kg, format='xml')
stats = 'Merged Ontologies {}'.format(derives_graph_statistics(self.graph)); print(stats); logger.info(stats)

Expand Down
9 changes: 6 additions & 3 deletions pkt_kg/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,10 +471,13 @@ def sublist_creator(actors: Union[Dict, List], chunk_size: int) -> List:

# update list to return string identifier associated with each list length
if isinstance(actors, Dict):
updated_lists = []; used_ids = set()
updated_lists = []
for sub in lists:
sub_list = [[k for k, v in actors.items() if v == x and k not in used_ids][0] for x in sub]
updated_lists += [sub_list]; used_ids |= set(x for y in sub_list for x in y)
sub_list = []
for x in sub:
temp_list = [k for k, v in actors.items() if v == x][0]
sub_list.append(temp_list); del actors[temp_list]
updated_lists += [sub_list]
else: updated_lists = lists

return updated_lists
2 changes: 1 addition & 1 deletion pkt_kg/utils/kg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def merges_ontologies(onts: List[str], loc: str, merged: str,
None.
"""

if not onts: return None
if not onts or len(onts) == 0: return None
else:
if loc + merged in glob.glob(loc + '/*.owl'): o1, o2 = onts.pop(), loc + merged
else: o1, o2 = onts.pop(), onts.pop()
Expand Down

0 comments on commit e749409

Please sign in to comment.