Skip to content

Commit

Permalink
Fix CDS coordinate errors and frame error on the reverse strand
Browse files Browse the repository at this point in the history
  • Loading branch information
Kuanhao-Chao committed May 6, 2024
1 parent 0d51ae2 commit ebe56a9
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 20 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ Given a reference <strong>Genome</strong> <span class="math notranslate nohighli
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="https://ccb.jhu.edu/lifton/content/changelog.html">Changelog</a><ul>
<li class="toctree-l2"><a class="reference internal" href="https://ccb.jhu.edu/lifton/content/changelog.html#v1-0-0">v1.0.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="https://ccb.jhu.edu/lifton/content/changelog.html#v1-0-0">v1.0.1</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="https://ccb.jhu.edu/lifton/content/license.html">License</a></li>
Expand Down
2 changes: 1 addition & 1 deletion lifton/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = 'v1.0.0'
__version__ = 'v1.0.1'
2 changes: 1 addition & 1 deletion lifton/lifton.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def run_all_lifton_steps(args):
# structure 2: transcript -> exon
################################
for feature in features:#CP132235.1:34100723-34103135
for locus in l_feature_db.features_of_type(feature):#, limit=("CP132235.1", 34100723, 34303135)):
for locus in l_feature_db.features_of_type(feature):#, limit=("NC_051336.1", 29333544, 29357000)):
lifton_gene = run_liftoff.process_liftoff(None, locus, ref_db.db_connection, l_feature_db, ref_id_2_m_id_trans_dict, m_feature_db, tree_dict, tgt_fai, ref_proteins, ref_trans, ref_features_dict, fw_score, fw_chain, args, ENTRY_FEATURE=True)
if lifton_gene is None or lifton_gene.ref_gene_id is None:
continue
Expand Down
28 changes: 14 additions & 14 deletions lifton/lifton_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,24 +471,22 @@ def get_coding_trans_seq(self, fai):
trans_seq = ""
coding_seq = ""
accum_cds_length = 0
for exon in self.exons:
lcl_exons = []
lcl_exons = self.exons
if len(self.exons) > 0 and self.exons[0].entry.strand == '-':
lcl_exons = self.exons[::-1]
for exon in lcl_exons:
# Chaining the exon features
p_trans_seq = exon.entry.sequence(fai)
p_trans_seq = Seq(p_trans_seq).upper()
if exon.entry.strand == '-':
trans_seq = p_trans_seq + trans_seq
elif exon.entry.strand == '+':
trans_seq = trans_seq + p_trans_seq
trans_seq = trans_seq + p_trans_seq
if exon.cds is not None:
# Updating CDS frame
exon.cds.entry.frame = str(self.__get_cds_frame(accum_cds_length))
accum_cds_length = exon.cds.entry.end - exon.cds.entry.start + 1
accum_cds_length += (exon.cds.entry.end - exon.cds.entry.start + 1)
# Chaining the CDS features
p_seq = exon.cds.entry.sequence(fai)
if exon.cds.entry.strand == '-':
coding_seq = p_seq + coding_seq
elif exon.cds.entry.strand == '+':
coding_seq = coding_seq + p_seq
coding_seq = coding_seq + p_seq
if trans_seq != None:
trans_seq = str(trans_seq).upper()
if coding_seq != None:
Expand Down Expand Up @@ -615,8 +613,10 @@ def __iterate_exons_update_cds(self, final_orf, exons, strand):
# Create first partial CDS
if exon.cds is not None:
if strand == "+":
exon.cds.entry.end = exon.entry.end
exon.cds.entry.start = exon.entry.start + (final_orf.start - accum_exon_length)
elif strand == "-":
exon.cds.entry.start = exon.entry.start
exon.cds.entry.end = exon.entry.end - (final_orf.start - accum_exon_length)
else:
if strand == "+":
Expand All @@ -633,24 +633,24 @@ def __iterate_exons_update_cds(self, final_orf, exons, strand):
# Create the last partial CDS
if exon.cds is not None:
if strand == "+":
exon.cds.entry.start = exon.entry.start
exon.cds.entry.end = exon.entry.start + (final_orf.end - accum_exon_length)-1
elif strand == "-":
exon.cds.entry.end = exon.entry.end
exon.cds.entry.start = exon.entry.end - (final_orf.end - accum_exon_length)+1
else:
if strand == "+":
exon.add_novel_lifton_cds(exon.entry, exon.entry.start, exon.entry.start + (final_orf.end - accum_exon_length)-1)
elif strand == "-":
exon.add_novel_lifton_cds(exon.entry, exon.entry.end - (final_orf.end - accum_exon_length)+1, exon.entry.end)
exon.cds.entry.frame = str(self.__get_cds_frame(accum_cds_length))
accum_cds_length += (exon.cds.entry.end - exon.cds.entry.start + 1)
else:
# Keep the original full CDS / extend the CDS to full exon length
if exon.cds is None:
exon.add_novel_lifton_cds(exon.entry, exon.entry.start, exon.entry.end)
else:
exon.cds.update_CDS_info(exon.entry.start, exon.entry.end)
exon.cds.entry.frame = str(self.__get_cds_frame(accum_cds_length))
accum_cds_length += (exon.cds.entry.end - exon.cds.entry.start + 1)
exon.cds.entry.frame = str(self.__get_cds_frame(accum_cds_length))
accum_cds_length += (exon.cds.entry.end - exon.cds.entry.start + 1)
elif final_orf.end <= accum_exon_length:
# No CDS should be created
exon.cds = None
Expand Down
1 change: 0 additions & 1 deletion lifton/run_miniprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def lifton_miniprot_with_ref_protein(m_feature, m_feature_db, ref_db, ref_gene_i
m_lifton_aln = align.lifton_parasail_align(Lifton_trans, m_entry, tgt_fai, ref_proteins, ref_trans_id)
lifton_status.annotation = "miniprot"
lifton_status.lifton_aa = m_lifton_aln.identity
# lifton_trans_aln, lifton_aa_aln = lifton_gene.orf_search_protein(Lifton_trans.entry.id, ref_trans_id, tgt_fai, ref_proteins, ref_trans, lifton_status)
return lifton_gene, Lifton_trans, Lifton_trans.entry.id, lifton_status


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
long_description = (this_directory / "./README.md").read_text()
setuptools.setup(
name="lifton",
version="1.0.0",
version="1.0.1",
author="Kuan-Hao Chao",
author_email="kh.chao@cs.jhu.edu",
description="Combining DNA and protein alignments to improve genome annotation with LiftOn",
Expand Down
2 changes: 1 addition & 1 deletion test/lifton_chr22_example.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
lifton -g GRCh38_chr22.gff3 -dir GRCh38_2_CHM13 -o GRCh38_2_CHM13_lifton.gff3 -copies chm13_chr22.fa GRCh38_chr22.fa
lifton -g GRCh38_chr22.gff3 -o GRCh38_2_CHM13/GRCh38_2_CHM13_lifton.gff3 -copies -sc 0.95 chm13_chr22.fa GRCh38_chr22.fa

0 comments on commit ebe56a9

Please sign in to comment.