Skip to content

Commit

Permalink
Prepare usage of pre-tokenized index #50
Browse files Browse the repository at this point in the history
  • Loading branch information
mam10eks committed Sep 16, 2024
1 parent ad36da6 commit 3644225
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 17 deletions.
4 changes: 2 additions & 2 deletions ir_axioms/axiom/preconditions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ def approximately_same_length(
margin_fraction: float,
) -> bool:
return approximately_equal(
len(context.terms(document1)),
len(context.terms(document2)),
context.document_length(document1),
context.document_length(document2),
margin_fraction=margin_fraction
)

Expand Down
12 changes: 8 additions & 4 deletions ir_axioms/axiom/query_aspects.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,14 @@ def preference(
document2: RankedDocument
):
query_terms = context.term_set(query)
document1_terms = context.term_set(document1)
document2_terms = context.term_set(document2)
s1 = query_terms.issubset(document1_terms)
s2 = query_terms.issubset(document2_terms)
s1, s2 = set(), set()

for query_term in query_terms:
if context.term_frequency(document1, query_term) > 0:
s1.add(query_term)
if context.term_frequency(document2, query_term) > 0:
s2.add(query_term)

return strictly_greater(s1, s2)


Expand Down
26 changes: 15 additions & 11 deletions ir_axioms/backend/pyterrier/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,10 @@ def transform(self, topics_or_res: DataFrame) -> DataFrame:


class AxiomTransformer(PerGroupTransformer, ABC):
index: Union[Index, IndexRef, Path, str]
index: Optional[Union[Index, IndexRef, Path, str]] = None
dataset: Optional[Union[Dataset, str, IRDSDataset]] = None
contents_accessor: Optional[ContentsAccessor] = "text"
context: Optional[IndexContext] = None
tokeniser: Optional[Tokeniser] = None
cache_dir: Optional[Path] = None
verbose: bool = False
Expand All @@ -80,15 +81,17 @@ class AxiomTransformer(PerGroupTransformer, ABC):
optional_group_columns = {"qid", "name"}
unit = "query"

@cached_property
@property
def _context(self) -> IndexContext:
return TerrierIndexContext(
index_location=self.index,
dataset=self.dataset,
contents_accessor=self.contents_accessor,
tokeniser=self.tokeniser,
cache_dir=self.cache_dir,
)
if not self.context:
self.context = TerrierIndexContext(
index_location=self.index,
dataset=self.dataset,
contents_accessor=self.contents_accessor,
tokeniser=self.tokeniser,
cache_dir=self.cache_dir,
)
return self.context

@final
def transform_group(self, topics_or_res: DataFrame) -> DataFrame:
Expand Down Expand Up @@ -124,8 +127,9 @@ class KwikSortReranker(AxiomTransformer):
description = "Reranking query axiomatically"

axiom: AxiomLike
index: Union[Index, IndexRef, Path, str]
index: Optional[Union[Index, IndexRef, Path, str]] = None
dataset: Optional[Union[Dataset, str, IRDSDataset]] = None
context: Optional[IndexContext] = None
contents_accessor: Optional[ContentsAccessor] = "text"
pivot_selection: PivotSelection = RandomPivotSelection()
tokeniser: Optional[Tokeniser] = None
Expand Down Expand Up @@ -170,8 +174,8 @@ class AggregatedAxiomaticPreferences(AxiomTransformer):
description = "Aggregating query axiom preferences"

axioms: Sequence[AxiomLike]
index: Union[Index, IndexRef, Path, str]
aggregations: Sequence[Callable[[Sequence[float]], float]]
index: Optional[Union[Index, IndexRef, Path, str]] = None
dataset: Optional[Union[Dataset, str, IRDSDataset]] = None
contents_accessor: Optional[ContentsAccessor] = "text"
filter_pairs: Optional[Callable[
Expand Down
3 changes: 3 additions & 0 deletions ir_axioms/model/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def term_set(
) -> FrozenSet[str]:
return frozenset(self.terms(query_or_document))

def document_length(self, document: Document):
return len(self.terms(document))

@lru_cache(None)
def term_frequency(
self,
Expand Down

0 comments on commit 3644225

Please sign in to comment.