Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MB-62230 - Avoiding unnecessary computations for un-filtered kNN. #2076

Merged
merged 5 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require (
github.com/bits-and-blooms/bitset v1.12.0
github.com/blevesearch/bleve_index_api v1.1.12
github.com/blevesearch/geo v0.1.20
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92
github.com/blevesearch/go-faiss v1.0.22-0.20240919162919-05a9ee21155a
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475
github.com/blevesearch/go-porterstemmer v1.0.3
github.com/blevesearch/goleveldb v1.0.1
Expand All @@ -24,7 +24,7 @@ require (
github.com/blevesearch/zapx/v13 v13.3.10
github.com/blevesearch/zapx/v14 v14.3.10
github.com/blevesearch/zapx/v15 v15.3.13
github.com/blevesearch/zapx/v16 v16.1.6-0.20240909182401-e148470cefbe
github.com/blevesearch/zapx/v16 v16.1.6-0.20240919163431-f2ee7670abd9
github.com/couchbase/moss v0.2.0
github.com/golang/protobuf v1.3.2
github.com/spf13/cobra v1.7.0
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ github.com/blevesearch/bleve_index_api v1.1.12 h1:P4bw9/G/5rulOF7SJ9l4FsDoo7UFJ+
github.com/blevesearch/bleve_index_api v1.1.12/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92 h1:pDbDTN8dgycpdp9eCzrNp9e6Z4C+UQhCUAZbaarQ6Bs=
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-faiss v1.0.22-0.20240919162919-05a9ee21155a h1:mSUfDoOPOLt0OABjiyQq/kQxOzAJmsgIjlAWUPfUDfc=
github.com/blevesearch/go-faiss v1.0.22-0.20240919162919-05a9ee21155a/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
Expand Down Expand Up @@ -43,8 +43,8 @@ github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz7
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
github.com/blevesearch/zapx/v15 v15.3.13 h1:6EkfaZiPlAxqXz0neniq35my6S48QI94W/wyhnpDHHQ=
github.com/blevesearch/zapx/v15 v15.3.13/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
github.com/blevesearch/zapx/v16 v16.1.6-0.20240909182401-e148470cefbe h1:S1rCvhrU2HqDrRtogYgM52rT5px7o2zFIB3Yo+JPFOU=
github.com/blevesearch/zapx/v16 v16.1.6-0.20240909182401-e148470cefbe/go.mod h1:x9Kg015zbkSXxmE7F+0qeGxpeHJBwkDuxosrrDxYltU=
github.com/blevesearch/zapx/v16 v16.1.6-0.20240919163431-f2ee7670abd9 h1:pSaAZuB/gu5cNhSXrpI6s6xyN3ysVdG+RMqEbHEDx+o=
github.com/blevesearch/zapx/v16 v16.1.6-0.20240919163431-f2ee7670abd9/go.mod h1:R6fi71sVKI+HnzchzfkomFQ5HvMvn3CWTmLBuuUqoTQ=
github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=
Expand Down
43 changes: 25 additions & 18 deletions index/scorch/optimize_knn.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"sync"
"sync/atomic"

"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
segment_api "github.com/blevesearch/scorch_segment_api/v2"
Expand Down Expand Up @@ -64,7 +65,10 @@ func (o *OptimizeVR) Finish() error {
var errorsM sync.Mutex
var errors []error

snapshotGlobalDocNums := o.snapshot.globalDocNums()
var snapshotGlobalDocNums map[int]*roaring.Bitmap
if o.requiresFiltering {
snapshotGlobalDocNums = o.snapshot.globalDocNums()
}

defer o.invokeSearcherEndCallback()

Expand Down Expand Up @@ -94,28 +98,31 @@ func (o *OptimizeVR) Finish() error {
vectorIndexSize := vecIndex.Size()
origSeg.cachedMeta.updateMeta(field, vectorIndexSize)
for _, vr := range vrs {
eligibleVectorInternalIDs := vr.getEligibleDocIDs()
if snapshotGlobalDocNums != nil {
// Only the eligible documents belonging to this segment
// will get filtered out.
// There is no way to determine which doc belongs to which segment
eligibleVectorInternalIDs.And(snapshotGlobalDocNums[index])
}

eligibleLocalDocNums := make([]uint64,
eligibleVectorInternalIDs.Stats().Cardinality)
// get the (segment-)local document numbers
for i, docNum := range eligibleVectorInternalIDs.ToArray() {
localDocNum := o.snapshot.localDocNumFromGlobal(index,
uint64(docNum))
eligibleLocalDocNums[i] = localDocNum
}

var pl segment_api.VecPostingsList
var err error

// for each VR, populate postings list and iterators
// by passing the obtained vector index and getting similar vectors.

// Only applies to filtered kNN.
if vr.eligibleDocIDs != nil && len(vr.eligibleDocIDs) > 0 {
eligibleVectorInternalIDs := vr.getEligibleDocIDs()
if snapshotGlobalDocNums != nil {
// Only the eligible documents belonging to this segment
// will get filtered out.
// There is no way to determine which doc belongs to which segment
eligibleVectorInternalIDs.And(snapshotGlobalDocNums[index])
}

eligibleLocalDocNums := make([]uint64,
eligibleVectorInternalIDs.Stats().Cardinality)
// get the (segment-)local document numbers
for i, docNum := range eligibleVectorInternalIDs.ToArray() {
localDocNum := o.snapshot.localDocNumFromGlobal(index,
uint64(docNum))
eligibleLocalDocNums[i] = localDocNum
}

pl, err = vecIndex.SearchWithFilter(vr.vector, vr.k,
eligibleLocalDocNums, vr.searchParams)
} else {
Expand Down
2 changes: 1 addition & 1 deletion index/scorch/snapshot_index_vr.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ type IndexSnapshotVectorReader struct {
searchParams json.RawMessage

// The following fields are only applicable for vector readers which will
// process kNN queries.
// process pre-filtered kNN queries.
eligibleDocIDs []index.IndexInternalID
}

Expand Down
1 change: 1 addition & 0 deletions search_knn.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
}

if _, ok := filterQ.(*query.MatchAllQuery); ok {
// Equivalent to not having a filter query.
requiresFiltering[idx] = false
continue
}
Expand Down
Loading