Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] Support sharding migration #48795

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions apps/dav/lib/Connector/Sabre/FilesPlugin.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
namespace OCA\DAV\Connector\Sabre;

use OC\AppFramework\Http\Request;
use OC\FilesMetadata\Model\FilesMetadata;
use OCA\DAV\Connector\Sabre\Exception\InvalidPath;
use OCP\Constants;
use OCP\Files\ForbiddenException;
Expand Down Expand Up @@ -575,7 +576,9 @@ private function handleUpdatePropertiesMetadata(PropPatch $propPatch, Node $node
$propPatch->handle(
$mutation,
function (mixed $value) use ($accessRight, $knownMetadata, $node, $mutation, $filesMetadataManager): bool {
/** @var FilesMetadata $metadata */
$metadata = $filesMetadataManager->getMetadata((int)$node->getFileId(), true);
$metadata->setStorageId($node->getNode()->getStorage()->getCache()->getNumericStorageId());
$metadataKey = substr($mutation, strlen(self::FILE_METADATA_PREFIX));

// confirm metadata key is editable via PROPPATCH
Expand Down
5 changes: 5 additions & 0 deletions apps/files/lib/Listener/SyncLivePhotosListener.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

namespace OCA\Files\Listener;

use OC\FilesMetadata\Model\FilesMetadata;
use OCA\Files\Service\LivePhotosService;
use OCP\EventDispatcher\Event;
use OCP\EventDispatcher\IEventListener;
Expand Down Expand Up @@ -154,10 +155,14 @@ private function handleCopy(NodeCopiedEvent $event, Node $peerFile): void {
* We have everything to update metadata and keep the link between the 2 copies.
*/
$newPeerFile = $peerFile->copy($targetParent->getPath() . '/' . $peerTargetName);
/** @var FilesMetadata $targetMetadata */
$targetMetadata = $this->filesMetadataManager->getMetadata($targetFile->getId(), true);
$targetMetadata->setStorageId($targetFile->getStorage()->getCache()->getNumericStorageId());
$targetMetadata->setString('files-live-photo', (string)$newPeerFile->getId());
$this->filesMetadataManager->saveMetadata($targetMetadata);
/** @var FilesMetadata $peerMetadata */
$peerMetadata = $this->filesMetadataManager->getMetadata($newPeerFile->getId(), true);
$peerMetadata->setStorageId($newPeerFile->getStorage()->getCache()->getNumericStorageId());
$peerMetadata->setString('files-live-photo', (string)$targetFile->getId());
$this->filesMetadataManager->saveMetadata($peerMetadata);
}
Expand Down
10 changes: 7 additions & 3 deletions lib/private/DB/Connection.php
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,9 @@
self::SHARD_PRESETS[$name]['shard_key'],
$shardMapper,
self::SHARD_PRESETS[$name]['companion_tables'],
$config['shards']
$config['shards'],
$config['from_primary_key'] ?? 0,

Check failure on line 184 in lib/private/DB/Connection.php

View workflow job for this annotation

GitHub Actions / static-code-analysis

InvalidArrayOffset

lib/private/DB/Connection.php:184:5: InvalidArrayOffset: Cannot access value on variable $config using offset value of 'from_primary_key', expecting 'shards' or 'mapper' (see https://psalm.dev/115)
$config['from_shard_key'] ?? 0,

Check failure on line 185 in lib/private/DB/Connection.php

View workflow job for this annotation

GitHub Actions / static-code-analysis

InvalidArrayOffset

lib/private/DB/Connection.php:185:5: InvalidArrayOffset: Cannot access value on variable $config using offset value of 'from_shard_key', expecting 'shards', 'mapper' or 'from_primary_key' (see https://psalm.dev/115)
);
}, $shardConfig, $shardNames);
$this->shards = array_combine($shardNames, $this->shards);
Expand All @@ -198,9 +200,11 @@
$connections = [];
if ($this->isShardingEnabled) {
foreach ($this->shards as $shardDefinition) {
foreach ($shardDefinition->getAllShards() as $shard) {

Check failure on line 203 in lib/private/DB/Connection.php

View workflow job for this annotation

GitHub Actions / static-code-analysis

UndefinedClass

lib/private/DB/Connection.php:203:14: UndefinedClass: Class, interface or enum named OC\DB\QueryBuilder\Sharded\List does not exist (see https://psalm.dev/019)
/** @var ConnectionAdapter $connection */
$connections[] = $this->shardConnectionManager->getConnection($shardDefinition, $shard);
if ($shard !== ShardDefinition::MIGRATION_SHARD) {
/** @var ConnectionAdapter $connection */
$connections[] = $this->shardConnectionManager->getConnection($shardDefinition, $shard);
}
}
}
}
Expand Down
24 changes: 14 additions & 10 deletions lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

namespace OC\DB\QueryBuilder\Sharded;

use OC\DB\QueryBuilder\QueryBuilder;
use OCP\ICacheFactory;
use OCP\IDBConnection;
use OCP\IMemcache;
use OCP\IMemcacheTTL;

Expand Down Expand Up @@ -134,17 +136,19 @@
* Get the maximum primary key value from the shards
*/
private function getMaxFromDb(ShardDefinition $shardDefinition): int {
$max = 0;
$max = $shardDefinition->fromFileId;
$query = $this->shardConnectionManager->getConnection($shardDefinition, 0)->getQueryBuilder();
$query->select($shardDefinition->primaryKey)
->from($shardDefinition->table)
->orderBy($shardDefinition->primaryKey, 'DESC')
->setMaxResults(1);
foreach ($shardDefinition->getAllShards() as $shard) {

Check failure on line 145 in lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php

View workflow job for this annotation

GitHub Actions / static-code-analysis

UndefinedClass

lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php:145:12: UndefinedClass: Class, interface or enum named OC\DB\QueryBuilder\Sharded\List does not exist (see https://psalm.dev/019)
$connection = $this->shardConnectionManager->getConnection($shardDefinition, $shard);
$query = $connection->getQueryBuilder();
$query->select($shardDefinition->primaryKey)
->from($shardDefinition->table)
->orderBy($shardDefinition->primaryKey, 'DESC')
->setMaxResults(1);
$result = $query->executeQuery()->fetchOne();
if ($result) {
$max = max($max, $result);
if ($shard !== ShardDefinition::MIGRATION_SHARD) {
$connection = $this->shardConnectionManager->getConnection($shardDefinition, $shard);
$result = $query->executeQuery($connection)->fetchOne();
if ($result) {
$max = max($max, $result);
}
}
}
return $max;
Expand Down
12 changes: 11 additions & 1 deletion lib/private/DB/QueryBuilder/Sharded/ShardConnectionManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use OC\DB\ConnectionFactory;
use OC\SystemConfig;
use OCP\IDBConnection;
use OCP\Server;

/**
* Keeps track of the db connections to the various shards
Expand All @@ -28,8 +29,17 @@ public function __construct(

public function getConnection(ShardDefinition $shardDefinition, int $shard): IDBConnection {
$connectionKey = $shardDefinition->table . '_' . $shard;
if (!isset($this->connections[$connectionKey])) {

if (isset($this->connections[$connectionKey])) {
return $this->connections[$connectionKey];
}

if ($shard === ShardDefinition::MIGRATION_SHARD) {
$this->connections[$connectionKey] = \OC::$server->get(IDBConnection::class);
} elseif (isset($shardDefinition->shards[$shard])) {
$this->connections[$connectionKey] = $this->createConnection($shardDefinition->shards[$shard]);
} else {
throw new \InvalidArgumentException("invalid shard key $shard only " . count($shardDefinition->shards) . ' configured');
}

return $this->connections[$connectionKey];
Expand Down
23 changes: 19 additions & 4 deletions lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@

namespace OC\DB\QueryBuilder\Sharded;

use lib\DB\QueryBuilder\Partitioned\JoinConditionTest;
use OCP\DB\QueryBuilder\Sharded\IShardMapper;

/**
* Configuration for a shard setup
*/
class ShardDefinition {
// we reserve the bottom byte of the primary key for the initial shard, so the total shard count is limited to what we can fit there
public const MAX_SHARDS = 256;
// additionally, shard id 255 is reserved for migration purposes
public const MAX_SHARDS = 255;
public const MIGRATION_SHARD = 255;

public const PRIMARY_KEY_MASK = 0x7F_FF_FF_FF_FF_FF_FF_00;
public const PRIMARY_KEY_SHARD_MASK = 0x00_00_00_00_00_00_00_FF;
Expand All @@ -37,8 +40,10 @@
public array $companionKeys,
public string $shardKey,
public IShardMapper $shardMapper,
public array $companionTables = [],
public array $shards = [],
public array $companionTables,
public array $shards,
public int $fromFileId,
public int $fromStorageId,
) {
if (count($this->shards) >= self::MAX_SHARDS) {
throw new \Exception('Only allowed maximum of ' . self::MAX_SHARDS . ' shards allowed');
Expand All @@ -53,11 +58,21 @@
}

public function getShardForKey(int $key): int {
if ($key < $this->fromStorageId) {
return self::MIGRATION_SHARD;
}
return $this->shardMapper->getShardForKey($key, count($this->shards));
}

/**
* @return List<int>

Check failure on line 68 in lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php

View workflow job for this annotation

GitHub Actions / static-code-analysis

UndefinedDocblockClass

lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php:68:13: UndefinedDocblockClass: Docblock-defined class, interface or enum named OC\DB\QueryBuilder\Sharded\List does not exist (see https://psalm.dev/200)
*/
public function getAllShards(): array {
return array_keys($this->shards);
if ($this->fromStorageId !== 0) {
return array_merge(array_keys($this->shards), [self::MIGRATION_SHARD]);
} else {
return array_keys($this->shards);
}
}

public function isKey(string $column): bool {
Expand Down
3 changes: 3 additions & 0 deletions lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@
*
* @param bool $allShards
* @param int[] $shardKeys
* @return null|int[]

Check failure on line 30 in lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php

View workflow job for this annotation

GitHub Actions / static-code-analysis

InvalidReturnType

lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php:30:13: InvalidReturnType: The declared return type 'array<array-key, int>|null' for OC\DB\QueryBuilder\Sharded\ShardQueryRunner::getShards is incorrect, got 'OC\DB\QueryBuilder\Sharded\List<int>|non-empty-list<int>|null' (see https://psalm.dev/011)
*/
public function getShards(bool $allShards, array $shardKeys): ?array {
if ($allShards) {
return $this->shardDefinition->getAllShards();

Check failure on line 34 in lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php

View workflow job for this annotation

GitHub Actions / static-code-analysis

InvalidReturnStatement

lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php:34:11: InvalidReturnStatement: The inferred type 'OC\DB\QueryBuilder\Sharded\List<int>' does not match the declared return type 'array<array-key, int>|null' for OC\DB\QueryBuilder\Sharded\ShardQueryRunner::getShards (see https://psalm.dev/128)
}
$allConfiguredShards = $this->shardDefinition->getAllShards();
if (count($allConfiguredShards) === 1) {

Check failure on line 37 in lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php

View workflow job for this annotation

GitHub Actions / static-code-analysis

InvalidArgument

lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php:37:13: InvalidArgument: Argument 1 of count expects Countable|array<array-key, mixed>, but OC\DB\QueryBuilder\Sharded\List<int> provided (see https://psalm.dev/004)
return $allConfiguredShards;

Check failure on line 38 in lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php

View workflow job for this annotation

GitHub Actions / static-code-analysis

InvalidReturnStatement

lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php:38:11: InvalidReturnStatement: The inferred type 'OC\DB\QueryBuilder\Sharded\List<int>' does not match the declared return type 'array<array-key, int>|null' for OC\DB\QueryBuilder\Sharded\ShardQueryRunner::getShards (see https://psalm.dev/128)
}
if (empty($shardKeys)) {
return null;
Expand All @@ -55,6 +55,9 @@
private function getLikelyShards(array $primaryKeys): array {
$shards = [];
foreach ($primaryKeys as $primaryKey) {
if ($primaryKey < $this->shardDefinition->fromFileId&& !in_array(ShardDefinition::MIGRATION_SHARD, $shards)) {
$shards[] = ShardDefinition::MIGRATION_SHARD;
}
$encodedShard = $primaryKey & ShardDefinition::PRIMARY_KEY_SHARD_MASK;
if ($encodedShard < count($this->shardDefinition->shards) && !in_array($encodedShard, $shards)) {
$shards[] = $encodedShard;
Expand Down Expand Up @@ -109,7 +112,7 @@
// we first try the shards that are "likely" to have the rows we want, based on the shard that the row was
// originally created in. If we then still haven't found all rows we try the rest of the shards
$likelyShards = $this->getLikelyShards($primaryKeys);
$unlikelyShards = array_diff($this->shardDefinition->getAllShards(), $likelyShards);

Check failure on line 115 in lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php

View workflow job for this annotation

GitHub Actions / static-code-analysis

InvalidArgument

lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php:115:33: InvalidArgument: Argument 1 of array_diff expects array<array-key, mixed>, but OC\DB\QueryBuilder\Sharded\List<int> provided (see https://psalm.dev/004)
$shards = array_merge($likelyShards, $unlikelyShards);

foreach ($shards as $shard) {
Expand Down
3 changes: 3 additions & 0 deletions lib/private/FilesMetadata/FilesMetadataManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,14 @@ public function refreshMetadata(
int $process = self::PROCESS_LIVE,
string $namedEvent = '',
): IFilesMetadata {
$storageId = $node->getStorage()->getCache()->getNumericStorageId();
try {
/** @var FilesMetadata $metadata */
$metadata = $this->metadataRequestService->getMetadataFromFileId($node->getId());
} catch (FilesMetadataNotFoundException) {
$metadata = new FilesMetadata($node->getId());
}
$metadata->setStorageId($storageId);

// if $process is LIVE, we enforce LIVE
// if $process is NAMED, we go NAMED
Expand Down
17 changes: 17 additions & 0 deletions lib/private/FilesMetadata/Model/FilesMetadata.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class FilesMetadata implements IFilesMetadata {
private bool $updated = false;
private int $lastUpdate = 0;
private string $syncToken = '';
private ?int $storageId = null;

public function __construct(
private int $fileId = 0,
Expand All @@ -42,6 +43,22 @@ public function getFileId(): int {
return $this->fileId;
}

public function getStorageId(): ?int {
return $this->storageId;
}

/**
* Set which storage the file this metadata belongs to.
*
* This helps with sharded filecache setups to know where to store the metadata
*
* @param int $storageId
* @return void
*/
public function setStorageId(int $storageId): void {
$this->storageId = $storageId;
}

/**
* @inheritDoc
* @return int timestamp
Expand Down
20 changes: 20 additions & 0 deletions lib/private/FilesMetadata/Service/MetadataRequestService.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,24 @@ public function __construct(
) {
}

private function getStorageId(IFilesMetadata $filesMetadata): int {
if ($filesMetadata instanceof FilesMetadata) {
$storage = $filesMetadata->getStorageId();
if ($storage) {
return $storage;
}
}
// all code paths that lead to saving metadata *should* have the storage id set
// this fallback is there just in case
$query = $this->dbConnection->getQueryBuilder();
$query->select('storage')
->from('filecache')
->where($query->expr()->eq('fileid', $query->createNamedParameter($filesMetadata->getFileId(), IQueryBuilder::PARAM_INT)));
$storageId = $query->executeQuery()->fetchColumn();
$filesMetadata->setStorageId($storageId);
return $storageId;
}

/**
* store metadata into database
*
Expand All @@ -38,6 +56,7 @@ public function __construct(
public function store(IFilesMetadata $filesMetadata): void {
$qb = $this->dbConnection->getQueryBuilder();
$qb->insert(self::TABLE_METADATA)
->hintShardKey('storage', $this->getStorageId($filesMetadata))
->setValue('file_id', $qb->createNamedParameter($filesMetadata->getFileId(), IQueryBuilder::PARAM_INT))
->setValue('json', $qb->createNamedParameter(json_encode($filesMetadata->jsonSerialize())))
->setValue('sync_token', $qb->createNamedParameter($this->generateSyncToken()))
Expand Down Expand Up @@ -134,6 +153,7 @@ public function updateMetadata(IFilesMetadata $filesMetadata): int {
$expr = $qb->expr();

$qb->update(self::TABLE_METADATA)
->hintShardKey('files_metadata', $this->getStorageId($filesMetadata))
->set('json', $qb->createNamedParameter(json_encode($filesMetadata->jsonSerialize())))
->set('sync_token', $qb->createNamedParameter($this->generateSyncToken()))
->set('last_update', $qb->createFunction('NOW()'))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ private function getQueryBuilder(string $table, string $shardColumn, string $pri
return new ShardedQueryBuilder(
$this->connection->getQueryBuilder(),
[
new ShardDefinition($table, $primaryColumn, [], $shardColumn, new RoundRobinShardMapper(), $companionTables, []),
new ShardDefinition($table, $primaryColumn, [], $shardColumn, new RoundRobinShardMapper(), $companionTables, [], 0),
],
$this->createMock(ShardConnectionManager::class),
$this->autoIncrementHandler,
Expand Down
98 changes: 98 additions & 0 deletions tests/lib/FilesMetadata/FilesMetadataManagerTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
<?php

declare(strict_types=1);
/**
* SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl>
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

namespace Test\FilesMetadata;

use OC\BackgroundJob\JobList;
use OC\Files\Storage\Temporary;
use OC\FilesMetadata\FilesMetadataManager;
use OC\FilesMetadata\Service\IndexRequestService;
use OC\FilesMetadata\Service\MetadataRequestService;
use OCP\EventDispatcher\Event;
use OCP\EventDispatcher\IEventDispatcher;
use OCP\Files\Folder;
use OCP\Files\IRootFolder;
use OCP\FilesMetadata\AMetadataEvent;
use OCP\IAppConfig;
use OCP\IDBConnection;
use OCP\Server;
use Psr\Log\LoggerInterface;
use Test\TestCase;
use Test\Traits\MountProviderTrait;
use Test\Traits\UserTrait;

/**
* @group DB
*/
class FilesMetadataManagerTest extends TestCase {
use UserTrait;
use MountProviderTrait;

private IEventDispatcher $eventDispatcher;
private JobList $jobList;
private IAppConfig $appConfig;
private LoggerInterface $logger;
private MetadataRequestService $metadataRequestService;
private IndexRequestService $indexRequestService;
private FilesMetadataManager $manager;
private IDBConnection $connection;
private Folder $userFolder;
private array $metadata = [];

protected function setUp(): void {
parent::setUp();

$this->jobList = $this->createMock(JobList::class);
$this->eventDispatcher = $this->createMock(IEventDispatcher::class);
$this->eventDispatcher->method('dispatchTyped')->willReturnCallback(function (Event $event) {
if ($event instanceof AMetadataEvent) {
$name = $event->getNode()->getName();
if (isset($this->metadata[$name])) {
$meta = $event->getMetadata();
foreach ($this->metadata[$name] as $key => $value) {
$meta->setString($key, $value);
}
}
}
});
$this->appConfig = $this->createMock(IAppConfig::class);
$this->logger = $this->createMock(LoggerInterface::class);

$this->connection = Server::get(IDBConnection::class);
$this->metadataRequestService = new MetadataRequestService($this->connection, $this->logger);
$this->indexRequestService = new IndexRequestService($this->connection, $this->logger);
$this->manager = new FilesMetadataManager(
$this->eventDispatcher,
$this->jobList,
$this->appConfig,
$this->logger,
$this->metadataRequestService,
$this->indexRequestService,
);

$this->createUser('metatest', '');
$this->registerMount('metatest', new Temporary([]), '/metatest');

$rootFolder = Server::get(IRootFolder::class);
$this->userFolder = $rootFolder->getUserFolder('metatest');
}

public function testRefreshMetadata(): void {
$this->metadata['test.txt'] = [
'istest' => 'yes'
];
$file = $this->userFolder->newFile('test.txt', 'test');
$stored = $this->manager->refreshMetadata($file);
$this->assertEquals($file->getId(), $stored->getFileId());
$this->assertEquals('yes', $stored->getString('istest'));

$retrieved = $this->manager->getMetadata($file->getId());
$this->assertEquals($file->getId(), $retrieved->getFileId());
$this->assertEquals('yes', $retrieved->getString('istest'));
}
}
Loading