From d81f5f2cb26eced82b1d07b6b9345efa91e5569e Mon Sep 17 00:00:00 2001 From: otsch Date: Thu, 8 Aug 2024 00:55:19 +0200 Subject: [PATCH 1/2] Remove addToResult() and multiple loaders Remove the methods addToResult(), addLaterToResult() and everything connected to that. Also, it is no longer possible to provide multiple loaders via the crawler. Instead you can now manually provide customized loaders directly to steps via the new withLoader() method. Further, also remove the Microseconds util class, which is now part of the crwlr/utils package. --- CHANGELOG.md | 6 + composer.json | 4 +- src/Crawler.php | 126 +--- src/Exceptions/UnknownLoaderKeyException.php | 7 - src/HttpCrawler.php | 4 +- src/Io.php | 8 +- src/Loader/AddLoadersToStepAction.php | 63 -- src/Loader/Http/Messages/RespondedRequest.php | 5 +- .../Politeness/TimingUnits/Microseconds.php | 8 - src/Loader/Loader.php | 10 - src/Result.php | 7 +- src/Steps/BaseStep.php | 316 +-------- src/Steps/Group.php | 120 +--- .../Loading/GetSitemapsFromRobotsTxt.php | 14 +- src/Steps/Loading/HttpBase.php | 9 +- src/Steps/Loading/HttpCrawl.php | 2 +- src/Steps/Loading/LoadingStep.php | 17 +- src/Steps/Loading/LoadingStepInterface.php | 21 - src/Steps/Step.php | 10 +- src/Steps/StepInterface.php | 16 - src/Utils/OutputTypeHelper.php | 2 - tests/CrawlerTest.php | 450 +++---------- tests/IoTest.php | 102 +-- tests/Loader/AddLoadersToStepActionTest.php | 96 --- .../Http/Messages/RespondedRequestTest.php | 38 +- tests/Pest.php | 27 +- tests/Steps/GroupTest.php | 602 +++++------------- .../Loading/GetSitemapsFromRobotsTxtTest.php | 2 +- tests/Steps/Loading/HttpTest.php | 83 ++- tests/Steps/Loading/LoadingStepTest.php | 50 +- tests/Steps/StepTest.php | 361 ++--------- tests/Utils/OutputTypeHelperTest.php | 14 - tests/_Integration/GroupTest.php | 5 +- tests/_Integration/Http/CrawlingTest.php | 21 +- .../_Integration/Http/ErrorResponsesTest.php | 17 +- tests/_Integration/Http/GzipTest.php | 10 +- .../_Integration/Http/HeadlessBrowserTest.php | 48 +- .../Http/Html/PaginatedListingTest.php | 28 +- .../Http/Html/SimpleListingTest.php | 37 +- tests/_Integration/Http/ProxyingTest.php | 43 +- .../Http/PublisherExampleTest.php | 37 +- .../Http/QueryParamPaginationTest.php | 8 +- tests/_Integration/Http/RedirectTest.php | 20 +- tests/_Stubs/LoaderCollectingStep.php | 29 - tests/_Stubs/MultiLoaderCrawler.php | 27 - 45 files changed, 637 insertions(+), 2293 deletions(-) delete mode 100644 src/Exceptions/UnknownLoaderKeyException.php delete mode 100644 src/Loader/AddLoadersToStepAction.php delete mode 100644 src/Loader/Http/Politeness/TimingUnits/Microseconds.php delete mode 100644 src/Steps/Loading/LoadingStepInterface.php delete mode 100644 tests/Loader/AddLoadersToStepActionTest.php delete mode 100644 tests/_Stubs/LoaderCollectingStep.php delete mode 100644 tests/_Stubs/MultiLoaderCrawler.php diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c9b057..ba94781 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [2.0.0] - 2024-x-x ### Changed +* __BREAKING__: Removed methods `BaseStep::addToResult()`, `BaseStep::addLaterToResult()`, `BaseStep::addsToOrCreatesResult()`, `BaseStep::createsResult()` and `BaseStep::keepInputData()`. They have already been deprecated in v1.8.0 and shall be replaced with `Step::keep()` and `Step::keepAs()`, `Step::keepFromInput()` and `Step::keepInputAs()`. +* __BREAKING__: As the `addToResult()` method was removed, the library does not use `toArrayForAddToResult()` methods on output objects any longer. Instead, please use `toArrayForResult()`. Therefore, also the `RespondedRequest::toArrayForAddToResult()` is renamed to `RespondedRequest::toArrayForResult()`. +* __BREAKING__: Removed the `result` and `addLaterToResult` properties from `Io` objects (so `Input` and `Output`). They were part of the whole `addToResult` feature and are therefore removed. Instead, there is the `keep` property where kept data is added. +* __BREAKING__: The return type of the `Crawler::loader()` method was changed to no longer allow `array`. This means it's no longer possible to provide multiple loaders from the crawler. Instead, use the functionality described below, to directly provide a custom loader to a step. +* __BREAKING__: Refactored the abstract `LoadingStep` class to a trait and removed the `LoadingStepInterface`. Loading steps should now just extend the `Step` class and use the trait. As it is no longer possible to have multiple loaders, the `addLoader` method was renamed to `setLoader`. For the same reason, the methods `useLoader()` and `usesLoader()`, to choose one of multiple loaders from the crawler by key, are removed. Instead, you can now directly provide a different loader to a single step (instead to the crawler), using the trait's new `withLoader()` method (e.g. `Http::get()->withLoader($loader)`). * __BREAKING__: The `HttpLoader::retryCachedErrorResponses()` method now returns an instance of the new `Crwlr\Crawler\Loader\Http\Cache\RetryManager` class, providing the methods `only()` and `except()` that can be used to restrict retries to certain HTTP response status codes. Previously the method returned the `HttpLoader` itself (`$this`), so if you're using it in a chain and call other loader methods after it, you need to refactor this. +* __BREAKING__: Removed the `Microseconds` class from this package. It was moved to the `crwlr/utils` package that you can use instead. ## [1.10.0] - 2024-08-05 ### Added diff --git a/composer.json b/composer.json index 15669e7..013c9a0 100644 --- a/composer.json +++ b/composer.json @@ -68,8 +68,8 @@ } }, "scripts": { - "test": "pest --exclude-group integration --display-warnings", - "test-integration": "pest --group integration --display-warnings", + "test": "pest --exclude-group integration --display-warnings --bail", + "test-integration": "pest --group integration --display-warnings --bail", "stan": "@php -d memory_limit=4G vendor/bin/phpstan analyse", "cs": "php-cs-fixer fix -v --dry-run", "cs-fix": "php-cs-fixer fix -v", diff --git a/src/Crawler.php b/src/Crawler.php index 6fe12ea..b8b856b 100644 --- a/src/Crawler.php +++ b/src/Crawler.php @@ -3,14 +3,11 @@ namespace Crwlr\Crawler; use Closure; -use Crwlr\Crawler\Exceptions\UnknownLoaderKeyException; -use Crwlr\Crawler\Loader\AddLoadersToStepAction; use Crwlr\Crawler\Loader\LoaderInterface; use Crwlr\Crawler\Logger\CliLogger; use Crwlr\Crawler\Steps\BaseStep; use Crwlr\Crawler\Steps\Exceptions\PreRunValidationException; use Crwlr\Crawler\Steps\Group; -use Crwlr\Crawler\Steps\Step; use Crwlr\Crawler\Steps\StepInterface; use Crwlr\Crawler\Stores\StoreInterface; use Crwlr\Crawler\UserAgents\UserAgentInterface; @@ -24,9 +21,9 @@ abstract class Crawler protected UserAgentInterface $userAgent; /** - * @var LoaderInterface|array + * @var LoaderInterface */ - protected LoaderInterface|array $loader; + protected LoaderInterface $loader; protected LoggerInterface $logger; @@ -68,9 +65,9 @@ abstract protected function userAgent(): UserAgentInterface; /** * @param UserAgentInterface $userAgent * @param LoggerInterface $logger - * @return LoaderInterface|array + * @return LoaderInterface */ - abstract protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger): LoaderInterface|array; + abstract protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger): LoaderInterface; public static function group(): Group { @@ -146,24 +143,17 @@ public function inputs(array $inputs): static } /** - * @param string|StepInterface $stepOrResultKey - * @param StepInterface|null $step + * @param StepInterface $step * @return $this - * @throws InvalidArgumentException|UnknownLoaderKeyException + * @throws InvalidArgumentException */ - public function addStep(string|StepInterface $stepOrResultKey, ?StepInterface $step = null): static + public function addStep(StepInterface $step): static { - if (is_string($stepOrResultKey) && $step === null) { - throw new InvalidArgumentException('No StepInterface object provided'); - } elseif (is_string($stepOrResultKey)) { - $step->addToResult($stepOrResultKey); - } else { - $step = $stepOrResultKey; - } - $step->addLogger($this->logger); - (new AddLoadersToStepAction($this->loader, $step))->invoke(); + if (method_exists($step, 'setLoader')) { + $step->setLoader($this->loader); + } if ($step instanceof BaseStep) { $step->setParentCrawler($this); @@ -266,8 +256,8 @@ protected function invokeStepsRecursive(Input $input, StepInterface $step, int $ $nextStep = $this->nextStep($stepIndex); - if (!$nextStep && $input->result === null) { - yield from $this->storeAndReturnResults($outputs, $step->createsResult() === true, true); + if (!$nextStep) { + yield from $this->storeAndReturnOutputsAsResults($outputs); return; } @@ -279,72 +269,11 @@ protected function invokeStepsRecursive(Input $input, StepInterface $step, int $ $this->outputHook?->call($this, $output, $stepIndex, $step); - if ($nextStep) { - if ($input->result === null && $step->createsResult()) { - $childOutputs = $this->invokeStepsRecursive( - new Input($output), - $nextStep, - $stepIndex + 1, - ); - - /** @var Generator $childOutputs */ - - yield from $this->storeAndReturnResults($childOutputs, true); - } else { - yield from $this->invokeStepsRecursive( - new Input($output), - $nextStep, - $stepIndex + 1, - ); - } - } else { - yield $output; - } - } - } - - /** - * @param Generator $outputs - * @return Generator - */ - protected function storeAndReturnResults( - Generator $outputs, - bool $manuallyDefinedResults = false, - bool $callOutputHook = false, - ): Generator { - if ($manuallyDefinedResults || $this->anyResultKeysDefinedInSteps()) { - yield from $this->storeAndReturnDefinedResults($outputs, $callOutputHook); - } else { - yield from $this->storeAndReturnOutputsAsResults($outputs, $callOutputHook); - } - } - - /** - * @param Generator $outputs - * @return Generator - */ - protected function storeAndReturnDefinedResults(Generator $outputs, bool $callOutputHook = false): Generator - { - $results = []; - - foreach ($outputs as $output) { - if ($callOutputHook) { - $this->outputHook?->call($this, $output, count($this->steps) - 1, end($this->steps)); - } - - if ($output->result !== null && !in_array($output->result, $results, true)) { - $results[] = $output->result; - } elseif ($output->addLaterToResult !== null && !in_array($output->addLaterToResult, $results, true)) { - $results[] = new Result($output->addLaterToResult); - } - } - - // yield results only after iterating over final outputs, because that could still add properties to result - // resources. - foreach ($results as $result) { - $this->store?->store($result); - - yield $result; + yield from $this->invokeStepsRecursive( + new Input($output), + $nextStep, + $stepIndex + 1, + ); } } @@ -352,12 +281,10 @@ protected function storeAndReturnDefinedResults(Generator $outputs, bool $callOu * @param Generator $outputs * @return Generator */ - protected function storeAndReturnOutputsAsResults(Generator $outputs, bool $callOutputHook = false): Generator + protected function storeAndReturnOutputsAsResults(Generator $outputs): Generator { foreach ($outputs as $output) { - if ($callOutputHook) { - $this->outputHook?->call($this, $output, count($this->steps) - 1, end($this->steps)); - } + $this->outputHook?->call($this, $output, count($this->steps) - 1, end($this->steps)); $result = new Result(); @@ -420,17 +347,6 @@ protected function prepareInput(): array }, $this->inputs); } - protected function anyResultKeysDefinedInSteps(): bool - { - foreach ($this->steps as $step) { - if ($step->addsToOrCreatesResult()) { - return true; - } - } - - return false; - } - protected function logMemoryUsage(): void { $memoryUsage = memory_get_usage(); @@ -445,11 +361,11 @@ protected function firstStep(): ?StepInterface return $this->steps[0] ?? null; } - protected function lastStep(): ?Step + protected function lastStep(): ?BaseStep { $lastStep = end($this->steps); - if (!$lastStep instanceof Step) { + if (!$lastStep instanceof BaseStep) { return null; } diff --git a/src/Exceptions/UnknownLoaderKeyException.php b/src/Exceptions/UnknownLoaderKeyException.php deleted file mode 100644 index 3041f7b..0000000 --- a/src/Exceptions/UnknownLoaderKeyException.php +++ /dev/null @@ -1,7 +0,0 @@ - + * @return LoaderInterface */ - protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger): LoaderInterface|array + protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger): LoaderInterface { return new HttpLoader($userAgent, logger: $logger); } diff --git a/src/Io.php b/src/Io.php index 4234c17..9700bc4 100644 --- a/src/Io.php +++ b/src/Io.php @@ -13,24 +13,18 @@ class Io */ final public function __construct( protected mixed $value, - public ?Result $result = null, - public ?Result $addLaterToResult = null, public array $keep = [], ) { if ($value instanceof self) { $this->value = $value->value; - $this->result ??= $value->result; - - $this->addLaterToResult ??= $value->addLaterToResult; - $this->keep = $value->keep; } } public function withValue(mixed $value): static { - return new static($value, $this->result, $this->addLaterToResult, $this->keep); + return new static($value, $this->keep); } public function withPropertyValue(string $key, mixed $value): static diff --git a/src/Loader/AddLoadersToStepAction.php b/src/Loader/AddLoadersToStepAction.php deleted file mode 100644 index 95d1a87..0000000 --- a/src/Loader/AddLoadersToStepAction.php +++ /dev/null @@ -1,63 +0,0 @@ - $loaders - * @param StepInterface $step - */ - public function __construct(protected LoaderInterface|array $loaders, protected StepInterface $step) {} - - /** - * @return void - * @throws UnknownLoaderKeyException - */ - public function invoke(): void - { - if (!method_exists($this->step, 'addLoader')) { - return; - } - - if (is_array($this->loaders)) { - $this->addLoadersToStep(); - } else { - $this->step->addLoader($this->loaders); - } - } - - /** - * Add either all or one of multiple defined loaders to the step - * - * A group step has a method addLoaders() to add all loaders at once and delegate them to child steps. - * If user chose a specific loader for a step, add only that loader to the step. - * Otherwise, call the steps addLoader() method with each loader one by one. In this case the step could implement - * logic to decide which loader to accept. - * - * @throws UnknownLoaderKeyException - */ - protected function addLoadersToStep(): void - { - if (!is_array($this->loaders) || !method_exists($this->step, 'addLoader')) { - return; - } - - if (method_exists($this->step, 'addLoaders')) { - $this->step->addLoaders($this->loaders); - } elseif (method_exists($this->step, 'usesLoader') && $this->step->usesLoader() !== null) { - if (!isset($this->loaders[$this->step->usesLoader()])) { - throw new UnknownLoaderKeyException(); - } - - $this->step->addLoader($this->loaders[$this->step->usesLoader()]); - } else { - foreach ($this->loaders as $loader) { - $this->step->addLoader($loader); - } - } - } -} diff --git a/src/Loader/Http/Messages/RespondedRequest.php b/src/Loader/Http/Messages/RespondedRequest.php index 5d1c03f..242b38c 100644 --- a/src/Loader/Http/Messages/RespondedRequest.php +++ b/src/Loader/Http/Messages/RespondedRequest.php @@ -2,6 +2,7 @@ namespace Crwlr\Crawler\Loader\Http\Messages; +use Crwlr\Crawler\Cache\Exceptions\MissingZlibExtensionException; use Crwlr\Crawler\Steps\Loading\Http; use Crwlr\Crawler\Utils\RequestKey; use Crwlr\Url\Url; @@ -57,6 +58,7 @@ public static function cacheKeyFromRequest(RequestInterface $request): string /** * @return mixed[] + * @throws MissingZlibExtensionException */ public function __serialize(): array { @@ -74,8 +76,9 @@ public function __serialize(): array /** * @return mixed[] + * @throws MissingZlibExtensionException */ - public function toArrayForAddToResult(): array + public function toArrayForResult(): array { $serialized = $this->__serialize(); diff --git a/src/Loader/Http/Politeness/TimingUnits/Microseconds.php b/src/Loader/Http/Politeness/TimingUnits/Microseconds.php deleted file mode 100644 index 5b3f701..0000000 --- a/src/Loader/Http/Politeness/TimingUnits/Microseconds.php +++ /dev/null @@ -1,8 +0,0 @@ -hooks)) { diff --git a/src/Result.php b/src/Result.php index d287ad7..6e9aa22 100644 --- a/src/Result.php +++ b/src/Result.php @@ -35,9 +35,14 @@ public function set(string $key, mixed $value): self return $this; } + public function has(string $key): bool + { + return array_key_exists($key, $this->data); + } + public function get(string $key, mixed $default = null): mixed { - if (array_key_exists($key, $this->data)) { + if ($this->has($key)) { return $this->data[$key]; } diff --git a/src/Steps/BaseStep.php b/src/Steps/BaseStep.php index 6997216..14fe41f 100644 --- a/src/Steps/BaseStep.php +++ b/src/Steps/BaseStep.php @@ -51,23 +51,6 @@ abstract class BaseStep implements StepInterface */ protected array $subCrawlers = []; - /** - * True means add all elements of the output array. - * String means use that key for a non array output value. - * Array of strings means, add just those keys. - * - * @var bool|string|string[] - */ - protected bool|string|array $addToResult = false; - - /** - * Same as $addToResult, but doesn't create a Result object now. Instead, it appends the data to the Output object, - * so it'll add the data to all the Result objects that are later created from the output. - * - * @var bool|string|string[] - */ - protected bool|string|array $addLaterToResult = false; - protected ?LoggerInterface $logger = null; protected ?string $useInputKey = null; @@ -96,10 +79,6 @@ abstract class BaseStep implements StepInterface */ protected array $refiners = []; - protected bool $keepInputData = false; - - protected ?string $keepInputDataKey = null; - protected ?string $outputKey = null; /** @@ -179,52 +158,6 @@ public function keepsAnythingFromOutputData(): bool return $this->keep !== false || $this->keepAs !== null; } - /** - * @deprecated This method will be removed in v2 of the library. Please use the new keep() and keepAs() - * methods instead. - */ - public function addToResult(array|string|null $keys = null): static - { - if (is_string($keys) || is_array($keys)) { - $this->addToResult = $keys; - } else { - $this->addToResult = true; - } - - return $this; - } - - /** - * @deprecated This method will be removed in v2 of the library. Please use the new keep() and keepAs() - * methods instead. - */ - public function addLaterToResult(array|string|null $keys = null): static - { - if (is_string($keys) || is_array($keys)) { - $this->addLaterToResult = $keys; - } else { - $this->addLaterToResult = true; - } - - return $this; - } - - /** - * @deprecated Along with addToResult() and addLaterToResult() this method is also deprecated. - */ - public function addsToOrCreatesResult(): bool - { - return $this->createsResult() || $this->addLaterToResult !== false; - } - - /** - * @deprecated Along with addToResult() this method is also deprecated. - */ - public function createsResult(): bool - { - return $this->addToResult !== false; - } - public function useInputKey(string $key): static { $this->useInputKey = $key; @@ -317,19 +250,6 @@ public function outputKey(string $key): static return $this; } - /** - * @deprecated This method will be removed in v2 of the library. Please use the new keepFromInput() or - * keepInputAs() methods instead. - */ - public function keepInputData(?string $inputKey = null): static - { - $this->keepInputData = true; - - $this->keepInputDataKey = $inputKey; - - return $this; - } - public function resetAfterRun(): void { $this->uniqueOutputKeys = $this->uniqueInputKeys = []; @@ -471,6 +391,17 @@ protected function runSubCrawlersFor(Output $output): Output return $output; } + /** + * If you want to define aliases for certain output keys that can be used with keep(), + * define this method in the child class and return the mappings. + * + * @return array alias => output key + */ + protected function outputKeyAliases(): array + { + return []; + } + /** * @param mixed[] $initialInputs * @throws PreRunValidationException @@ -607,43 +538,10 @@ protected function applyRefiners(mixed $outputValue, mixed $inputValue): mixed return $outputValue; } - /** - * @return array - * @throws Exception - * @deprecated because the keepInputData() feature is deprecated. - */ - protected function addInputDataToOutputData(mixed $inputValue, mixed $outputValue): array - { - if (!is_array($outputValue)) { - throw new Exception( - 'Can\'t add input data to non array output data! You can use the outputKey() method ' . - 'to make the step\'s output an array.', - ); - } - - if (!is_array($inputValue)) { - if (!is_string($this->keepInputDataKey)) { - throw new Exception('No key defined for scalar input value.'); - } - - $inputValue = [$this->keepInputDataKey => $inputValue]; - } - - foreach ($inputValue as $key => $value) { - if (!isset($outputValue[$key])) { - $outputValue[$key] = $value; - } - } - - return $outputValue; - } - protected function makeOutput(mixed $outputData, Input $input): Output { $output = new Output( $outputData, - $this->addOutputDataToResult($outputData, $input), - $this->addOutputDataToAddLaterResult($outputData, $input), $input->keep, ); @@ -729,7 +627,7 @@ protected function getInputOrOutputDataToKeep(Io $io, array $alreadyKept): ?arra if ($keepProperty === true) { return $data; } elseif (is_string($keepProperty)) { - return [$keepProperty => $data[$keepProperty] ?? null]; + return [$keepProperty => $this->getOutputPropertyFromArray($keepProperty, $data)]; } return $this->mapKeepProperties($data, $keepProperty); @@ -764,178 +662,48 @@ protected function mapKeepProperties(array $data, array $keep): array foreach ($keep as $key => $value) { if (is_int($key)) { - $keepData[$value] = $data[$value] ?? null; + $keepData[$value] = $this->getOutputPropertyFromArray($value, $data); } elseif (is_string($key)) { - $keepData[$key] = $data[$value] ?? null; + $keepData[$key] = $this->getOutputPropertyFromArray($value, $data); } } return $keepData; } - protected function addOutputDataToResult( - mixed $output, - Input $input, - ): ?Result { - if ($this->addToResult !== false) { - $result = $input->result ?? new Result($input->addLaterToResult); - - return $this->addOutputDataToResultObject($output, $result); - } - - return $input->result; - } - - protected function addOutputDataToAddLaterResult(mixed $output, Input $input): ?Result - { - if ($this->addToResult !== false) { - return null; - } - - if ($this->addLaterToResult !== false) { - $addLaterResult = $input->addLaterToResult ?? new Result(); - - return $this->addOutputDataToResultObject($output, $addLaterResult); - } - - return $input->addLaterToResult; - } - - protected function addOutputDataToResultObject(mixed $output, Result $result): Result - { - $addToResultObject = $this->addToResult !== false ? $this->addToResult : $this->addLaterToResult; - - if (is_string($addToResultObject)) { - $result->set($addToResultObject, $output); - } - - if ( - ($addToResultObject === true || is_array($addToResultObject)) && - (is_array($output) || (is_object($output) && method_exists($output, '__serialize'))) - ) { - if (!is_array($output)) { - $output = $output->__serialize(); - } - - $output = $this->serializeElementsOfOutputArray($output); - - foreach ($output as $key => $value) { - if ($addToResultObject === true) { - $result->set(is_string($key) ? $key : '', $value); - } elseif ($this->shouldBeAdded($key, $addToResultObject)) { - $result->set($this->choseResultKey($key), $value); - } - } - - if (is_array($addToResultObject)) { - $this->tryToAddMissingKeysUsingDotNotation($output, $addToResultObject, $result); - } - } - - return $result; - } - /** - * @param mixed[] $output - * @return mixed[] + * @param mixed[] $data */ - protected function serializeElementsOfOutputArray(array $output): array + protected function getOutputPropertyFromArray(string $key, array $data): mixed { - foreach ($output as $key => $value) { - if (is_object($value)) { - if (method_exists($value, 'toArrayForAddToResult')) { - $output[$key] = $value->toArrayForAddToResult(); - } elseif (method_exists($value, '__serialize')) { - $output[$key] = $value->__serialize(); - } - } - - if (is_array($value)) { - $output[$key] = $this->serializeElementsOfOutputArray($value); - } + if (array_key_exists($key, $data)) { + return $data[$key]; + } elseif ($this->isOutputKeyAlias($key)) { + return $data[$this->getOutputKeyAliasRealKey($key)]; } - return $output; - } - - /** - * When user defines an array of keys that shall be added to the result it can also contain a mapping. - * If it does, use the key that it should be mapped to, instead of the key it has in the output array. - */ - protected function choseResultKey(int|string $keyInOutput): string - { - if (is_string($keyInOutput)) { - if (is_array($this->addToResult)) { - if (in_array($keyInOutput, $this->addToResult, true)) { - $mapTo = array_search($keyInOutput, $this->addToResult, true); - - return is_string($mapTo) ? $mapTo : $keyInOutput; - } + $data = $this->recursiveChildObjectsToArray($data); - foreach ($this->getAliasesForOutputKey($keyInOutput) as $alias) { - if (in_array($alias, $this->addToResult, true)) { - $mapTo = array_search($alias, $this->addToResult, true); + $dot = new Dot($data); - return is_string($mapTo) ? $mapTo : $alias; - } - } - } elseif (is_bool($this->addToResult)) { - return $keyInOutput; - } - } elseif (is_string($this->addToResult)) { - return $this->addToResult; - } - - return ''; - } - - /** - * @param mixed[] $output - * @param array $addToResult - */ - protected function tryToAddMissingKeysUsingDotNotation(array $output, array $addToResult, Result $result): void - { - $outputDot = new Dot($output); - - foreach ($addToResult as $resultKeyOrInt => $potentialDotNotationKey) { - $resultKey = is_int($resultKeyOrInt) ? $potentialDotNotationKey : $resultKeyOrInt; - - if ($result->get($resultKey) === null) { - $valueUsingDotNotation = $outputDot->get($potentialDotNotationKey); - - if ($valueUsingDotNotation !== null) { - $result->set($resultKey, $valueUsingDotNotation); - } - } - } + return $dot->get($key); } /** - * If you want to define aliases for certain output keys that can be used with addToResult, define this method in - * the child class and return the mappings. - * - * @return array alias => output key - */ - protected function outputKeyAliases(): array - { - return []; - } - - /** - * @param string $key - * @return string[] + * @param mixed[] $data + * @return mixed[] */ - protected function getAliasesForOutputKey(string $key): array + protected function recursiveChildObjectsToArray(array $data): array { - $aliases = []; - - foreach ($this->outputKeyAliases() as $alias => $outputKey) { - if ($outputKey === $key) { - $aliases[] = $alias; + foreach ($data as $key => $value) { + if (is_object($value)) { + $data[$key] = $this->recursiveChildObjectsToArray(OutputTypeHelper::objectToArray($value)); + } elseif (is_array($value)) { + $data[$key] = $this->recursiveChildObjectsToArray($value); } } - return $aliases; + return $data; } protected function isOutputKeyAlias(string $key): bool @@ -949,24 +717,4 @@ protected function getOutputKeyAliasRealKey(string $key): string return $mapping[$key]; } - - /** - * @param string $key - * @param string[] $addToResultKeys - * @return bool - */ - protected function shouldBeAdded(string $key, array $addToResultKeys): bool - { - if (in_array($key, $addToResultKeys, true)) { - return true; - } - - foreach ($this->getAliasesForOutputKey($key) as $alias) { - if (in_array($alias, $addToResultKeys, true)) { - return true; - } - } - - return false; - } } diff --git a/src/Steps/Group.php b/src/Steps/Group.php index f84ecf0..3a3f514 100644 --- a/src/Steps/Group.php +++ b/src/Steps/Group.php @@ -2,15 +2,11 @@ namespace Crwlr\Crawler\Steps; -use Crwlr\Crawler\Exceptions\UnknownLoaderKeyException; use Crwlr\Crawler\Input; -use Crwlr\Crawler\Loader\AddLoadersToStepAction; use Crwlr\Crawler\Loader\LoaderInterface; use Crwlr\Crawler\Output; -use Crwlr\Crawler\Result; use Exception; use Generator; -use InvalidArgumentException; use Psr\Log\LoggerInterface; final class Group extends BaseStep @@ -21,9 +17,9 @@ final class Group extends BaseStep private array $steps = []; /** - * @var LoaderInterface|array|null + * @var LoaderInterface|null */ - private null|LoaderInterface|array $loader = null; + private ?LoaderInterface $loader = null; /** * @param Input $input @@ -34,10 +30,8 @@ public function invokeStep(Input $input): Generator { $combinedOutput = []; - if (!$this->uniqueInput || $this->inputOrOutputIsUnique($input)) { - $input = $this->addResultToInputIfAnyResultKeysDefined($input); - } else { - return; // Input is not unique. + if ($this->uniqueInput && !$this->inputOrOutputIsUnique($input)) { + return; } // When input is array and useInputKey() was used, invoke the steps only with that input array element, @@ -45,7 +39,7 @@ public function invokeStep(Input $input): Generator $inputForStepInvocation = $this->getInputKeyToUse($input); if ($inputForStepInvocation) { - foreach ($this->steps as $key => $step) { + foreach ($this->steps as $step) { foreach ($step->invokeStep($inputForStepInvocation) as $nthOutput => $output) { if (method_exists($step, 'callUpdateInputUsingOutput')) { $inputForStepInvocation = $step->callUpdateInputUsingOutput($inputForStepInvocation, $output); @@ -55,7 +49,6 @@ public function invokeStep(Input $input): Generator $combinedOutput = $this->addOutputToCombinedOutputs( $output->get(), $combinedOutput, - $key, $nthOutput, ); } @@ -66,57 +59,17 @@ public function invokeStep(Input $input): Generator } } - public function addsToOrCreatesResult(): bool - { - if (parent::addsToOrCreatesResult()) { - return true; - } - - foreach ($this->steps as $step) { - if ($step->addsToOrCreatesResult()) { - return true; - } - } - - return false; - } - - public function createsResult(): bool - { - if (parent::createsResult()) { - return true; - } - - foreach ($this->steps as $step) { - if ($step->createsResult()) { - return true; - } - } - - return false; - } - - public function addStep(string|StepInterface $stepOrResultKey, ?StepInterface $step = null): self + public function addStep(StepInterface $step): self { - if (is_string($stepOrResultKey) && $step === null) { - throw new InvalidArgumentException('No StepInterface object provided'); - } elseif ($stepOrResultKey instanceof StepInterface) { - $step = $stepOrResultKey; - } - if ($this->logger instanceof LoggerInterface) { $step->addLogger($this->logger); } - if (method_exists($step, 'addLoader') && $this->loader instanceof LoaderInterface) { - $step->addLoader($this->loader); + if (method_exists($step, 'setLoader') && $this->loader instanceof LoaderInterface) { + $step->setLoader($this->loader); } - if (is_string($stepOrResultKey) && !isset($this->steps[$stepOrResultKey])) { - $this->steps[$stepOrResultKey] = $step; - } else { - $this->steps[] = $step; - } + $this->steps[] = $step; return $this; } @@ -132,37 +85,19 @@ public function addLogger(LoggerInterface $logger): static return $this; } - /** - * @throws UnknownLoaderKeyException - */ - public function addLoader(LoaderInterface $loader): self + public function setLoader(LoaderInterface $loader): self { $this->loader = $loader; foreach ($this->steps as $step) { - if (method_exists($step, 'addLoader')) { - (new AddLoadersToStepAction($loader, $step))->invoke(); + if (method_exists($step, 'setLoader')) { + $step->setLoader($loader); } } return $this; } - /** - * @param LoaderInterface[] $loaders - * @throws UnknownLoaderKeyException - */ - public function addLoaders(array $loaders): self - { - $this->loader = $loaders; - - foreach ($this->steps as $step) { - (new AddLoadersToStepAction($this->loader, $step))->invoke(); - } - - return $this; - } - public function outputType(): StepOutputType { return StepOutputType::AssociativeArrayOrObject; @@ -180,24 +115,6 @@ protected function includeOutput(StepInterface $step): bool return false; } - /** - * If this group combines the output, there are result keys and there is no Result object created before invoking - * the steps, add one. Because otherwise multiple Result objects will be created. - * - * @param Input $input - * @return Input - */ - private function addResultToInputIfAnyResultKeysDefined(Input $input): Input - { - if ($this->createsResult() && !$input->result) { - $input = new Input($input->get(), new Result(), $input->addLaterToResult, $input->keep); - } elseif ($this->addsToOrCreatesResult() && !$input->addLaterToResult) { - $input = new Input($input->get(), $input->result, new Result(), $input->keep); - } - - return $input; - } - /** * @param mixed[] $combined * @return mixed[] @@ -205,19 +122,14 @@ private function addResultToInputIfAnyResultKeysDefined(Input $input): Input private function addOutputToCombinedOutputs( mixed $output, array $combined, - int|string $stepKey, int $nthOutput, ): array { if (is_array($output)) { foreach ($output as $key => $value) { - if (is_int($stepKey) && is_string($key)) { - $combined[$nthOutput][$key][] = $value; - } else { - $combined[$nthOutput][$stepKey][$key][] = $value; - } + $combined[$nthOutput][$key][] = $value; } } else { - $combined[$nthOutput][$stepKey][] = $output; + $combined[$nthOutput][][] = $output; } return $combined; @@ -237,10 +149,6 @@ private function prepareCombinedOutputs(array $combinedOutputs, Input $input): G $outputData = $this->applyRefiners($outputData, $input->get()); if ($this->passesAllFilters($outputData)) { - if ($this->keepInputData === true) { - $outputData = $this->addInputDataToOutputData($input->get(), $outputData); - } - $output = $this->makeOutput($outputData, $input); if ($this->uniqueOutput !== false && !$this->inputOrOutputIsUnique($output)) { diff --git a/src/Steps/Loading/GetSitemapsFromRobotsTxt.php b/src/Steps/Loading/GetSitemapsFromRobotsTxt.php index 3476813..960dc11 100644 --- a/src/Steps/Loading/GetSitemapsFromRobotsTxt.php +++ b/src/Steps/Loading/GetSitemapsFromRobotsTxt.php @@ -2,6 +2,8 @@ namespace Crwlr\Crawler\Steps\Loading; +use Crwlr\Crawler\Loader\Http\HttpLoader; +use Crwlr\Crawler\Steps\Step; use Crwlr\Crawler\Steps\StepOutputType; use Crwlr\RobotsTxt\Exceptions\InvalidRobotsTxtFileException; use Exception; @@ -9,8 +11,10 @@ use InvalidArgumentException; use Psr\Http\Message\UriInterface; -class GetSitemapsFromRobotsTxt extends LoadingStep +class GetSitemapsFromRobotsTxt extends Step { + use LoadingStep; + public function outputType(): StepOutputType { return StepOutputType::Scalar; @@ -25,7 +29,13 @@ protected function invoke(mixed $input): Generator throw new Exception('The Loader doesn\'t expose the RobotsTxtHandler.'); } - $robotsTxtHandler = $this->loader->robotsTxt(); + $loader = $this->getLoader(); + + if (!$loader instanceof HttpLoader) { + throw new Exception('The GetSitemapsFromRobotsTxt step needs an HttpLoader as loader instance.'); + } + + $robotsTxtHandler = $loader->robotsTxt(); foreach ($robotsTxtHandler->getSitemaps($input) as $sitemapUrl) { yield $sitemapUrl; diff --git a/src/Steps/Loading/HttpBase.php b/src/Steps/Loading/HttpBase.php index 510a044..b718a03 100644 --- a/src/Steps/Loading/HttpBase.php +++ b/src/Steps/Loading/HttpBase.php @@ -4,6 +4,7 @@ use Crwlr\Crawler\Loader\Http\Exceptions\LoadingException; use Crwlr\Crawler\Loader\Http\Messages\RespondedRequest; +use Crwlr\Crawler\Steps\Step; use Crwlr\Crawler\Utils\HttpHeaders; use GuzzleHttp\Psr7\Request; use InvalidArgumentException; @@ -11,8 +12,10 @@ use Psr\Http\Message\StreamInterface; use Psr\Http\Message\UriInterface; -abstract class HttpBase extends LoadingStep +abstract class HttpBase extends Step { + use LoadingStep; + protected bool $stopOnErrorResponse = false; protected bool $yieldErrorResponses = false; @@ -179,9 +182,9 @@ protected function getRequestFromInputUri(UriInterface $uri): RequestInterface protected function getResponseFromRequest(RequestInterface $request): ?RespondedRequest { if ($this->stopOnErrorResponse) { - $response = $this->loader->loadOrFail($request); + $response = $this->getLoader()->loadOrFail($request); } else { - $response = $this->loader->load($request); + $response = $this->getLoader()->load($request); } if ($response !== null && ($response->response->getStatusCode() < 400 || $this->yieldErrorResponses)) { diff --git a/src/Steps/Loading/HttpCrawl.php b/src/Steps/Loading/HttpCrawl.php index fee4a5f..bfd9280 100644 --- a/src/Steps/Loading/HttpCrawl.php +++ b/src/Steps/Loading/HttpCrawl.php @@ -144,7 +144,7 @@ protected function invoke(mixed $input): Generator { $this->setHostOrDomain($input); - $response = $this->loader->load($this->getRequestFromInputUri($input)); + $response = $this->getLoader()->load($this->getRequestFromInputUri($input)); if (!$response) { return; diff --git a/src/Steps/Loading/LoadingStep.php b/src/Steps/Loading/LoadingStep.php index b8732e9..299f277 100644 --- a/src/Steps/Loading/LoadingStep.php +++ b/src/Steps/Loading/LoadingStep.php @@ -3,30 +3,29 @@ namespace Crwlr\Crawler\Steps\Loading; use Crwlr\Crawler\Loader\LoaderInterface; -use Crwlr\Crawler\Steps\Step; -abstract class LoadingStep extends Step implements LoadingStepInterface +trait LoadingStep { - protected LoaderInterface $loader; + private LoaderInterface $loader; - protected ?string $useLoaderKey = null; + private ?LoaderInterface $customLoader = null; - public function addLoader(LoaderInterface $loader): static + public function setLoader(LoaderInterface $loader): static { $this->loader = $loader; return $this; } - public function useLoader(string $key): static + public function withLoader(LoaderInterface $loader): static { - $this->useLoaderKey = $key; + $this->customLoader = $loader; return $this; } - public function usesLoader(): ?string + protected function getLoader(): LoaderInterface { - return $this->useLoaderKey; + return $this->customLoader ?? $this->loader; } } diff --git a/src/Steps/Loading/LoadingStepInterface.php b/src/Steps/Loading/LoadingStepInterface.php deleted file mode 100644 index a272398..0000000 --- a/src/Steps/Loading/LoadingStepInterface.php +++ /dev/null @@ -1,21 +0,0 @@ -outputKey => $outputData]; } - if ($this->keepInputData === true) { - $outputData = $this->addInputDataToOutputData($input->get(), $outputData); - } - $output = $this->makeOutput($outputData, $input); if ($this->uniqueOutput && !$this->inputOrOutputIsUnique($output)) { diff --git a/src/Steps/StepInterface.php b/src/Steps/StepInterface.php index bfae4dc..6fbbd97 100644 --- a/src/Steps/StepInterface.php +++ b/src/Steps/StepInterface.php @@ -20,20 +20,6 @@ public function invokeStep(Input $input): Generator; public function useInputKey(string $key): static; - /** - * @param string|string[]|null $keys - */ - public function addToResult(null|string|array $keys = null): static; - - /** - * @param string|string[]|null $keys - */ - public function addLaterToResult(null|string|array $keys = null): static; - - public function addsToOrCreatesResult(): bool; - - public function createsResult(): bool; - public function uniqueInputs(?string $key = null): static; public function uniqueOutputs(?string $key = null): static; @@ -44,7 +30,5 @@ public function orWhere(string|FilterInterface $keyOrFilter, ?FilterInterface $f public function outputKey(string $key): static; - public function keepInputData(?string $inputKey = null): static; - public function resetAfterRun(): void; } diff --git a/src/Utils/OutputTypeHelper.php b/src/Utils/OutputTypeHelper.php index b88fbfa..57710ad 100644 --- a/src/Utils/OutputTypeHelper.php +++ b/src/Utils/OutputTypeHelper.php @@ -13,8 +13,6 @@ public static function objectToArray(object $output): array return $output->toArrayForResult(); } elseif (method_exists($output, 'toArray')) { return $output->toArray(); - } elseif (method_exists($output, 'toArrayForAddToResult')) { // legacy, please consider one of the other options - return $output->toArrayForAddToResult(); } elseif (method_exists($output, '__serialize')) { return $output->__serialize(); } diff --git a/tests/CrawlerTest.php b/tests/CrawlerTest.php index 82aafa8..ec3ce78 100644 --- a/tests/CrawlerTest.php +++ b/tests/CrawlerTest.php @@ -5,16 +5,10 @@ use Crwlr\Crawler\Steps\StepOutputType; use tests\_Stubs\Crawlers\DummyOne; use tests\_Stubs\Crawlers\DummyTwo; -use tests\_Stubs\LoaderCollectingStep; -use tests\_Stubs\MultiLoaderCrawler; -use tests\_Stubs\PhantasyLoader; use Crwlr\Crawler\Crawler; -use Crwlr\Crawler\Exceptions\UnknownLoaderKeyException; -use Crwlr\Crawler\Loader\Http\HttpLoader; use Crwlr\Crawler\Output; use Crwlr\Crawler\Result; use Crwlr\Crawler\Steps\Loading\Http; -use Crwlr\Crawler\Steps\Loading\LoadingStepInterface; use Crwlr\Crawler\Steps\Step; use Crwlr\Crawler\Steps\StepInterface; use Crwlr\Crawler\Stores\Store; @@ -154,111 +148,25 @@ function () { $crawler->addStep($step); - $step = Mockery::mock(LoadingStepInterface::class); + $step = helper_getLoadingStep(); - $step->shouldReceive('addLogger')->once(); - - $step->shouldReceive('addLoader')->once(); - - $crawler->addStep($step); -}); - -test('you can define multiple loaders', function () { - $crawler = new MultiLoaderCrawler(); - - expect($crawler->getLoader())->toBeArray(); - - expect($crawler->getLoader())->toHaveCount(3); - - expect($crawler->getLoader())->toHaveKey('http'); - - expect($crawler->getLoader()['http'])->toBeInstanceOf(HttpLoader::class); // @phpstan-ignore-line - - expect($crawler->getLoader())->toHaveKey('phantasy'); - - expect($crawler->getLoader()['phantasy'])->toBeInstanceOf(PhantasyLoader::class); // @phpstan-ignore-line - - expect($crawler->getLoader())->toHaveKey('phantasy2'); + $step = Mockery::mock($step)->makePartial(); - expect($crawler->getLoader()['phantasy2'])->toBeInstanceOf(PhantasyLoader::class); // @phpstan-ignore-line -}); - -it('passes each of its loaders one by one to its steps', function () { - $step = new LoaderCollectingStep(); - - (new MultiLoaderCrawler())->addStep($step); - - expect($step->loaders)->toHaveCount(3); - - expect($step->loaders[0])->toBeInstanceOf(HttpLoader::class); - - expect($step->loaders[1])->toBeInstanceOf(PhantasyLoader::class); - - expect($step->loaders[2])->toBeInstanceOf(PhantasyLoader::class); -}); - -it('passes on all the loaders to a group step which by default passes all of them to child loading steps', function () { - $crawler = new MultiLoaderCrawler(); - - $step = new LoaderCollectingStep(); - - $crawler - ->addStep( - Crawler::group() - ->addStep(Http::get()) - ->addStep($step), - ); - - expect($step->loaders)->toHaveCount(3); - - expect($step->loaders[0])->toBeInstanceOf(HttpLoader::class); - - expect($step->loaders[1])->toBeInstanceOf(PhantasyLoader::class); - - expect($step->loaders[2])->toBeInstanceOf(PhantasyLoader::class); -}); - -it('passes only a certain loader when user chooses one by calling useLoader() on a step', function () { - $step = new LoaderCollectingStep(); - - (new MultiLoaderCrawler())->addStep($step->useLoader('http')); - - expect($step->loaders)->toHaveCount(1); + $step->shouldReceive('addLogger')->once(); - expect($step->loaders[0])->toBeInstanceOf(HttpLoader::class); -}); + $step->shouldReceive('setLoader')->once(); -it('passes only a certain loader when user chooses one by calling useLoader() on a step inside a group', function () { - $crawler = new MultiLoaderCrawler(); + $step->shouldReceive('setParentCrawler')->once()->andReturnSelf(); - $step = new LoaderCollectingStep(); + /** @var Step $step */ - $crawler - ->addStep( - Crawler::group() - ->addStep(Http::get()) - ->addStep($step->useLoader('http')), - ); - - expect($step->loaders)->toHaveCount(1); - - expect($step->loaders[0])->toBeInstanceOf(HttpLoader::class); + $crawler->addStep($step); }); -it( - 'throws an UnknownLoaderKeyException when user wants to chose a loader that was not defined in the crawlers ' . - 'loader() method', - function () { - $step = new LoaderCollectingStep(); - - (new MultiLoaderCrawler())->addStep($step->useLoader('https')); - }, -)->throws(UnknownLoaderKeyException::class); - test('You can add steps and they are invoked when the Crawler is run', function () { - $step1 = helper_getValueReturningStep('step1 output')->addToResult('step1'); + $step1 = helper_getValueReturningStep('step1 output')->keepAs('step1'); - $step2 = helper_getValueReturningStep('step2 output')->addToResult('step2'); + $step2 = helper_getValueReturningStep('step2 output')->keepAs('step2'); $crawler = helper_getDummyCrawler() ->addStep($step1) @@ -268,36 +176,32 @@ function () { $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(1); + expect($results)->toHaveCount(1) + ->and($results[0]->toArray())->toBe(['step1' => 'step1 output', 'step2' => 'step2 output']); - expect($results[0]->toArray())->toBe(['step1' => 'step1 output', 'step2' => 'step2 output']); }); it('resets the initial inputs and calls the resetAfterRun method of all its steps', function () { - $step = helper_getInputReturningStep() - ->uniqueOutputs(); + $step = helper_getInputReturningStep()->uniqueOutputs(); $crawler = helper_getDummyCrawler() - ->addStep('foo', $step) - ->inputs(['input1', 'input1', 'input2']); + ->inputs(['input1', 'input1', 'input2']) + ->addStep($step->keepAs('foo')); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(2); - - expect($results[0]->toArray())->toBe(['foo' => 'input1']); - - expect($results[1]->toArray())->toBe(['foo' => 'input2']); + expect($results)->toHaveCount(2) + ->and($results[0]->toArray())->toBe(['foo' => 'input1']) + ->and($results[1]->toArray())->toBe(['foo' => 'input2']); $crawler->inputs(['input1', 'input3']); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(2); - - expect($results[0]->toArray())->toBe(['foo' => 'input1']); + expect($results)->toHaveCount(2) + ->and($results[0]->toArray())->toBe(['foo' => 'input1']) + ->and($results[1]->toArray())->toBe(['foo' => 'input3']); - expect($results[1]->toArray())->toBe(['foo' => 'input3']); }); test('You can add a step group as a step and all it\'s steps are invoked when the Crawler is run', function () { @@ -331,183 +235,6 @@ function () { expect(true)->toBeTrue(); // So pest doesn't complain that there is no assertion. }); -test('Result objects are created when addToResult() is called and passed on through all the steps', function () { - $crawler = helper_getDummyCrawler(); - - $step = helper_getValueReturningStep('yo'); - - $crawler->addStep($step->addToResult('prop1')); - - $step2 = helper_getValueReturningStep('lo'); - - $crawler->addStep($step2->addToResult('prop2')); - - $step3 = helper_getValueReturningStep('foo'); - - $crawler->addStep($step3); - - $step4 = helper_getValueReturningStep('bar'); - - $crawler->addStep($step4); - - $crawler->input('randomInput'); - - $results = helper_generatorToArray($crawler->run()); - - expect($results[0])->toBeInstanceOf(Result::class); - - expect($results[0]->toArray())->toBe([ - 'prop1' => 'yo', - 'prop2' => 'lo', - ]); -}); - -test( - 'when calling addToResult() it creates a Result object. When the next step also adds to the result and it yields ' . - 'multiple outputs for one input, the data is added as array to the previously created Result object.', - function () { - $crawler = helper_getDummyCrawler(); - - $step = helper_getValueReturningStep(['some' => 'thing'])->addToResult(); - - $crawler->addStep($step); - - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - foreach (['one', 'two', 'three'] as $number) { - yield $number; - } - } - }; - - $step2->addToResult('number'); - - $crawler->addStep($step2); - - $crawler->input('some input'); - - $results = helper_generatorToArray($crawler->run()); - - expect($results)->toHaveCount(1); - - expect($results[0])->toBeInstanceOf(Result::class); - - expect($results[0]->toArray())->toBe([ - 'some' => 'thing', - 'number' => ['one', 'two', 'three'], - ]); - }, -); - -test( - 'calling addLaterToResult() doesn\'t immediately create a Result object, but adds the data to the output and ' . - 'later adds it to each Result object that is created from that output object.', - function () { - $crawler = helper_getDummyCrawler(); - - $step = helper_getValueReturningStep(['some' => 'thing'])->addLaterToResult(); - - $crawler->addStep($step); - - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - foreach (['one', 'two', 'three'] as $number) { - yield $number; - } - } - }; - - $step2->addToResult('number'); - - $crawler->addStep($step2); - - $crawler->input('test input'); - - $results = helper_generatorToArray($crawler->run()); - - expect($results)->toHaveCount(3); - - expect($results[0])->toBeInstanceOf(Result::class); - - expect($results[0]->toArray())->toBe([ - 'some' => 'thing', - 'number' => 'one', - ]); - - expect($results[1]->toArray())->toBe([ - 'some' => 'thing', - 'number' => 'two', - ]); - - expect($results[2]->toArray())->toBe([ - 'some' => 'thing', - 'number' => 'three', - ]); - }, -); - -test( - 'when addLaterToResult() is called, but addToResult() is not, you get the results from the step that ' . - 'addLaterToResult() was called on in the quantity of the last steps outputs.', - function () { - $crawler = helper_getDummyCrawler(); - - $step = helper_getValueReturningStep(['some' => 'thing'])->addLaterToResult(); - - $crawler->addStep($step); - - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - foreach (['one', 'two', 'three'] as $number) { - yield $number; - } - } - }; - - $crawler->addStep($step2); - - $crawler->input('test input'); - - $results = helper_generatorToArray($crawler->run()); - - expect($results)->toHaveCount(3); - - expect($results[0]->toArray())->toBe(['some' => 'thing']); - - expect($results[1]->toArray())->toBe(['some' => 'thing']); - - expect($results[2]->toArray())->toBe(['some' => 'thing']); - }, -); - -test('When final steps return an array you get all values in the defined Result resource', function () { - $crawler = helper_getDummyCrawler(); - - $step1 = helper_getValueReturningStep('Donald'); - - $crawler->addStep($step1->addToResult('parent')); - - $step2 = helper_getValueReturningStep(['Tick', 'Trick', 'Track']); - - $crawler->addStep($step2->addToResult('children')); - - $crawler->input('randomInput'); - - $results = $crawler->run(); - - expect($results->current()->toArray())->toBe([ - 'parent' => 'Donald', - 'children' => ['Tick', 'Trick', 'Track'], - ]); - - $results->next(); - - expect($results->current())->toBeNull(); -}); - /* ----------------------------- keep() and keepAs() ----------------------------- */ test('when you call keep() or keepAs() on a step, it keeps its output data until the end', function () { @@ -615,11 +342,14 @@ protected function invoke(mixed $input): Generator } }; - $crawler->addStep('number', $step); + $crawler->addStep($step->keepAs('number')); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(3); + expect($results)->toHaveCount(3) + ->and($results[0]->toArray())->toBe(['number' => 'one']) + ->and($results[1]->toArray())->toBe(['number' => 'two']) + ->and($results[2]->toArray())->toBe(['number' => 'three']); }); it( @@ -697,8 +427,8 @@ public function store(Result $result): void $crawler = helper_getDummyCrawler() ->inputs(['input1', 'input2']) - ->addStep('foo', $step1) - ->addStep('bar', $step2) + ->addStep($step1->keepAs('foo')) + ->addStep($step2->keepAs('bar')) ->setStore($store); $crawler->runAndTraverse(); @@ -707,30 +437,21 @@ public function store(Result $result): void $outputLines = explode("\n", $output); - expect($outputLines[0])->toContain('step1 called'); - - expect($outputLines[1])->toContain('step2 called'); - - expect($outputLines[2])->toContain('Stored a result'); - - expect($outputLines[3])->toContain('step2 called'); - - expect($outputLines[4])->toContain('Stored a result'); - - expect($outputLines[5])->toContain('step1 called'); - - expect($outputLines[6])->toContain('step2 called'); - - expect($outputLines[7])->toContain('Stored a result'); - - expect($outputLines[8])->toContain('step2 called'); - - expect($outputLines[9])->toContain('Stored a result'); + expect($outputLines[0])->toContain('step1 called') + ->and($outputLines[1])->toContain('step2 called') + ->and($outputLines[2])->toContain('Stored a result') + ->and($outputLines[3])->toContain('step2 called') + ->and($outputLines[4])->toContain('Stored a result') + ->and($outputLines[5])->toContain('step1 called') + ->and($outputLines[6])->toContain('step2 called') + ->and($outputLines[7])->toContain('Stored a result') + ->and($outputLines[8])->toContain('step2 called') + ->and($outputLines[9])->toContain('Stored a result'); }, ); it( - 'immediately calls the store for each final output when addToResult() was not called', + 'immediately calls the store for each final output', function () { $step1 = new class () extends Step { protected function invoke(mixed $input): Generator @@ -833,8 +554,8 @@ public function store(Result $result): void ); it( - 'waits for all child outputs originating from an output of a step where addToResult() was called before calling ' . - 'the store', + 'does not wait for all child outputs originating from an output of a step where keepAs() was called before ' . + 'calling the store', function () { $step1 = new class () extends Step { protected function invoke(mixed $input): Generator @@ -858,7 +579,7 @@ protected function invoke(mixed $input): Generator } }; - $step2->addToResult('foo'); + $step2->keepAs('foo'); $step3 = new class () extends Step { protected function invoke(mixed $input): Generator @@ -882,10 +603,12 @@ protected function invoke(mixed $input): Generator } }; + $step4->keepAs('bar'); + $store = new class () extends Store { public function store(Result $result): void { - $this->logger?->info('Stored a result: ' . $result->get('foo')); + $this->logger?->info('Stored a result: ' . $result->get('bar')); } }; @@ -903,26 +626,37 @@ public function store(Result $result): void $outputLines = explode("\n", $output); - expect($outputLines[0]) - ->toContain('step1 called') + expect($outputLines[0])->toContain('step1 called') ->and($outputLines[1])->toContain('step2 called: 1-1') ->and($outputLines[2])->toContain('step3 called: 1-1 2-1') ->and($outputLines[3])->toContain('step4 called: 1-1 2-1 3-1') - ->and($outputLines[4])->toContain('step4 called: 1-1 2-1 3-2') - ->and($outputLines[5])->toContain('Stored a result: 1-1 2-1') - ->and($outputLines[6])->toContain('step3 called: 1-1 2-2') - ->and($outputLines[7])->toContain('step4 called: 1-1 2-2 3-1') - ->and($outputLines[8])->toContain('step4 called: 1-1 2-2 3-2') - ->and($outputLines[9])->toContain('Stored a result: 1-1 2-2') - ->and($outputLines[10])->toContain('step2 called: 1-2') - ->and($outputLines[11])->toContain('step3 called: 1-2 2-1') - ->and($outputLines[12])->toContain('step4 called: 1-2 2-1 3-1') - ->and($outputLines[13])->toContain('step4 called: 1-2 2-1 3-2') - ->and($outputLines[14])->toContain('Stored a result: 1-2 2-1') - ->and($outputLines[15])->toContain('step3 called: 1-2 2-2') - ->and($outputLines[16])->toContain('step4 called: 1-2 2-2 3-1') - ->and($outputLines[17])->toContain('step4 called: 1-2 2-2 3-2') - ->and($outputLines[18])->toContain('Stored a result: 1-2 2-2'); + ->and($outputLines[4])->toContain('Stored a result: 1-1 2-1 3-1 4-1') + ->and($outputLines[5])->toContain('Stored a result: 1-1 2-1 3-1 4-2') + ->and($outputLines[6])->toContain('step4 called: 1-1 2-1 3-2') + ->and($outputLines[7])->toContain('Stored a result: 1-1 2-1 3-2 4-1') + ->and($outputLines[8])->toContain('Stored a result: 1-1 2-1 3-2 4-2') + ->and($outputLines[9])->toContain('step3 called: 1-1 2-2') + ->and($outputLines[10])->toContain('step4 called: 1-1 2-2 3-1') + ->and($outputLines[11])->toContain('Stored a result: 1-1 2-2 3-1 4-1') + ->and($outputLines[12])->toContain('Stored a result: 1-1 2-2 3-1 4-2') + ->and($outputLines[13])->toContain('step4 called: 1-1 2-2 3-2') + ->and($outputLines[14])->toContain('Stored a result: 1-1 2-2 3-2 4-1') + ->and($outputLines[15])->toContain('Stored a result: 1-1 2-2 3-2 4-2') + ->and($outputLines[16])->toContain('step2 called: 1-2') + ->and($outputLines[17])->toContain('step3 called: 1-2 2-1') + ->and($outputLines[18])->toContain('step4 called: 1-2 2-1 3-1') + ->and($outputLines[19])->toContain('Stored a result: 1-2 2-1 3-1 4-1') + ->and($outputLines[20])->toContain('Stored a result: 1-2 2-1 3-1 4-2') + ->and($outputLines[21])->toContain('step4 called: 1-2 2-1 3-2') + ->and($outputLines[22])->toContain('Stored a result: 1-2 2-1 3-2 4-1') + ->and($outputLines[23])->toContain('Stored a result: 1-2 2-1 3-2 4-2') + ->and($outputLines[24])->toContain('step3 called: 1-2 2-2') + ->and($outputLines[25])->toContain('step4 called: 1-2 2-2 3-1') + ->and($outputLines[26])->toContain('Stored a result: 1-2 2-2 3-1 4-1') + ->and($outputLines[27])->toContain('Stored a result: 1-2 2-2 3-1 4-2') + ->and($outputLines[28])->toContain('step4 called: 1-2 2-2 3-2') + ->and($outputLines[29])->toContain('Stored a result: 1-2 2-2 3-2 4-1') + ->and($outputLines[30])->toContain('Stored a result: 1-2 2-2 3-2 4-2'); }, ); @@ -957,15 +691,11 @@ public function store(Result $result): void $crawler->runAndTraverse(); - expect($outputs)->toHaveCount(2); - - expect($outputs[0])->toHaveCount(1); - - expect($outputs[0][0])->toBe(2); - - expect($outputs[1])->toHaveCount(1); - - expect($outputs[1][0])->toBe(3); + expect($outputs)->toHaveCount(2) + ->and($outputs[0])->toHaveCount(1) + ->and($outputs[0][0])->toBe(2) + ->and($outputs[1])->toHaveCount(1) + ->and($outputs[1][0])->toBe(3); }); test( @@ -992,21 +722,13 @@ function () { $actualOutput = $this->getActualOutputForAssertion(); - expect(explode('array(2)', $actualOutput))->toHaveCount(3); - - expect($actualOutput)->toContain('["foo"]=>'); - - expect($actualOutput)->toContain('string(3) "one"'); - - expect($actualOutput)->toContain('["bar"]=>'); - - expect($actualOutput)->toContain('string(3) "two"'); - - expect($actualOutput)->toContain('["baz"]=>'); - - expect($actualOutput)->toContain('string(5) "three"'); - - expect($actualOutput)->toContain('["quz"]=>'); - - expect($actualOutput)->toContain('string(4) "four"'); + expect(explode('array(2)', $actualOutput))->toHaveCount(3) + ->and($actualOutput)->toContain('["foo"]=>') + ->and($actualOutput)->toContain('string(3) "one"') + ->and($actualOutput)->toContain('["bar"]=>') + ->and($actualOutput)->toContain('string(3) "two"') + ->and($actualOutput)->toContain('["baz"]=>') + ->and($actualOutput)->toContain('string(5) "three"') + ->and($actualOutput)->toContain('["quz"]=>') + ->and($actualOutput)->toContain('string(4) "four"'); }); diff --git a/tests/IoTest.php b/tests/IoTest.php index 7d983de..f1cbc6e 100644 --- a/tests/IoTest.php +++ b/tests/IoTest.php @@ -3,18 +3,15 @@ namespace tests; use Crwlr\Crawler\Io; -use Crwlr\Crawler\Result; /** * @param mixed[] $keep */ function helper_getIoInstance( mixed $value, - ?Result $result = null, - ?Result $addLaterToResult = null, array $keep = [], ): Io { - return new class ($value, $result, $addLaterToResult, $keep) extends Io {}; + return new class ($value, $keep) extends Io {}; } it('can be created with only a value.', function () { @@ -23,22 +20,6 @@ function helper_getIoInstance( expect($io)->toBeInstanceOf(Io::class); }); -test('you can add a Result object.', function () { - $result = new Result(); - - $io = helper_getIoInstance('test', $result); - - expect($io->result)->toBe($result); -}); - -test('you can add a secondary Result object that should be added to the main Result object later.', function () { - $addLaterToResult = new Result(); - - $io = helper_getIoInstance('test', addLaterToResult: $addLaterToResult); - - expect($io->addLaterToResult)->toBe($addLaterToResult); -}); - test('you can add an array with data that should be kept (see Step::keep() functionality)', function () { $keep = ['foo' => 'bar', 'baz' => 'quz']; @@ -55,26 +36,6 @@ function helper_getIoInstance( expect($io2->get())->toBe('test'); }); -test('when created from another Io instance it passes on the Result object.', function () { - $result = new Result(); - - $io1 = helper_getIoInstance('test', $result); - - $io2 = helper_getIoInstance($io1); - - expect($io2->result)->toBe($result); -}); - -test('when created from another Io instance it passes on the secondary Result object.', function () { - $addLaterToResult = new Result(); - - $io1 = helper_getIoInstance('test', addLaterToResult: $addLaterToResult); - - $io2 = helper_getIoInstance($io1); - - expect($io2->addLaterToResult)->toBe($addLaterToResult); -}); - test('when created from another Io instance it passes on the data to keep', function () { $io1 = helper_getIoInstance('test', keep: ['co' => 'derotsch']); @@ -83,43 +44,23 @@ function helper_getIoInstance( expect($io2->keep)->toBe(['co' => 'derotsch']); }); -test('the withValue() method creates a new instance with that value bot keeps the result and keep data', function () { - $result = new Result(); - - $result->set('foo', 'one'); - - $addLaterResult = new Result(); - - $result->set('bar', 'two'); - - $io1 = helper_getIoInstance('hey', $result, $addLaterResult, ['baz' => 'three']); +test('the withValue() method creates a new instance with that value but keeps the keep data', function () { + $io1 = helper_getIoInstance('hey', ['baz' => 'three']); $io2 = $io1->withValue('ho'); expect($io2->get())->toBe('ho') - ->and($io2->result)->toBe($result) - ->and($io2->addLaterToResult)->toBe($addLaterResult) ->and($io2->keep)->toBe(['baz' => 'three']); }); test( 'the withPropertyValue() method creates a new instance and replaces a certain property in its array value', function () { - $result = new Result(); - - $result->set('foo', 'one'); - - $addLaterResult = new Result(); - - $result->set('bar', 'two'); - - $io1 = helper_getIoInstance(['a' => '1', 'b' => '2', 'c' => '3'], $result, $addLaterResult, ['baz' => 'three']); + $io1 = helper_getIoInstance(['a' => '1', 'b' => '2', 'c' => '3'], ['baz' => 'three']); $io2 = $io1->withPropertyValue('c', '4'); expect($io2->get())->toBe(['a' => '1', 'b' => '2', 'c' => '4']) - ->and($io2->result)->toBe($result) - ->and($io2->addLaterToResult)->toBe($addLaterResult) ->and($io2->keep)->toBe(['baz' => 'three']); }, ); @@ -148,9 +89,8 @@ function () { it('sets a simple value key', function ($value, $key) { $io = helper_getIoInstance($value); - expect($io->setKey())->toBe($key); - - expect($io->getKey())->toBe($key); + expect($io->setKey())->toBe($key) + ->and($io->getKey())->toBe($key); })->with([ ['foo', 'foo'], [123, '123'], @@ -163,9 +103,8 @@ function () { it('sets a key from array output', function () { $io = helper_getIoInstance(['foo' => 'bar', 'yo' => 123.45]); - expect($io->setKey('yo'))->toBe('123.45'); - - expect($io->getKey())->toBe('123.45'); + expect($io->setKey('yo'))->toBe('123.45') + ->and($io->getKey())->toBe('123.45'); }); it('sets a key from object output', function () { @@ -173,17 +112,15 @@ function () { $io = helper_getIoInstance($value); - expect($io->setKey('yo'))->toBe('123.45'); - - expect($io->getKey())->toBe('123.45'); + expect($io->setKey('yo'))->toBe('123.45') + ->and($io->getKey())->toBe('123.45'); }); it('creates a string key for array output when not providing a key name', function () { $io = helper_getIoInstance(['one', 'two', 'three']); - expect($io->setKey())->toBe('6975f1fd65cae4b21e32f4f47bf153a8'); - - expect($io->getKey())->toBe('6975f1fd65cae4b21e32f4f47bf153a8'); + expect($io->setKey())->toBe('6975f1fd65cae4b21e32f4f47bf153a8') + ->and($io->getKey())->toBe('6975f1fd65cae4b21e32f4f47bf153a8'); }); it('creates a string key for object output when not providing a key name', function () { @@ -191,17 +128,15 @@ function () { $io = helper_getIoInstance($object); - expect($io->setKey())->toBe('bb8dd69ea029ca1379df3994721f5fa9'); - - expect($io->getKey())->toBe('bb8dd69ea029ca1379df3994721f5fa9'); + expect($io->setKey())->toBe('bb8dd69ea029ca1379df3994721f5fa9') + ->and($io->getKey())->toBe('bb8dd69ea029ca1379df3994721f5fa9'); }); it('creates a string key for array output when provided key name doesn\'t exist in output array', function () { $io = helper_getIoInstance(['one', 'two', 'three']); - expect($io->setKey('four'))->toBe('6975f1fd65cae4b21e32f4f47bf153a8'); - - expect($io->getKey())->toBe('6975f1fd65cae4b21e32f4f47bf153a8'); + expect($io->setKey('four'))->toBe('6975f1fd65cae4b21e32f4f47bf153a8') + ->and($io->getKey())->toBe('6975f1fd65cae4b21e32f4f47bf153a8'); }); it('creates a string key for array output when provided key name doesn\'t exist in output object', function () { @@ -209,9 +144,8 @@ function () { $io = helper_getIoInstance($object); - expect($io->setKey('four'))->toBe('bb8dd69ea029ca1379df3994721f5fa9'); - - expect($io->getKey())->toBe('bb8dd69ea029ca1379df3994721f5fa9'); + expect($io->setKey('four'))->toBe('bb8dd69ea029ca1379df3994721f5fa9') + ->and($io->getKey())->toBe('bb8dd69ea029ca1379df3994721f5fa9'); }); test('getKey returns a key when setKey was not called yet', function () { diff --git a/tests/Loader/AddLoadersToStepActionTest.php b/tests/Loader/AddLoadersToStepActionTest.php deleted file mode 100644 index c410265..0000000 --- a/tests/Loader/AddLoadersToStepActionTest.php +++ /dev/null @@ -1,96 +0,0 @@ - - */ -function helper_getLoaders(): array -{ - $userAgent = new UserAgent('SomeUserAgent'); - - $logger = new CliLogger(); - - return [ - 'http' => new HttpLoader($userAgent, logger: $logger), - 'phantasy' => new PhantasyLoader($userAgent, logger: $logger), - 'phantasy2' => new PhantasyLoader($userAgent, logger: $logger), - ]; -} - -it('does not cause an error when called with a non loading step', function () { - (new AddLoadersToStepAction(helper_getLoaders(), Html::root()->extract([]))); -})->throwsNoExceptions(); - -it('adds the loader to the step when invoked with a single loader', function () { - $loader = new HttpLoader(new UserAgent('Foo'), logger: new CliLogger()); - - $step = new LoaderCollectingStep(); - - (new AddLoadersToStepAction($loader, $step))->invoke(); - - expect($step->loaders)->toHaveCount(1); - - expect($step->loaders[0])->toBe($loader); -}); - -it('adds all loaders one by one to the step when called with multiple loaders', function () { - $step = new LoaderCollectingStep(); - - (new AddLoadersToStepAction(helper_getLoaders(), $step))->invoke(); - - expect($step->loaders)->toHaveCount(3); - - expect($step->loaders[0])->toBeInstanceOf(HttpLoader::class); - - expect($step->loaders[1])->toBeInstanceOf(PhantasyLoader::class); - - expect($step->loaders[2])->toBeInstanceOf(PhantasyLoader::class); -}); - -it('adds only the chosen loader when useLoader() was called on a step', function () { - $step = new LoaderCollectingStep(); - - $step->useLoader('http'); - - (new AddLoadersToStepAction(helper_getLoaders(), $step))->invoke(); - - expect($step->loaders)->toHaveCount(1); - - expect($step->loaders[0])->toBeInstanceOf(HttpLoader::class); -}); - -it('throws an UnknownLoaderKeyException when useLoader() is called with an undefined loader key', function () { - $step = new LoaderCollectingStep(); - - $step->useLoader('https'); - - (new AddLoadersToStepAction(helper_getLoaders(), $step))->invoke(); -})->throws(UnknownLoaderKeyException::class); - -it('adds all loaders to a group step, and the group step adds it to its children', function () { - $step1 = (new LoaderCollectingStep())->useLoader('http'); - - $step2 = new LoaderCollectingStep(); - - $groupStep = Crawler::group() - ->addStep($step1) - ->addStep($step2); - - (new AddLoadersToStepAction(helper_getLoaders(), $groupStep))->invoke(); - - expect($step1->loaders)->toHaveCount(1); - - expect($step2->loaders)->toHaveCount(3); -}); diff --git a/tests/Loader/Http/Messages/RespondedRequestTest.php b/tests/Loader/Http/Messages/RespondedRequestTest.php index 5efa202..d1bdc7d 100644 --- a/tests/Loader/Http/Messages/RespondedRequestTest.php +++ b/tests/Loader/Http/Messages/RespondedRequestTest.php @@ -162,23 +162,15 @@ /** @var RespondedRequest $respondedRequest */ - expect($respondedRequest)->toBeInstanceOf(RespondedRequest::class); - - expect($respondedRequest->request->getMethod())->toBe('POST'); - - expect($respondedRequest->request->getUri()->__toString())->toBe('/home'); - - expect($respondedRequest->request->getHeaders())->toBe(['key' => ['val']]); - - expect($respondedRequest->request->getBody()->getContents())->toBe('bod'); - - expect($respondedRequest->effectiveUri())->toBe('/index'); - - expect($respondedRequest->response->getStatusCode())->toBe(201); - - expect($respondedRequest->response->getHeaders())->toBe(['k' => ['v']]); - - expect($respondedRequest->response->getBody()->getContents())->toBe('res'); + expect($respondedRequest)->toBeInstanceOf(RespondedRequest::class) + ->and($respondedRequest->request->getMethod())->toBe('POST') + ->and($respondedRequest->request->getUri()->__toString())->toBe('/home') + ->and($respondedRequest->request->getHeaders())->toBe(['key' => ['val']]) + ->and($respondedRequest->request->getBody()->getContents())->toBe('bod') + ->and($respondedRequest->effectiveUri())->toBe('/index') + ->and($respondedRequest->response->getStatusCode())->toBe(201) + ->and($respondedRequest->response->getHeaders())->toBe(['k' => ['v']]) + ->and($respondedRequest->response->getBody()->getContents())->toBe('res'); }); it('can be created from a serialized array', function () { @@ -188,20 +180,18 @@ $respondedRequest = RespondedRequest::fromArray(unserialize($serialized)); - expect($respondedRequest)->toBeInstanceOf(RespondedRequest::class); - - expect($respondedRequest->request->getUri()->__toString())->toBe('/foo'); - - expect($respondedRequest->effectiveUri())->toBe('/bar'); + expect($respondedRequest)->toBeInstanceOf(RespondedRequest::class) + ->and($respondedRequest->request->getUri()->__toString())->toBe('/foo') + ->and($respondedRequest->effectiveUri())->toBe('/bar'); }); -it('has a serializeForAddToResult() method', function () { +it('has a toArrayForResult() method', function () { $respondedRequest = new RespondedRequest( new Request('POST', '/home', ['key' => 'val'], 'bod'), new Response(201, ['k' => 'v'], 'res'), ); - expect($respondedRequest->toArrayForAddToResult())->toBe([ + expect($respondedRequest->toArrayForResult())->toBe([ 'requestMethod' => 'POST', 'requestUri' => '/home', 'requestHeaders' => ['key' => ['val']], diff --git a/tests/Pest.php b/tests/Pest.php index 9090a71..1c9e8ee 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -10,6 +10,7 @@ use Crwlr\Crawler\Loader\Http\Politeness\TimingUnits\MultipleOf; use Crwlr\Crawler\Loader\LoaderInterface; use Crwlr\Crawler\Output; +use Crwlr\Crawler\Steps\Loading\LoadingStep; use Crwlr\Crawler\Steps\Step; use Crwlr\Crawler\Steps\StepInterface; use Crwlr\Crawler\Steps\StepOutputType; @@ -114,18 +115,6 @@ protected function invoke(mixed $input): Generator }; } -function helper_getStepYieldingArrayWithNumber(int $number): Step -{ - return new class ($number) extends Step { - public function __construct(private int $number) {} - - protected function invoke(mixed $input): Generator - { - yield ['number' => $this->number, 'foo' => 'bar' . (is_int($input) ? ' ' . $input : '')]; - } - }; -} - function helper_getStepYieldingMultipleArraysWithNumber(): Step { return new class () extends Step { @@ -166,6 +155,18 @@ protected function invoke(mixed $input): Generator }; } +function helper_getLoadingStep(): Step +{ + return new class () extends Step { + use LoadingStep; + + protected function invoke(mixed $input): Generator + { + yield 'yo'; + } + }; +} + function helper_getDummyRobotsTxtResponse(?string $forDomain = null): Response { return new Response( @@ -280,7 +281,7 @@ protected function userAgent(): UserAgentInterface return new UserAgent('TestBot'); } - protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger): LoaderInterface|array + protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger): LoaderInterface { return helper_getFastLoader($userAgent, $logger); } diff --git a/tests/Steps/GroupTest.php b/tests/Steps/GroupTest.php index 78e11f2..7f4a773 100644 --- a/tests/Steps/GroupTest.php +++ b/tests/Steps/GroupTest.php @@ -8,22 +8,16 @@ use Crwlr\Crawler\Loader\Http\HttpLoader; use Crwlr\Crawler\Logger\CliLogger; use Crwlr\Crawler\Output; -use Crwlr\Crawler\Result; use Crwlr\Crawler\Steps\Group; -use Crwlr\Crawler\Steps\Loading\LoadingStepInterface; use Crwlr\Crawler\Steps\Refiners\StringRefiner; use Crwlr\Crawler\Steps\Step; use Crwlr\Crawler\Steps\StepInterface; use Crwlr\Crawler\UserAgents\BotUserAgent; -use Crwlr\Crawler\UserAgents\UserAgent; -use Exception; use Generator; use Mockery; -use tests\_Stubs\LoaderCollectingStep; -use tests\_Stubs\PhantasyLoader; - use function tests\helper_getInputReturningStep; +use function tests\helper_getLoadingStep; use function tests\helper_getStdClassWithData; use function tests\helper_getStepYieldingObjectWithNumber; use function tests\helper_getValueReturningStep; @@ -64,7 +58,7 @@ protected function invoke(mixed $input): Generator $step->shouldReceive('addLogger')->once(); - $step->shouldNotReceive('addLoader'); + $step->shouldNotReceive('setLoader'); $group = new Group(); @@ -91,55 +85,44 @@ protected function invoke(mixed $input): Generator $group->addLogger(new CliLogger()); }); -it('also passes on the loader to the step when addLoader method exists in step', function () { - $step = Mockery::mock(LoadingStepInterface::class); +it('also passes on the loader to the step when setLoader method exists in step', function () { + $step = Mockery::mock(helper_getLoadingStep()); $step->shouldReceive('addLogger')->once(); - $step->shouldReceive('addLoader')->once(); + $step->shouldReceive('setLoader')->once(); $group = new Group(); $group->addLogger(new CliLogger()); - $group->addLoader(new HttpLoader(new BotUserAgent('MyBot'))); + $group->setLoader(new HttpLoader(new BotUserAgent('MyBot'))); + + /** @var Step $step */ $group->addStep($step); }); it('also passes on a new loader to all steps when it is added after the steps', function () { - $step1 = Mockery::mock(LoadingStepInterface::class); - - $step1->shouldReceive('addLoader')->once(); - - $step2 = Mockery::mock(LoadingStepInterface::class); - - $step2->shouldReceive('addLoader')->once(); + $step1 = Mockery::mock(helper_getLoadingStep()); - $group = new Group(); + $step1->shouldReceive('setLoader')->once(); - $group->addStep($step1); + $step2 = Mockery::mock(helper_getLoadingStep()); - $group->addStep($step2); - - $group->addLoader(new HttpLoader(new BotUserAgent('MyBot'))); -}); + $step2->shouldReceive('setLoader')->once(); -test('you can add multiple loaders', function () { $group = new Group(); - $loaders = [ - 'http' => new HttpLoader(new UserAgent('Youseragent')), - 'phantasy' => new PhantasyLoader(new UserAgent('Youseragent')), - ]; + /** @var Step $step1 */ - $step = new LoaderCollectingStep(); + $group->addStep($step1); - $group->addStep($step); + /** @var Step $step2 */ - $group->addLoaders($loaders); + $group->addStep($step2); - expect($step->loaders)->toHaveCount(2); + $group->setLoader(new HttpLoader(new BotUserAgent('MyBot'))); }); test('The factory method returns a Group object instance', function () { @@ -159,22 +142,15 @@ protected function invoke(mixed $input): Generator helper_invokeStepWithInput($group); - expect($step1->called)->toBeTrue(); // @phpstan-ignore-line - - expect($step2->called)->toBeTrue(); // @phpstan-ignore-line - - expect($step3->called)->toBeTrue(); // @phpstan-ignore-line + expect($step1->called)->toBeTrue() // @phpstan-ignore-line + ->and($step2->called)->toBeTrue() // @phpstan-ignore-line + ->and($step3->called)->toBeTrue(); // @phpstan-ignore-line }); it('combines the outputs of all it\'s steps into one output containing an array', function () { $step1 = helper_getValueReturningStep('lorem'); - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield 'ipsum'; - } - }; + $step2 = helper_getValueReturningStep('ipsum'); $step3 = helper_getValueReturningStep('dolor'); @@ -184,11 +160,9 @@ protected function invoke(mixed $input): Generator $output = helper_invokeStepWithInput($group, 'gogogo'); - expect($output)->toHaveCount(1); - - expect($output[0])->toBeInstanceOf(Output::class); - - expect($output[0]->get())->toBe(['lorem', 'ipsum', 'dolor']); + expect($output)->toHaveCount(1) + ->and($output[0])->toBeInstanceOf(Output::class) + ->and($output[0]->get())->toBe(['lorem', 'ipsum', 'dolor']); }); test( @@ -196,25 +170,19 @@ protected function invoke(mixed $input): Generator function () { $step1 = helper_getValueReturningStep('ich'); - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield 'bin'; - } - }; + $step2 = helper_getValueReturningStep('bin'); $step3 = helper_getValueReturningStep('ein berliner'); $group = (new Group()) - ->addStep('foo', $step1) - ->addStep('bar', $step2) - ->addStep('baz', $step3); + ->addStep($step1->outputKey('foo')) + ->addStep($step2->outputKey('bar')) + ->addStep($step3->outputKey('baz')); $output = helper_invokeStepWithInput($group, 'https://www.gogo.go'); - expect($output)->toHaveCount(1); - - expect($output[0])->toBeInstanceOf(Output::class); + expect($output)->toHaveCount(1) + ->and($output[0])->toBeInstanceOf(Output::class); $expectedOutputAndResultArray = ['foo' => 'ich', 'bar' => 'bin', 'baz' => 'ein berliner']; @@ -233,14 +201,13 @@ protected function invoke(mixed $input): Generator $output = helper_invokeStepWithInput($group); - expect($output)->toHaveCount(1); - - expect($output[0]->get())->toBe([ - 'foo' => 'fooValue', - 'bar' => 'barValue', - 'baz' => 'bazValue', - 'yo' => 'lo', - ]); + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe([ + 'foo' => 'fooValue', + 'bar' => 'barValue', + 'baz' => 'bazValue', + 'yo' => 'lo', + ]); }); it('doesn\'t invoke twice with duplicate inputs when uniqueInput was called', function () { @@ -411,9 +378,8 @@ function () { $group->uniqueOutputs(); - expect(helper_invokeStepWithInput($group))->toHaveCount(1); - - expect(helper_invokeStepWithInput($group))->toHaveCount(0); + expect(helper_invokeStepWithInput($group))->toHaveCount(1) + ->and(helper_invokeStepWithInput($group))->toHaveCount(0); $incrementNumberCallback = function (mixed $input) { return $input + 1; @@ -440,9 +406,8 @@ function () { $group->uniqueOutputs('number'); - expect(helper_invokeStepWithInput($group))->toHaveCount(1); - - expect(helper_invokeStepWithInput($group))->toHaveCount(0); + expect(helper_invokeStepWithInput($group))->toHaveCount(1) + ->and(helper_invokeStepWithInput($group))->toHaveCount(0); $group->resetAfterRun(); @@ -469,275 +434,103 @@ function () { $outputs = helper_invokeStepWithInput($group); - expect($outputs)->toHaveCount(1); - - expect($outputs[0]->get())->toBe(['foo' => 'one', 'baz' => 'three']); + expect($outputs)->toHaveCount(1) + ->and($outputs[0]->get())->toBe(['foo' => 'one', 'baz' => 'three']); }, ); test('You can update the input for further steps with the output of a step that is before those steps', function () { $step1 = helper_getValueReturningStep(' rocks') ->updateInputUsingOutput(function (mixed $input, mixed $output) { - return $input . $output; + return $input . $output['foo']; }); $step2 = helper_getInputReturningStep(); $group = (new Group()) - ->addStep('foo', $step1) - ->addStep('bar', $step2); + ->addStep($step1->outputKey('foo')) + ->addStep($step2->outputKey('bar')); $outputs = helper_invokeStepWithInput($group, 'crwlr.software'); - expect($outputs)->toHaveCount(1); - - expect($outputs[0]->get())->toBe(['foo' => ' rocks', 'bar' => 'crwlr.software rocks']); -}); - -it('knows when at least one of the steps adds something to the final result', function () { - $step1 = helper_getValueReturningStep('Tick'); - - $step2 = helper_getValueReturningStep('Trick'); - - $step3 = helper_getValueReturningStep('Track')->addToResult('foo'); - - $group = (new Group()) - ->addStep($step1) - ->addStep($step2); - - expect($group->addsToOrCreatesResult())->toBeFalse(); - - $group->addStep($step3); - - expect($group->addsToOrCreatesResult())->toBeTrue(); - - $outputs = helper_invokeStepWithInput($group, 'ducks'); - - expect($outputs)->toHaveCount(1); - - expect($outputs[0]->get())->toBe(['Tick', 'Trick', 'Track']); - - expect($outputs[0]->result?->toArray())->toBe(['foo' => 'Track']); + expect($outputs)->toHaveCount(1) + ->and($outputs[0]->get())->toBe(['foo' => ' rocks', 'bar' => 'crwlr.software rocks']); }); -it( - 'knows when at least one of the steps adds something to the final result when addToResult is used without argument', - function () { - $step1 = helper_getValueReturningStep('Tick'); - - $step2 = helper_getValueReturningStep('Trick'); - - $step3 = helper_getValueReturningStep(['duck' => 'Track'])->addToResult(); - - $group = (new Group()) - ->addStep($step1) - ->addStep($step2) - ->addStep($step3); - - expect($group->addsToOrCreatesResult())->toBe(true); - - $outputs = helper_invokeStepWithInput($group, 'ducks'); - - expect($outputs)->toHaveCount(1); - - expect($outputs[0]->get())->toBe(['Tick', 'Trick', 'duck' => 'Track']); - - expect($outputs[0]->result?->toArray())->toBe(['duck' => 'Track']); - }, -); - -test( - 'addsToOrCreatesResult() returns true when addLaterToResult() was called', - function () { - $step1 = helper_getValueReturningStep('Tick'); - - $step2 = helper_getValueReturningStep('Trick'); - - $step3 = helper_getValueReturningStep(['duck' => 'Track'])->addLaterToResult(); - - $group = (new Group()) - ->addStep($step1) - ->addStep($step2) - ->addStep($step3); - - expect($group->addsToOrCreatesResult())->toBe(true); - }, -); - -test( - 'createsResult() returns true when addToResult() was called', - function () { - $step1 = helper_getValueReturningStep('Tick'); - - $step2 = helper_getValueReturningStep('Trick'); - - $step3 = helper_getValueReturningStep(['duck' => 'Track']); - - $group = (new Group()) - ->addStep($step1) - ->addStep($step2) - ->addStep($step3) - ->addToResult(['duck']); - - expect($group->createsResult())->toBe(true); - }, -); - -test( - 'createsResult() returns false when addLaterToResult() was called', - function () { - $step1 = helper_getValueReturningStep('Tick'); - - $step2 = helper_getValueReturningStep('Trick'); - - $step3 = helper_getValueReturningStep(['duck' => 'Track'])->addLaterToResult(); - - $group = (new Group()) - ->addStep($step1) - ->addStep($step2) - ->addStep($step3); - - expect($group->createsResult())->toBeFalse(); - }, -); - it('uses a key from array input when defined', function () { $step = helper_getInputReturningStep(); $group = (new Group()) - ->addStep('test', $step) + ->addStep($step->outputKey('test')) ->useInputKey('bar'); $outputs = helper_invokeStepWithInput($group, new Input( ['foo' => 'fooValue', 'bar' => 'barValue', 'baz' => 'bazValue'], )); - expect($outputs)->toHaveCount(1); - - expect($outputs[0]->get())->toBe(['test' => 'barValue']); + expect($outputs)->toHaveCount(1) + ->and($outputs[0]->get())->toBe(['test' => 'barValue']); }); -it( - 'adds the combined output to result with a certain key when addToResult() is used with a key as argument', - function () { - $step1 = helper_getValueReturningStep('foo'); +it('keeps the combined output with a certain key when keepAs() is used', function () { + $step1 = helper_getValueReturningStep('foo'); - $step2 = helper_getValueReturningStep('bar'); + $step2 = helper_getValueReturningStep('bar'); - $group = (new Group()) - ->addStep('key1', $step1) - ->addStep('key2', $step2) - ->addToResult('test'); - - $output = helper_invokeStepWithInput($group); - - expect($output)->toHaveCount(1); - - expect($output[0]->result)->toBeInstanceOf(Result::class); - - expect($output[0]->result?->toArray())->toBe(['test' => ['key1' => 'foo', 'key2' => 'bar']]); - }, -); + $group = (new Group()) + ->addStep($step1->outputKey('key1')) + ->addStep($step2->outputKey('key2')) + ->keepAs('test'); -it( - 'adds all keys from a combined array output to the Result when addToResult() was called without argument', - function () { - $step1 = helper_getValueReturningStep(['foo' => 'fooValue', 'bar' => 'barValue']); + $output = helper_invokeStepWithInput($group); - $step2 = helper_getValueReturningStep(['baz' => 'bazValue', 'yo' => 'lo']); + expect($output)->toHaveCount(1) + ->and($output[0]->keep)->toBe(['test' => ['key1' => 'foo', 'key2' => 'bar']]); +}); - $group = (new Group()) - ->addStep($step1) - ->addStep($step2) - ->addToResult(); +it('keeps all keys from a combined array output when keep() was called without argument', function () { + $step1 = helper_getValueReturningStep(['foo' => 'fooValue', 'bar' => 'barValue']); - $output = helper_invokeStepWithInput($group); + $step2 = helper_getValueReturningStep(['baz' => 'bazValue', 'yo' => 'lo']); - expect($output)->toHaveCount(1); + $group = (new Group()) + ->addStep($step1) + ->addStep($step2) + ->keep(); - expect($output[0]->result)->toBeInstanceOf(Result::class); + $output = helper_invokeStepWithInput($group); - expect($output[0]->result?->toArray())->toBe([ + expect($output)->toHaveCount(1) + ->and($output[0]->keep)->toBe([ 'foo' => 'fooValue', 'bar' => 'barValue', 'baz' => 'bazValue', 'yo' => 'lo', ]); - }, -); - -it( - 'adds all defined keys from a combined array output to the Result when addToResult() was called with argument', - function () { - $step1 = helper_getValueReturningStep(['foo' => 'fooValue', 'bar' => 'barValue']); - - $step2 = helper_getValueReturningStep(['baz' => 'bazValue', 'yo' => 'lo']); +}); - $group = (new Group()) - ->addStep($step1) - ->addStep($step2) - ->addToResult(['foo', 'baz', 'yo']); +it('keeps all defined keys from a combined array output when keep() was called with keys', function () { + $step1 = helper_getValueReturningStep(['foo' => 'fooValue', 'bar' => 'barValue']); - $output = helper_invokeStepWithInput($group); + $step2 = helper_getValueReturningStep(['baz' => 'bazValue', 'yo' => 'lo']); - expect($output)->toHaveCount(1); + $group = (new Group()) + ->addStep($step1) + ->addStep($step2) + ->keep(['foo', 'baz', 'yo']); - expect($output[0]->result)->toBeInstanceOf(Result::class); + $output = helper_invokeStepWithInput($group); - expect($output[0]->result?->toArray())->toBe([ + expect($output)->toHaveCount(1) + ->and($output[0]->keep)->toBe([ 'foo' => 'fooValue', 'baz' => 'bazValue', 'yo' => 'lo', ]); - }, -); - -it( - 'adds a secondary Result object with data to add later to main Result objects when addLaterToResult() is called', - function () { - $step1 = helper_getValueReturningStep(['foo' => 'one', 'bar' => 'two']); - - $step2 = helper_getValueReturningStep(['baz' => 'three', 'four' => 'quz']); - - $group = (new Group()) - ->addStep($step1) - ->addStep($step2) - ->addLaterToResult(['foo', 'baz']); - - $outputs = helper_invokeStepWithInput($group); - - expect($outputs[0]->result)->toBeNull(); - - expect($outputs[0]->addLaterToResult)->toBeInstanceOf(Result::class); - - expect($outputs[0]->addLaterToResult?->toArray())->toBe(['foo' => 'one', 'baz' => 'three']); - }, -); - -it( - 'adds a secondary Result object with data to add later to main Result objects when addLaterToResult() is called ' . - 'on one of the steps in the group', - function () { - $step1 = helper_getValueReturningStep(['foo' => 'one', 'bar' => 'two'])->addLaterToResult(); - - $step2 = helper_getValueReturningStep(['baz' => 'three', 'four' => 'quz']); - - $group = (new Group()) - ->addStep($step1) - ->addStep($step2); - - $outputs = helper_invokeStepWithInput($group); - - expect($outputs[0]->result)->toBeNull(); - - expect($outputs[0]->addLaterToResult)->toBeInstanceOf(Result::class); - - expect($outputs[0]->addLaterToResult?->toArray())->toBe(['foo' => 'one', 'bar' => 'two']); - }, -); +}); test( - 'When steps yield multiple outputs it combines the first output from first step with first output from second ' . + 'when steps yield multiple outputs it combines the first output from first step with first output from second ' . 'step and so on.', function () { $step1 = new class () extends Step { @@ -764,28 +557,16 @@ protected function invoke(mixed $input): Generator $output = helper_invokeStepWithInput($group); - expect($output)->toHaveCount(2); - - expect($output[0]->get())->toBe(['one' => 'foo', 'three' => 'baz']); - - expect($output[1]->get())->toBe(['two' => 'bar', 'four' => 'quz']); + expect($output)->toHaveCount(2) + ->and($output[0]->get())->toBe(['one' => 'foo', 'three' => 'baz']) + ->and($output[1]->get())->toBe(['two' => 'bar', 'four' => 'quz']); }, ); it('ignores the key set via outputKey because group step output is always an array', function () { - $step1 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['one' => 'foo']; - } - }; + $step1 = helper_getValueReturningStep(['one' => 'foo']); - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['two' => 'bar']; - } - }; + $step2 = helper_getValueReturningStep(['two' => 'bar']); $group = (new Group()) ->addStep($step1) @@ -794,221 +575,132 @@ protected function invoke(mixed $input): Generator $output = helper_invokeStepWithInput($group); - expect($output)->toHaveCount(1); - - expect($output[0]->get())->toBe(['one' => 'foo', 'two' => 'bar']); + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe(['one' => 'foo', 'two' => 'bar']); }); it( - 'keeps input data in output when keepInputData() was called when outputs are combined', + 'keeps input data when keepFromInput() was called when outputs are combined', function () { - $step1 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['foo' => 'one']; - } - }; + $step1 = helper_getValueReturningStep(['foo' => 'one']); - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['bar' => 'two']; - } - }; + $step2 = helper_getValueReturningStep(['bar' => 'two']); $group = (new Group()) ->addStep($step1) ->addStep($step2) - ->keepInputData(); + ->keepFromInput(); $output = helper_invokeStepWithInput($group, new Input(['baz' => 'three'])); - expect($output)->toHaveCount(1); - - expect($output[0]->get())->toBe(['foo' => 'one', 'bar' => 'two', 'baz' => 'three']); + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe(['foo' => 'one', 'bar' => 'two']) + ->and($output[0]->keep)->toBe(['baz' => 'three']); }, ); it('keeps non array input data in array output with key', function () { - $step1 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['foo' => 'one']; - } - }; + $step1 = helper_getValueReturningStep(['foo' => 'one']); - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['bar' => 'two']; - } - }; + $step2 = helper_getValueReturningStep(['bar' => 'two']); $group = (new Group()) ->addStep($step1) ->addStep($step2) - ->keepInputData('baz'); + ->keepInputAs('baz'); $output = helper_invokeStepWithInput($group, new Input('three')); - expect($output)->toHaveCount(1); - - expect($output[0]->get())->toBe(['foo' => 'one', 'bar' => 'two', 'baz' => 'three']); + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe(['foo' => 'one', 'bar' => 'two']) + ->and($output[0]->keep)->toBe(['baz' => 'three']); }); -it('throws an error when non array input should be kept but no key is defined', function () { - $step1 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['foo' => 'one']; - } - }; - - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['bar' => 'two']; - } - }; - - $group = (new Group()) - ->addStep($step1) - ->addStep($step2) - ->keepInputData(); - - helper_invokeStepWithInput($group, new Input('three')); -})->throws(Exception::class); - -it('does not replace output data when a key from input to keep is also defined in output', function () { - $step1 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['foo' => 'one']; - } - }; +it('keeps a value with unnamed key, when non array input should be kept but no key is defined', function () { + $step1 = helper_getValueReturningStep(['foo' => 'one']); - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['bar' => 'two']; - } - }; + $step2 = helper_getValueReturningStep(['bar' => 'two']); $group = (new Group()) ->addStep($step1) ->addStep($step2) - ->keepInputData(); - - $output = helper_invokeStepWithInput($group, new Input(['foo' => 'four', 'baz' => 'three'])); + ->keepFromInput(); - expect($output)->toHaveCount(1); + $output = helper_invokeStepWithInput($group, new Input('three')); - expect($output[0]->get())->toBe(['foo' => 'one', 'bar' => 'two', 'baz' => 'three']); + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe(['foo' => 'one', 'bar' => 'two']) + ->and($output[0]->keep)->toBe(['unnamed1' => 'three']); }); it('contains an element with a numeric key when it contains a step that yields non array output', function () { - $step1 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield 'one'; - } - }; + $step1 = helper_getValueReturningStep('one'); - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['bar' => 'two']; - } - }; + $step2 = helper_getValueReturningStep(['bar' => 'two']); $group = (new Group()) ->addStep($step1) - ->addStep($step2) - ->keepInputData(); - - $output = helper_invokeStepWithInput($group, new Input(['baz' => 'three'])); + ->addStep($step2); - expect($output)->toHaveCount(1); + $output = helper_invokeStepWithInput($group); - expect($output[0]->get())->toBe([0 => 'one', 'bar' => 'two', 'baz' => 'three']); + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe([0 => 'one', 'bar' => 'two']); }); it('keeps array input data when some output is non array but converted to array using outputKey()', function () { - $step1 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield 'one'; - } - }; + $step1 = helper_getValueReturningStep('one'); - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['bar' => 'two']; - } - }; + $step2 = helper_getValueReturningStep(['bar' => 'two']); $group = (new Group()) ->addStep($step1->outputKey('foo')) ->addStep($step2) - ->keepInputData(); + ->keepFromInput(); $output = helper_invokeStepWithInput($group, new Input(['baz' => 'three'])); - expect($output)->toHaveCount(1); - - expect($output[0]->get())->toBe(['foo' => 'one', 'bar' => 'two', 'baz' => 'three']); + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe(['foo' => 'one', 'bar' => 'two']) + ->and($output[0]->keep)->toBe(['baz' => 'three']); }); -it('throws an exception when input should be kept, is non array and no key is defined', function () { - $step1 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield 'one'; - } - }; +it( + 'keeps an input value with an unnamed key, when it is a non array value and no key is defined (via keepInputAs())', + function () { + $step1 = helper_getValueReturningStep('one'); - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['bar' => 'two']; - } - }; + $step2 = helper_getValueReturningStep(['bar' => 'two']); - $group = (new Group()) - ->addStep($step1) - ->addStep($step2) - ->keepInputData(); + $group = (new Group()) + ->addStep($step1) + ->addStep($step2) + ->keepFromInput(); - helper_invokeStepWithInput($group, new Input('three')); -})->throws(Exception::class); + $output = helper_invokeStepWithInput($group, new Input('three')); + + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe([0 => 'one', 'bar' => 'two']) + ->and($output[0]->keep)->toBe(['unnamed1' => 'three']); + }, +); it('keeps the original input data when useInputKey() is used', function () { - $step1 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['foo' => 'one']; - } - }; + $step1 = helper_getValueReturningStep(['foo' => 'one']); - $step2 = new class () extends Step { - protected function invoke(mixed $input): Generator - { - yield ['bar' => 'two']; - } - }; + $step2 = helper_getValueReturningStep(['bar' => 'two']); $group = (new Group()) ->addStep($step1) ->addStep($step2) ->useInputKey('baz') - ->keepInputData(); + ->keepFromInput(); $output = helper_invokeStepWithInput($group, new Input(['baz' => 'three', 'quz' => 'four'])); - expect($output)->toHaveCount(1); - - expect($output[0]->get())->toBe(['foo' => 'one', 'bar' => 'two', 'baz' => 'three', 'quz' => 'four']); + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe(['foo' => 'one', 'bar' => 'two']) + ->and($output[0]->keep)->toBe(['baz' => 'three', 'quz' => 'four']); }); it('applies a Closure refiner to the steps output', function () { diff --git a/tests/Steps/Loading/GetSitemapsFromRobotsTxtTest.php b/tests/Steps/Loading/GetSitemapsFromRobotsTxtTest.php index 04aa415..6994b21 100644 --- a/tests/Steps/Loading/GetSitemapsFromRobotsTxtTest.php +++ b/tests/Steps/Loading/GetSitemapsFromRobotsTxtTest.php @@ -36,7 +36,7 @@ $loader = new HttpLoader(new UserAgent('SomeUserAgent'), $httpClient); - $step = Sitemap::getSitemapsFromRobotsTxt()->addLoader($loader); + $step = Sitemap::getSitemapsFromRobotsTxt()->setLoader($loader); $outputs = helper_invokeStepWithInput($step, new Input('https://www.crwlr.software/packages')); diff --git a/tests/Steps/Loading/HttpTest.php b/tests/Steps/Loading/HttpTest.php index 85a9899..eaee5ee 100644 --- a/tests/Steps/Loading/HttpTest.php +++ b/tests/Steps/Loading/HttpTest.php @@ -5,7 +5,6 @@ use Crwlr\Crawler\Input; use Crwlr\Crawler\Loader\Http\HttpLoader; use Crwlr\Crawler\Loader\Http\Messages\RespondedRequest; -use Crwlr\Crawler\Result; use Crwlr\Crawler\Steps\Loading\Http; use Crwlr\Url\Url; use GuzzleHttp\Psr7\Request; @@ -25,7 +24,7 @@ $loader->shouldReceive('load')->once(); - $step = (new Http('GET'))->addLoader($loader); + $step = (new Http('GET'))->setLoader($loader); helper_traverseIterable($step->invokeStep(new Input('https://www.foo.bar/baz'))); }); @@ -35,7 +34,7 @@ $loader->shouldReceive('load')->once(); - $step = (new Http('GET'))->addLoader($loader); + $step = (new Http('GET'))->setLoader($loader); helper_traverseIterable($step->invokeStep(new Input(Url::parsePsr7('https://www.linkedin.com/')))); }); @@ -43,7 +42,7 @@ it('throws an InvalidArgumentException when invoked with something else as input', function () { $loader = Mockery::mock(HttpLoader::class); - $step = (new Http('GET'))->addLoader($loader); + $step = (new Http('GET'))->setLoader($loader); helper_traverseIterable($step->invokeStep(new Input(new stdClass()))); })->throws(InvalidArgumentException::class); @@ -55,7 +54,7 @@ return $request->getMethod() === $httpMethod; })->once(); - $step = (new Http($httpMethod))->addLoader($loader); + $step = (new Http($httpMethod))->setLoader($loader); helper_traverseIterable($step->invokeStep(new Input('https://www.foo.bar/baz'))); })->with(['GET', 'POST', 'PUT', 'PATCH', 'DELETE']); @@ -88,7 +87,7 @@ return true; })->once(); - $step = (new Http('GET', $headers))->addLoader($loader); + $step = (new Http('GET', $headers))->setLoader($loader); helper_traverseIterable($step->invokeStep(new Input('https://www.crwlr.software/packages/url'))); }); @@ -102,7 +101,7 @@ return $request->getBody()->getContents() === $body; })->once(); - $step = (new Http('PATCH', [], $body))->addLoader($loader); + $step = (new Http('PATCH', [], $body))->setLoader($loader); helper_traverseIterable($step->invokeStep(new Input('https://github.com/'))); }); @@ -114,7 +113,7 @@ return $request->getProtocolVersion() === $httpVersion; })->once(); - $step = (new Http('PATCH', [], 'body', $httpVersion))->addLoader($loader); + $step = (new Http('PATCH', [], 'body', $httpVersion))->setLoader($loader); helper_traverseIterable($step->invokeStep(new Input('https://packagist.org/packages/crwlr/url'))); })->with(['1.0', '1.1', '2.0']); @@ -126,7 +125,7 @@ return $request->getMethod() === $httpMethod; })->once(); - $step = (Http::{strtolower($httpMethod)}())->addLoader($loader); + $step = (Http::{strtolower($httpMethod)}())->setLoader($loader); helper_traverseIterable($step->invokeStep(new Input('https://dev.to/otsch'))); })->with(['GET', 'POST', 'PUT', 'PATCH', 'DELETE']); @@ -141,43 +140,38 @@ function (string $httpMethod) { })->once()->andReturn(new RespondedRequest(new Request('GET', '/foo'), new Response(200))); $step = (Http::{strtolower($httpMethod)}()) - ->addLoader($loader) + ->setLoader($loader) ->stopOnErrorResponse(); helper_traverseIterable($step->invokeStep(new Input('https://example.com/otsch'))); }, )->with(['GET', 'POST', 'PUT', 'PATCH', 'DELETE']); -test( - 'you can add response properties to the result with their aliases', - function () { - $loader = Mockery::mock(HttpLoader::class); - - $loader->shouldReceive('load')->once()->andReturn( - new RespondedRequest( - new Request('GET', 'https://www.example.com/testresponse'), - new Response(202, ['foo' => 'bar'], Utils::streamFor('testbody')), - ), - ); - - $step = Http::get() - ->addLoader($loader) - ->addToResult(['url', 'status', 'headers', 'body']); +test('you can keep response properties with their aliases', function () { + $loader = Mockery::mock(HttpLoader::class); - $outputs = helper_invokeStepWithInput($step); + $loader->shouldReceive('load')->once()->andReturn( + new RespondedRequest( + new Request('GET', 'https://www.example.com/testresponse'), + new Response(202, ['foo' => 'bar'], Utils::streamFor('testbody')), + ), + ); - expect($outputs)->toHaveCount(1); + $step = Http::get() + ->setLoader($loader) + ->keep(['url', 'status', 'headers', 'body']); - expect($outputs[0]->result)->toBeInstanceOf(Result::class); + $outputs = helper_invokeStepWithInput($step); - expect($outputs[0]->result?->toArray())->toBe([ + expect($outputs)->toHaveCount(1) + ->and($outputs[0]->keep)->toBe([ 'url' => 'https://www.example.com/testresponse', 'status' => 202, 'headers' => ['foo' => ['bar']], 'body' => 'testbody', ]); - }, -); + +}); test( 'the value behind url and uri is the effectiveUri', @@ -194,16 +188,13 @@ function (string $outputKey) { $loader->shouldReceive('load')->once()->andReturn($respondedRequest); $step = Http::get() - ->addLoader($loader) - ->addToResult([$outputKey]); + ->setLoader($loader) + ->keep([$outputKey]); $outputs = helper_invokeStepWithInput($step); - expect($outputs)->toHaveCount(1); - - expect($outputs[0]->result)->toBeInstanceOf(Result::class); - - expect($outputs[0]->result?->toArray())->toBe([$outputKey => 'https://www.example.com/testresponseredirect']); + expect($outputs)->toHaveCount(1) + ->and($outputs[0]->keep)->toBe([$outputKey => 'https://www.example.com/testresponseredirect']); }, )->with(['url', 'uri']); @@ -220,7 +211,7 @@ function (string $outputKey) { })->once()->andReturn(new RespondedRequest(new Request('GET', 'https://www.example.com/baz'), new Response(200))); $step = Http::get() - ->addLoader($loader) + ->setLoader($loader) ->useInputKeyAsUrl('someUrl'); helper_invokeStepWithInput($step, $inputArray); @@ -241,7 +232,7 @@ function ($key) { })->once()->andReturn(new RespondedRequest(new Request('GET', 'https://www.example.com/baz'), new Response(200))); $step = Http::get() - ->addLoader($loader); + ->setLoader($loader); helper_invokeStepWithInput($step, $inputArray); }, @@ -265,7 +256,7 @@ function ($key) { ->andReturn(new RespondedRequest(new Request('GET', 'https://www.example.com/baz'), new Response(200))); $step = Http::get() - ->addLoader($loader) + ->setLoader($loader) ->useInputKeyAsUrl('someUrl') ->useInputKeyAsBody('someBodyThatIUsedToKnow'); @@ -290,7 +281,7 @@ function ($key) { ->andReturn(new RespondedRequest(new Request('GET', 'https://www.example.com/baz'), new Response(200))); $step = Http::get() - ->addLoader($loader) + ->setLoader($loader) ->useInputKeyAsUrl('someUrl') ->useInputKeyAsHeader('someHeader', 'header-name-x'); @@ -315,7 +306,7 @@ function ($key) { ->andReturn(new RespondedRequest(new Request('GET', 'https://www.example.com/baz'), new Response(200))); $step = Http::get() - ->addLoader($loader) + ->setLoader($loader) ->useInputKeyAsHeader('header-name'); helper_invokeStepWithInput($step, $inputArray); @@ -339,7 +330,7 @@ function ($key) { ->andReturn(new RespondedRequest(new Request('GET', 'https://www.example.com/baz'), new Response(200))); $step = Http::get(['header-name-x' => 'foo']) - ->addLoader($loader) + ->setLoader($loader) ->useInputKeyAsUrl('someUrl') ->useInputKeyAsHeader('someHeader', 'header-name-x'); @@ -366,7 +357,7 @@ function ($key) { ->andReturn(new RespondedRequest(new Request('GET', 'https://www.example.com/baz'), new Response(200))); $step = Http::get() - ->addLoader($loader) + ->setLoader($loader) ->useInputKeyAsUrl('someUrl') ->useInputKeyAsHeader('someHeader', 'header-name-x') ->useInputKeyAsHeader('anotherHeader', 'header-name-y'); @@ -400,7 +391,7 @@ function ($key) { ->andReturn(new RespondedRequest(new Request('GET', 'https://www.example.com/baz'), new Response(200))); $step = Http::get(['header-name-y' => 'quz']) - ->addLoader($loader) + ->setLoader($loader) ->useInputKeyAsUrl('someUrl') ->useInputKeyAsHeaders('customHeaders'); diff --git a/tests/Steps/Loading/LoadingStepTest.php b/tests/Steps/Loading/LoadingStepTest.php index 566d649..d76d68c 100644 --- a/tests/Steps/Loading/LoadingStepTest.php +++ b/tests/Steps/Loading/LoadingStepTest.php @@ -4,17 +4,22 @@ use Crwlr\Crawler\Input; use Crwlr\Crawler\Loader\Http\HttpLoader; +use Crwlr\Crawler\Loader\Loader; use Crwlr\Crawler\Steps\Loading\LoadingStep; +use Crwlr\Crawler\Steps\Step; use Generator; use Mockery; +use function tests\helper_invokeStepWithInput; use function tests\helper_traverseIterable; test('you can add a loader', function () { - $step = new class () extends LoadingStep { + $step = new class () extends Step { + use LoadingStep; + protected function invoke(mixed $input): Generator { - $this->loader->load($input); + $this->getLoader()->load($input); yield []; } @@ -24,22 +29,39 @@ protected function invoke(mixed $input): Generator $loader->shouldReceive('load')->once(); - $step->addLoader($loader); + $step->setLoader($loader); helper_traverseIterable($step->invokeStep(new Input('https://www.digitalocean.com/blog'))); }); -test('you can set the key of the loader that it should use', function () { - $step = new class () extends LoadingStep { - protected function invoke(mixed $input): Generator - { - yield 'yo'; - } - }; +test( + 'you can provide a custom loader to a step via the withLoader() method, and it will be preferred to the loader ' . + 'provided via setLoader()', + function () { + $loaderOne = Mockery::mock(Loader::class); - expect($step->usesLoader())->toBeNull(); + $loaderOne->shouldNotReceive('load'); - $step->useLoader('ftp'); + $loaderTwo = Mockery::mock(Loader::class); - expect($step->usesLoader())->toBe('ftp'); -}); + $loaderTwo->shouldReceive('load')->once()->andReturn('Hi'); + + $step = new class () extends Step { + use LoadingStep; + + protected function invoke(mixed $input): Generator + { + yield $this->getLoader()->load($input); + } + }; + + $step->withLoader($loaderTwo); + + // The crawler will call the setLoader() method of the step after the step was added to the crawler. + // So, the call to withLoader() will happen before that. + // Nevertheless, the loader passed to withLoader() should be preferred. + $step->setLoader($loaderOne); + + helper_invokeStepWithInput($step); + }, +); diff --git a/tests/Steps/StepTest.php b/tests/Steps/StepTest.php index 9c89ff7..5698be4 100644 --- a/tests/Steps/StepTest.php +++ b/tests/Steps/StepTest.php @@ -6,13 +6,11 @@ use Crwlr\Crawler\Loader\Http\Messages\RespondedRequest; use Crwlr\Crawler\Logger\CliLogger; use Crwlr\Crawler\Output; -use Crwlr\Crawler\Result; use Crwlr\Crawler\Steps\Filters\Filter; use Crwlr\Crawler\Steps\Loading\Http; use Crwlr\Crawler\Steps\Refiners\StringRefiner; use Crwlr\Crawler\Steps\Step; use Crwlr\Crawler\Steps\StepOutputType; -use Exception; use Generator; use GuzzleHttp\Psr7\Request; use GuzzleHttp\Psr7\Response; @@ -54,60 +52,17 @@ protected function invoke(mixed $input): Generator expect($output)->toContain('logging works'); }); -test( - 'The invokeStep method wraps the values returned by invoke in Output objects by default without Result objects', - function () { - $step = helper_getValueReturningStep('returnValue'); - - $output = helper_invokeStepWithInput($step); - - expect($output)->toHaveCount(1); - - expect($output[0])->toBeInstanceOf(Output::class); - - expect($output[0]->get())->toBe('returnValue'); - - expect($output[0]->result)->toBeNull(); - }, -); - -test( - 'The invokeStep method creates a Result object that is added to the Output when you set a property name', - function () { - $step = helper_getValueReturningStep('returnValue') - ->addToResult('property'); - - $output = helper_invokeStepWithInput($step); - - expect($output[0]->result)->toBeInstanceOf(Result::class); - - expect($output[0]->result?->toArray())->toBe(['property' => 'returnValue']); - }, -); - -it('creates a Result object with the data from yielded array when addToResult() is used', function () { - $step = helper_getValueReturningStep(['foo' => 'bar', 'baz' => 'yo']) - ->addToResult(); +test('The invokeStep method wraps the values returned by invoke in Output objects', function () { + $step = helper_getValueReturningStep('returnValue'); $output = helper_invokeStepWithInput($step); - expect($output[0]->result)->toBeInstanceOf(Result::class); - - expect($output[0]->result?->toArray())->toBe(['foo' => 'bar', 'baz' => 'yo']); -}); - -it('picks keys from the output array when you pass an array of keys to addToResult()', function () { - $step = helper_getValueReturningStep(['user' => 'otsch', 'firstname' => 'Christian', 'surname' => 'Olear']) - ->addToResult(['firstname', 'surname']); - - $output = helper_invokeStepWithInput($step); - - expect($output[0]->result)->toBeInstanceOf(Result::class); - - expect($output[0]->result?->toArray())->toBe(['firstname' => 'Christian', 'surname' => 'Olear']); + expect($output)->toHaveCount(1) + ->and($output[0])->toBeInstanceOf(Output::class) + ->and($output[0]->get())->toBe('returnValue'); }); -it('is able to pick keys from nested (array) output using dot notation', function () { +test('keep() can pick keys from nested (array) output using dot notation', function () { $step = helper_getValueReturningStep([ 'users' => [ ['user' => 'otsch', 'firstname' => 'Christian', 'surname' => 'Olear'], @@ -116,16 +71,14 @@ function () { ], 'foo' => 'bar', ]) - ->addToResult(['nickname' => 'users.0.user', 'foo']); + ->keep(['nickname' => 'users.0.user', 'foo']); $output = helper_invokeStepWithInput($step); - expect($output[0]->result)->toBeInstanceOf(Result::class); - - expect($output[0]->result?->toArray())->toBe(['foo' => 'bar', 'nickname' => 'otsch']); + expect($output[0]->keep)->toBe(['nickname' => 'otsch', 'foo' => 'bar']); }); -it('picks keys from nested output including a RespondedRequest object', function () { +test('keep() picks keys from nested output including a RespondedRequest object', function () { $step = helper_getValueReturningStep([ 'response' => new RespondedRequest( new Request('GET', 'https://www.example.com/something'), @@ -133,90 +86,20 @@ function () { ), 'foo' => 'bar', ]) - ->addToResult(['content' => 'response.body']); + ->keep(['content' => 'response.body']); $output = helper_invokeStepWithInput($step); - expect($output[0]->result)->toBeInstanceOf(Result::class); - - expect($output[0]->result?->toArray())->toBe(['content' => 'Hi :)']); + expect($output[0]->keep)->toBe(['content' => 'Hi :)']); }); -it('maps output keys to different result keys when defined in the array passed to addToResult()', function () { +it('maps output keys to different keys when defined in the array passed to keep()', function () { $step = helper_getValueReturningStep(['user' => 'otsch', 'firstname' => 'Christian', 'surname' => 'Olear']) - ->addToResult(['foo' => 'firstname', 'bar' => 'surname']); + ->keep(['foo' => 'firstname', 'bar' => 'surname']); $output = helper_invokeStepWithInput($step); - expect($output[0]->result)->toBeInstanceOf(Result::class); - - expect($output[0]->result?->toArray())->toBe(['foo' => 'Christian', 'bar' => 'Olear']); -}); - -test( - 'The addsToOrCreatesResult() method returns false when addToResult() and addLaterToResult() have not been called', - function () { - $step = helper_getValueReturningStep('lol'); - - expect($step->addsToOrCreatesResult())->toBeFalse(); - }, -); - -test('The addsToOrCreatesResult() method returns true when addToResult() was called with a string key', function () { - $step = helper_getValueReturningStep('test')->addToResult('test'); - - expect($step->addsToOrCreatesResult())->toBeTrue(); -}); - -test('The addsToOrCreatesResult() method returns true when addLaterToResult() was called with a string key', function () { - $step = helper_getValueReturningStep('test')->addLaterToResult('test'); - - expect($step->addsToOrCreatesResult())->toBeTrue(); -}); - -test('The addsToOrCreatesResult() method returns true when addToResult() was called without an argument', function () { - $step = helper_getValueReturningStep(['test' => 'yo'])->addToResult(); - - expect($step->addsToOrCreatesResult())->toBeTrue(); -}); - -test( - 'The addsToOrCreatesResult() method returns true when addLaterToResult() was called without an argument', - function () { - $step = helper_getValueReturningStep(['test' => 'yo'])->addLaterToResult(); - - expect($step->addsToOrCreatesResult())->toBeTrue(); - }, -); - -test('The createsResult() method returns false when addToResult() has not been called', function () { - $step = helper_getValueReturningStep('lol'); - - expect($step->createsResult())->toBeFalse(); -}); - -test('The createsResult() method returns true when addToResult() was called with a string key', function () { - $step = helper_getValueReturningStep('test')->addToResult('test'); - - expect($step->createsResult())->toBeTrue(); -}); - -test('The createsResult() method returns false when addLaterToResult() was called with a string key', function () { - $step = helper_getValueReturningStep('test')->addLaterToResult('test'); - - expect($step->createsResult())->toBeFalse(); -}); - -test('The createsResult() method returns true when addToResult() was called without an argument', function () { - $step = helper_getValueReturningStep(['test' => 'yo'])->addToResult(); - - expect($step->createsResult())->toBeTrue(); -}); - -test('The createsResult() method returns false when addLaterToResult() was called without an argument', function () { - $step = helper_getValueReturningStep(['test' => 'yo'])->addLaterToResult(); - - expect($step->createsResult())->toBeFalse(); + expect($output[0]->keep)->toBe(['foo' => 'Christian', 'bar' => 'Olear']); }); it('uses a key from array input when defined', function () { @@ -226,9 +109,8 @@ function () { ['foo' => 'fooValue', 'bar' => 'barValue', 'baz' => 'bazValue'], )); - expect($output)->toHaveCount(1); - - expect($output[0]->get())->toBe('barValue'); + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe('barValue'); }); it('logs a warning message when the input key to use does not exist in input array', function () { @@ -264,113 +146,23 @@ function (mixed $inputValue) { [new stdClass()], ]); -it('doesn\'t add the result object to the Input object only to the Output', function () { - $step = helper_getValueReturningStep('Stand with Ukraine!') - ->addToResult('property'); - - $input = new Input('inputValue'); +it('does not lose previously kept data, when it uses the useInputKey() method', function () { + $step = helper_getValueReturningStep(['test' => 'test'])->useInputKey('foo'); - $output = helper_invokeStepWithInput($step); + $outputs = helper_invokeStepWithInput($step, new Input(['foo' => 'test'], ['some' => 'thing'])); - expect($output[0]->result)->toBeInstanceOf(Result::class); - - expect($input->result)->toBe(null); -}); - -it('appends properties to a result object that was already included with the Input object', function () { - $step = helper_getValueReturningStep('returnValue') - ->addToResult('property'); - - $prevResult = new Result(); - - $prevResult->set('prevProperty', 'foobar'); - - $input = new Input('inputValue', $prevResult); - - $output = helper_invokeStepWithInput($step, $input); - - expect($output[0]->result)->toBeInstanceOf(Result::class); - - expect($output[0]->result?->toArray())->toBe([ - 'prevProperty' => 'foobar', - 'property' => 'returnValue', - ]); + expect($outputs[0]->keep)->toBe(['some' => 'thing']); }); it( - 'adds a secondary Result object with data to add later to main Result objects when addLaterToResult() is called', - function () { - $step = helper_getValueReturningStep('returnValue') - ->addLaterToResult('property'); - - $outputs = helper_invokeStepWithInput($step); - - expect($outputs[0]->result)->toBeNull(); - - expect($outputs[0]->addLaterToResult)->toBeInstanceOf(Result::class); - - expect($outputs[0]->addLaterToResult?->toArray())->toBe([ - 'property' => 'returnValue', - ]); - }, -); - -test('addLaterToResult() works with array output and no argument', function () { - $step = helper_getValueReturningStep(['foo' => 'bar']) - ->addLaterToResult(); - - $outputs = helper_invokeStepWithInput($step); - - expect($outputs[0]->result)->toBeNull(); - - expect($outputs[0]->addLaterToResult)->toBeInstanceOf(Result::class); - - expect($outputs[0]->addLaterToResult?->toArray())->toBe([ - 'foo' => 'bar', - ]); -}); - -test('with addLaterToResult() you can also pick some keys from array output', function () { - $step = helper_getValueReturningStep(['foo' => 'one', 'bar' => 'two', 'baz' => 'three', 'quz' => 'four']) - ->addLaterToResult(['foo', 'baz', 'yolo']); - - $outputs = helper_invokeStepWithInput($step); - - expect($outputs[0]->result)->toBeNull(); - - expect($outputs[0]->addLaterToResult)->toBeInstanceOf(Result::class); - - expect($outputs[0]->addLaterToResult?->toArray())->toBe([ - 'foo' => 'one', - 'baz' => 'three', - ]); -}); - -it('does not lose previously added result to add later, when it uses the useInputKey() method', function () { - $step = helper_getValueReturningStep(['test' => 'test']) - ->useInputKey('foo'); - - $addLaterToResult = new Result(); - - $outputs = helper_invokeStepWithInput($step, new Input(['foo' => 'test'], addLaterToResult: $addLaterToResult)); - - expect($outputs[0]->addLaterToResult)->toBe($addLaterToResult); -}); - -it( - 'also passes on Result objects through further steps when they don\'t define further result resource properties', + 'also passes on kept data through further steps when they don\'t define any further data to keep', function () { $step = helper_getValueReturningStep('returnValue'); - $prevResult = new Result(); - - $prevResult->set('prevProperty', 'foobar'); - - $output = helper_invokeStepWithInput($step, new Input('inputValue', $prevResult)); + $output = helper_invokeStepWithInput($step, new Input('inputValue', ['prevProperty' => 'foobar'])); - expect($output[0]->result)->toBeInstanceOf(Result::class); - - expect($output[0]->result?->toArray())->toBe(['prevProperty' => 'foobar']); + expect($output)->toHaveCount(1) + ->and($output[0]->keep)->toBe(['prevProperty' => 'foobar']); }, ); @@ -461,17 +253,12 @@ function () { $output = helper_invokeStepWithInput($step, new Input('anything')); - expect($output)->toHaveCount(5); - - expect($output[0]->get())->toBe('one'); - - expect($output[1]->get())->toBe('two'); - - expect($output[2]->get())->toBe('three'); - - expect($output[3]->get())->toBe('four'); - - expect($output[4]->get())->toBe('five'); + expect($output)->toHaveCount(5) + ->and($output[0]->get())->toBe('one') + ->and($output[1]->get())->toBe('two') + ->and($output[2]->get())->toBe('three') + ->and($output[3]->get())->toBe('four') + ->and($output[4]->get())->toBe('five'); }); it('makes outputs unique when providing a key name to uniqueOutput to use from array output', function () { @@ -598,9 +385,8 @@ protected function invoke(mixed $input): Generator $output = helper_invokeStepWithInput($step, 'foo'); - expect($output)->toHaveCount(1); - - expect($output[0]->get())->toBe('bar'); + expect($output)->toHaveCount(1) + ->and($output[0]->get())->toBe('bar'); }); test('You can add and call an updateInputUsingOutput callback', function () { @@ -612,26 +398,23 @@ protected function invoke(mixed $input): Generator $updatedInput = $step->callUpdateInputUsingOutput(new Input('Boo'), new Output('Yah!')); - expect($updatedInput)->toBeInstanceOf(Input::class); - - expect($updatedInput->get())->toBe('Boo Yah!'); + expect($updatedInput)->toBeInstanceOf(Input::class) + ->and($updatedInput->get())->toBe('Boo Yah!'); }); -it('does not lose previously added result to add later, when updateInputUsingOutput() is called', function () { +it('does not lose previously kept data, when updateInputUsingOutput() is called', function () { $step = helper_getValueReturningStep('something'); $step->updateInputUsingOutput(function (mixed $input, mixed $output) { return $input . ' ' . $output; }); - $addLaterToResult = new Result(); - $updatedInput = $step->callUpdateInputUsingOutput( - new Input('Some', addLaterToResult: $addLaterToResult), + new Input('Some', ['foo' => 'bar']), new Output('thing'), ); - expect($updatedInput->addLaterToResult)->toBe($addLaterToResult); + expect($updatedInput->keep)->toBe(['foo' => 'bar']); }); it('does not yield more outputs than defined via maxOutputs() method', function () { @@ -724,63 +507,6 @@ protected function invoke(mixed $input): Generator expect($outputs[0]->get())->toBe(['foo' => 'bar']); }); -it('keeps input data in output when keepInputData() was called', function () { - $step = helper_getValueReturningStep(['bar' => 'baz']) - ->keepInputData(); - - $output = helper_invokeStepWithInput($step, new Input(['foo' => 'quz'])); - - expect($output[0]->get())->toBe(['bar' => 'baz', 'foo' => 'quz']); -}); - -it('keeps non array input data in array output with key', function () { - $step = helper_getValueReturningStep(['bar' => 'baz']) - ->keepInputData('foo'); - - $output = helper_invokeStepWithInput($step, new Input('quz')); - - expect($output[0]->get())->toBe(['bar' => 'baz', 'foo' => 'quz']); -}); - -it('throws an error when non array input should be kept but no key is defined', function () { - $step = helper_getValueReturningStep(['bar' => 'baz']) - ->keepInputData(); - - helper_invokeStepWithInput($step, new Input('quz')); -})->throws(Exception::class); - -it('does not replace output data when a key from input to keep is also defined in output', function () { - $step = helper_getValueReturningStep(['foo' => 'four', 'bar' => 'five']) - ->keepInputData('foo'); - - $output = helper_invokeStepWithInput($step, new Input(['foo' => 'one', 'bar' => 'two', 'baz' => 'three'])); - - expect($output[0]->get())->toBe(['foo' => 'four', 'bar' => 'five', 'baz' => 'three']); -}); - -it( - 'throws an exception when input should be kept, output is non array value and no output key is defined', - function () { - $step = helper_getValueReturningStep('three') - ->keepInputData(); - - helper_invokeStepWithInput($step, new Input(['foo' => 'one', 'bar' => 'two'])); - }, -)->throws(Exception::class); - -it( - 'works when output is non array value but it\'s converted to an array using the outputKey() method', - function () { - $step = helper_getValueReturningStep('three') - ->keepInputData() - ->outputKey('baz'); - - $outputs = helper_invokeStepWithInput($step, new Input(['foo' => 'one', 'bar' => 'two'])); - - expect($outputs[0]->get())->toBe(['baz' => 'three', 'foo' => 'one', 'bar' => 'two']); - }, -); - test('keeping a scalar output value with keep() also works when outputKey() was used', function () { $step = new class () extends Step { protected function invoke(mixed $input): Generator @@ -807,12 +533,13 @@ public function outputType(): StepOutputType it('keeps the original input data when useInputKey() is used', function () { $step = helper_getValueReturningStep(['baz' => 'three']) - ->keepInputData() + ->keepFromInput() ->useInputKey('bar'); $outputs = helper_invokeStepWithInput($step, ['foo' => 'one', 'bar' => 'two']); - expect($outputs[0]->get())->toBe(['baz' => 'three', 'foo' => 'one', 'bar' => 'two']); + expect($outputs[0]->get())->toBe(['baz' => 'three']) + ->and($outputs[0]->keep)->toBe(['foo' => 'one', 'bar' => 'two']); }); it('applies a Closure refiner to the steps output', function () { @@ -926,7 +653,7 @@ function () { expect($outputs[0]->get())->toBe('baz'); }); -test('you can define aliases for output keys for addToResult()', function () { +test('you can define aliases for output keys and they are considered when using keep()', function () { $step = new class () extends Step { protected function invoke(mixed $input): Generator { @@ -947,13 +674,11 @@ protected function outputKeyAliases(): array } }; - $step->addToResult(['woo', 'far' => 'war', 'waz']); + $step->keep(['woo', 'far' => 'war', 'waz']); $outputs = helper_invokeStepWithInput($step); - expect($outputs[0]->result)->toBeInstanceOf(Result::class); - - expect($outputs[0]->result?->toArray())->toBe([ + expect($outputs[0]->keep)->toBe([ 'woo' => 'one', 'far' => 'two', 'waz' => 'three', diff --git a/tests/Utils/OutputTypeHelperTest.php b/tests/Utils/OutputTypeHelperTest.php index b2e975c..1edd6aa 100644 --- a/tests/Utils/OutputTypeHelperTest.php +++ b/tests/Utils/OutputTypeHelperTest.php @@ -33,20 +33,6 @@ public function toArray(): array expect(OutputTypeHelper::objectToArray($object))->toBe(['foo' => 'bar']); }); -it('converts an object with a toArrayForAddToResult() method to an array', function () { - $object = new class () { - /** - * @return string[] - */ - public function toArrayForAddToResult(): array - { - return ['yo' => 'lo']; - } - }; - - expect(OutputTypeHelper::objectToArray($object))->toBe(['yo' => 'lo']); -}); - it('converts an object with a __serialize() method to an array', function () { $object = new class () { public function __serialize(): array diff --git a/tests/_Integration/GroupTest.php b/tests/_Integration/GroupTest.php index 8359f08..dfcedae 100644 --- a/tests/_Integration/GroupTest.php +++ b/tests/_Integration/GroupTest.php @@ -32,7 +32,8 @@ public function loader(UserAgentInterface $userAgent, LoggerInterface $logger): $crawler->input('http://localhost:8000/blog-post-with-json-ld'); - $crawler->addStep(Http::get()) + $crawler + ->addStep(Http::get()) ->addStep( Crawler::group() ->addStep( @@ -47,7 +48,7 @@ public function loader(UserAgentInterface $userAgent, LoggerInterface $logger): 'keywords', ]), ) - ->addToResult(), + ->keep(), ); $result = helper_generatorToArray($crawler->run()); diff --git a/tests/_Integration/Http/CrawlingTest.php b/tests/_Integration/Http/CrawlingTest.php index c6497b5..1ea7235 100644 --- a/tests/_Integration/Http/CrawlingTest.php +++ b/tests/_Integration/Http/CrawlingTest.php @@ -408,7 +408,7 @@ function () { // discovered URLs by default. $crawler = (new Crawler()) ->input('http://www.example.com/crawling/main') - ->addStep(Http::crawl()->keepUrlFragment()->addToResult(['url'])); + ->addStep(Http::crawl()->keepUrlFragment()->keep(['url'])); $results = helper_generatorToArray($crawler->run()); @@ -420,11 +420,9 @@ function () { $urls[] = $result->get('url'); } - expect($urls)->toContain('http://www.example.com/crawling/sub2'); - - expect($urls)->toContain('http://www.example.com/crawling/sub2#fragment1'); - - expect($urls)->toContain('http://www.example.com/crawling/sub2#fragment2'); + expect($urls)->toContain('http://www.example.com/crawling/sub2') + ->and($urls)->toContain('http://www.example.com/crawling/sub2#fragment1') + ->and($urls)->toContain('http://www.example.com/crawling/sub2#fragment2'); }, ); @@ -439,9 +437,8 @@ function () { $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(4); - - expect($crawler->getLoader()->loadedUrls)->toHaveCount(4); + expect($results)->toHaveCount(4) + ->and($crawler->getLoader()->loadedUrls)->toHaveCount(4); }); it('uses canonical links when useCanonicalLinks() is called', function () { @@ -450,7 +447,7 @@ function () { ->addStep( Http::crawl() ->useCanonicalLinks() - ->addToResult(['url']), + ->keep(['url']), ); $results = helper_generatorToArray($crawler->run()); @@ -479,7 +476,7 @@ function () { it('does not yield the same page twice when a URL was redirected to an already loaded page', function () { $crawler = (new Crawler()) ->input('http://www.example.com/crawling/redirect') - ->addStep(Http::crawl()->addToResult(['url'])); + ->addStep(Http::crawl()->keep(['url'])); $results = helper_generatorToArray($crawler->run()); @@ -499,7 +496,7 @@ function () { it('does not produce a fatal error when the initial request fails', function () { $crawler = (new Crawler()) ->input('http://www.example.com/not-allowed') - ->addStep(Http::crawl()->addToResult(['url'])); + ->addStep(Http::crawl()->keep(['url'])); $results = helper_generatorToArray($crawler->run()); diff --git a/tests/_Integration/Http/ErrorResponsesTest.php b/tests/_Integration/Http/ErrorResponsesTest.php index 193e685..882b2f9 100644 --- a/tests/_Integration/Http/ErrorResponsesTest.php +++ b/tests/_Integration/Http/ErrorResponsesTest.php @@ -30,7 +30,7 @@ public function loader(UserAgentInterface $userAgent, LoggerInterface $logger): $crawler = new ErrorCrawler(); $crawler->inputs(['http://localhost:8000/client-error-response']) - ->addStep('response', Http::{$method}()); + ->addStep(Http::{$method}()->keepAs('response')); $results = helper_generatorToArray($crawler->run()); @@ -41,7 +41,7 @@ public function loader(UserAgentInterface $userAgent, LoggerInterface $logger): $crawler = new ErrorCrawler(); $crawler->inputs(['http://localhost:8000/server-error-response']) - ->addStep('response', Http::{$method}()); + ->addStep(Http::{$method}()->keepAs('response')); $results = helper_generatorToArray($crawler->run()); @@ -52,7 +52,7 @@ public function loader(UserAgentInterface $userAgent, LoggerInterface $logger): $crawler = new ErrorCrawler(); $crawler->inputs(['http://localhost:8000/client-error-response']) - ->addStep('response', Http::{$method}()->yieldErrorResponses()); + ->addStep(Http::{$method}()->yieldErrorResponses()->keepAs('response')); $results = helper_generatorToArray($crawler->run()); @@ -63,7 +63,7 @@ public function loader(UserAgentInterface $userAgent, LoggerInterface $logger): $crawler = new ErrorCrawler(); $crawler->inputs(['http://localhost:8000/server-error-response']) - ->addStep('response', Http::{$method}()->yieldErrorResponses()); + ->addStep(Http::{$method}()->yieldErrorResponses()->keepAs('response')); $results = helper_generatorToArray($crawler->run()); @@ -76,7 +76,7 @@ function (string $method) { $crawler = new ErrorCrawler(); $crawler->inputs(['http://localhost:8000/client-error-response', 'http://localhost:8000/simple-listing']) - ->addStep('response', Http::{$method}()); + ->addStep(Http::{$method}()->keepAs('response')); $results = helper_generatorToArray($crawler->run()); @@ -90,7 +90,7 @@ function (string $method) { $crawler = new ErrorCrawler(); $crawler->inputs(['http://localhost:8000/server-error-response', 'http://localhost:8000/simple-listing']) - ->addStep('response', Http::{$method}()); + ->addStep(Http::{$method}()->keepAs('response')); $results = helper_generatorToArray($crawler->run()); @@ -104,10 +104,7 @@ function (string $method) { $crawler = new ErrorCrawler(); $crawler->inputs(['http://localhost:8000/client-error-response', 'http://localhost:8000/simple-listing']) - ->addStep( - Http::{$method}() - ->stopOnErrorResponse(), - ); + ->addStep(Http::{$method}()->stopOnErrorResponse()); $crawler->runAndTraverse(); }, diff --git a/tests/_Integration/Http/GzipTest.php b/tests/_Integration/Http/GzipTest.php index 0dd7d9c..19f0c52 100644 --- a/tests/_Integration/Http/GzipTest.php +++ b/tests/_Integration/Http/GzipTest.php @@ -31,13 +31,11 @@ public function loader(UserAgentInterface $userAgent, LoggerInterface $logger): $crawler = new GzipCrawler(); $crawler->input('http://localhost:8000/gzip') - ->addStep('response', Http::get()); + ->addStep(Http::get()->keepAs('response')); $results = helper_generatorToArray($crawler->run()); - expect($results[0])->toBeInstanceOf(Result::class); - - expect($results[0]->get('response'))->toBeInstanceOf(RespondedRequest::class); - - expect(Http::getBodyString($results[0]->get('response')))->toBe('This is a gzip compressed string'); + expect($results[0])->toBeInstanceOf(Result::class) + ->and($results[0]->get('response'))->toBeInstanceOf(RespondedRequest::class) + ->and(Http::getBodyString($results[0]->get('response')))->toBe('This is a gzip compressed string'); }); diff --git a/tests/_Integration/Http/HeadlessBrowserTest.php b/tests/_Integration/Http/HeadlessBrowserTest.php index 725f8c2..a43800f 100644 --- a/tests/_Integration/Http/HeadlessBrowserTest.php +++ b/tests/_Integration/Http/HeadlessBrowserTest.php @@ -61,23 +61,21 @@ protected function invoke(mixed $input): Generator $crawler->input('http://localhost:8000/print-headers') ->addStep(Http::get()) - ->addStep('responseBody', new GetJsonFromResponseHtmlBody()); + ->addStep((new GetJsonFromResponseHtmlBody())->keepAs('responseBody')); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(1); - - expect($results[0]->get('responseBody'))->toBeArray(); - - expect($results[0]->get('responseBody'))->toHaveKey('User-Agent'); - - expect($results[0]->get('responseBody')['User-Agent'])->toBe('HeadlessBrowserBot'); + expect($results)->toHaveCount(1) + ->and($results[0]->get('responseBody'))->toBeArray() + ->and($results[0]->get('responseBody'))->toHaveKey('User-Agent') + ->and($results[0]->get('responseBody')['User-Agent'])->toBe('HeadlessBrowserBot'); }); it('uses cookies', function () { $crawler = new HeadlessBrowserCrawler(); - $crawler->input('http://localhost:8000/set-cookie') + $crawler + ->input('http://localhost:8000/set-cookie') ->addStep(Http::get()) ->addStep(new class () extends Step { protected function invoke(mixed $input): Generator @@ -86,15 +84,13 @@ protected function invoke(mixed $input): Generator } }) ->addStep(Http::get()) - ->addStep('printed-cookie', new GetStringFromResponseHtmlBody()); + ->addStep((new GetStringFromResponseHtmlBody())->keepAs('printed-cookie')); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(1); - - expect($results[0]->get('printed-cookie'))->toBeString(); - - expect($results[0]->get('printed-cookie'))->toBe('foo123'); + expect($results)->toHaveCount(1) + ->and($results[0]->get('printed-cookie'))->toBeString() + ->and($results[0]->get('printed-cookie'))->toBe('foo123'); }); it('renders javascript', function () { @@ -109,17 +105,17 @@ protected function invoke(mixed $input): Generator $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(1); - - expect($results[0]->toArray())->toBe([ - 'content' => 'This was added through javascript', - ]); + expect($results)->toHaveCount(1) + ->and($results[0]->toArray())->toBe([ + 'content' => 'This was added through javascript', + ]); }); it('also gets cookies that are set via javascript', function () { $crawler = new HeadlessBrowserCrawler(); - $crawler->input('http://localhost:8000/set-js-cookie') + $crawler + ->input('http://localhost:8000/set-js-cookie') ->addStep(Http::get()) ->addStep(new class () extends Step { protected function invoke(mixed $input): Generator @@ -128,13 +124,11 @@ protected function invoke(mixed $input): Generator } }) ->addStep(Http::get()) - ->addStep('printed-cookie', new GetStringFromResponseHtmlBody()); + ->addStep((new GetStringFromResponseHtmlBody())->keepAs('printed-cookie')); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(1); - - expect($results[0]->get('printed-cookie'))->toBeString(); - - expect($results[0]->get('printed-cookie'))->toBe('javascriptcookie'); + expect($results)->toHaveCount(1) + ->and($results[0]->get('printed-cookie'))->toBeString() + ->and($results[0]->get('printed-cookie'))->toBe('javascriptcookie'); }); diff --git a/tests/_Integration/Http/Html/PaginatedListingTest.php b/tests/_Integration/Http/Html/PaginatedListingTest.php index ee3c865..621842f 100644 --- a/tests/_Integration/Http/Html/PaginatedListingTest.php +++ b/tests/_Integration/Http/Html/PaginatedListingTest.php @@ -31,27 +31,25 @@ public function loader(UserAgentInterface $userAgent, LoggerInterface $logger): $crawler ->addStep(Http::get()->paginate('#nextPage')) - ->addStep('url', Html::getLinks('#listing .item a')) + ->addStep(Html::getLinks('#listing .item a')->keepAs('url')) ->addStep(Http::get()) ->addStep( Html::first('article') ->extract(['title' => 'h1', 'number' => '.someNumber']) - ->addToResult(), + ->keep(), ); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(10); - - expect($results[0]->toArray())->toBe([ - 'url' => 'http://localhost:8000/paginated-listing/items/1', - 'title' => 'Some Item 1', - 'number' => '10', - ]); - - expect($results[9]->toArray())->toBe([ - 'url' => 'http://localhost:8000/paginated-listing/items/10', - 'title' => 'Some Item 10', - 'number' => '100', - ]); + expect($results)->toHaveCount(10) + ->and($results[0]->toArray())->toBe([ + 'url' => 'http://localhost:8000/paginated-listing/items/1', + 'title' => 'Some Item 1', + 'number' => '10', + ]) + ->and($results[9]->toArray())->toBe([ + 'url' => 'http://localhost:8000/paginated-listing/items/10', + 'title' => 'Some Item 10', + 'number' => '100', + ]); }); diff --git a/tests/_Integration/Http/Html/SimpleListingTest.php b/tests/_Integration/Http/Html/SimpleListingTest.php index 1d21715..7f00f88 100644 --- a/tests/_Integration/Http/Html/SimpleListingTest.php +++ b/tests/_Integration/Http/Html/SimpleListingTest.php @@ -39,28 +39,25 @@ public function loader(UserAgentInterface $userAgent, LoggerInterface $logger): 'date' => '.date', 'author' => '.articleAuthor', ]) - ->addToResult(), + ->keep(), ); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(3); - - expect($results[0]->toArray())->toBe([ - 'title' => 'Some Article 1', - 'date' => '2022-04-13', - 'author' => 'Christian Olear', - ]); - - expect($results[1]->toArray())->toBe([ - 'title' => 'Some Article 2', - 'date' => '2022-04-14', - 'author' => 'Christian Olear', - ]); - - expect($results[2]->toArray())->toBe([ - 'title' => 'Some Article 3', - 'date' => '2022-04-15', - 'author' => 'Christian Olear', - ]); + expect($results)->toHaveCount(3) + ->and($results[0]->toArray())->toBe([ + 'title' => 'Some Article 1', + 'date' => '2022-04-13', + 'author' => 'Christian Olear', + ]) + ->and($results[1]->toArray())->toBe([ + 'title' => 'Some Article 2', + 'date' => '2022-04-14', + 'author' => 'Christian Olear', + ]) + ->and($results[2]->toArray())->toBe([ + 'title' => 'Some Article 3', + 'date' => '2022-04-15', + 'author' => 'Christian Olear', + ]); }); diff --git a/tests/_Integration/Http/ProxyingTest.php b/tests/_Integration/Http/ProxyingTest.php index 78f0e44..d6b824f 100644 --- a/tests/_Integration/Http/ProxyingTest.php +++ b/tests/_Integration/Http/ProxyingTest.php @@ -1,6 +1,5 @@ getLoader(); - - /** @var HttpLoader $loader */ - - $loader->useProxy('http://localhost:8001'); + $crawler->getLoader()->useProxy('http://localhost:8001'); $crawler ->input('http://www.crwlr.software/packages') - ->addStep(Http::get()->addToResult(['body'])); + ->addStep(Http::get()->keep(['body'])); $results = iterator_to_array($crawler->run()); @@ -69,17 +64,13 @@ class ProxyServerProcesses it('uses correct method, headers and HTTP version in the proxied request', function () { $crawler = helper_getFastCrawler(); - $loader = $crawler->getLoader(); - - /** @var HttpLoader $loader */ - - $loader->useProxy('http://localhost:8001'); + $crawler->getLoader()->useProxy('http://localhost:8001'); $crawler ->input('http://www.crwlr.software/packages') ->addStep( Http::put(['Accept-Encoding' => 'gzip, deflate, br'], 'Hello World', '1.0') - ->addToResult(['body']), + ->keep(['body']), ); $results = iterator_to_array($crawler->run()); @@ -96,11 +87,7 @@ class ProxyServerProcesses it('uses rotating proxies when the useRotatingProxies() method of the loader was called', function () { $crawler = helper_getFastCrawler(); - $loader = $crawler->getLoader(); - - /** @var HttpLoader $loader */ - - $loader->useRotatingProxies([ + $crawler->getLoader()->useRotatingProxies([ 'http://localhost:8001', 'http://localhost:8002', 'http://localhost:8003', @@ -113,7 +100,7 @@ class ProxyServerProcesses 'http://www.crwlr.software/packages/query-string/v1.0/getting-started', 'http://www.crwlr.software/packages/robots-txt/v1.1/getting-started', ]) - ->addStep(Http::get()->addToResult(['body'])); + ->addStep(Http::get()->keep(['body'])); $results = iterator_to_array($crawler->run()); @@ -139,11 +126,8 @@ class ProxyServerProcesses it('can also use a proxy when using the headless browser', function () { $crawler = helper_getFastCrawler(); - $loader = $crawler->getLoader(); - - /** @var HttpLoader $loader */ - - $loader + $crawler + ->getLoader() ->useHeadlessBrowser() ->useProxy('http://localhost:8001'); @@ -151,7 +135,7 @@ class ProxyServerProcesses ->input('http://www.crwlr.software/blog') ->addStep( Http::get(['Accept-Language' => 'de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7']) - ->addToResult(['body']), + ->keep(['body']), ); $results = iterator_to_array($crawler->run()); @@ -165,11 +149,8 @@ class ProxyServerProcesses it('can also use rotating proxies when using the headless browser', function () { $crawler = helper_getFastCrawler(); - $loader = $crawler->getLoader(); - - /** @var HttpLoader $loader */ - - $loader + $crawler + ->getLoader() ->useHeadlessBrowser() ->useRotatingProxies([ 'http://localhost:8001', @@ -182,7 +163,7 @@ class ProxyServerProcesses 'http://www.crwlr.software/packages/url/v2.0', 'http://www.crwlr.software/packages/query-string/v1.0', ]) - ->addStep(Http::get()->addToResult(['body'])); + ->addStep(Http::get()->keep(['body'])); $results = iterator_to_array($crawler->run()); diff --git a/tests/_Integration/Http/PublisherExampleTest.php b/tests/_Integration/Http/PublisherExampleTest.php index f867b6a..87bb366 100644 --- a/tests/_Integration/Http/PublisherExampleTest.php +++ b/tests/_Integration/Http/PublisherExampleTest.php @@ -38,36 +38,41 @@ protected function userAgent(): UserAgentInterface ->addStep( Html::root() ->extract([ - 'name' => 'h1', - 'age' => '#author-data .age', - 'bornIn' => '#author-data .born-in', + 'author' => 'h1', 'bookUrls' => Dom::cssSelector('#author-data .books a.book')->attribute('href')->toAbsoluteUrl(), ]) - ->addToResult(['name', 'age', 'bornIn']), + ->keep(['author']), ) ->addStep(Http::get()->useInputKey('bookUrls')) ->addStep( Html::root() - ->extract(['books' => 'h1']) - ->addToResult(), + ->extract(['book' => 'h1']) + ->keep(), ); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(2) + expect($results)->toHaveCount(5) ->and($results[0]->toArray())->toBe([ - 'name' => 'John Example', - 'age' => '51', - 'bornIn' => 'Lisbon', - 'books' => ['Some novel', 'Another novel'], + 'author' => 'John Example', + 'book' => 'Some novel', ]) ->and($results[1]->toArray())->toBe([ - 'name' => 'Susan Example', - 'age' => '49', - 'bornIn' => 'Athens', - 'books' => ['Poems #1', 'Poems #2', 'Poems #3'], + 'author' => 'John Example', + 'book' => 'Another novel', + ]) + ->and($results[2]->toArray())->toBe([ + 'author' => 'Susan Example', + 'book' => 'Poems #1', + ]) + ->and($results[3]->toArray())->toBe([ + 'author' => 'Susan Example', + 'book' => 'Poems #2', + ]) + ->and($results[4]->toArray())->toBe([ + 'author' => 'Susan Example', + 'book' => 'Poems #3', ]); - }); it('turns an array of URLs to nested extracted data from those child pages using sub crawlers', function () { diff --git a/tests/_Integration/Http/QueryParamPaginationTest.php b/tests/_Integration/Http/QueryParamPaginationTest.php index 17c62cc..5168d8a 100644 --- a/tests/_Integration/Http/QueryParamPaginationTest.php +++ b/tests/_Integration/Http/QueryParamPaginationTest.php @@ -43,7 +43,7 @@ protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger ->inBody() ->increase('page') ->stopWhen(PaginatorStopRules::isEmptyInJson('data.items')), - )->addToResult(['body']), + )->keep(['body']), ); $results = helper_generatorToArray($crawler->run()); @@ -63,7 +63,7 @@ protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger ->inUrl() ->increase('page') ->stopWhen(PaginatorStopRules::isEmptyInJson('data.items')), - )->addToResult(['body']), + )->keep(['body']), ); $results = helper_generatorToArray($crawler->run()); @@ -82,7 +82,7 @@ protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger QueryParamsPaginator::paramsInUrl(2) ->increase('page') ->stopWhen(PaginatorStopRules::isEmptyInJson('data.items')), - )->addToResult(['body']), + )->keep(['body']), ); $results = helper_generatorToArray($crawler->run()); @@ -104,7 +104,7 @@ protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger QueryParamsPaginator::paramsInUrl(2) ->increase('page') ->stopWhen(PaginatorStopRules::isEmptyInJson('data.items')), - )->addToResult(['body']), + )->keep(['body']), ); $results = helper_generatorToArray($crawler->run()); diff --git a/tests/_Integration/Http/RedirectTest.php b/tests/_Integration/Http/RedirectTest.php index 8049eb5..9e5a9cb 100644 --- a/tests/_Integration/Http/RedirectTest.php +++ b/tests/_Integration/Http/RedirectTest.php @@ -2,6 +2,7 @@ namespace tests\_Integration\Http; +use Crwlr\Crawler\Cache\Exceptions\MissingZlibExtensionException; use Crwlr\Crawler\HttpCrawler; use Crwlr\Crawler\Loader\Http\HttpLoader; use Crwlr\Crawler\Loader\Http\Messages\RespondedRequest; @@ -28,6 +29,7 @@ class GetResponseBodyAsString extends Step { /** * @param RespondedRequest $input + * @throws MissingZlibExtensionException */ protected function invoke(mixed $input): Generator { @@ -43,13 +45,12 @@ protected function invoke(mixed $input): Generator $crawler ->input('http://localhost:8000/redirect?stopAt=5') ->addStep(Http::get()) - ->addStep('body', new GetResponseBodyAsString()); + ->addStep((new GetResponseBodyAsString())->keepAs('body')); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(1); - - expect($results[0]->get('body'))->toBe('success after 5 redirects'); + expect($results)->toHaveCount(1) + ->and($results[0]->get('body'))->toBe('success after 5 redirects'); }); it('stops at 10 redirects by default', function () { @@ -58,7 +59,7 @@ protected function invoke(mixed $input): Generator $crawler ->input('http://localhost:8000/redirect?stopAt=11') ->addStep(Http::get()) - ->addStep('body', new GetResponseBodyAsString()); + ->addStep((new GetResponseBodyAsString())->keepAs('body')); $results = helper_generatorToArray($crawler->run()); @@ -76,7 +77,7 @@ protected function userAgent(): UserAgentInterface return new UserAgent('RedirectBot'); } - protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger): LoaderInterface|array + protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger): LoaderInterface { $loader = parent::loader($userAgent, $logger); @@ -91,11 +92,10 @@ protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger $crawler ->input('http://localhost:8000/redirect?stopAt=11') ->addStep(Http::get()) - ->addStep('body', new GetResponseBodyAsString()); + ->addStep((new GetResponseBodyAsString())->keepAs('body')); $results = helper_generatorToArray($crawler->run()); - expect($results)->toHaveCount(1); - - expect($results[0]->get('body'))->toBe('success after 11 redirects'); + expect($results)->toHaveCount(1) + ->and($results[0]->get('body'))->toBe('success after 11 redirects'); }); diff --git a/tests/_Stubs/LoaderCollectingStep.php b/tests/_Stubs/LoaderCollectingStep.php deleted file mode 100644 index e6cf54c..0000000 --- a/tests/_Stubs/LoaderCollectingStep.php +++ /dev/null @@ -1,29 +0,0 @@ -loaders[] = $loader; - - $this->loader = $loader; - - return $this; - } - - protected function invoke(mixed $input): Generator - { - yield 'foo'; - } -} diff --git a/tests/_Stubs/MultiLoaderCrawler.php b/tests/_Stubs/MultiLoaderCrawler.php deleted file mode 100644 index 43fac62..0000000 --- a/tests/_Stubs/MultiLoaderCrawler.php +++ /dev/null @@ -1,27 +0,0 @@ - new HttpLoader($userAgent, logger: $logger), - 'phantasy' => new PhantasyLoader($userAgent, $logger), - 'phantasy2' => new PhantasyLoader($userAgent, $logger), - ]; - } -} From 3cd99e8829c8f4a2f4a3895ed3ae4c30eb8ffb5b Mon Sep 17 00:00:00 2001 From: otsch Date: Thu, 8 Aug 2024 00:58:54 +0200 Subject: [PATCH 2/2] Changes after PHP CS Fixer update --- .../AnonymousHttpCrawlerBuilder.php | 4 +-- tests/CrawlerTest.php | 32 +++++++++---------- tests/Loader/Http/HttpLoaderTest.php | 4 +-- tests/Pest.php | 14 ++++---- tests/Steps/BaseStepTest.php | 10 +++--- tests/Steps/CsvTest.php | 2 +- tests/Steps/GroupTest.php | 6 ++-- tests/Steps/Loading/LoadingStepTest.php | 4 +-- tests/Steps/StepTest.php | 24 +++++++------- tests/Utils/OutputTypeHelperTest.php | 8 ++--- tests/_Integration/GroupTest.php | 2 +- tests/_Integration/Http/CrawlingTest.php | 2 +- .../_Integration/Http/HeadlessBrowserTest.php | 4 +-- .../Http/Html/PaginatedListingTest.php | 2 +- .../Http/Html/SimpleListingTest.php | 2 +- .../Http/PublisherExampleTest.php | 4 +-- tests/_Integration/Http/RedirectTest.php | 2 +- .../Http/RequestParamsFromInputTest.php | 2 +- tests/_Integration/Http/TimeoutTest.php | 2 +- 19 files changed, 65 insertions(+), 65 deletions(-) diff --git a/src/HttpCrawler/AnonymousHttpCrawlerBuilder.php b/src/HttpCrawler/AnonymousHttpCrawlerBuilder.php index a2f3819..ee43bfc 100644 --- a/src/HttpCrawler/AnonymousHttpCrawlerBuilder.php +++ b/src/HttpCrawler/AnonymousHttpCrawlerBuilder.php @@ -13,7 +13,7 @@ public function __construct() {} public function withBotUserAgent(string $productToken): HttpCrawler { - $instance = new class () extends HttpCrawler { + $instance = new class extends HttpCrawler { protected function userAgent(): UserAgentInterface { return new UserAgent('temp'); @@ -27,7 +27,7 @@ protected function userAgent(): UserAgentInterface public function withUserAgent(string|UserAgentInterface $userAgent): HttpCrawler { - $instance = new class () extends HttpCrawler { + $instance = new class extends HttpCrawler { protected function userAgent(): UserAgentInterface { return new UserAgent('temp'); diff --git a/tests/CrawlerTest.php b/tests/CrawlerTest.php index ec3ce78..360e24c 100644 --- a/tests/CrawlerTest.php +++ b/tests/CrawlerTest.php @@ -280,7 +280,7 @@ function () { it('immediately stops when keepAs() is not used with a scalar value output step', function () { $crawler = helper_getDummyCrawler(); - $step1 = new class () extends Step { + $step1 = new class extends Step { public bool $wasCalled = false; protected function invoke(mixed $input): Generator @@ -296,7 +296,7 @@ public function outputType(): StepOutputType } }; - $step2 = new class () extends Step { + $step2 = new class extends Step { protected function invoke(mixed $input): Generator { yield 'foo'; @@ -333,7 +333,7 @@ public function outputType(): StepOutputType $crawler->setStore($store); - $step = new class () extends Step { + $step = new class extends Step { protected function invoke(mixed $input): Generator { yield 'one'; @@ -398,7 +398,7 @@ function () { it( 'cascades step outputs immediately and doesn\'t wait for the current step being called with all the inputs', function () { - $step1 = new class () extends Step { + $step1 = new class extends Step { protected function invoke(mixed $input): Generator { $this->logger?->info('step1 called'); @@ -409,7 +409,7 @@ protected function invoke(mixed $input): Generator } }; - $step2 = new class () extends Step { + $step2 = new class extends Step { protected function invoke(mixed $input): Generator { $this->logger?->info('step2 called'); @@ -418,7 +418,7 @@ protected function invoke(mixed $input): Generator } }; - $store = new class () extends Store { + $store = new class extends Store { public function store(Result $result): void { $this->logger?->info('Stored a result'); @@ -453,7 +453,7 @@ public function store(Result $result): void it( 'immediately calls the store for each final output', function () { - $step1 = new class () extends Step { + $step1 = new class extends Step { protected function invoke(mixed $input): Generator { $this->logger?->info('step1 called'); @@ -464,7 +464,7 @@ protected function invoke(mixed $input): Generator } }; - $step2 = new class () extends Step { + $step2 = new class extends Step { protected function invoke(mixed $input): Generator { $this->logger?->info('step2 called: ' . $input); @@ -475,7 +475,7 @@ protected function invoke(mixed $input): Generator } }; - $step3 = new class () extends Step { + $step3 = new class extends Step { protected function invoke(mixed $input): Generator { $this->logger?->info('step3 called: ' . $input); @@ -486,7 +486,7 @@ protected function invoke(mixed $input): Generator } }; - $step4 = new class () extends Step { + $step4 = new class extends Step { protected function invoke(mixed $input): Generator { $this->logger?->info('step4 called: ' . $input); @@ -497,7 +497,7 @@ protected function invoke(mixed $input): Generator } }; - $store = new class () extends Store { + $store = new class extends Store { public function store(Result $result): void { $this->logger?->info('Stored a result: ' . $result->get('unnamed')); @@ -557,7 +557,7 @@ public function store(Result $result): void 'does not wait for all child outputs originating from an output of a step where keepAs() was called before ' . 'calling the store', function () { - $step1 = new class () extends Step { + $step1 = new class extends Step { protected function invoke(mixed $input): Generator { $this->logger?->info('step1 called'); @@ -568,7 +568,7 @@ protected function invoke(mixed $input): Generator } }; - $step2 = new class () extends Step { + $step2 = new class extends Step { protected function invoke(mixed $input): Generator { $this->logger?->info('step2 called: ' . $input); @@ -581,7 +581,7 @@ protected function invoke(mixed $input): Generator $step2->keepAs('foo'); - $step3 = new class () extends Step { + $step3 = new class extends Step { protected function invoke(mixed $input): Generator { $this->logger?->info('step3 called: ' . $input); @@ -592,7 +592,7 @@ protected function invoke(mixed $input): Generator } }; - $step4 = new class () extends Step { + $step4 = new class extends Step { protected function invoke(mixed $input): Generator { $this->logger?->info('step4 called: ' . $input); @@ -605,7 +605,7 @@ protected function invoke(mixed $input): Generator $step4->keepAs('bar'); - $store = new class () extends Store { + $store = new class extends Store { public function store(Result $result): void { $this->logger?->info('Stored a result: ' . $result->get('bar')); diff --git a/tests/Loader/Http/HttpLoaderTest.php b/tests/Loader/Http/HttpLoaderTest.php index 03381d8..fdba5bd 100644 --- a/tests/Loader/Http/HttpLoaderTest.php +++ b/tests/Loader/Http/HttpLoaderTest.php @@ -282,7 +282,7 @@ public function isAllowedToBeLoaded(UriInterface $uri, bool $throwsException = f $httpClient->shouldReceive('sendRequest')->once()->andReturn(new Response(200)); - $throttler = new class () extends Throttler { + $throttler = new class extends Throttler { public function trackRequestStartFor(UriInterface $url): void { echo 'Track request start ' . $url . PHP_EOL; @@ -330,7 +330,7 @@ function (string $loadingMethod) { }) ->andReturn(new Response(200)); - $throttler = new class () extends Throttler { + $throttler = new class extends Throttler { public function trackRequestEndFor(UriInterface $url): void { echo 'Track request end ' . $url . PHP_EOL; diff --git a/tests/Pest.php b/tests/Pest.php index 1c9e8ee..fd95956 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -85,7 +85,7 @@ public function outputType(): StepOutputType function helper_getInputReturningStep(): Step { - return new class () extends Step { + return new class extends Step { protected function invoke(mixed $input): Generator { yield $input; @@ -95,7 +95,7 @@ protected function invoke(mixed $input): Generator function helper_getNumberIncrementingStep(): Step { - return new class () extends Step { + return new class extends Step { protected function invoke(mixed $input): Generator { yield $input + 1; @@ -105,7 +105,7 @@ protected function invoke(mixed $input): Generator function helper_getStepYieldingMultipleNumbers(): Step { - return new class () extends Step { + return new class extends Step { protected function invoke(mixed $input): Generator { foreach (['one', 'two', 'two', 'three', 'four', 'three', 'five', 'three'] as $number) { @@ -117,7 +117,7 @@ protected function invoke(mixed $input): Generator function helper_getStepYieldingMultipleArraysWithNumber(): Step { - return new class () extends Step { + return new class extends Step { protected function invoke(mixed $input): Generator { foreach (['one', 'two', 'two', 'three', 'four', 'three', 'five', 'three'] as $key => $number) { @@ -143,7 +143,7 @@ protected function invoke(mixed $input): Generator function helper_getStepYieldingMultipleObjectsWithNumber(): Step { - return new class () extends Step { + return new class extends Step { protected function invoke(mixed $input): Generator { foreach (['one', 'two', 'two', 'three', 'four', 'three', 'five', 'three'] as $key => $number) { @@ -157,7 +157,7 @@ protected function invoke(mixed $input): Generator function helper_getLoadingStep(): Step { - return new class () extends Step { + return new class extends Step { use LoadingStep; protected function invoke(mixed $input): Generator @@ -275,7 +275,7 @@ function helper_getFastLoader( function helper_getFastCrawler(): HttpCrawler { - return new class () extends HttpCrawler { + return new class extends HttpCrawler { protected function userAgent(): UserAgentInterface { return new UserAgent('TestBot'); diff --git a/tests/Steps/BaseStepTest.php b/tests/Steps/BaseStepTest.php index 18cb20f..8b07865 100644 --- a/tests/Steps/BaseStepTest.php +++ b/tests/Steps/BaseStepTest.php @@ -158,7 +158,7 @@ function () { })->throws(InvalidArgumentException::class); it('removes an UTF-8 byte order mark from the beginning of a string', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function invoke(mixed $input): Generator { yield $input; @@ -200,7 +200,7 @@ protected function validateAndSanitizeInput(mixed $input): mixed it( 'throws an exception in validateBeforeRun() when output type is scalar and keep() was used but not keepAs()', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function invoke(mixed $input): Generator { yield $input; @@ -292,7 +292,7 @@ protected function invoke(mixed $input): Generator })->throwsNoExceptions(); it('logs a warning, when keepFromInput() was called and previous step yields mixed outputs', function () { - $stepWithMixedOutputType = new class () extends Step { + $stepWithMixedOutputType = new class extends Step { protected function invoke(mixed $input): Generator { yield 'yo'; @@ -327,7 +327,7 @@ public function outputType(): StepOutputType it('adds all from object output to the keep array in the output object, when keep() is called', function () { $step = helper_getInputReturningStep()->keep(); - $outputObject = new class () { + $outputObject = new class { /** * @return array */ @@ -422,7 +422,7 @@ public function toArray(): array it('adds all from object input to the keep array in the output object, when keepFromInput() is called', function () { $step = helper_getValueReturningStep('foo')->keepFromInput(); - $inputObject = new class () { + $inputObject = new class { /** * @return array */ diff --git a/tests/Steps/CsvTest.php b/tests/Steps/CsvTest.php index 3a89c02..f3c7d06 100644 --- a/tests/Steps/CsvTest.php +++ b/tests/Steps/CsvTest.php @@ -73,7 +73,7 @@ function helper_csvFilePath(string $fileName): string }); it('works with an object having a __toString method', function () { - $object = new class () { + $object = new class { public function __toString(): string { return << 'foo']; @@ -542,7 +542,7 @@ protected function invoke(mixed $input): Generator } }; - $step2 = new class () extends Step { + $step2 = new class extends Step { protected function invoke(mixed $input): Generator { yield ['three' => 'baz']; diff --git a/tests/Steps/Loading/LoadingStepTest.php b/tests/Steps/Loading/LoadingStepTest.php index d76d68c..4094f60 100644 --- a/tests/Steps/Loading/LoadingStepTest.php +++ b/tests/Steps/Loading/LoadingStepTest.php @@ -14,7 +14,7 @@ use function tests\helper_traverseIterable; test('you can add a loader', function () { - $step = new class () extends Step { + $step = new class extends Step { use LoadingStep; protected function invoke(mixed $input): Generator @@ -46,7 +46,7 @@ function () { $loaderTwo->shouldReceive('load')->once()->andReturn('Hi'); - $step = new class () extends Step { + $step = new class extends Step { use LoadingStep; protected function invoke(mixed $input): Generator diff --git a/tests/Steps/StepTest.php b/tests/Steps/StepTest.php index 5698be4..8e7887c 100644 --- a/tests/Steps/StepTest.php +++ b/tests/Steps/StepTest.php @@ -31,7 +31,7 @@ /** @var TestCase $this */ test('You can add a logger and it is available within the invoke method', function () { - $step = new class () extends Step { + $step = new class extends Step { /** * @return Generator */ @@ -310,7 +310,7 @@ function () { }); it('calls the validateAndSanitizeInput method', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function validateAndSanitizeInput(mixed $input): string { return $input . ' validated and sanitized'; @@ -331,7 +331,7 @@ protected function invoke(mixed $input): Generator 'when calling validateAndSanitizeStringOrStringable() and the input is array with a single element it tries to ' . 'use that element as input value', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function validateAndSanitizeInput(mixed $input): string { return $this->validateAndSanitizeStringOrStringable($input); @@ -353,7 +353,7 @@ protected function invoke(mixed $input): Generator 'when calling validateAndSanitizeStringOrStringable() and the input is array with multiple elements it throws ' . 'an InvalidArgumentException', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function validateAndSanitizeInput(mixed $input): string { return $this->validateAndSanitizeStringOrStringable($input); @@ -370,7 +370,7 @@ protected function invoke(mixed $input): Generator )->throws(InvalidArgumentException::class); it('is possible that a step does not produce any output at all', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function invoke(mixed $input): Generator { if ($input === 'foo') { @@ -435,7 +435,7 @@ protected function invoke(mixed $input): Generator 'does not yield more outputs than defined via maxOutputs() when step yields multiple outputs per input and the ' . 'limit is reached in the middle of the outputs resulting from one input', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function invoke(mixed $input): Generator { yield 'one'; @@ -463,7 +463,7 @@ protected function invoke(mixed $input): Generator ); test('When a step has max outputs defined, it won\'t call the invoke method after the limit was reached', function () { - $step = new class () extends Step { + $step = new class extends Step { public int $_invokeCallCount = 0; protected function invoke(mixed $input): Generator @@ -508,7 +508,7 @@ protected function invoke(mixed $input): Generator }); test('keeping a scalar output value with keep() also works when outputKey() was used', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function invoke(mixed $input): Generator { yield 'hey'; @@ -654,7 +654,7 @@ function () { }); test('you can define aliases for output keys and they are considered when using keep()', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function invoke(mixed $input): Generator { yield [ @@ -686,7 +686,7 @@ protected function outputKeyAliases(): array }); test('you can filter outputs using an output key alias', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function invoke(mixed $input): Generator { yield [ @@ -711,10 +711,10 @@ protected function outputKeyAliases(): array }); it('can filter by a key that only exists in the serialized version of an output object', function () { - $step = new class () extends Step { + $step = new class extends Step { protected function invoke(mixed $input): Generator { - yield new class () { + yield new class { public string $foo = 'one'; public string $bar = 'two'; diff --git a/tests/Utils/OutputTypeHelperTest.php b/tests/Utils/OutputTypeHelperTest.php index 1edd6aa..25e8d13 100644 --- a/tests/Utils/OutputTypeHelperTest.php +++ b/tests/Utils/OutputTypeHelperTest.php @@ -6,7 +6,7 @@ use stdClass; it('converts an object with a toArrayForResult() method to an array', function () { - $object = new class () { + $object = new class { /** * @return string[] */ @@ -20,7 +20,7 @@ public function toArrayForResult(): array }); it('converts an object with a toArray() method to an array', function () { - $object = new class () { + $object = new class { /** * @return string[] */ @@ -34,7 +34,7 @@ public function toArray(): array }); it('converts an object with a __serialize() method to an array', function () { - $object = new class () { + $object = new class { public function __serialize(): array { return ['winnie' => 'the pooh']; @@ -45,7 +45,7 @@ public function __serialize(): array }); it('converts an object to an array by just casting it', function () { - $object = new class () { + $object = new class { public string $foo = 'one'; public string $bar = 'two'; diff --git a/tests/_Integration/GroupTest.php b/tests/_Integration/GroupTest.php index dfcedae..505b100 100644 --- a/tests/_Integration/GroupTest.php +++ b/tests/_Integration/GroupTest.php @@ -18,7 +18,7 @@ it( 'gets both, data from html and the enclosed json-ld using two steps in a group and combines the results', function () { - $crawler = new class () extends HttpCrawler { + $crawler = new class extends HttpCrawler { protected function userAgent(): UserAgentInterface { return new BotUserAgent('MyBot'); diff --git a/tests/_Integration/Http/CrawlingTest.php b/tests/_Integration/Http/CrawlingTest.php index 1ea7235..cdaa076 100644 --- a/tests/_Integration/Http/CrawlingTest.php +++ b/tests/_Integration/Http/CrawlingTest.php @@ -92,7 +92,7 @@ class Crawler extends HttpCrawler { public function loader(UserAgentInterface $userAgent, LoggerInterface $logger): TestLoader { - $client = new class () implements ClientInterface { + $client = new class implements ClientInterface { private Client $guzzleClient; public function __construct() diff --git a/tests/_Integration/Http/HeadlessBrowserTest.php b/tests/_Integration/Http/HeadlessBrowserTest.php index a43800f..8ad4673 100644 --- a/tests/_Integration/Http/HeadlessBrowserTest.php +++ b/tests/_Integration/Http/HeadlessBrowserTest.php @@ -77,7 +77,7 @@ protected function invoke(mixed $input): Generator $crawler ->input('http://localhost:8000/set-cookie') ->addStep(Http::get()) - ->addStep(new class () extends Step { + ->addStep(new class extends Step { protected function invoke(mixed $input): Generator { yield 'http://localhost:8000/print-cookie'; @@ -117,7 +117,7 @@ protected function invoke(mixed $input): Generator $crawler ->input('http://localhost:8000/set-js-cookie') ->addStep(Http::get()) - ->addStep(new class () extends Step { + ->addStep(new class extends Step { protected function invoke(mixed $input): Generator { yield 'http://localhost:8000/print-cookie'; diff --git a/tests/_Integration/Http/Html/PaginatedListingTest.php b/tests/_Integration/Http/Html/PaginatedListingTest.php index 621842f..dedb6f7 100644 --- a/tests/_Integration/Http/Html/PaginatedListingTest.php +++ b/tests/_Integration/Http/Html/PaginatedListingTest.php @@ -15,7 +15,7 @@ use function tests\helper_getFastLoader; it('paginates through pagination', function () { - $crawler = new class () extends HttpCrawler { + $crawler = new class extends HttpCrawler { protected function userAgent(): UserAgentInterface { return new BotUserAgent('MyBot'); diff --git a/tests/_Integration/Http/Html/SimpleListingTest.php b/tests/_Integration/Http/Html/SimpleListingTest.php index 7f00f88..d053808 100644 --- a/tests/_Integration/Http/Html/SimpleListingTest.php +++ b/tests/_Integration/Http/Html/SimpleListingTest.php @@ -15,7 +15,7 @@ use function tests\helper_getFastLoader; it('gets all the links from a listing and gets data from the detail pages', function () { - $crawler = new class () extends HttpCrawler { + $crawler = new class extends HttpCrawler { protected function userAgent(): UserAgentInterface { return new BotUserAgent('MyBot'); diff --git a/tests/_Integration/Http/PublisherExampleTest.php b/tests/_Integration/Http/PublisherExampleTest.php index 87bb366..6392438 100644 --- a/tests/_Integration/Http/PublisherExampleTest.php +++ b/tests/_Integration/Http/PublisherExampleTest.php @@ -76,7 +76,7 @@ protected function userAgent(): UserAgentInterface }); it('turns an array of URLs to nested extracted data from those child pages using sub crawlers', function () { - $crawlerBuilder = new class () { + $crawlerBuilder = new class { public function build(): \Crwlr\Crawler\Crawler { $crawler = new PublisherExampleCrawler(); @@ -181,7 +181,7 @@ private function extractEditionData(): Html }); test('it can also keep the URLs, provided to the sub crawler', function () { - $crawlerBuilder = new class () { + $crawlerBuilder = new class { public function build(): \Crwlr\Crawler\Crawler { $crawler = new PublisherExampleCrawler(); diff --git a/tests/_Integration/Http/RedirectTest.php b/tests/_Integration/Http/RedirectTest.php index 9e5a9cb..c241b8b 100644 --- a/tests/_Integration/Http/RedirectTest.php +++ b/tests/_Integration/Http/RedirectTest.php @@ -71,7 +71,7 @@ protected function invoke(mixed $input): Generator }); test('you can set your own max redirects limit', function () { - $crawler = new class () extends HttpCrawler { + $crawler = new class extends HttpCrawler { protected function userAgent(): UserAgentInterface { return new UserAgent('RedirectBot'); diff --git a/tests/_Integration/Http/RequestParamsFromInputTest.php b/tests/_Integration/Http/RequestParamsFromInputTest.php index 8a08d03..b5f36dc 100644 --- a/tests/_Integration/Http/RequestParamsFromInputTest.php +++ b/tests/_Integration/Http/RequestParamsFromInputTest.php @@ -11,7 +11,7 @@ use function tests\helper_getFastCrawler; test('Http steps can receive url, body and headers from an input array', function () { - $paramsStep = new class () extends Step { + $paramsStep = new class extends Step { protected function invoke(mixed $input): Generator { yield [ diff --git a/tests/_Integration/Http/TimeoutTest.php b/tests/_Integration/Http/TimeoutTest.php index 25f5c4c..17cda18 100644 --- a/tests/_Integration/Http/TimeoutTest.php +++ b/tests/_Integration/Http/TimeoutTest.php @@ -14,7 +14,7 @@ /** @var TestCase $this */ it('Fails when timeout is exceeded', function () { - $crawler = new class () extends HttpCrawler { + $crawler = new class extends HttpCrawler { protected function userAgent(): UserAgentInterface { return new UserAgent('SomeUserAgent');