From 00bfcb5eae81cfd0c36e5ee84c6599098c2c6023 Mon Sep 17 00:00:00 2001 From: Paul Mitchum Date: Thu, 14 Dec 2023 08:56:37 -0700 Subject: [PATCH] Finalize filefetcher custom processor DX (#4074) --- composer.json | 2 +- .../source/user-guide/guide_file_fetchers.rst | 66 +++++++++++++++ docs/source/user-guide/index.rst | 1 + .../src/FileFetcher/DkanFileFetcher.php | 66 +-------------- .../src/FileFetcher/FileFetcherFactory.php | 32 +++++-- .../custom_processor_test.info.yml | 5 ++ .../custom_processor_test.services.yml | 5 ++ .../FileFetcher/CustomFileFetcherFactory.php | 45 ++++++++++ .../src/FileFetcher/NonProcessor.php | 27 ++++++ .../src/FileFetcher/YesProcessor.php | 27 ++++++ .../FileFetcher/CustomProcessorTest.php | 84 +++++++++++++++++++ .../FileFetcher}/FileFetcherFactoryTest.php | 15 +++- .../ImportLocalCopyOfRemoteFileTest.php | 33 +++----- 13 files changed, 308 insertions(+), 100 deletions(-) create mode 100644 docs/source/user-guide/guide_file_fetchers.rst create mode 100644 modules/common/tests/modules/custom_processor_test/custom_processor_test.info.yml create mode 100644 modules/common/tests/modules/custom_processor_test/custom_processor_test.services.yml create mode 100644 modules/common/tests/modules/custom_processor_test/src/FileFetcher/CustomFileFetcherFactory.php create mode 100644 modules/common/tests/modules/custom_processor_test/src/FileFetcher/NonProcessor.php create mode 100644 modules/common/tests/modules/custom_processor_test/src/FileFetcher/YesProcessor.php create mode 100644 modules/common/tests/src/Kernel/FileFetcher/CustomProcessorTest.php rename modules/common/tests/src/{Functional => Kernel/FileFetcher}/FileFetcherFactoryTest.php (83%) diff --git a/composer.json b/composer.json index 029784a498..010046d513 100644 --- a/composer.json +++ b/composer.json @@ -15,7 +15,7 @@ "fmizzell/maquina": "^1.1.1", "getdkan/contracts": "^1.0.0", "getdkan/csv-parser": "^1.3.0", - "getdkan/file-fetcher" : "^5.0.0", + "getdkan/file-fetcher" : "^5.0.2", "getdkan/harvest": "^1.0.0", "getdkan/procrastinator": "^5.0.0", "getdkan/rooted-json-data": "^0.1", diff --git a/docs/source/user-guide/guide_file_fetchers.rst b/docs/source/user-guide/guide_file_fetchers.rst new file mode 100644 index 0000000000..98fe75f3a6 --- /dev/null +++ b/docs/source/user-guide/guide_file_fetchers.rst @@ -0,0 +1,66 @@ +Implementing a custom file fetcher +---------------------------------- + +DKAN uses a library called [getdkan/file-fetcher](https://github.com/GetDKAN/file-fetcher). This library allows developers to extend the file transfer functionality for their specialized needs. + +This library is used to download a resource, such as a CSV file, so that it can be loaded into the database and presented through the UI and API. This process is called "localization," because the source resource is copied to the local file system. Usually, this downloaded copy is temporary and is eventually removed. + +The standard file fetcher processors will probably be adequate for most uses, but there could be other use cases, such as needing to authenticate, or getting a file from S3 instead of HTTP. + +In cases such as these, we might want to add our own processor class to extend the file fetcher functionality. + +How-to: +======= + +Note that a code example can be found in the ``custom_processor_test`` module, which is used to test this functionality. + +Create a file processor class +_____________________________ + +To implement a new file processor, a create a custom file fetcher processor class. This class could extend ``FileFetcher\Processor\Remote`` or ``FileFetcher\Processor\Local``, or be a totally new implementation of ``FileFetcher\Processor\ProcessorInterface``. + +Create a FileFetcherFactory +___________________________ + +Next, create a new file fetcher factory class. This class should emulate ``Drupal\common\FileFetcher\FileFetcherFactory``. There is example code in the ``custom_processor_test`` module which demonstrates how to do this. + +The new factory should create and configure a ``FileFetcher\FileFetcher`` object to use your new custom processor. Do this by merging configuration for your new processor into the ``$config['processors']`` array that is passed to ``FileFetcherFactory::getInstance()``: + + .. code-block:: php + + public function getInstance(string $identifier, array $config = []) { + // Add OurProcessor as a custom processor. + $config['processors'] = array_merge( + [OurProcessor::class], + $config['processors'] ?? [] + ); + // Get the instance from the decorated factory, using our modified config. + return $this->decoratedFactory->getInstance($identifier, $config); + } + +Declare your factory as a service +_________________________________ + +It is also very important to declare your new factory class as a service. You accomplish this by decorating ``dkan.common.file_fetcher`` in your module's ``*.services.yml`` file, something like this: + + .. code-block:: yaml + + our_module.file_fetcher: + class: Drupal\our_module\FileFetcher\FileFetcherFactory + decorates: dkan.common.file_fetcher + arguments: ['@our_module.file_fetcher.inner'] + +Now whenever DKAN uses the ``dkan.common.file_fetcher`` service, your file fetcher factory will be used instead, and your new processor will find its way into use. + +Processor negotiation +===================== + +It's important to know how ``FileFetcher`` goes about choosing a processor. + +File fetcher knows about two processors by default: ``FileFetcher\Processor\Local`` and ``FileFetcher\Processor\Remote``. It also knows about whichever custom processor class names you configured in the ``processors`` array in configuration. + +When you ask a file fetcher object to perform the transfer (using ``FileFetcher::run()``), it will instantiate all the different types of processors it knows about. + +Then it will loop through them and use the ``ProcessorInterface::isServerCompatible()`` method to determine if the given ``source`` is suitable for use with that processor object. The file fetcher will use the first processor that answers ``true``. + +You can look at the implementations of ``FileFetcher\Processor\Local::isServerCompatible()`` or ``FileFetcher\Processor\Remote::isServerCompatible()`` to see how they each handle the question of whether they're suitable for the ``source``. diff --git a/docs/source/user-guide/index.rst b/docs/source/user-guide/index.rst index d4725c276a..17b794befa 100644 --- a/docs/source/user-guide/index.rst +++ b/docs/source/user-guide/index.rst @@ -13,4 +13,5 @@ User Guide guide_harvest guide_queues guide_local_files + guide_file_fetchers guide_revisions diff --git a/modules/common/src/FileFetcher/DkanFileFetcher.php b/modules/common/src/FileFetcher/DkanFileFetcher.php index fadde0b601..0307e4798e 100644 --- a/modules/common/src/FileFetcher/DkanFileFetcher.php +++ b/modules/common/src/FileFetcher/DkanFileFetcher.php @@ -5,72 +5,10 @@ use FileFetcher\FileFetcher; /** - * Allows FileFetcher to be reconfigured for using existing local files. + * Formerly needed to support using local files with FileFetcher. * - * This DKAN-specific extension of the FileFetcher (which comes from an - * external library) applies the DKAN configuration - * common.settings.always_use_existing_local_perspective - * when selecting the processor. The configuration itself is retrieved - * in FileFetcherFactory and passed to DkanFileFetcher on getInstance(). + * @deprecated Uneeded. Use FileFetcher. */ class DkanFileFetcher extends FileFetcher { - /** - * Tell this file fetcher whether to use local files if they exist. - * - * @param bool $use_local_file - * (optional) Whether to use the local file. If TRUE, we'll use the file - * processor that prefers to use local files. Defaults to TRUE. - * - * @return self - * Fluent interface. - * - * @see https://dkan.readthedocs.io/en/2.x/user-guide/guide_local_files.html - */ - public function setAlwaysUseExistingLocalPerspective(bool $use_local_file = TRUE) : self { - // @todo Re-computing the custom processor classes should be in another - // method that is in the parent class. - if ($use_local_file) { - $this->dkanUseLocalFileProcessor(); - } - else { - $this->dkanUseDefaultFileProcessor(); - } - return $this; - } - - /** - * Configure the processor to respect the local file if it already exists. - */ - protected function dkanUseLocalFileProcessor() { - // Set the state/config to use our remote class. - $this->setProcessors(['processors' => [FileFetcherRemoteUseExisting::class]]); - $this->setStateProperty('processor', FileFetcherRemoteUseExisting::class); - // At this very early stage, update the status if the file already exists. - /** @var \Drupal\common\FileFetcher\FileFetcherRemoteUseExisting $processor */ - $processor = $this->getProcessor(); - $existing_status = $processor->discoverStatusForExistingFile( - $this->getState(), - $this->getResult() - ); - $this->setState($existing_status['state']); - } - - /** - * Configure the processor to use its default behavior. - */ - protected function dkanUseDefaultFileProcessor() { - // @todo This ignores any other custom processor classes that might have - // been configured. Improve this situation. - $this->customProcessorClasses = []; - $state = $this->getState(); - foreach ($this->getProcessors() as $processor) { - if ($processor->isServerCompatible($state)) { - $state['processor'] = get_class($processor); - break; - } - } - $this->getResult()->setData(json_encode($state)); - } - } diff --git a/modules/common/src/FileFetcher/FileFetcherFactory.php b/modules/common/src/FileFetcher/FileFetcherFactory.php index fce72719a4..3737ccd43f 100644 --- a/modules/common/src/FileFetcher/FileFetcherFactory.php +++ b/modules/common/src/FileFetcher/FileFetcherFactory.php @@ -48,18 +48,32 @@ public function __construct(JobStoreFactory $jobStoreFactory, ConfigFactoryInter * {@inheritDoc} */ public function getInstance(string $identifier, array $config = []) { - $config = array_merge($this->configDefault, $config); - $file_fetcher = DkanFileFetcher::get( + return FileFetcher::get( $identifier, $this->jobStoreFactory->getInstance(FileFetcher::class), - $config - ); - // Inject our special configuration into the file fetcher, so it can use - // local files rather than re-downloading them. - $file_fetcher->setAlwaysUseExistingLocalPerspective( - (bool) $this->dkanConfig->get('always_use_existing_local_perspective') + $this->getFileFetcherConfig($config) ); - return $file_fetcher; + } + + /** + * Adjust the provided config for our defaults and DKAN configuration. + * + * @param array $config + * Configuration provided by the caller to getInstance(). + * + * @return array + * Modified configuration array. + */ + protected function getFileFetcherConfig(array $config): array { + // Merge in our defaults. + $config = array_merge($this->configDefault, $config); + // Add our special custom processor to the config if we're configured to + // always use the local perspective file. + if ($this->dkanConfig->get('always_use_existing_local_perspective')) { + $processors = [FileFetcherRemoteUseExisting::class] + ($config['processors'] ?? []); + $config['processors'] = $processors; + } + return $config; } } diff --git a/modules/common/tests/modules/custom_processor_test/custom_processor_test.info.yml b/modules/common/tests/modules/custom_processor_test/custom_processor_test.info.yml new file mode 100644 index 0000000000..19a3c13c2a --- /dev/null +++ b/modules/common/tests/modules/custom_processor_test/custom_processor_test.info.yml @@ -0,0 +1,5 @@ +name: 'Processor API Test' +type: module +description: 'Support module for testing custom processor APIs.' +package: Testing +core_version_requirement: ^9 || ^10 diff --git a/modules/common/tests/modules/custom_processor_test/custom_processor_test.services.yml b/modules/common/tests/modules/custom_processor_test/custom_processor_test.services.yml new file mode 100644 index 0000000000..29a3572024 --- /dev/null +++ b/modules/common/tests/modules/custom_processor_test/custom_processor_test.services.yml @@ -0,0 +1,5 @@ +services: + custom_processor_test.file_fetcher: + class: Drupal\custom_processor_test\FileFetcher\CustomFileFetcherFactory + decorates: dkan.common.file_fetcher + arguments: ['@custom_processor_test.file_fetcher.inner'] diff --git a/modules/common/tests/modules/custom_processor_test/src/FileFetcher/CustomFileFetcherFactory.php b/modules/common/tests/modules/custom_processor_test/src/FileFetcher/CustomFileFetcherFactory.php new file mode 100644 index 0000000000..7a2d888f37 --- /dev/null +++ b/modules/common/tests/modules/custom_processor_test/src/FileFetcher/CustomFileFetcherFactory.php @@ -0,0 +1,45 @@ +decoratedFactory = $decoratedFactory; + } + + /** + * {@inheritDoc} + */ + public function getInstance(string $identifier, array $config = []) { + // Add NonProcessor as a custom processor. + $config['processors'] = array_merge( + [NonProcessor::class], + $config['processors'] ?? [] + ); + // Get the instance from the decorated factory, using our modified config. + return $this->decoratedFactory->getInstance($identifier, $config); + } + +} diff --git a/modules/common/tests/modules/custom_processor_test/src/FileFetcher/NonProcessor.php b/modules/common/tests/modules/custom_processor_test/src/FileFetcher/NonProcessor.php new file mode 100644 index 0000000000..c516028b3c --- /dev/null +++ b/modules/common/tests/modules/custom_processor_test/src/FileFetcher/NonProcessor.php @@ -0,0 +1,27 @@ +setStatus(Result::DONE); + return [$state, $result]; + } + + public function isTimeLimitIncompatible(): bool { + return FALSE; + } + +} diff --git a/modules/common/tests/modules/custom_processor_test/src/FileFetcher/YesProcessor.php b/modules/common/tests/modules/custom_processor_test/src/FileFetcher/YesProcessor.php new file mode 100644 index 0000000000..65090e02fb --- /dev/null +++ b/modules/common/tests/modules/custom_processor_test/src/FileFetcher/YesProcessor.php @@ -0,0 +1,27 @@ +setStatus(Result::DONE); + return [$state, $result]; + } + + public function isTimeLimitIncompatible(): bool { + return FALSE; + } + +} diff --git a/modules/common/tests/src/Kernel/FileFetcher/CustomProcessorTest.php b/modules/common/tests/src/Kernel/FileFetcher/CustomProcessorTest.php new file mode 100644 index 0000000000..cc66e8cbb0 --- /dev/null +++ b/modules/common/tests/src/Kernel/FileFetcher/CustomProcessorTest.php @@ -0,0 +1,84 @@ +container->get('dkan.common.file_fetcher'); + $this->assertInstanceOf(CustomFileFetcherFactory::class, $factory); + + /** @var \FileFetcher\FileFetcher $instance */ + $instance = $factory->getInstance($identifier, ['filePath' => 'asdf']); + $this->assertInstanceOf(FileFetcher::class, $instance); + + $ref_custom_processors = new \ReflectionProperty($instance, 'customProcessorClasses'); + $ref_custom_processors->setAccessible(TRUE); + + // NonProcessor is always set by our custom file fetcher factory. + $this->assertContains( + NonProcessor::class, + $ref_custom_processors->getValue($instance) + ); + + $ref_get_processor = new \ReflectionMethod($instance, 'getProcessor'); + $ref_get_processor->setAccessible(TRUE); + + // NonProcessor will not process because it always returns false from + // isServerCompatible(). Also our file path of 'asdf' results in false from + // isServerCompatible() from the default processors as well, so we get NULL. + $this->assertNull($ref_get_processor->invoke($instance)); + + // Gather a file fetcher again, specifying another custom processor. + $instance = $factory->getInstance($identifier, [ + 'filePath' => 'asdf', + 'processors' => [YesProcessor::class], + ]); + + // Both custom processors are still available because our factory always + // specifies NonProcessor in addition to whatever is in $config. + $this->assertEquals([ + NonProcessor::class, + YesProcessor::class, + ], $ref_custom_processors->getValue($instance)); + $this->assertInstanceOf( + YesProcessor::class, + $ref_get_processor->invoke($instance) + ); + + // Gather a third time, and now don't specify a custom processor. + $instance = $factory->getInstance($identifier, ['filePath' => 'asdf']); + + // NonProcessor and YesProcessor are still available because they're both + // serialized into the jobstore for this file fetcher. + $this->assertEquals([ + NonProcessor::class, + YesProcessor::class, + ], $ref_custom_processors->getValue($instance)); + // Processor will still be YesProcessor because NonProcessor always returns + // false for isServerCompatible() which rules it out. + $this->assertInstanceOf(YesProcessor::class, $ref_get_processor->invoke($instance)); + } + +} diff --git a/modules/common/tests/src/Functional/FileFetcherFactoryTest.php b/modules/common/tests/src/Kernel/FileFetcher/FileFetcherFactoryTest.php similarity index 83% rename from modules/common/tests/src/Functional/FileFetcherFactoryTest.php rename to modules/common/tests/src/Kernel/FileFetcher/FileFetcherFactoryTest.php index 19cae4bf37..8424775cd8 100644 --- a/modules/common/tests/src/Functional/FileFetcherFactoryTest.php +++ b/modules/common/tests/src/Kernel/FileFetcher/FileFetcherFactoryTest.php @@ -2,7 +2,6 @@ namespace Drupal\Tests\common\Kernel\FileFetcher; -use Drupal\common\FileFetcher\DkanFileFetcher; use Drupal\common\FileFetcher\FileFetcherFactory; use Drupal\KernelTests\KernelTestBase; use FileFetcher\FileFetcher; @@ -49,6 +48,15 @@ public function testOurRemote($use_existing, $remote_class) { $factory = $this->container->get('dkan.common.file_fetcher'); $this->assertInstanceOf(FileFetcherFactory::class, $factory); + $ref_get_config = new \ReflectionMethod($factory, 'getFileFetcherConfig'); + $ref_get_config->setAccessible(TRUE); + + $ff_config = $ref_get_config->invokeArgs($factory, [[]]); + if ($use_existing) { + $this->assertArrayHasKey('processors', $ff_config); + $this->assertContains($remote_class, $ff_config['processors']); + } + // Set up an existing file. $tmp = sys_get_temp_dir(); $dest_file_path = $tmp . '/' . basename(self::DATA_FILE_URL); @@ -64,11 +72,12 @@ public function testOurRemote($use_existing, $remote_class) { ]; $ff = $factory->getInstance('identifier', $config); $this->assertInstanceOf(FileFetcher::class, $ff); - $this->assertInstanceOf(DkanFileFetcher::class, $ff); // Make sure we have the correct processor class that corresponds to our // config. - $this->assertEquals($remote_class, $ff->getState()['processor']); + $ref_get_processor = new \ReflectionMethod($ff, 'getProcessor'); + $ref_get_processor->setAccessible(TRUE); + $this->assertInstanceOf($remote_class, $ref_get_processor->invoke($ff)); // Did it work? $result = $ff->run(); diff --git a/modules/datastore/tests/src/Functional/ImportLocalCopyOfRemoteFileTest.php b/modules/datastore/tests/src/Functional/ImportLocalCopyOfRemoteFileTest.php index b9f67e2a1f..0d3af69fc9 100644 --- a/modules/datastore/tests/src/Functional/ImportLocalCopyOfRemoteFileTest.php +++ b/modules/datastore/tests/src/Functional/ImportLocalCopyOfRemoteFileTest.php @@ -3,10 +3,10 @@ namespace Drupal\Tests\datastore\Functional; use Drupal\common\DataResource; -use Drupal\common\FileFetcher\DkanFileFetcher; use Drupal\common\FileFetcher\FileFetcherRemoteUseExisting; use Drupal\datastore\Service\ResourceLocalizer; use Drupal\Tests\BrowserTestBase; +use FileFetcher\FileFetcher; use FileFetcher\Processor\Remote; use Procrastinator\Result; use RootedData\RootedJsonData; @@ -102,13 +102,18 @@ public function test() { /** @var \Drupal\datastore\Service\ResourceLocalizer $resource_localizer */ $resource_localizer = $this->container->get('dkan.datastore.service.resource_localizer'); $file_fetcher = $resource_localizer->getFileFetcher($source_resource); - $job_data = json_decode($file_fetcher->getResult()->getData()); - $this->assertEquals(Remote::class, $job_data->processor); + $ref_get_processor = new \ReflectionMethod($file_fetcher, 'getProcessor'); + $ref_get_processor->setAccessible(TRUE); + // Should be Remote. + $this->assertInstanceOf( + Remote::class, + $ref_get_processor->invoke($file_fetcher) + ); // Result should be 'waiting,' which is the default value if there is no // actual result object. $some_info = $dataset_info_service->gather($identifier); $this->assertEquals( - 'waiting', + Result::WAITING, $some_info['latest_revision']['distributions'][0]['fetcher_status'] ?? NULL ); @@ -123,20 +128,7 @@ public function test() { // We should get our FileFetcherRemoteUseExisting when we get another // file fetcher. $file_fetcher = $resource_localizer->getFileFetcher($source_resource); - $this->assertInstanceOf(DkanFileFetcher::class, $file_fetcher); - $job_data = json_decode($file_fetcher->getResult()->getData()); - $this->assertEquals( - FileFetcherRemoteUseExisting::class, - $job_data->processor - ); - - // Get the file fetcher again so we can test getProcessor(). - $file_fetcher = $resource_localizer->getFileFetcher($source_resource); - $this->assertInstanceOf(DkanFileFetcher::class, $file_fetcher); - // Access getProcessor(). - $ref_get_processor = new \ReflectionMethod($file_fetcher, 'getProcessor'); - $ref_get_processor->setAccessible(TRUE); - // Now we get our special processor. + $this->assertInstanceOf(FileFetcher::class, $file_fetcher); $this->assertInstanceOf( FileFetcherRemoteUseExisting::class, $ref_get_processor->invoke($file_fetcher) @@ -148,11 +140,6 @@ public function test() { ->set('always_use_existing_local_perspective', FALSE) ->save(); $file_fetcher = $resource_localizer->getFileFetcher($source_resource); - $job_data = json_decode($file_fetcher->getResult()->getData()); - $this->assertEquals( - Remote::class, - $job_data->processor - ); $this->assertInstanceOf( Remote::class, $ref_get_processor->invoke($file_fetcher)