From 828192ba9fa25c9d42c6b68a07496d5b98d69e5f Mon Sep 17 00:00:00 2001 From: mshroom <32199029+mshroom@users.noreply.github.com> Date: Wed, 26 Jun 2024 17:24:43 +0300 Subject: [PATCH] [FINNA-1729] LIDO: Drop support for splitting titles (#160) --- src/RecordManager/Base/Record/Lido.php | 20 +----- .../Base/Utils/MetadataUtils.php | 62 ------------------- .../Base/Record/LidoTest.php | 2 + 3 files changed, 3 insertions(+), 81 deletions(-) diff --git a/src/RecordManager/Base/Record/Lido.php b/src/RecordManager/Base/Record/Lido.php index c0bf5a350..faa27e47d 100644 --- a/src/RecordManager/Base/Record/Lido.php +++ b/src/RecordManager/Base/Record/Lido.php @@ -146,29 +146,11 @@ public function toSolrArray(Database $db = null) $data['record_format'] = 'lido'; $title = $this->getTitle(false); - if ($this->getDriverParam('splitTitles', false)) { - $titlePart = $this->metadataUtils->splitTitle($title); - if ($titlePart) { - $data['description'] = $title; - $title = $titlePart; - } - } $data['title'] = $data['title_short'] = $data['title_full'] = $title; - // Create sort title from the title that may have been split above: $data['title_sort'] = $this->metadataUtils->createSortTitle($title); $data['title_alt'] = $this->getAltTitles(); - $description = $this->getDescription(); - if ($description) { - if ( - !empty($data['description']) - && !str_starts_with($description, $data['description']) - ) { - $data['description'] .= " -- $description"; - } else { - $data['description'] = $description; - } - } + $data['description'] = $this->getDescription(); $data['format'] = $this->getObjectWorkType(); $data['identifier'] = $this->getIdentifier(); diff --git a/src/RecordManager/Base/Utils/MetadataUtils.php b/src/RecordManager/Base/Utils/MetadataUtils.php index 659809e87..60331b476 100644 --- a/src/RecordManager/Base/Utils/MetadataUtils.php +++ b/src/RecordManager/Base/Utils/MetadataUtils.php @@ -935,68 +935,6 @@ function (&$val, $key, $chars) { return $array; } - /** - * Split title to main title and description. Tries to find the first sentence - * break where the title can be split. - * - * @param string $title Title to split - * - * @return null|string Null if title was not split, otherwise the initial - * title part - */ - public function splitTitle($title) - { - $i = 0; - $parenLevel = 0; - $bracketLevel = 0; - // Make sure the title has single spaces for whitespace - $title = preg_replace('/\s+/', ' ', $title); - $titleWords = explode(' ', $title); - foreach ($titleWords as $word) { - ++$i; - $parenLevel += substr_count($word, '('); - $parenLevel -= substr_count($word, ')'); - $bracketLevel += substr_count($word, '['); - $bracketLevel -= substr_count($word, ']'); - if ($parenLevel == 0 && $bracketLevel == 0) { - // Try to avoid splitting at short words or the very beginning - if ( - substr($word, -1) == '.' && strlen($word) > 2 - && ($i > 1 || strlen($word) > 4) - ) { - // Verify that the word is strippable (not abbreviation etc.) - $leadStripped = $this->stripLeadingPunctuation( - $word - ); - $stripped = $this->stripTrailingPunctuation( - $leadStripped - ); - $nextFirst = isset($titleWords[$i]) - ? substr($titleWords[$i], 0, 1) - : ''; - // 1.) There has to be something following this word. - // 2.) The trailing period must be strippable or end with a year. - // 3.) Next word has to start with a capital or digit - // 4.) Not something like 12-p. - // 5.) Not initials like A.N. - if ( - $nextFirst - && ($leadStripped != $stripped - || preg_match('/^\d{4}\.$/', $word)) - && (is_numeric($nextFirst) || !ctype_lower($nextFirst)) - && !preg_match('/.+\-\w{1,2}\.$/', $word) - && !preg_match('/^\w\.\w\.$/', $word) // initials - ) { - return $this->stripTrailingPunctuation( - implode(' ', array_splice($titleWords, 0, $i)) - ); - } - } - } - } - return null; - } - /** * Determine if a record is a hidden component part * diff --git a/tests/RecordManagerTest/Base/Record/LidoTest.php b/tests/RecordManagerTest/Base/Record/LidoTest.php index 0dfb9dd68..ca4dfcfa9 100644 --- a/tests/RecordManagerTest/Base/Record/LidoTest.php +++ b/tests/RecordManagerTest/Base/Record/LidoTest.php @@ -64,6 +64,7 @@ public function testLido1() 'title_sort' => 'luonnonsuojelusäädökset toimittanut raimo luhtanen' . ' säädökset', 'title_alt' => [], + 'description' => '', 'format' => 'Kirja', 'identifier' => '26054', 'institution' => 'Test Institution', @@ -179,6 +180,7 @@ public function testLido1NonMergedTitle() 'title_alt' => [ 'Säädökset', ], + 'description' => '', 'format' => 'Kirja', 'identifier' => '26054', 'institution' => 'Test Institution',