From cb103cac8ba1669cf6a06b07aa2af6053f2447c7 Mon Sep 17 00:00:00 2001 From: Martynas Bagdonas Date: Tue, 30 May 2017 21:40:30 +0300 Subject: [PATCH 1/2] File identifier prototype --- controllers/StorageController.php | 63 +++++++++++++++++++++++++++++++ include/config/routes.inc.php | 1 + model/Storage.inc.php | 24 ++++++++++++ 3 files changed, 88 insertions(+) diff --git a/controllers/StorageController.php b/controllers/StorageController.php index f1f2075c..016d3bc8 100644 --- a/controllers/StorageController.php +++ b/controllers/StorageController.php @@ -164,4 +164,67 @@ public function storagetransferbucket() { Zotero_Storage::transferBucket('zoterofilestorage', 'zoterofilestoragetest'); exit; } + + public function identify() { + $this->allowMethods(array('POST')); + + // 10 requests per second per IP with 100 requests burst + $rateLimit = [ + 'logOnly' => false, + 'bucket' => $_SERVER['REMOTE_ADDR'], + 'capacity' => 100, + 'rate' => 10 + ]; + + // $this->requestLimiter must be initialized in ApiController + $requestsRemaining = $this->requestLimiter->checkBucketRate($rateLimit); + + if (!$requestsRemaining) { + StatsD::increment("request.limit.identify.rate.rejected", 1); + Z_Core::logError('Request rate limit exceeded for' . $rateLimit['bucket']); + + if (!$rateLimit['logOnly']) { + header("Retry-After: " . (int)$rateLimit['capacity'] / $rateLimit['rate']); + $this->e429(); + } + } + + if (!empty($_REQUEST['hash'])) { + $hash = $_REQUEST['hash']; + $fieldTypes = [ + 11, // ISBN + 13, // ISSN + 26 // DOI + ]; + $identifiers = []; + + // Returns up to 10 identifiers and checks up to 5 libraries + $numIdentifiers = 0; + $numLibraries = 0; + + $libraryIDs = Zotero_Storage::getHashLibraries($hash); + while (($libraryID = array_shift($libraryIDs)) + && $numIdentifiers < 10 + && $numLibraries++ < 5) { + $fields = Zotero_Storage::getFileSourceFields($libraryID, $hash, $fieldTypes); + foreach ($fields as $fieldID => $values) { + $fieldName = Zotero_ItemFields::getName($fieldID); + foreach ($values as $value) { + if (!isset($identifiers[$fieldName])) { + $identifiers[$fieldName] = []; + } + + if (!in_array($value, $identifiers[$fieldName])) { + $identifiers[$fieldName][] = $value; + $numIdentifiers++; + } + } + } + } + + echo Zotero_Utilities::formatJSON([ + 'identifiers' => $identifiers + ]); + } + } } diff --git a/include/config/routes.inc.php b/include/config/routes.inc.php index 3c609c64..f0fe879b 100644 --- a/include/config/routes.inc.php +++ b/include/config/routes.inc.php @@ -34,6 +34,7 @@ $router->map('/users/i:objectUserID/publications/items/:objectKey/file/view', ['controller' => 'Items', 'extra' => ['allowHTTP' => true, 'file' => true, 'view' => true, 'publications' => true]]); $router->map('/groups/i:objectGroupID/items/:objectKey/file', array('controller' => 'Items', 'extra' => array('allowHTTP' => true, 'file' => true))); $router->map('/groups/i:objectGroupID/items/:objectKey/file/view', array('controller' => 'Items', 'extra' => array('allowHTTP' => true, 'file' => true, 'view' => true))); + $router->map('/identify', array('controller' => 'Storage', 'action' => 'identify')); // Full-text content $router->map('/users/i:objectUserID/items/:objectKey/fulltext', array('controller' => 'FullText', 'action' => 'itemContent')); diff --git a/model/Storage.inc.php b/model/Storage.inc.php index da7f1003..f06018c5 100644 --- a/model/Storage.inc.php +++ b/model/Storage.inc.php @@ -904,6 +904,30 @@ public static function getUserUsage($userID) { return $usage; } + public static function getHashLibraries($hash) { + $sql = "SELECT libraryID FROM storageFiles JOIN storageFileLibraries USING (storageFileID) WHERE hash = ?"; + return Zotero_DB::columnQuery($sql, $hash); + } + + public static function getFileSourceFields($libraryID, $hash, $fieldTypes) { + // Limit sourceItem metadata fields per library, to give a chance + // for other libraries to impact result in case if + // one library has incorrect metadata. + // Only non-empty fields exist in itemData + $sql = "SELECT fieldID, `value` FROM itemData WHERE itemID IN + (SELECT MAX(sourceItemID) FROM itemAttachments + JOIN items USING (itemID) WHERE libraryID = ? AND storageHash = ?) + AND fieldID IN (" . implode(',', $fieldTypes) . ") LIMIT 2"; + $rows = Zotero_DB::query($sql, array($libraryID, $hash), Zotero_Shards::getByLibraryID($libraryID)); + $fields = []; + foreach ($rows as $row) { + if (!isset($fields[$row['fieldID']])) { + $fields[$row['fieldID']] = []; + } + $fields[$row['fieldID']][] = $row['value']; + } + return $fields; + } private static function updateLastAdded($storageFileID) { $sql = "UPDATE storageFiles SET lastAdded=NOW() WHERE storageFileID=?"; From f708f3ba2603bd387ccad779a1c16ea14c3a1545 Mon Sep 17 00:00:00 2001 From: Martynas Bagdonas Date: Wed, 31 May 2017 08:07:44 +0300 Subject: [PATCH 2/2] Removed 'MAX' from subquery which was temporary used to limit itemAttachment results to only one attachment --- model/Storage.inc.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/Storage.inc.php b/model/Storage.inc.php index f06018c5..8d06b6ea 100644 --- a/model/Storage.inc.php +++ b/model/Storage.inc.php @@ -915,7 +915,7 @@ public static function getFileSourceFields($libraryID, $hash, $fieldTypes) { // one library has incorrect metadata. // Only non-empty fields exist in itemData $sql = "SELECT fieldID, `value` FROM itemData WHERE itemID IN - (SELECT MAX(sourceItemID) FROM itemAttachments + (SELECT sourceItemID FROM itemAttachments JOIN items USING (itemID) WHERE libraryID = ? AND storageHash = ?) AND fieldID IN (" . implode(',', $fieldTypes) . ") LIMIT 2"; $rows = Zotero_DB::query($sql, array($libraryID, $hash), Zotero_Shards::getByLibraryID($libraryID));