Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add keywords to files during creation #4625

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
40ad9e6
Allow keywords to be specified at point of file creation that can be …
shinyhappydan Jan 5, 2024
d1a25ef
Exclude _original_source from ES queries on the default view
shinyhappydan Jan 8, 2024
540b6ac
scalafmt
shinyhappydan Jan 9, 2024
ef6e2dd
Use keywords rather than vocabulary
shinyhappydan Jan 9, 2024
2a1748d
Add test for keyword search returning nothing
shinyhappydan Jan 9, 2024
6a5c2dc
Use Label for metadata keys
shinyhappydan Jan 10, 2024
6476548
Ensure metadata is copied with files
shinyhappydan Jan 11, 2024
5387384
Remove duplicated test
shinyhappydan Jan 12, 2024
cdf3c44
Fix copy tests
shinyhappydan Jan 17, 2024
8fff4f7
Simplify resources search to only accept keywords
shinyhappydan Jan 17, 2024
de8f9a8
Move FileUserMetadata to storages plugin
shinyhappydan Jan 17, 2024
de025b7
Remove unused handlebars helper
shinyhappydan Jan 17, 2024
ee364d8
Refactor file operations to pass/return more accurate data types, mak…
shinyhappydan Jan 24, 2024
b7d790f
Fix serialization/deser issues
shinyhappydan Jan 25, 2024
bc5311e
scalafmt
shinyhappydan Jan 25, 2024
ff7b9ce
keywords context
shinyhappydan Jan 25, 2024
a612769
Fix scapegoat error
shinyhappydan Jan 25, 2024
2cfef15
Use underscore fields in file serialization
shinyhappydan Jan 25, 2024
03b922c
Fix one test
shinyhappydan Jan 25, 2024
ea9f870
Only refer to docker after it starts
shinyhappydan Jan 25, 2024
4349269
_keywords not keywords
shinyhappydan Jan 26, 2024
c1ec3d0
Fix PR issues
shinyhappydan Jan 26, 2024
aed5448
Add @vocab to indexing-metadata.json
shinyhappydan Jan 29, 2024
7ab0340
Add full text test
shinyhappydan Jan 29, 2024
e791aea
Use flattened
shinyhappydan Feb 1, 2024
9f15932
Rename rejection
shinyhappydan Feb 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,11 @@ abstract class Rejection extends Exception with Product with Serializable { self
def reason: String

}

object NotARejection {
def unapply(throwable: Throwable): Option[Throwable] =
throwable match {
case _: Rejection => None
case other => Some(other)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ package ch.epfl.bluebrain.nexus.delta.kernel.utils

import cats.effect.{IO, Resource}
import ch.epfl.bluebrain.nexus.delta.kernel.utils.ClasspathResourceError.{InvalidJson, InvalidJsonObject, ResourcePathNotFound}
import ch.epfl.bluebrain.nexus.delta.kernel.utils.ClasspathResourceLoader.handleBars
import com.github.jknack.handlebars.{EscapingStrategy, Handlebars}
import ch.epfl.bluebrain.nexus.delta.kernel.utils.ClasspathResourceLoader.handlebarsExpander
import fs2.text
import io.circe.parser.parse
import io.circe.{Json, JsonObject}
Expand Down Expand Up @@ -54,10 +53,8 @@ class ClasspathResourceLoader private (classLoader: ClassLoader) {
resourcePath: String,
attributes: (String, Any)*
): IO[String] = {
resourceAsTextFrom(resourcePath).map {
case text if attributes.isEmpty => text
case text => handleBars.compileInline(text).apply(attributes.toMap.asJava)
}
resourceAsTextFrom(resourcePath)
.map(handlebarsExpander.expand(_, attributes.toMap))
}

/**
Expand Down Expand Up @@ -124,7 +121,7 @@ class ClasspathResourceLoader private (classLoader: ClassLoader) {
}

object ClasspathResourceLoader {
private[utils] val handleBars = new Handlebars().`with`(EscapingStrategy.NOOP)
private val handlebarsExpander = new HandlebarsExpander
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What was the issue here ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had previously added something else to the handlebars instance which I then removed, although I think this interface is still better

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the handlebars thing is only used here, maybe the HandleBarsExpander can be here too ?
It does not really matters though


/**
* Creates a resource loader using the standard ClassLoader
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package ch.epfl.bluebrain.nexus.delta.kernel.utils

import com.github.jknack.handlebars.{EscapingStrategy, Handlebars}

import scala.jdk.CollectionConverters._

class HandlebarsExpander {

private val handleBars = new Handlebars()
.`with`(EscapingStrategy.NOOP)

def expand(templateText: String, attributes: Map[String, Any]) = {
if (attributes.isEmpty) {
templateText
} else {
handleBars.compileInline(templateText).apply(attributes.asJava)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class ArchiveDownloadSpec
Uri.Path("file.txt"),
filename,
Some(`text/plain(UTF-8)`),
Map.empty,
bytes,
Digest.NotComputedDigest,
Client
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class ArchiveRoutesSpec extends BaseRouteSpec with StorageFixtures with ArchiveH
Uri.Path("file.txt"),
"myfile",
Some(`text/plain(UTF-8)`),
Map.empty,
12L,
ComputedDigest(DigestAlgorithm.default, "digest"),
Client
Expand Down Expand Up @@ -307,7 +308,6 @@ class ArchiveRoutesSpec extends BaseRouteSpec with StorageFixtures with ArchiveH
createdBy = subject,
updatedBy = subject
)
.accepted
val actualMetadata = result.entryAsJson(s"${project.ref}/compacted/${encode(fileId.toString)}.json")
actualMetadata shouldEqual expectedMetadata
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
"prefLabel": "http://www.w3.org/2004/02/skos/core#prefLabel",
"name": "http://schema.org/name",
"label": "http://www.w3.org/2000/01/rdf-schema#label",
"description": "http://schema.org/description"
"description": "http://schema.org/description",
"@vocab": "https://bluebrain.github.io/nexus/keywords/"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one should go with indexing-metadata too

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can now be deleted from here

},
"@id": "https://bluebrain.github.io/nexus/contexts/elasticsearch-indexing.json"
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"@context": {
"@vocab": "https://bluebrain.github.io/nexus/keywords/",
"_path": "https://bluebrain.github.io/nexus/vocabulary/path",
"_fileId": {
"@id": "https://bluebrain.github.io/nexus/vocabulary/fileId",
Expand All @@ -18,6 +19,7 @@
"_location": "https://bluebrain.github.io/nexus/vocabulary/location",
"_filename": "https://bluebrain.github.io/nexus/vocabulary/filename",
"_mediaType": "https://bluebrain.github.io/nexus/vocabulary/mediaType",
"_keywords": "https://bluebrain.github.io/nexus/vocabulary/keywords",
"_uuid": "https://bluebrain.github.io/nexus/vocabulary/uuid",
"_storage": {
"@id": "https://bluebrain.github.io/nexus/vocabulary/storage",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@
}
}
},
"_keywords": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

keywords.
Also:

  • You will need a multi-field here to enable full text search properly
  • Flattened could be an option here, we have to be careful that when searching, the ES query does not fail because a keyword value does not exist but return an empty value

"type": "flattened"
},
"_storage": {
"properties": {
"_rev": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import io.circe.syntax._
/**
* Enumeration type for all possible bulk operations
*/
sealed trait ElasticSearchBulk extends Product with Serializable {
sealed trait ElasticSearchAction extends Product with Serializable {

/**
* @return
Expand All @@ -30,22 +30,22 @@ sealed trait ElasticSearchBulk extends Product with Serializable {
Json.obj("_index" -> index.value.asJson, "_id" -> id.asJson)
}

object ElasticSearchBulk {
object ElasticSearchAction {

private val newLine = System.lineSeparator()

final case class Index(index: IndexLabel, id: String, content: Json) extends ElasticSearchBulk {
final case class Index(index: IndexLabel, id: String, content: Json) extends ElasticSearchAction {
def payload: String = Json.obj("index" -> json).noSpaces + newLine + content.noSpaces
}
final case class Create(index: IndexLabel, id: String, content: Json) extends ElasticSearchBulk {
final case class Create(index: IndexLabel, id: String, content: Json) extends ElasticSearchAction {
def payload: String = Json.obj("create" -> json).noSpaces + newLine + content.noSpaces
}
final case class Update(index: IndexLabel, id: String, content: Json, retry: Int = 0) extends ElasticSearchBulk {
final case class Update(index: IndexLabel, id: String, content: Json, retry: Int = 0) extends ElasticSearchAction {
val modified = if (retry > 0) json deepMerge Json.obj("retry_on_conflict" -> retry.asJson) else json

def payload: String = Json.obj("update" -> modified).noSpaces + newLine + content.asJson.noSpaces
}
final case class Delete(index: IndexLabel, id: String) extends ElasticSearchBulk {
final case class Delete(index: IndexLabel, id: String) extends ElasticSearchAction {
def payload: String = Json.obj("delete" -> json).noSpaces + newLine
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ class ElasticSearchClient(client: HttpClient, endpoint: Uri, maxIndexPathLength:
* @param refresh
* the value for the `refresh` Elasticsearch parameter
*/
def bulk(ops: Seq[ElasticSearchBulk], refresh: Refresh = Refresh.False): IO[BulkResponse] = {
def bulk(ops: Seq[ElasticSearchAction], refresh: Refresh = Refresh.False): IO[BulkResponse] = {
if (ops.isEmpty) IO.pure(BulkResponse.Success)
else {
val bulkEndpoint = (endpoint / bulkPath).withQuery(Query(refreshParam -> refresh.value))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ final case class QueryBuilder private[client] (private val query: JsonObject) {
range(nxv.createdAt.prefix, params.createdAt) ++
params.updatedBy.map(term(nxv.updatedBy.prefix, _)) ++
range(nxv.updatedAt.prefix, params.updatedAt) ++
params.tag.map(term(nxv.tags.prefix, _)),
params.tag.map(term(nxv.tags.prefix, _)) ++
params.keywords.map { case (key, value) =>
term(s"_keywords.$key", value)
},
mustNotTerms = typesTerms(params.typeOperator.negate, excludeTypes),
withScore = params.q.isDefined
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ import cats.effect.IO
import ch.epfl.bluebrain.nexus.delta.kernel.kamon.KamonMetricComponent
import ch.epfl.bluebrain.nexus.delta.kernel.syntax.kamonSyntax
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.ElasticSearchViews
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.client.ElasticSearchAction.{Delete, Index}
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.client.ElasticSearchClient.BulkResponse.{MixedOutcomes, Success}
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.client.ElasticSearchClient.{BulkResponse, Refresh}
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.client.{ElasticSearchBulk, ElasticSearchClient, IndexLabel}
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.client.{ElasticSearchAction, ElasticSearchClient, IndexLabel}
import ch.epfl.bluebrain.nexus.delta.sdk.implicits._
import ch.epfl.bluebrain.nexus.delta.sourcing.stream.Elem
import ch.epfl.bluebrain.nexus.delta.sourcing.stream.Elem.FailedElem
Expand Down Expand Up @@ -49,20 +50,20 @@ final class ElasticSearchSink private (
KamonMetricComponent(ElasticSearchViews.entityType.value)

override def apply(elements: Chunk[Elem[Json]]): IO[Chunk[Elem[Unit]]] = {
val bulk = elements.foldLeft(Vector.empty[ElasticSearchBulk]) {
case (acc, successElem @ Elem.SuccessElem(_, _, _, _, _, json, _)) =>
val actions = elements.foldLeft(Vector.empty[ElasticSearchAction]) {
case (actions, successElem @ Elem.SuccessElem(_, _, _, _, _, json, _)) =>
if (json.isEmpty()) {
acc :+ ElasticSearchBulk.Delete(index, documentId(successElem))
actions :+ Delete(index, documentId(successElem))
} else
acc :+ ElasticSearchBulk.Index(index, documentId(successElem), json)
case (acc, droppedElem: Elem.DroppedElem) =>
acc :+ ElasticSearchBulk.Delete(index, documentId(droppedElem))
case (acc, _: Elem.FailedElem) => acc
actions :+ Index(index, documentId(successElem), json)
case (actions, droppedElem: Elem.DroppedElem) =>
actions :+ Delete(index, documentId(droppedElem))
case (actions, _: Elem.FailedElem) => actions
}

if (bulk.nonEmpty) {
if (actions.nonEmpty) {
client
.bulk(bulk, refresh)
.bulk(actions, refresh)
.map(ElasticSearchSink.markElems(_, elements, documentId))
} else {
IO.pure(elements.map(_.void))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode.Iri
import ch.epfl.bluebrain.nexus.delta.sdk.marshalling.QueryParamsUnmarshalling.{iriFromStringUnmarshaller, iriVocabFromStringUnmarshaller => iriUnmarshaller}
import ch.epfl.bluebrain.nexus.delta.sdk.projects.model.ProjectContext
import ch.epfl.bluebrain.nexus.delta.sourcing.model.Identity.Subject
import ch.epfl.bluebrain.nexus.delta.sourcing.model.ResourceRef
import ch.epfl.bluebrain.nexus.delta.sourcing.model.Tag.UserTag
import ch.epfl.bluebrain.nexus.delta.sourcing.model.{Label, ResourceRef}

/**
* Search parameters for any generic resource type.
Expand Down Expand Up @@ -49,6 +49,7 @@ final case class ResourcesSearchParams(
updatedAt: TimeRange = TimeRange.Anytime,
types: List[Type] = List.empty,
typeOperator: TypeOperator = TypeOperator.Or,
keywords: Map[Label, String] = Map.empty,
schema: Option[ResourceRef] = None,
q: Option[String] = None,
tag: Option[UserTag] = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ object DefaultViewsQuery {

type Elasticsearch = DefaultViewsQuery[SearchResults[JsonObject], AggregationResult]

private val excludeOriginalSource = "_source_excludes" -> "_original_source"

def apply(
aclCheck: AclCheck,
client: ElasticSearchClient,
Expand All @@ -52,7 +54,10 @@ object DefaultViewsQuery {
aclCheck,
(request: DefaultSearchRequest, views: Set[IndexingView]) =>
client
.search(request.params, views.map(_.index), Uri.Query.Empty)(request.pagination, request.sort)
.search(request.params, views.map(_.index), Uri.Query(excludeOriginalSource))(
request.pagination,
request.sort
)
.adaptError { case e: HttpClientError => ElasticSearchClientError(e) },
(request: DefaultSearchRequest, views: Set[IndexingView]) =>
client
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ import ch.epfl.bluebrain.nexus.delta.sdk.marshalling.QueryParamsUnmarshalling.Ir
import ch.epfl.bluebrain.nexus.delta.sdk.model.BaseUri
import ch.epfl.bluebrain.nexus.delta.sdk.model.search.{Sort, SortList}
import ch.epfl.bluebrain.nexus.delta.sdk.projects.model.ProjectContext
import ch.epfl.bluebrain.nexus.delta.sourcing.model.ResourceRef
import ch.epfl.bluebrain.nexus.delta.sourcing.model.{Label, ResourceRef}
import io.circe.parser

trait ElasticSearchViewsDirectives extends UriDirectives {

Expand All @@ -29,6 +30,18 @@ trait ElasticSearchViewsDirectives extends UriDirectives {
private def types(implicit um: FromStringUnmarshaller[Type]): Directive1[List[Type]] =
parameter("type".as[Type].*).map(_.toList.reverse)

implicit val keywordsFromStringUnmarshaller: FromStringUnmarshaller[Map[Label, String]] = Unmarshaller.strict {
string =>
parser.parse(string).flatMap(_.as[Map[Label, String]]) match {
case Left(e) => throw e
case Right(value) => value
}
}

private def keywords: Directive1[Map[Label, String]] = {
parameter("keywords".as[Map[Label, String]].withDefault(Map.empty[Label, String]))
}

private def typeOperator(implicit um: FromStringUnmarshaller[TypeOperator]): Directive1[TypeOperator] = {
parameter("typeOperator".as[TypeOperator].?[TypeOperator](Or))
}
Expand Down Expand Up @@ -68,7 +81,9 @@ trait ElasticSearchViewsDirectives extends UriDirectives {
baseUri: BaseUri,
pc: ProjectContext
): Directive1[ResourcesSearchParams] = {
(searchParams & createdAt & updatedAt & types & typeOperator & schema & id & locate & parameter("q".?) & tagParam)
(searchParams & createdAt & updatedAt & types & typeOperator & keywords & schema & id & locate & parameter(
"q".?
) & tagParam)
.tmap {
case (
deprecated,
Expand All @@ -79,6 +94,7 @@ trait ElasticSearchViewsDirectives extends UriDirectives {
updatedAt,
types,
typeOperator,
keywords,
schema,
id,
locate,
Expand All @@ -97,6 +113,7 @@ trait ElasticSearchViewsDirectives extends UriDirectives {
updatedAt,
types,
typeOperator,
keywords,
schema,
qq,
tag
Expand All @@ -111,7 +128,9 @@ trait ElasticSearchViewsDirectives extends UriDirectives {
implicit val baseIriUm: FromStringUnmarshaller[IriBase] =
DeltaSchemeDirectives.iriBaseFromStringUnmarshallerNoExpansion

(searchParams & createdAt & updatedAt & types & typeOperator & schema & id & locate & parameter("q".?) & tagParam)
(searchParams & createdAt & updatedAt & types & typeOperator & keywords & schema & id & locate & parameter(
"q".?
) & tagParam)
.tmap {
case (
deprecated,
Expand All @@ -122,6 +141,7 @@ trait ElasticSearchViewsDirectives extends UriDirectives {
updatedAt,
types,
typeOperator,
keywords,
schema,
id,
locate,
Expand All @@ -140,6 +160,7 @@ trait ElasticSearchViewsDirectives extends UriDirectives {
updatedAt,
types,
typeOperator,
keywords,
schema,
qq,
tag
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import cats.effect.IO
import cats.syntax.all._
import ch.epfl.bluebrain.nexus.delta.kernel.utils.UUIDF
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.ElasticSearchViewsQuerySuite.Sample
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.client.ElasticSearchBulk
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.client.ElasticSearchAction
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.model.ElasticSearchViewRejection.{DifferentElasticSearchViewType, ViewIsDeprecated, ViewNotFound}
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.model.ElasticSearchViewValue.{AggregateElasticSearchViewValue, IndexingElasticSearchViewValue}
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.model.{defaultViewId, permissions, ElasticSearchViewType}
Expand Down Expand Up @@ -265,7 +265,7 @@ class ElasticSearchViewsQuerySuite
bulk <- allResources.traverse { r =>
r.asDocument(ref).map { d =>
// We create a unique id across all indices
ElasticSearchBulk.Index(view.index, genString(), d)
ElasticSearchAction.Index(view.index, genString(), d)
}
}
_ <- client.bulk(bulk)
Expand Down
Loading