Skip to content

Commit

Permalink
Revert "Allow basename lookups for DRS paths [BW-790, SUP-534] (broad…
Browse files Browse the repository at this point in the history
  • Loading branch information
cahrens authored Sep 29, 2021
1 parent 8022438 commit e2c72e1
Show file tree
Hide file tree
Showing 14 changed files with 31 additions and 151 deletions.
9 changes: 0 additions & 9 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
# Cromwell Change Log

## 69 Release Notes

### Bug Fixes

### DRS/`basename` Fix

The WDL `basename` function should now work as expected with DRS paths, giving the basename of the
resolved file, not just a substring of the DRS path.

## 68 Release Notes

### Virtual Private Cloud
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,13 @@ package cromwell.backend

import cromwell.core.io.AsyncIoFunctions
import cromwell.core.path.PathFactory
import cromwell.filesystems.drs.{DrsPath, DrsResolver}
import wom.expression.IoFunctionSet

import scala.concurrent.Future
import scala.util.Try

trait ReadLikeFunctions extends PathFactory with IoFunctionSet with AsyncIoFunctions {

override def resolvedFileBasename(value: String): Future[String] = buildPath(value) match {
case drsPath: DrsPath => DrsResolver.getResolvedBasename(drsPath).unsafeToFuture()
case path =>
val name = path.name
if (name.nonEmpty) Future.successful(name)
else Future.failed(new Exception(s"No resolvable basename for $value. Was it a directory?"))
}

override def readFile(path: String, maxBytes: Option[Int], failOnOverflow: Boolean): Future[String] =
Future.fromTry(Try(buildPath(path))) flatMap { p => asyncIo.contentAsStringAsync(p, maxBytes, failOnOverflow) }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers
import wom.expression.IoFunctionSet.{IoDirectory, IoFile}

import scala.concurrent.{Await, Future}
import scala.concurrent.Await
import scala.concurrent.duration.Duration

class DirectoryFunctionsSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers {
Expand All @@ -18,8 +18,7 @@ class DirectoryFunctionsSpec extends AnyFlatSpec with CromwellTimeoutSpec with M
override def copyFile(source: String, destination: String) = throw new UnsupportedOperationException()
override def glob(pattern: String) = throw new UnsupportedOperationException()
override def size(path: String) = throw new UnsupportedOperationException()
override def resolvedFileBasename(path: String): Future[String] = throw new UnsupportedOperationException()
override def readFile(path: String, maxBytes: Option[Int], failOnOverflow: Boolean) = throw new UnsupportedOperationException()
override def readFile(path: String, maxBytes: Option[Int], failOnOverflow: Boolean) = throw new UnsupportedOperationException()
override def pathFunctions = throw new UnsupportedOperationException()
override def writeFile(path: String, content: String) = throw new UnsupportedOperationException()
override implicit def ec = throw new UnsupportedOperationException()
Expand All @@ -33,7 +32,7 @@ class DirectoryFunctionsSpec extends AnyFlatSpec with CromwellTimeoutSpec with M
val innerDir = (rootDir / "innerDir").createDirectories()
val link = innerDir / "linkToRootDirInInnerDir"
link.symbolicLinkTo(rootDir)

def listRecursively(path: String)(visited: Vector[String] = Vector.empty): Iterator[String] = {
Await.result(functions.listDirectory(path)(visited), Duration.Inf) flatMap {
case IoFile(v) => List(v)
Expand Down
1 change: 0 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ lazy val backend = project
.dependsOn(services)
.dependsOn(core % "test->test")
.dependsOn(common % "test->test")
.dependsOn(drsFileSystem)

lazy val googlePipelinesCommon = (project in backendRoot / "google" / "pipelines" / "common")
.withLibrarySettings("cromwell-pipelines-common")
Expand Down
22 changes: 0 additions & 22 deletions centaur/src/main/resources/standardTestCases/drs_basename.test

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ workflow taskless_engine_functions {

Array[String] strings = ["a", "b"]

# This is a local file path so that all test backends can interpret it. In Cromwell instances without a local
# backend, we might expect this to fail because "that doesn't look like a path"
String filepath = "/not/a/real/file.txt"
String filepath = "gs://not/a/real/file.txt"

Array[Array[Int]] matrix = [
[1, 0],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,31 +32,37 @@ object DrsResolver {
} yield drsFileSystemProvider.drsPathResolver
}

final case class PartialMarthaResponse(gsUri: Option[String], fileName: Option[String], bondProvider: Option[String])
private def getGsUriFileNameBondProvider(pathAsString: String,
drsPathResolver: DrsPathResolver
): IO[PartialMarthaResponse] = {
): IO[(Option[String], Option[String], Option[String])] = {
val fields = NonEmptyList.of(MarthaField.GsUri, MarthaField.FileName, MarthaField.BondProvider)
for {
marthaResponse <- drsPathResolver.resolveDrsThroughMartha(pathAsString, fields)
} yield PartialMarthaResponse(marthaResponse.gsUri, marthaResponse.fileName, marthaResponse.bondProvider)
} yield (marthaResponse.gsUri, marthaResponse.fileName, marthaResponse.bondProvider)
}

/** Returns the `gsUri` if it ends in the `fileName` and the `bondProvider` is empty. */
private def getSimpleGsUri(gsUriFileNameAndBondProvider: PartialMarthaResponse): Option[String] = {
private def getSimpleGsUri(gsUriOption: Option[String],
fileNameOption: Option[String],
bondProviderOption: Option[String],
): Option[String] = {
for {
// Only return gsUri that do not use Bond
gsUri <- if (gsUriFileNameAndBondProvider.bondProvider.isEmpty) gsUriFileNameAndBondProvider.gsUri else None
gsUri <- if (bondProviderOption.isEmpty) gsUriOption else None
// Only return the gsUri if there is no fileName or if gsUri ends in /fileName
if gsUriFileNameAndBondProvider.fileName.forall(fileName => gsUri.endsWith(s"/$fileName"))
if fileNameOption.forall(fileName => gsUri.endsWith(s"/$fileName"))
} yield gsUri
}

/** Returns the `gsUri` if it ends in the `fileName` and the `bondProvider` is empty. */
def getSimpleGsUri(pathAsString: String,
drsPathResolver: DrsPathResolver): IO[Option[String]] = {
getGsUriFileNameBondProvider(pathAsString, drsPathResolver).map(getSimpleGsUri)
.handleErrorWith(resolveError(pathAsString))
val gsUriIO = for {
tuple <- getGsUriFileNameBondProvider(pathAsString, drsPathResolver)
(gsUriOption, fileNameOption, bondProviderOption) = tuple
} yield getSimpleGsUri(gsUriOption, fileNameOption, bondProviderOption)

gsUriIO.handleErrorWith(resolveError(pathAsString))
}

/** Returns the `gsUri` if it ends in the `fileName` and the `bondProvider` is empty. */
Expand All @@ -70,29 +76,22 @@ object DrsResolver {
def getContainerRelativePath(drsPath: DrsPath): IO[String] = {
val pathIO = for {
drsPathResolver <- getDrsPathResolver(drsPath)
gsUriFileNameAndBondProvider <- getGsUriFileNameBondProvider(drsPath.pathAsString, drsPathResolver)
tuple <- getGsUriFileNameBondProvider(drsPath.pathAsString, drsPathResolver)
(gsUriOption, fileNameOption, _) = tuple
/*
In the DOS/DRS spec file names are safe for file systems but not necessarily the DRS URIs.
Reuse the regex defined for ContentsObject.name, plus add "/" for directory separators.
https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.0.0/docs/#_contentsobject
*/
rootPath = DefaultPathBuilder.get(drsPath.pathWithoutScheme.replaceAll("[^/A-Za-z0-9._-]", "_"))
fileName <- getFileName(gsUriFileNameAndBondProvider.fileName, gsUriFileNameAndBondProvider.gsUri)
fileName <- getFileName(fileNameOption, gsUriOption)
fullPath = rootPath.resolve(fileName)
fullPathString = fullPath.pathAsString
} yield fullPathString

pathIO.handleErrorWith(resolveError(drsPath.pathAsString))
}

def getResolvedBasename(drsPath: DrsPath): IO[String] = {
for {
drsPathResolver <- getDrsPathResolver(drsPath)
gsUriFileNameAndBondProvider <- getGsUriFileNameBondProvider(drsPath.pathAsString, drsPathResolver)
filename <- getFileName(gsUriFileNameAndBondProvider.fileName, gsUriFileNameAndBondProvider.gsUri)
} yield filename
}

/**
* Return the file name returned from the martha response or get it from the gsUri
*/
Expand Down
1 change: 0 additions & 1 deletion runConfigurations/Cromwell_server.run.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
<env name="CROMWELL_BUILD_RESOURCES_DIRECTORY" value="target/ci/resources" />
<env name="CROMWELL_BUILD_PAPI_JSON_FILE" value="target/ci/resources/cromwell-centaur-service-account.json" />
<env name="CROMWELL_BUILD_CENTAUR_READ_LINES_LIMIT" value="128000" />
<env name="CROMWELL_BUILD_CENTAUR_256_BITS_KEY" value="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" />
</envs>
<option name="MAIN_CLASS_NAME" value="cromwell.CromwellApp" />
<module name="cromwell" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -595,19 +595,17 @@ object EngineFunctionEvaluators {
ioFunctionSet: IoFunctionSet,
forCommandInstantiationOptions: Option[ForCommandInstantiationOptions])
(implicit expressionValueEvaluator: ValueEvaluator[ExpressionElement]): ErrorOr[EvaluatedValue[WomString]] = {
def simpleBasename(fileNameAsString: WomString): Try[String] = {
Try(Await.result(ioFunctionSet.resolvedFileBasename(fileNameAsString.valueString), 60.seconds))
}
def simpleBasename(fileNameAsString: WomString) = fileNameAsString.valueString.split('/').last

a.suffixToRemove match {
case None => processValidatedSingleValue[WomString, WomString](a.param.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)) { filePathString =>
simpleBasename(filePathString).map(basename => EvaluatedValue(WomString(basename), Seq.empty)).toErrorOrWithContext(s"interpret '${filePathString.valueString}' as a file path input for basename")
case None => processValidatedSingleValue[WomString, WomString](a.param.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)) { str =>
EvaluatedValue(WomString(simpleBasename(str)), Seq.empty).validNel
}
case Some(suffixToRemove) => processTwoValidatedValues[WomString, WomString, WomString](
a.param.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions),
suffixToRemove.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)) { (filePathString, suffix) =>
simpleBasename(filePathString).map(basename => EvaluatedValue(WomString(basename.stripSuffix(suffix.valueString)), Seq.empty)).toErrorOrWithContext(s"interpret '${filePathString.valueString}' as a file path input for basename")
}
suffixToRemove.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)) { (name, suffix) =>
EvaluatedValue(WomString(simpleBasename(name).stripSuffix(suffix.valueString)), Seq.empty).validNel
}
}
}
}
Expand Down
6 changes: 2 additions & 4 deletions wom/src/main/scala/wom/expression/IoFunctionSetAdapter.scala
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
package wom.expression
import scala.concurrent.Future

class IoFunctionSetAdapter(delegate: IoFunctionSet) extends IoFunctionSet {
override def pathFunctions: PathFunctionSet = delegate.pathFunctions
override def resolvedFileBasename(path: String): Future[String] = delegate.resolvedFileBasename(path)
override def readFile(path: String, maxBytes: Option[Int], failOnOverflow: Boolean): Future[String] = delegate.readFile(path, maxBytes, failOnOverflow)
override def pathFunctions = delegate.pathFunctions
override def readFile(path: String, maxBytes: Option[Int], failOnOverflow: Boolean) = delegate.readFile(path, maxBytes, failOnOverflow)
override def writeFile(path: String, content: String) = delegate.writeFile(path, content)
override def createTemporaryDirectory(name: Option[String]) = delegate.createTemporaryDirectory(name)
override def copyFile(source: String, destination: String) = delegate.copyFile(source, destination)
Expand Down
2 changes: 0 additions & 2 deletions wom/src/main/scala/wom/expression/NoIoFunctionSet.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ object EmptyIoFunctionSet {
}

class EmptyIoFunctionSet extends IoFunctionSet {

override def resolvedFileBasename(path: String): Future[String] = Future.failed(new UnsupportedOperationException("resolvedPath is not available here"))
override def readFile(path: String, maxBytes: Option[Int] = None, failOnOverflow: Boolean = false): Future[String] = Future.failed(new UnsupportedOperationException("readFile is not available here"))

override def writeFile(path: String, content: String): Future[WomSingleFile] = {
Expand Down
13 changes: 2 additions & 11 deletions wom/src/main/scala/wom/expression/WomExpression.scala
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,7 @@ trait PathFunctionSet {
def relativeToHostCallRoot(path: String): String

/**
* Similar to java.nio.Path.getFileName.
*
* Note: Does NOT run DRS resolution so will return the wrong value for DRS files.
* Similar to java.nio.Path.getFileName
*/
def name(path: String): String

Expand Down Expand Up @@ -130,13 +128,6 @@ trait IoFunctionSet {
// Functions that do NOT necessitate network I/O but are only manipulating paths
def pathFunctions: PathFunctionSet

/**
* Get the basename of this path. If a DRS path, resolve to a real URL and get the basename
* @param path The input path
* @return The base filename of the object at the (fully resolved) path
*/
def resolvedFileBasename(path: String): Future[String]

// Functions that (possibly) necessitate I/O operation (on local, network, or cloud filesystems)
/**
* Read the content of a file
Expand Down Expand Up @@ -196,7 +187,7 @@ trait IoFunctionSet {
* To map/flatMap over IO results
*/
implicit def ec: ExecutionContext

implicit def cs = IO.contextShift(ec)

/**
Expand Down

0 comments on commit e2c72e1

Please sign in to comment.