diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index efb7abf30066..0efefc19a132 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -99,11 +99,14 @@ jobs: ./joern --src /tmp/foo --run scan ./joern-scan /tmp/foo ./joern-scan --dump - - run: | + - name: Joern Slice Testing + run: | mkdir /tmp/slice ./joern-slice data-flow tests/code/javasrc/SliceTest.java -o /tmp/slice/dataflow-slice-javasrc.json echo "checking that the script output contains the content we expect:" ./joern --script "tests/test-dataflow-slice.sc" --param sliceFile=/tmp/slice/dataflow-slice-javasrc.json | grep 'List(boolean b, b, this, s, "MALICIOUS", s, new Foo("MALICIOUS"), s, s, "SAFE", s, b, this, this, b, s, System.out)' + - name: SARIF Export Testing + run: ./tests/finding-to-sarif-test.sh - run: | cd joern-cli/target/universal/stage ./schema-extender/test.sh diff --git a/console/src/main/scala/io/joern/console/BridgeBase.scala b/console/src/main/scala/io/joern/console/BridgeBase.scala index fde3833282f6..bb9f948227f4 100644 --- a/console/src/main/scala/io/joern/console/BridgeBase.scala +++ b/console/src/main/scala/io/joern/console/BridgeBase.scala @@ -232,6 +232,9 @@ trait BridgeBase extends InteractiveShell with ScriptExecution with PluginHandli builder += s"""openForInputPath("$name")""".stripMargin } builder ++= config.runBefore + builder ++= "import _root_.io.shiftleft.semanticcpg.sarif.SarifConfig" + :: "implicit val sarifConfig: SarifConfig = SarifConfig(semanticVersion = version)" + :: Nil builder.result() } diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/SarifExtension.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/SarifExtension.scala new file mode 100644 index 000000000000..a60fddda5a3e --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/SarifExtension.scala @@ -0,0 +1,68 @@ +package io.shiftleft.semanticcpg.language + +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.codepropertygraph.generated.help.Doc +import io.shiftleft.codepropertygraph.generated.nodes.Finding +import io.shiftleft.semanticcpg.sarif.SarifConfig.SarifVersion +import io.shiftleft.semanticcpg.sarif.SarifSchema.{Sarif, Sarif2_1_0} +import io.shiftleft.semanticcpg.sarif.{SarifConfig, SarifSchema, v2_1_0} +import org.json4s.Formats +import org.json4s.native.Serialization.{write, writePretty} + +import java.net.URI + +/** Converts findings written to the CPG to the SARIF format. + * + * @param traversal + * the findings + * @see + * https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html + */ +class SarifExtension(val traversal: Iterator[Finding]) extends AnyVal { + + @Doc(info = "execute this traversal and convert findings to SARIF format") + def toSarif(implicit config: SarifConfig = SarifConfig()): Sarif = { + + def generateSarif(results: List[SarifSchema.Result], baseUri: Option[URI]): Sarif = { + config.sarifVersion match { + case SarifVersion.V2_1_0 => + val tool = v2_1_0.Schema.ToolComponent( + name = config.toolName, + fullName = config.toolFullName, + organization = config.organization, + semanticVersion = config.semanticVersion, + informationUri = config.toolInformationUri + ) + val projectBaseUri = Map( + "PROJECT_ROOT" -> v2_1_0.Schema + .ArtifactLocation(uriBaseId = baseUri.map(_.toString).orElse(Option(""))) + ) + val runs = v2_1_0.Schema.Run( + tool = v2_1_0.Schema.Tool(driver = tool), + originalUriBaseIds = projectBaseUri, + results = results + ) :: Nil + Sarif2_1_0(runs = runs) + } + } + + traversal.l match { + case Nil => generateSarif(results = Nil, baseUri = None) + case findings @ head :: _ => + val baseUri = Cpg(head.graph).metaData.root.headOption.map(java.io.File(_).toURI) + val results = findings.map(config.resultConverter.convertFindingToResult) + generateSarif(results, baseUri) + } + + } + + @Doc(info = "execute this traversal and convert findings to SARIF format as JSON") + def toSarifJson(pretty: Boolean = false)(implicit config: SarifConfig = SarifConfig()): String = { + implicit val formats: Formats = org.json4s.DefaultFormats ++ config.customSerializers + + val results = toSarif + if (pretty) writePretty(results) + else write(results) + } + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/package.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/package.scala index cb36c6d7deb3..888cd4d5f9c7 100644 --- a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/package.scala +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/package.scala @@ -4,6 +4,7 @@ import flatgraph.help.DocSearchPackages import io.shiftleft.codepropertygraph.generated import io.shiftleft.codepropertygraph.generated.Cpg import io.shiftleft.codepropertygraph.generated.nodes.* +import io.shiftleft.semanticcpg.language.SarifExtension import io.shiftleft.semanticcpg.language.bindingextension.{ MethodTraversal as BindingMethodTraversal, TypeDeclTraversal as BindingTypeDeclTraversal @@ -263,6 +264,9 @@ package object language new MethodParameterOutTraversal(Iterator.single(a)) } + + implicit def singleToSarifTraversal[A <: Finding](a: A): SarifExtension = new SarifExtension(Iterator.single(a)) + implicit def iterOnceToSarifTraversal[A <: Finding](a: IterableOnce[A]): SarifExtension = new SarifExtension(a) } trait LowPrioImplicits { @@ -290,4 +294,5 @@ trait LowPrioImplicits { new DeclarationTraversal[A](Iterator.single(a)) implicit def iterOnceToDeclarationNodeTraversal[A <: Declaration](a: IterableOnce[A]): DeclarationTraversal[A] = new DeclarationTraversal[A](a.iterator) + } diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifConfig.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifConfig.scala new file mode 100644 index 000000000000..77773b294c67 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifConfig.scala @@ -0,0 +1,48 @@ +package io.shiftleft.semanticcpg.sarif + +import io.shiftleft.semanticcpg.sarif.SarifConfig.SarifVersion +import io.shiftleft.semanticcpg.sarif.v2_1_0.JoernScanResultToSarifConverter +import org.json4s.Serializer + +import java.net.URI + +/** A configuration for tool-specific information and arguments on transforming how findings are to be converted to + * SARIF. + * + * @param toolName + * The name of the tool component. + * @param toolFullName + * The name of the tool component along with its version and any other useful identifying information, such as its + * locale. + * @param toolInformationUri + * The absolute URI at which information about this version of the tool component can be found. + * @param organization + * The organization or company that produced the tool component. + * @param semanticVersion + * The tool component version in the format specified by Semantic Versioning 2.0. + * @param sarifVersion + * The SARIF format version of the resulting log file. + * @param resultConverter + * A transformer class to map from Finding nodes to a SARIF `Result`. + * @param customSerializers + * Additional JSON serializers for any additional properties for [[io.shiftleft.semanticcpg.sarif.Sarif]] derived + * classes. + */ +case class SarifConfig( + toolName: String = "Joern", + toolFullName: String = "Joern - The Bug Hunter's Workbench", + toolInformationUri: URI = URI("https://joern.io"), + organization: String = "Joern.io", + semanticVersion: String = "0.0.1", + sarifVersion: SarifVersion = SarifVersion.V2_1_0, + resultConverter: ScanResultToSarifConverter = JoernScanResultToSarifConverter(), + customSerializers: List[Serializer[?]] = SarifSchema.serializers +) + +object SarifConfig { + + enum SarifVersion { + case V2_1_0 + } + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala new file mode 100644 index 000000000000..2f6a483f85b0 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala @@ -0,0 +1,342 @@ +package io.shiftleft.semanticcpg.sarif + +import org.json4s.{CustomSerializer, Extraction, Serializer} +import org.slf4j.LoggerFactory + +import java.net.URI + +object SarifSchema { + + private val logger = LoggerFactory.getLogger(getClass) + + /** Provides a basic Sarif trait under which possibly multiple defined schemata would be defined. + */ + sealed trait Sarif { + + /** @return + * The SARIF format version of this log file. + */ + def version: String + + /** @return + * The URI of the JSON schema corresponding to the version. + */ + def schema: String + + /** @return + * The set of runs contained in this log file. + */ + def runs: List[Run] + } + + case class Sarif2_1_0(runs: List[Run]) extends Sarif { + def version: String = "2.1.0" + + def schema: String = "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json" + } + + // Minimal properties we want to use across versions: + + /** Represents the contents of an artifact. + */ + trait ArtifactContent private[sarif] { + + /** @return + * UTF-8-encoded content from a text artifact. + */ + def text: String + } + + /** Specifies the location of an artifact. + */ + trait ArtifactLocation private[sarif] { + + /** @return + * A string containing a valid relative or absolute URI. + */ + def uri: Option[URI] + + /** @return + * A string which indirectly specifies the absolute URI with respect to which a relative URI in the "uri" + * property is interpreted. + */ + def uriBaseId: Option[String] + } + + /** A set of threadFlows which together describe a pattern of code execution relevant to detecting a result. + */ + trait CodeFlow private[sarif] { + + /** @return + * A message relevant to the code flow. + */ + def message: Message + + /** @return + * An array of one or more unique threadFlow objects, each of which describes the progress of a program through a + * thread of execution. + */ + def threadFlows: List[ThreadFlow] + } + + /** A location within a programming artifact. + */ + trait Location private[sarif] { + + /** @return + * Identifies the artifact and region. + */ + def physicalLocation: PhysicalLocation + } + + /** Encapsulates a message intended to be read by the end user. + */ + trait Message private[sarif] { + + /** @return + * A plain text message string. + */ + def text: String + } + + /** A physical location relevant to a result. Specifies a reference to a programming artifact together with a range of + * bytes or characters within that artifact. + */ + trait PhysicalLocation private[sarif] { + + /** @return + * The location of the artifact. + */ + def artifactLocation: ArtifactLocation + + /** @return + * Specifies a portion of the artifact. + */ + def region: Region + } + + /** A region within an artifact where a result was detected. + */ + trait Region private[sarif] { + + /** @return + * The line number of the first character in the region. + */ + def startLine: Option[Int] + + /** @return + * The column number of the first character in the region. + */ + def startColumn: Option[Int] + + /** @return + * The line number of the last character in the region. + */ + def endLine: Option[Int] + + /** @return + * The column number of the character following the end of the region. + */ + def endColumn: Option[Int] + + /** @return + * The portion of the artifact contents within the specified region. + */ + def snippet: Option[ArtifactContent] + } + + /** A result produced by an analysis tool. + */ + trait Result private[sarif] { + + /** @return + * The stable, unique identifier of the rule, if any, to which this result is relevant. + */ + def ruleId: String + + /** @return + * A message that describes the result. The first sentence of the message only will be displayed when visible + * space is limited. + */ + def message: Message + + /** @return + * A value specifying the severity level of the result. + */ + def level: String + + /** @return + * The set of locations where the result was detected. Specify only one location unless the problem indicated by + * the result can only be corrected by making a change at every specified location. + */ + def locations: List[Location] + + /** @return + * A set of locations relevant to this result. + */ + def relatedLocations: List[Location] + + /** @return + * An array of 'codeFlow' objects relevant to the result. + */ + def codeFlows: List[CodeFlow] + } + + /** Describes a single run of an analysis tool, and contains the reported output of that run. + */ + trait Run private[sarif] { + + /** @return + * Information about the tool or tool pipeline that generated the results in this run. A run can only contain + * results produced by a single tool or tool pipeline. A run can aggregate results from multiple log files, as + * long as context around the tool run (tool command-line arguments and the like) is identical for all aggregated + * files. + */ + def tool: Tool + + /** @return + * The set of results contained in an SARIF log. The results array can be omitted when a run is solely exporting + * rules metadata. It must be present (but may be empty) if a log file represents an actual scan. + */ + def results: List[Result] + + /** @return + * The artifact location specified by each uriBaseId symbol on the machine where the tool originally ran. + */ + def originalUriBaseIds: Map[String, ArtifactLocation] + } + + /** Describes a sequence of code locations that specify a path through a single thread of execution such as an + * operating system or fiber. + */ + trait ThreadFlow private[sarif] { + + /** @return + * A temporally ordered array of 'threadFlowLocation' objects, each of which describes a location visited by the + * tool while producing the result. + */ + def locations: List[ThreadFlowLocation] + } + + /** A location visited by an analysis tool while simulating or monitoring the execution of a program. + */ + trait ThreadFlowLocation private[sarif] { + + /** @return + * The code location. + */ + def location: Location + } + + /** The analysis tool that was run. + */ + trait Tool private[sarif] { + def driver: ToolComponent + } + + /** A component, such as a plug-in or the driver, of the analysis tool that was run. + */ + trait ToolComponent private[sarif] { + + /** @return + * The name of the tool component. + */ + def name: String + + /** @return + * The name of the tool component along with its version and any other useful identifying information, such as + * its locale. + */ + def fullName: String + + /** @return + * The organization or company that produced the tool component. + */ + def organization: String + + /** @return + * The tool component version in the format specified by Semantic Versioning 2.0. + */ + def semanticVersion: String + + /** @return + * The absolute URI at which information about this version of the tool component can be found. + */ + def informationUri: URI + } + + /** A value specifying the severity level of the result. + */ + object Level { + val None = "none" + val Note = "note" + val Warning = "warning" + val Error = "error" + + def cvssToLevel(cvssScore: Double): String = { + cvssScore match { + case score if score < 0.0 || score > 10.0 => + logger.error(s"Score '$score' is not a valid CVSS score! Defaulting to 'warning' SARIF level.") + Warning + case score if score == 0.0 => None + case score if score <= 3.9 => Note + case score if score <= 6.9 => Warning + case score if score <= 10.0 => Error + } + } + + } + + val serializers: List[Serializer[?]] = List( + new CustomSerializer[SarifSchema.Sarif](implicit format => + ( + { case _ => + ??? + }, + { case sarif: SarifSchema.Sarif => + Extraction.decompose(Map("version" -> sarif.version, "$schema" -> sarif.schema, "runs" -> sarif.runs)) + } + ) + ), + new CustomSerializer[SarifSchema.ArtifactLocation](implicit format => + ( + { case _ => + ??? + }, + { case location: SarifSchema.ArtifactLocation => + val elementMap = Map.newBuilder[String, Any] + location.uri.foreach(x => elementMap.addOne("uri" -> x)) + elementMap.addOne("uriBaseId" -> location.uriBaseId) + Extraction.decompose(elementMap.result()) + } + ) + ), + new CustomSerializer[SarifSchema.Region](implicit format => + ( + { case _ => + ??? + }, + { case region: SarifSchema.Region => + val elementMap = Map.newBuilder[String, Any] + region.startLine.foreach(x => elementMap.addOne("startLine" -> x)) + region.startColumn.foreach(x => elementMap.addOne("startColumn" -> x)) + region.endLine.foreach(x => elementMap.addOne("endLine" -> x)) + region.endColumn.foreach(x => elementMap.addOne("endColumn" -> x)) + region.snippet.foreach(x => elementMap.addOne("snippet" -> x)) + Extraction.decompose(elementMap.result()) + } + ) + ), + new CustomSerializer[URI](implicit format => + ( + { case _ => + ??? + }, + { case uri: URI => + Extraction.decompose(uri.toString) + } + ) + ) + ) + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/ScanResultToSarifConverter.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/ScanResultToSarifConverter.scala new file mode 100644 index 000000000000..c75aa3fe6369 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/ScanResultToSarifConverter.scala @@ -0,0 +1,18 @@ +package io.shiftleft.semanticcpg.sarif + +import io.shiftleft.codepropertygraph.generated.nodes.* +import io.shiftleft.semanticcpg.sarif.SarifSchema.Result + +/** A component that converts a CPG finding to some version of SARIF. + */ +trait ScanResultToSarifConverter { + + /** Given a finding, will convert it to the SARIF specified result. + * @param finding + * the finding to convert. + * @return + * a SARIF compliant result object. + */ + def convertFindingToResult(finding: Finding): Result + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala new file mode 100644 index 000000000000..a503f81ff7f0 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala @@ -0,0 +1,108 @@ +package io.shiftleft.semanticcpg.sarif.v2_1_0 + +import io.shiftleft.codepropertygraph.generated.nodes.* +import io.shiftleft.semanticcpg.language.* +import io.shiftleft.semanticcpg.sarif.{ScanResultToSarifConverter, SarifSchema} + +import java.net.URI + +/** Convert finding node to a SARIF v2.1.0 model. + */ +class JoernScanResultToSarifConverter extends ScanResultToSarifConverter { + + import JoernScanResultToSarifConverter.* + + override def convertFindingToResult(finding: Finding): SarifSchema.Result = { + val locations = finding.evidence.lastOption.map(nodeToLocation).toList + val relatedLocations = finding.evidence.headOption.map(nodeToLocation).toList + Schema.Result( + ruleId = finding.name, + message = Schema.Message(text = finding.title), + level = SarifSchema.Level.cvssToLevel(finding.score), + locations = locations, + relatedLocations = relatedLocations, + codeFlows = evidenceToCodeFlow(finding) :: Nil + ) + } + + protected def evidenceToCodeFlow(finding: Finding): Schema.CodeFlow = { + Schema.CodeFlow( + message = Schema.Message(text = finding.description), + threadFlows = Schema.ThreadFlow( + finding.evidence.map(node => Schema.ThreadFlowLocation(location = nodeToLocation(node))).l + ) :: Nil + ) + } + + protected def nodeToLocation(node: StoredNode): Schema.Location = { + Schema.Location(physicalLocation = + Schema.PhysicalLocation( + artifactLocation = Schema.ArtifactLocation(uri = nodeToUri(node)), + region = nodeToRegion(node) + ) + ) + } + + protected def nodeToUri(node: StoredNode): Option[URI] = { + node match { + case t: TypeDecl if !t.isExternal => Option(t.filename).filterNot(_ == "").map(URI(_)) + case m: Method if !m.isExternal => Option(m.filename).filterNot(_ == "").map(URI(_)) + case expr: Expression => expr.file.map(x => URI(x.name)).headOption + case _ => None + } + } + + protected def nodeToRegion(node: StoredNode): Schema.Region = { + node match { + case t: TypeDecl => + Schema.Region( + startLine = t.lineNumber, + startColumn = t.columnNumber, + snippet = Option(Schema.ArtifactContent(t.code)) + ) + case m: Method => + Schema.Region( + startLine = m.lineNumber, + startColumn = m.columnNumber, + endLine = m.lineNumberEnd, + endColumn = m.columnNumberEnd, + snippet = Option(Schema.ArtifactContent(m.code)) + ) + case n: CfgNode => + Schema.Region( + startLine = n.lineNumber, + startColumn = n.columnNumber, + snippet = Option(Schema.ArtifactContent(n.code)) + ) + case _ => null + } + } + +} + +/** Due to module dependencies, the following code is lifted from `io.joern.console.scan`. + */ +object JoernScanResultToSarifConverter { + + private object FindingKeys { + val name = "name" + val title = "title" + val description = "description" + val score = "score" + } + + implicit class FindingExtension(val node: Finding) extends AnyRef { + + def name: String = getValue(FindingKeys.name) + + def title: String = getValue(FindingKeys.title) + + def description: String = getValue(FindingKeys.description) + + def score: Double = getValue(FindingKeys.score).toDoubleOption.getOrElse(-1d) + + protected def getValue(key: String, default: String = ""): String = + node.keyValuePairs.find(_.key == key).map(_.value).getOrElse(default) + + } +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala new file mode 100644 index 000000000000..d1491f9fe21e --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala @@ -0,0 +1,67 @@ +package io.shiftleft.semanticcpg.sarif.v2_1_0 + +import io.shiftleft.semanticcpg.sarif.SarifSchema +import io.shiftleft.semanticcpg.sarif.SarifSchema.Location +import org.json4s.{CustomSerializer, Extraction} + +import java.net.URI + +object Schema { + + final case class ArtifactContent(text: String) extends SarifSchema.ArtifactContent + + /** Specifies the location of an artifact. + * + * @param uri + * A string containing a valid relative or absolute URI. + * @param uriBaseId + * A string which indirectly specifies the absolute URI with respect to which a relative URI in the "uri" property + * is interpreted. + */ + final case class ArtifactLocation(uri: Option[URI] = None, uriBaseId: Option[String] = Option("PROJECT_ROOT")) + extends SarifSchema.ArtifactLocation + + final case class CodeFlow(message: Message, threadFlows: List[ThreadFlow]) extends SarifSchema.CodeFlow + + final case class Location(physicalLocation: PhysicalLocation) extends SarifSchema.Location + + final case class Message(text: String) extends SarifSchema.Message + + final case class PhysicalLocation(artifactLocation: ArtifactLocation, region: Region) + extends SarifSchema.PhysicalLocation + + final case class Region( + startLine: Option[Int], + startColumn: Option[Int] = None, + endLine: Option[Int] = None, + endColumn: Option[Int] = None, + snippet: Option[ArtifactContent] = None + ) extends SarifSchema.Region + + final case class Result( + ruleId: String, + message: Message, + level: String, + locations: List[Location], + relatedLocations: List[Location], + codeFlows: List[CodeFlow] + ) extends SarifSchema.Result + + final case class Run(tool: Tool, results: List[SarifSchema.Result], originalUriBaseIds: Map[String, ArtifactLocation]) + extends SarifSchema.Run + + final case class ThreadFlow(locations: List[ThreadFlowLocation]) extends SarifSchema.ThreadFlow + + final case class ThreadFlowLocation(location: Location) extends SarifSchema.ThreadFlowLocation + + final case class Tool(driver: ToolComponent) extends SarifSchema.Tool + + final case class ToolComponent( + name: String, + fullName: String, + organization: String, + semanticVersion: String, + informationUri: URI + ) extends SarifSchema.ToolComponent + +} diff --git a/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala b/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala new file mode 100644 index 000000000000..2ab3a9bf4170 --- /dev/null +++ b/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala @@ -0,0 +1,329 @@ +package io.shiftleft.semanticcpg.language + +import flatgraph.DiffGraphApplier +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.{NewFinding, NewKeyValuePair, NewMethod} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class SarifTests extends AnyWordSpec with Matchers { + + import SarifTests.* + + "a CPG without finding nodes" should { + val cpg = Cpg.empty + + "create a SARIF file with empty results" in { + val sarif = cpg.finding.toSarif + sarif.version shouldBe "2.1.0" + sarif.schema shouldBe "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json" + sarif.runs.size shouldBe 1 + val run = sarif.runs.head + run.results shouldBe Nil + val tool = run.tool.driver + tool.name shouldBe "Joern" + tool.fullName shouldBe "Joern - The Bug Hunter's Workbench" + tool.organization shouldBe "Joern.io" + } + } + + "an iterable with a single finding node with all expected properties" should { + + val cpg = Cpg.empty + + createValidFindingNode(cpg) + + "create a valid SARIF result" in { + val sarif = cpg.finding.toSarif() + val results = sarif.runs.head.results + results.size shouldBe 1 + val result = results.head + + result.ruleId shouldBe "f1" + result.message.text shouldBe "Finding 1" + result.level shouldBe "error" + + val region = result.locations.head.physicalLocation.region + + region.startLine shouldBe Some(2) + region.snippet.map(_.text) shouldBe Some("public foo()") + + val artifactLocation = result.locations.head.physicalLocation.artifactLocation + artifactLocation.uri.map(_.toString) shouldBe Some("Bar.java") + + result.codeFlows.size shouldBe 1 + result.codeFlows.head.message.text shouldBe "something bad happened" + } + + "create a valid SARIF JSON" in { + cpg.finding.toSarifJson(pretty = true) shouldBe + """{ + | "version":"2.1.0", + | "$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json", + | "runs":[ + | { + | "tool":{ + | "driver":{ + | "name":"Joern", + | "fullName":"Joern - The Bug Hunter's Workbench", + | "organization":"Joern.io", + | "semanticVersion":"0.0.1", + | "informationUri":"https://joern.io" + | } + | }, + | "results":[ + | { + | "ruleId":"f1", + | "message":{ + | "text":"Finding 1" + | }, + | "level":"error", + | "locations":[ + | { + | "physicalLocation":{ + | "artifactLocation":{ + | "uri":"Bar.java", + | "uriBaseId":"PROJECT_ROOT" + | }, + | "region":{ + | "startLine":2, + | "snippet":{ + | "text":"public foo()" + | } + | } + | } + | } + | ], + | "relatedLocations":[ + | { + | "physicalLocation":{ + | "artifactLocation":{ + | "uri":"Bar.java", + | "uriBaseId":"PROJECT_ROOT" + | }, + | "region":{ + | "startLine":2, + | "snippet":{ + | "text":"public foo()" + | } + | } + | } + | } + | ], + | "codeFlows":[ + | { + | "message":{ + | "text":"something bad happened" + | }, + | "threadFlows":[ + | { + | "locations":[ + | { + | "location":{ + | "physicalLocation":{ + | "artifactLocation":{ + | "uri":"Bar.java", + | "uriBaseId":"PROJECT_ROOT" + | }, + | "region":{ + | "startLine":2, + | "snippet":{ + | "text":"public foo()" + | } + | } + | } + | } + | } + | ] + | } + | ] + | } + | ] + | } + | ], + | "originalUriBaseIds":{ + | "PROJECT_ROOT":{ + | "uriBaseId":"" + | } + | } + | } + | ] + |} + |""".stripMargin.trim + } + + } + + "an iterable with a single finding node with missing properties" should { + + val cpg = Cpg.empty + + createInvalidFindingNode(cpg) + + "create a valid SARIF result" in { + val sarif = cpg.finding.toSarif() + val results = sarif.runs.head.results + results.size shouldBe 1 + val result = results.head + + result.ruleId shouldBe "f1" + result.message.text shouldBe "" + result.level shouldBe "warning" + + val region = result.locations.head.physicalLocation.region + + region.startLine shouldBe Some(2) + region.snippet.map(_.text) shouldBe Some("public foo()") + + val artifactLocation = result.locations.head.physicalLocation.artifactLocation + artifactLocation.uri.map(_.toString) shouldBe None + + result.codeFlows.size shouldBe 1 + result.codeFlows.head.message.text shouldBe "something bad happened" + } + + "create a valid SARIF JSON" in { + cpg.finding.toSarifJson(pretty = true) shouldBe + """ + |{ + | "version":"2.1.0", + | "$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json", + | "runs":[ + | { + | "tool":{ + | "driver":{ + | "name":"Joern", + | "fullName":"Joern - The Bug Hunter's Workbench", + | "organization":"Joern.io", + | "semanticVersion":"0.0.1", + | "informationUri":"https://joern.io" + | } + | }, + | "results":[ + | { + | "ruleId":"f1", + | "message":{ + | "text":"" + | }, + | "level":"warning", + | "locations":[ + | { + | "physicalLocation":{ + | "artifactLocation":{ + | "uriBaseId":"PROJECT_ROOT" + | }, + | "region":{ + | "startLine":2, + | "snippet":{ + | "text":"public foo()" + | } + | } + | } + | } + | ], + | "relatedLocations":[ + | { + | "physicalLocation":{ + | "artifactLocation":{ + | "uriBaseId":"PROJECT_ROOT" + | }, + | "region":{ + | "startLine":2, + | "snippet":{ + | "text":"public foo()" + | } + | } + | } + | } + | ], + | "codeFlows":[ + | { + | "message":{ + | "text":"something bad happened" + | }, + | "threadFlows":[ + | { + | "locations":[ + | { + | "location":{ + | "physicalLocation":{ + | "artifactLocation":{ + | "uriBaseId":"PROJECT_ROOT" + | }, + | "region":{ + | "startLine":2, + | "snippet":{ + | "text":"public foo()" + | } + | } + | } + | } + | } + | ] + | } + | ] + | } + | ] + | } + | ], + | "originalUriBaseIds":{ + | "PROJECT_ROOT":{ + | "uriBaseId":"" + | } + | } + | } + | ] + |} + |""".stripMargin.trim + } + + } + +} + +object SarifTests { + + def createValidFindingNode(cpg: Cpg): Unit = { + val dg = Cpg.newDiffGraphBuilder + val method = NewMethod() + .name("Foo") + .lineNumber(2) + .filename("Bar.java") + .code("public foo()") + val finding = NewFinding() + .evidence(Iterator.single(method)) + .keyValuePairs( + List( + NewKeyValuePair().key("name").value("f1"), + NewKeyValuePair().key("title").value("Finding 1"), + NewKeyValuePair().key("description").value("something bad happened"), + NewKeyValuePair().key("score").value("8.0") + ) + ) + dg.addNode(method) + .addNode(finding) + + DiffGraphApplier.applyDiff(cpg.graph, dg) + } + + def createInvalidFindingNode(cpg: Cpg): Unit = { + val dg = Cpg.newDiffGraphBuilder + val method = NewMethod() + .name("Foo") + .lineNumber(2) + .code("public foo()") + val finding = NewFinding() + .evidence(Iterator.single(method)) + .keyValuePairs( + List( + NewKeyValuePair().key("name").value("f1"), + NewKeyValuePair().key("description").value("something bad happened") + ) + ) + dg.addNode(method) + .addNode(finding) + + DiffGraphApplier.applyDiff(cpg.graph, dg) + } + +} diff --git a/tests/code/sarif-test/main.c b/tests/code/sarif-test/main.c new file mode 100644 index 000000000000..78052dddf47b --- /dev/null +++ b/tests/code/sarif-test/main.c @@ -0,0 +1,10 @@ +int index_into_dst_array (char *dst, char *src, int offset) { + for(i = 0; i < strlen(src); i++) { + dst[i + + j*8 + offset] = src[i]; + } +} + +int vulnerable(size_t len, char *src) { + char *dst = malloc(len + 8); + memcpy(dst, src, len + 7); +} diff --git a/tests/finding-to-sarif-test.sh b/tests/finding-to-sarif-test.sh new file mode 100755 index 000000000000..5029be4dd5bb --- /dev/null +++ b/tests/finding-to-sarif-test.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -euo pipefail + +SCRIPT_ABS_PATH=$(readlink -f "$0") +JOERN_TESTS_DIR=$(dirname "$SCRIPT_ABS_PATH") +JOERN="$JOERN_TESTS_DIR"/.. + +mkdir -p /tmp/sarif +./joern-scan "$JOERN_TESTS_DIR/code/sarif-test" --store +./joern --script "$JOERN_TESTS_DIR/test-sarif.sc" --param cpgFile="$JOERN/workspace/sarif-test/cpg.bin" --param outFile="/tmp/sarif/test.sarif" +exit_code=$(curl -s -X POST \ + -F "postedFiles=@/tmp/sarif/test.sarif;type=application/octet-stream" \ + https://sarifweb.azurewebsites.net/Validation/ValidateFiles | jq -r '.exitCode') + +echo "SARIF Validation Exit Code: $exit_code" + +exit $exit_code diff --git a/tests/test-sarif.sc b/tests/test-sarif.sc new file mode 100644 index 000000000000..83f23572dfae --- /dev/null +++ b/tests/test-sarif.sc @@ -0,0 +1,8 @@ +// to test, run e.g. +// ./joern --script test-sarif.sc --param cpgFile=workspace/src/cpg.bin --param outFile=test.sarif + +@main def exec(cpgFile: String, outFile: String) = { + importCpg(cpgFile) + assert(cpg.finding.nonEmpty, "no findings in this cpg - please check the setup") + cpg.finding.toSarifJson() |> outFile +}