Skip to content

Commit

Permalink
Query EdenFS for File SHA1 instead of Performing a Full Read for Writ…
Browse files Browse the repository at this point in the history
…ing Artifacts

Summary:
## Current

When deciding whether to write an artifact we read the full file into memory and compare it to the new contents.

## Problem

Because Relay writes over 100K artifacts this is causing a serious strain.

## Solution

Switch to using EdenFS that can provide quick access to the SHA-1 for comparison.

https://www.internalfb.com/intern/wiki/EdenFS/guide-for-tool-authors/#what-is-faster-in-edenfs

: Added a new config field for a specifying a artifact file hash fetch function for comparison instead of reading the entire file contents in the artifact writer.

Reviewed By: tyao1

Differential Revision: D47752336

fbshipit-source-id: 16285414e883f4c519d4bd0e612ef662d2e9066f
  • Loading branch information
Jerry Francois authored and facebook-github-bot committed Aug 10, 2023
1 parent a8dc8a9 commit 95ec269
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 29 deletions.
1 change: 1 addition & 0 deletions compiler/crates/relay-compiler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ relay-docblock = { path = "../relay-docblock" }
relay-schema = { path = "../relay-schema" }
relay-transforms = { path = "../relay-transforms" }
relay-typegen = { path = "../relay-typegen" }
rustc-hash = "1.1.0"
schema = { path = "../schema" }
schema-diff = { path = "../schema-diff" }
serde = { version = "1.0.176", features = ["derive", "rc"] }
Expand Down
113 changes: 85 additions & 28 deletions compiler/crates/relay-compiler/src/build_project/artifact_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::fs::create_dir_all;
use std::fs::File;
use std::io;
use std::io::prelude::*;
use std::path::Path;
use std::path::PathBuf;
use std::sync::atomic::AtomicUsize;
use std::sync::Mutex;
Expand All @@ -19,14 +20,21 @@ use dashmap::DashSet;
use log::info;
use serde::Serialize;
use serde::Serializer;
use sha1::Digest;
use sha1::Sha1;

use crate::errors::BuildProjectError;
use crate::errors::Error;

type BuildProjectResult = Result<(), BuildProjectError>;

pub trait ArtifactWriter {
fn should_write(&self, path: &PathBuf, content: &[u8]) -> Result<bool, BuildProjectError>;
fn should_write(
&self,
path: &Path,
content: &[u8],
hash: Option<String>,
) -> Result<bool, BuildProjectError>;
fn write(&self, path: PathBuf, content: Vec<u8>) -> BuildProjectResult;
fn remove(&self, path: PathBuf) -> BuildProjectResult;
fn finalize(&self) -> crate::errors::Result<()>;
Expand Down Expand Up @@ -78,11 +86,21 @@ impl ArtifactFileWriter {
}
}
impl ArtifactWriter for ArtifactFileWriter {
fn should_write(&self, path: &PathBuf, content: &[u8]) -> Result<bool, BuildProjectError> {
content_is_different(path, content).map_err(|error| BuildProjectError::WriteFileError {
file: path.clone(),
fn should_write(
&self,
path: &Path,
content: &[u8],
hash: Option<String>,
) -> Result<bool, BuildProjectError> {
let op = |error| BuildProjectError::WriteFileError {
file: path.to_owned(),
source: error,
})
};
if let Some(file_hash) = hash {
hash_is_different(file_hash, content).map_err(op)
} else {
content_is_different(path, content).map_err(op)
}
}

fn write(&self, path: PathBuf, content: Vec<u8>) -> BuildProjectResult {
Expand Down Expand Up @@ -160,14 +178,23 @@ impl ArtifactDifferenceWriter {
}

impl ArtifactWriter for ArtifactDifferenceWriter {
fn should_write(&self, path: &PathBuf, content: &[u8]) -> Result<bool, BuildProjectError> {
Ok(!self.verify_changes_against_filesystem
|| content_is_different(path, content).map_err(|error| {
BuildProjectError::WriteFileError {
file: path.clone(),
source: error,
}
})?)
fn should_write(
&self,
path: &Path,
content: &[u8],
hash: Option<String>,
) -> Result<bool, BuildProjectError> {
let op = |error| BuildProjectError::WriteFileError {
file: path.to_owned(),
source: error,
};
if !self.verify_changes_against_filesystem {
Ok(true)
} else if let Some(file_hash) = hash {
hash_is_different(file_hash, content).map_err(op)
} else {
content_is_different(path, content).map_err(op)
}
}

fn write(&self, path: PathBuf, content: Vec<u8>) -> BuildProjectResult {
Expand Down Expand Up @@ -244,14 +271,23 @@ impl ArtifactDifferenceShardedWriter {
}

impl ArtifactWriter for ArtifactDifferenceShardedWriter {
fn should_write(&self, path: &PathBuf, content: &[u8]) -> Result<bool, BuildProjectError> {
Ok(!self.verify_changes_against_filesystem
|| content_is_different(path, content).map_err(|error| {
BuildProjectError::WriteFileError {
file: path.clone(),
source: error,
}
})?)
fn should_write(
&self,
path: &Path,
content: &[u8],
hash: Option<String>,
) -> Result<bool, BuildProjectError> {
let op = |error| BuildProjectError::WriteFileError {
file: path.to_owned(),
source: error,
};
if !self.verify_changes_against_filesystem {
Ok(true)
} else if let Some(file_hash) = hash {
hash_is_different(file_hash, content).map_err(op)
} else {
content_is_different(path, content).map_err(op)
}
}

fn write(&self, path: PathBuf, content: Vec<u8>) -> BuildProjectResult {
Expand All @@ -261,7 +297,7 @@ impl ArtifactWriter for ArtifactDifferenceShardedWriter {
file.write_all(&content)
})()
.map_err(|error| BuildProjectError::WriteFileError {
file: path.clone(),
file: path.to_owned(),
source: error,
})?;
self.codegen_records
Expand Down Expand Up @@ -305,7 +341,7 @@ fn ensure_file_directory_exists(file_path: &PathBuf) -> io::Result<()> {
Ok(())
}

fn content_is_different(path: &PathBuf, content: &[u8]) -> io::Result<bool> {
fn content_is_different(path: &Path, content: &[u8]) -> io::Result<bool> {
if path.exists() {
let existing_content = std::fs::read(path)?;
Ok(existing_content != content)
Expand All @@ -314,9 +350,20 @@ fn content_is_different(path: &PathBuf, content: &[u8]) -> io::Result<bool> {
}
}

fn hash_is_different(file_hash: String, content: &[u8]) -> io::Result<bool> {
let hasher = Sha1::new_with_prefix(content);
let content_hash = format!("{:x}", hasher.finalize());
Ok(file_hash != content_hash)
}

pub struct NoopArtifactWriter;
impl ArtifactWriter for NoopArtifactWriter {
fn should_write(&self, _: &PathBuf, _: &[u8]) -> Result<bool, BuildProjectError> {
fn should_write(
&self,
_: &Path,
_: &[u8],
_: Option<String>,
) -> Result<bool, BuildProjectError> {
Ok(false)
}

Expand All @@ -339,11 +386,21 @@ pub struct ArtifactValidationWriter {
}

impl ArtifactWriter for ArtifactValidationWriter {
fn should_write(&self, path: &PathBuf, content: &[u8]) -> Result<bool, BuildProjectError> {
content_is_different(path, content).map_err(|error| BuildProjectError::WriteFileError {
file: path.clone(),
fn should_write(
&self,
path: &Path,
content: &[u8],
hash: Option<String>,
) -> Result<bool, BuildProjectError> {
let op = |error| BuildProjectError::WriteFileError {
file: path.to_owned(),
source: error,
})
};
if let Some(file_hash) = hash {
hash_is_different(file_hash, content).map_err(op)
} else {
content_is_different(path, content).map_err(op)
}
}

fn write(&self, path: PathBuf, _: Vec<u8>) -> BuildProjectResult {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

use futures::future::BoxFuture;
use rustc_hash::FxHashMap;

use super::Artifact;

pub type GetArtifactsFileHashMapFn = Box<
dyn Send
+ Sync
+ for<'a> Fn(&'a [Artifact]) -> BoxFuture<'a, Option<FxHashMap<String, Option<String>>>>,
>;
21 changes: 20 additions & 1 deletion compiler/crates/relay-compiler/src/build_project/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ mod build_resolvers_schema;
pub mod build_schema;
mod generate_artifacts;
pub mod generate_extra_artifacts;
pub mod get_artifacts_file_hash_map;
mod log_program_stats;
mod persist_operations;
mod project_asts;
Expand Down Expand Up @@ -54,6 +55,7 @@ use relay_transforms::apply_transforms;
use relay_transforms::CustomTransformsConfig;
use relay_transforms::Programs;
use relay_typegen::FragmentLocations;
use rustc_hash::FxHashMap;
use schema::SDLSchema;
pub use source_control::add_to_mercurial;
pub use validate::validate;
Expand Down Expand Up @@ -377,6 +379,11 @@ pub async fn commit_project(
}
};

let artifacts_file_hash_map = match &config.get_artifacts_file_hash_map {
Some(get_fn) => get_fn(&artifacts).await,
_ => None,
};

// Write the generated artifacts to disk. This step is separate from
// generating artifacts or persisting to avoid partial writes in case of
// errors as much as possible.
Expand All @@ -391,6 +398,7 @@ pub async fn commit_project(
should_stop_updating_artifacts,
&artifacts,
&fragment_locations,
&artifacts_file_hash_map,
)?;
for artifact in &artifacts {
if !existing_artifacts.remove(&artifact.path) {
Expand Down Expand Up @@ -426,6 +434,7 @@ pub async fn commit_project(
should_stop_updating_artifacts,
&artifacts,
&fragment_locations,
&artifacts_file_hash_map,
)?;
artifacts.into_par_iter().for_each(|artifact| {
current_paths_map.insert(artifact);
Expand Down Expand Up @@ -524,6 +533,7 @@ fn write_artifacts<F: Fn() -> bool + Sync + Send>(
should_stop_updating_artifacts: F,
artifacts: &[Artifact],
fragment_locations: &FragmentLocations,
artifacts_file_hash_map: &Option<FxHashMap<String, Option<String>>>,
) -> Result<(), BuildProjectFailure> {
artifacts.par_chunks(8192).try_for_each_init(
|| Printer::with_dedupe(project_config),
Expand All @@ -541,7 +551,16 @@ fn write_artifacts<F: Fn() -> bool + Sync + Send>(
artifact.source_file,
fragment_locations,
);
if config.artifact_writer.should_write(&path, &content)? {
let file_hash = match artifact.path.to_str() {
Some(key) => artifacts_file_hash_map
.as_ref()
.and_then(|map| map.get(key).cloned().flatten()),
_ => None,
};
if config
.artifact_writer
.should_write(&path, &content, file_hash)?
{
config.artifact_writer.write(path, content)?;
}
}
Expand Down
4 changes: 4 additions & 0 deletions compiler/crates/relay-compiler/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ use watchman_client::pdu::ScmAwareClockData;
use crate::build_project::artifact_writer::ArtifactFileWriter;
use crate::build_project::artifact_writer::ArtifactWriter;
use crate::build_project::generate_extra_artifacts::GenerateExtraArtifactsFn;
use crate::build_project::get_artifacts_file_hash_map::GetArtifactsFileHashMapFn;
use crate::build_project::AdditionalValidations;
use crate::compiler_state::CompilerState;
use crate::compiler_state::ProjectName;
Expand Down Expand Up @@ -112,6 +113,8 @@ pub struct Config {

/// Path to which to write the output of the compilation
pub artifact_writer: Box<dyn ArtifactWriter + Send + Sync>,
// Function to get the file hash for an artifact file.
pub get_artifacts_file_hash_map: Option<GetArtifactsFileHashMapFn>,

/// Compile all files. Persist ids are still re-used unless
/// `Config::repersist_operations` is also set.
Expand Down Expand Up @@ -404,6 +407,7 @@ Example file:
load_saved_state_file: None,
generate_extra_artifacts: None,
generate_virtual_id_file_name: None,
get_artifacts_file_hash_map: None,
saved_state_config: config_file.saved_state_config,
saved_state_loader: None,
saved_state_version: hex::encode(hash.finalize()),
Expand Down
1 change: 1 addition & 0 deletions compiler/crates/relay-compiler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub use build_project::build_schema;
pub use build_project::find_duplicates;
pub use build_project::generate_artifacts;
pub use build_project::generate_extra_artifacts::GenerateExtraArtifactsFn;
pub use build_project::get_artifacts_file_hash_map::GetArtifactsFileHashMapFn;
pub use build_project::transform_program;
pub use build_project::validate;
pub use build_project::validate_program;
Expand Down

0 comments on commit 95ec269

Please sign in to comment.