Skip to content

Commit

Permalink
Add instance and config metadata to user agent
Browse files Browse the repository at this point in the history
We'd like to collect the same metadata that AWS SDKs gather so we can
better understand how different Mountpoint features are used. This
change adds support for detecting platform and instance metadata and
including it in HTTP User-agents. We follow the SDK template for
serializing this metadata.

To make this cleaner, I moved the instance info logic into the client
crate so that all users can get this kind of user agent. The new
`UserAgent` struct supports addings arbitrary key/value pairs, and we
use that in Mountpoint to record basic configurations.

User agents are always a bit annoying to test, but I manually verified
in a few cases (caching enabled/disabled) that this change was sending
the expected headers.

Signed-off-by: James Bornholt <[email protected]>
  • Loading branch information
jamesbornholt committed Nov 15, 2023
1 parent a1e4d86 commit 8b32850
Show file tree
Hide file tree
Showing 11 changed files with 353 additions and 131 deletions.
13 changes: 12 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions mountpoint-s3-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ libc = "0.2.126"
libc-stdhandle = "0.1.0"
md-5 = "0.10.5"
metrics = "0.20.1"
once_cell = "1.16.0"
percent-encoding = "2.2.0"
pin-project = "1.0.12"
platform-info = "2.0.2"
regex = "1.7.1"
static_assertions = "1.1.0"
thiserror = "1.0.34"
Expand Down
88 changes: 88 additions & 0 deletions mountpoint-s3-client/src/instance_info.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
//! A simple interface to retrieve information about the EC2 instance this client is running on by
//! querying the Instance Metadata Service (IMDS).
use std::env;

use once_cell::unsync::Lazy;
use thiserror::Error;

use crate::imds_crt_client::{IdentityDocument, ImdsCrtClient, ImdsQueryRequestError};

/// Information on the EC2 instance from the IMDS client. The client is queried lazily and only if
/// the `AWS_EC2_METADATA_DISABLED` environment variable is not set.
#[derive(Debug)]
pub struct InstanceInfo {
document: Lazy<Result<IdentityDocument, InstanceInfoError>>,
}

impl InstanceInfo {
/// Create a new instance. The IMDS client will only be queried when a methon on the instance is
/// called, and only if the `AWS_EC2_METADATA_DISABLED` environment variable is not set.
pub fn new() -> Self {
Self {
document: Lazy::new(|| {
if !imds_disabled() {
match retrieve_instance_identity_document() {
Ok(identity_document) => {
tracing::debug!(?identity_document, "got instance info from IMDS");
Ok(identity_document)
}
Err(err) => {
tracing::warn!("EC2 instance info not retrieved: {err:?}");
Err(err)
}
}
} else {
tracing::debug!("EC2 instance info not retrieved: IMDS was disabled");
Err(InstanceInfoError::ImdsDisabled)
}
}),
}
}

/// The region for the current instance, if it can be retrieved using the IMDS client.
pub fn region(&self) -> Result<&str, &InstanceInfoError> {
self.document.as_ref().map(|d| d.region.as_str())
}

/// The instance type for the current instance, if it can be retrieved using the IMDS client.
pub fn instance_type(&self) -> Result<&str, &InstanceInfoError> {
self.document.as_ref().map(|d| d.instance_type.as_str())
}
}

impl Default for InstanceInfo {
fn default() -> Self {
Self::new()
}
}

fn retrieve_instance_identity_document() -> Result<IdentityDocument, InstanceInfoError> {
let imds_crt_client = ImdsCrtClient::new().map_err(InstanceInfoError::ImdsClientFailed)?;

let identity_document = futures::executor::block_on(imds_crt_client.get_identity_document())?;
Ok(identity_document)
}

fn imds_disabled() -> bool {
match env::var_os("AWS_EC2_METADATA_DISABLED") {
Some(val) => val.to_ascii_lowercase() != "false",
None => false,
}
}

/// Errors returned by instance info queries
#[derive(Debug, Error)]
pub enum InstanceInfoError {
/// IMDS is disabled
#[error("IMDS is disabled")]
ImdsDisabled,

/// A query to IMDS failed
#[error("IMDS query failed: {0}")]
ImdsQueryFailed(#[from] ImdsQueryRequestError),

/// The IMDS client couldn't be constructed
#[error("could not construct IMDS client: {0}")]
ImdsClientFailed(mountpoint_s3_crt::common::error::Error),
}
3 changes: 3 additions & 0 deletions mountpoint-s3-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,13 @@ mod endpoint_config;
#[doc(hidden)]
pub mod failure_client;
pub mod imds_crt_client;
pub mod instance_info;
#[doc(hidden)]
pub mod mock_client;
mod object_client;
mod s3_crt_client;
#[doc(hidden)]
pub mod user_agent;
mod util;

pub use object_client::{ObjectClient, PutObjectRequest};
Expand Down
21 changes: 9 additions & 12 deletions mountpoint-s3-client/src/s3_crt_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ use tracing::{debug, error, trace, Span};

use self::get_object::S3GetObjectRequest;
use self::put_object::S3PutObjectRequest;
use crate::build_info;
use crate::endpoint_config::EndpointConfig;
use crate::endpoint_config::EndpointError;
use crate::object_client::*;
use crate::user_agent::UserAgent;

macro_rules! request_span {
($self:expr, $method:expr, $($field:tt)*) => {{
Expand Down Expand Up @@ -84,7 +84,7 @@ pub struct S3ClientConfig {
throughput_target_gbps: f64,
part_size: usize,
endpoint_config: EndpointConfig,
user_agent_prefix: Option<String>,
user_agent: Option<UserAgent>,
request_payer: Option<String>,
bucket_owner: Option<String>,
}
Expand All @@ -96,7 +96,7 @@ impl Default for S3ClientConfig {
throughput_target_gbps: 10.0,
part_size: 8 * 1024 * 1024,
endpoint_config: EndpointConfig::new("us-east-1"),
user_agent_prefix: None,
user_agent: None,
request_payer: None,
bucket_owner: None,
}
Expand Down Expand Up @@ -136,10 +136,10 @@ impl S3ClientConfig {
self
}

/// Set a prefix to prepend to the User-agent HTTP header for S3 requests
/// Set a constructor for the HTTP User-agent header for S3 requests
#[must_use = "S3ClientConfig follows a builder pattern"]
pub fn user_agent_prefix(mut self, user_agent_prefix: &str) -> Self {
self.user_agent_prefix = Some(user_agent_prefix.to_owned());
pub fn user_agent(mut self, user_agent: UserAgent) -> Self {
self.user_agent = Some(user_agent);
self
}

Expand Down Expand Up @@ -291,11 +291,8 @@ impl S3CrtClientInner {
}
client_config.part_size(config.part_size);

let client_agent = format!("mountpoint-s3-client/{}", build_info::FULL_VERSION);
let user_agent_header = match config.user_agent_prefix {
Some(prefix) => format!("{prefix} {client_agent}"),
None => client_agent,
};
let user_agent = config.user_agent.unwrap_or_else(|| UserAgent::new(None));
let user_agent_header = user_agent.build();

let s3_client = Client::new(&allocator, client_config).unwrap();

Expand Down Expand Up @@ -1031,7 +1028,7 @@ mod tests {
let expected_user_agent = "someprefix mountpoint-s3-client/";

let config = S3ClientConfig {
user_agent_prefix: Some(user_agent_prefix),
user_agent: Some(UserAgent::new(Some(user_agent_prefix))),
..Default::default()
};

Expand Down
167 changes: 167 additions & 0 deletions mountpoint-s3-client/src/user_agent.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
//! Utilities to construct a HTTP User-agent header in the AWS SDK format
use platform_info::{PlatformInfo, PlatformInfoAPI, UNameAPI};

use crate::build_info;
use crate::instance_info::InstanceInfo;

/// A builder for AWS SDK-style user agent headers
#[derive(Debug, Clone)]
pub struct UserAgent {
fields: Vec<String>,
prefix: Option<String>,
}

impl UserAgent {
/// Create a new User-agent builder
pub fn new(prefix: Option<String>) -> Self {
Self { fields: vec![], prefix }
}

/// Create a new User-agent builder with the default platform metadata fields
pub fn new_with_instance_info(prefix: Option<String>, instance_info: &InstanceInfo) -> Self {
let user_agent_info = UserAgentInfo::new(instance_info);
Self::new_with_user_agent_info(prefix, user_agent_info)
}

fn new_with_user_agent_info(prefix: Option<String>, user_agent_info: UserAgentInfo) -> Self {
let mut fields = vec![];

if let Some(sysname) = user_agent_info.sysname {
if let Some(release) = user_agent_info.release {
fields.push(format!(
"os/{}#{}",
sanitize_string(canonicalize_sysname(sysname)),
sanitize_string(release)
));
} else {
fields.push(format!("os/{}", sanitize_string(sysname)));
}
}

if let Some(machine) = user_agent_info.machine {
fields.push(format!("md/arch#{}", sanitize_string(machine)));
}

if let Some(instance_type) = user_agent_info.instance_type {
fields.push(format!("md/instance#{}", sanitize_string(instance_type)));
}

Self { fields, prefix }
}

/// Add a key-value metadata field to the header
pub fn key_value(&mut self, key: &str, value: &str) -> &mut Self {
self.fields
.push(format!("md/{}#{}", sanitize_string(key), sanitize_string(value)));
self
}

/// Add a value-only metadata field to the header
pub fn value(&mut self, value: &str) -> &mut Self {
self.fields.push(format!("md/{}", sanitize_string(value)));
self
}

/// Construct the final User-agent header string
pub fn build(self) -> String {
let mut fields = Vec::with_capacity(self.fields.len() + 2);
if let Some(prefix) = self.prefix {
fields.push(prefix);
}
fields.push(format!("mountpoint-s3-client/{}", build_info::FULL_VERSION));
fields.extend(self.fields);
fields.join(" ")
}
}

fn sanitize_string(s: impl AsRef<str>) -> String {
const VALID_CHARS: &[char] = &['!', '$', '%', '&', '\'', '*', '+', '-', '.', '^', '_', '`', '|', '~'];
s.as_ref()
.replace(|c: char| !c.is_alphanumeric() && !VALID_CHARS.contains(&c), "-")
}

fn canonicalize_sysname(sysname: impl AsRef<str>) -> &'static str {
match sysname.as_ref() {
"Linux" => "linux",
"Darwin" => "macos",
// https://github.com/uutils/platform-info/blob/755cdc7d597469962a08a3f88f838c7cc8d2c0cb/src/platform/windows.rs#L523
"Windows_NT" => "Windows",
_ => "other",
}
}

/// To make this code testable we factor out the platform queries so we can mock them in tests
struct UserAgentInfo {
sysname: Option<String>,
release: Option<String>,
machine: Option<String>,
instance_type: Option<String>,
}

impl UserAgentInfo {
fn new(instance_info: &InstanceInfo) -> Self {
let platform_info = PlatformInfo::new().ok();

Self {
sysname: platform_info
.as_ref()
.map(|p| p.sysname().to_string_lossy().into_owned()),
release: platform_info
.as_ref()
.map(|p| p.release().to_string_lossy().into_owned()),
machine: platform_info
.as_ref()
.map(|p| p.machine().to_string_lossy().into_owned()),
instance_type: instance_info.instance_type().ok().map(|s| s.to_string()),
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_platform_fields() {
// Linux ip-172-31-29-144.us-west-2.compute.internal 6.1.61-85.141.amzn2023.aarch64 #1 SMP Wed Nov 8 00:38:50 UTC 2023 aarch64 aarch64 aarch64 GNU/Linux
let user_agent_info = UserAgentInfo {
sysname: Some("Linux".to_string()),
release: Some("6.1.61-85.141.amzn2023.aarch64".to_string()),
machine: Some("aarch64".to_string()),
instance_type: None,
};
let user_agent = UserAgent::new_with_user_agent_info(None, user_agent_info).build();
assert!(user_agent.contains("os/linux#6.1.61-85.141.amzn2023.aarch64 md/arch#aarch64"));
assert!(user_agent.starts_with("mountpoint-s3-client/"));

let user_agent_info = UserAgentInfo {
sysname: Some("Linux".to_string()),
release: Some("6.1.61-85.141.amzn2023.aarch64".to_string()),
machine: Some("aarch64".to_string()),
instance_type: Some("t4g.large".to_string()),
};
let user_agent = UserAgent::new_with_user_agent_info(Some("prefix".to_string()), user_agent_info).build();
assert!(user_agent.contains("os/linux#6.1.61-85.141.amzn2023.aarch64 md/arch#aarch64 md/instance#t4g.large"));
assert!(user_agent.starts_with("prefix mountpoint-s3-client/"));

// Darwin abcdefg.amazon.com 23.1.0 Darwin Kernel Version 23.1.0: Mon Oct 9 21:27:24 PDT 2023; root:xnu-10002.41.9~6/RELEASE_ARM64_T6000 arm64
let user_agent_info = UserAgentInfo {
sysname: Some("Darwin".to_string()),
release: Some("23.1.0".to_string()),
machine: Some("arm64".to_string()),
instance_type: None,
};
let user_agent = UserAgent::new_with_user_agent_info(None, user_agent_info).build();
assert!(user_agent.contains("os/macos#23.1.0 md/arch#arm64"));
assert!(user_agent.starts_with("mountpoint-s3-client/"));
}

#[test]
fn test_sanitize() {
assert_eq!(
sanitize_string("Java_HotSpot_(TM)_64-Bit_Server_VM"),
"Java_HotSpot_-TM-_64-Bit_Server_VM"
);
}
}
1 change: 0 additions & 1 deletion mountpoint-s3/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ hdrhistogram = { version = "7.5.2", default-features = false }
lazy_static = "1.4.0"
libc = "0.2.126"
metrics = "0.20.1"
once_cell = "1.16.0"
regex = "1.7.1"
supports-color = "2.0.0"
syslog = "6.1.0"
Expand Down
Loading

0 comments on commit 8b32850

Please sign in to comment.