Skip to content

Commit

Permalink
perform traffic metering on vfs
Browse files Browse the repository at this point in the history
  • Loading branch information
byteocean committed Jan 11, 2024
1 parent ac63b51 commit f67513e
Show file tree
Hide file tree
Showing 12 changed files with 181 additions and 18 deletions.
1 change: 1 addition & 0 deletions include/dp_error.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ const char *dp_strerror_verbose(int error);
ERR(VNI_FREE6, 364) \
ERR(PORT_START, 381) \
ERR(PORT_STOP, 382) \
ERR(PORT_METER, 383) \
ERR(VNF_INSERT, 401) \
ERR(VM_HANDLE, 402) \
ERR(NO_BACKIP, 421) \
Expand Down
1 change: 1 addition & 0 deletions include/dp_mbuf_dyn.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct dp_flow {
enum dp_vnf_type vnf_type : 3;

uint16_t l3_type; //layer-3 for inner packets. it can be crafted or extracted from raw frames
uint32_t l3_payload_length; //layer-3 playload length for inner packets.
union {
rte_be32_t dst_addr;
uint8_t dst_addr6[16];
Expand Down
39 changes: 22 additions & 17 deletions include/dp_port.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <stdbool.h>
#include <net/if.h>
#include <rte_pci.h>
#include <rte_meter.h>
#include "dp_conf.h"
#include "dp_util.h"
#include "dp_firewall.h"
Expand Down Expand Up @@ -43,23 +44,26 @@ struct dp_port_iface {
};

struct dp_port {
bool is_pf;
uint16_t port_id;
char port_name[IF_NAMESIZE];
int socket_id;
uint8_t link_status;
bool allocated;
char vf_name[IF_NAMESIZE];
char dev_name[RTE_ETH_NAME_MAX_LEN];
uint8_t peer_pf_hairpin_tx_rx_queue_offset;
uint16_t peer_pf_port_id;
struct rte_ether_addr own_mac;
struct rte_ether_addr neigh_mac;
struct dp_port_iface iface;
struct rte_flow *default_jump_flow;
struct rte_flow *default_capture_flow;
bool captured;
struct dp_port_stats stats;
bool is_pf;
uint16_t port_id;
char port_name[IF_NAMESIZE];
int socket_id;
uint8_t link_status;
bool allocated;
char vf_name[IF_NAMESIZE];
char dev_name[RTE_ETH_NAME_MAX_LEN];
uint8_t peer_pf_hairpin_tx_rx_queue_offset;
uint16_t peer_pf_port_id;
struct rte_ether_addr own_mac;
struct rte_ether_addr neigh_mac;
struct dp_port_iface iface;
struct rte_flow *default_jump_flow;
struct rte_flow *default_capture_flow;
bool captured;
struct dp_port_stats stats;
bool soft_metering_enabled;
struct rte_meter_srtcm port_srtcm;
struct rte_meter_srtcm_profile port_srtcm_profile;
};

struct dp_ports {
Expand All @@ -81,6 +85,7 @@ void dp_ports_free(void);
int dp_start_port(struct dp_port *port);
int dp_stop_port(struct dp_port *port);

int dp_port_meter_config(struct dp_port *port, uint64_t total_flow_rate_cap, uint64_t public_flow_rate_cap);

static __rte_always_inline
int dp_load_mac(struct dp_port *port)
Expand Down
2 changes: 2 additions & 0 deletions include/dp_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ struct rte_hash *dp_create_jhash_table(int entries, size_t key_len, const char *

void dp_free_jhash_table(struct rte_hash *table);

int dp_set_vf_rate_limit(uint16_t port_id, uint64_t rate);


// inspired by DPDK's RTE_ETHER_ADDR_PRT_FMT and RTE_ETHER_ADDR_BYTES
// network byte-order!
Expand Down
2 changes: 2 additions & 0 deletions include/grpc/dp_grpc_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ struct dpgrpc_iface {
char pxe_str[DP_IFACE_PXE_MAX_LEN]; // request (create) only
char pci_name[RTE_ETH_NAME_MAX_LEN];
uint8_t ul_addr6[DP_IPV6_ADDR_SIZE]; // reply only
uint64_t total_flow_rate_cap;
uint64_t public_flow_rate_cap;
};

struct dpgrpc_iface_id {
Expand Down
7 changes: 7 additions & 0 deletions proto/dpdk.proto
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,12 @@ message ProtocolFilter {
}
}

// only for virtual interfaces
message MeteringParams {
uint64 total_rate = 1; // in mbps (Mbits/s), regulated rate for all outgoing traffic
uint64 public_rate = 2; // in mbps (Mbits/s), regulated rate for outgoing traffic to public networks
}

// Firewall Rules can be inserted and removed while the interface is running. Linked list is used despite O(n) nature, as the list
// needs to be traversed only once for each packet. (Stateful Firewall and each pass after that is O(1))
// Firewall rules have a priority. "0" being the highest and "65536" being the lowest. Default is "1000".
Expand Down Expand Up @@ -215,6 +221,7 @@ message CreateInterfaceRequest {
PxeConfig pxe_config = 6;
// Device with this name will be assigned if it exists, otherwise error is returned.
string device_name = 7;
MeteringParams metering_parameters = 8;
}

message CreateInterfaceResponse {
Expand Down
70 changes: 70 additions & 0 deletions src/dp_port.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
#define DP_PORT_INIT_PF true
#define DP_PORT_INIT_VF false

#define DP_METER_CIR_BASE_VALUE 1024 * 1024 // 1 Mbits
#define DP_METER_EBS_BREAK_VALUE 100 // 100 Mbits/s
#define DP_METER_MBITS_TO_BYTES (1024 * 1024 / 8)

static const struct rte_eth_conf port_conf_default = {
.rxmode = {
.mq_mode = RTE_ETH_MQ_RX_NONE,
Expand All @@ -44,6 +48,12 @@ static const struct rte_eth_conf port_conf_default = {
},
};

static const struct rte_meter_srtcm_params dp_srtcm_params_base = {
.cir = DP_METER_CIR_BASE_VALUE * 100 / 8, // 100 Mbits/s
.cbs = 4096, // 4 KBytes
.ebs = DP_METER_EBS_BREAK_VALUE * DP_METER_MBITS_TO_BYTES, // DP_METER_EBS_BREAK_VALUE -> kbps -> * 1000 -> bytes
};

struct dp_port *_dp_port_table[DP_MAX_PORTS];
struct dp_port *_dp_pf_ports[DP_MAX_PF_PORTS];
struct dp_ports _dp_ports;
Expand Down Expand Up @@ -458,3 +468,63 @@ int dp_stop_port(struct dp_port *port)
port->allocated = false;
return DP_OK;
}

static int dp_port_total_flow_meter_config(struct dp_port *port, uint64_t total_flow_rate_cap)
{
return dp_set_vf_rate_limit(port->port_id, total_flow_rate_cap);
}

static int dp_port_public_flow_meter_config(struct dp_port *port, uint64_t public_flow_rate_cap)
{
struct rte_meter_srtcm_params srtcm_params = dp_srtcm_params_base;
int ret;

srtcm_params.cir = DP_METER_CIR_BASE_VALUE * (public_flow_rate_cap / 8); // Mbits/s -> bytes/s
if (public_flow_rate_cap < DP_METER_EBS_BREAK_VALUE)
srtcm_params.ebs = public_flow_rate_cap * DP_METER_MBITS_TO_BYTES;

ret = rte_meter_srtcm_profile_config(&port->port_srtcm_profile, &srtcm_params);
if (DP_FAILED(ret)) {
DPS_LOG_ERR("Cannot configure meter profile", DP_LOG_PORT(port), DP_LOG_RET(ret));
return DP_ERROR;
}

ret = rte_meter_srtcm_config(&port->port_srtcm, &port->port_srtcm_profile);
if (DP_FAILED(ret)) {
DPS_LOG_ERR("Cannot configure meter", DP_LOG_PORT(port), DP_LOG_RET(ret));
return DP_ERROR;
}

return DP_OK;
}

int dp_port_meter_config(struct dp_port *port, uint64_t total_flow_rate_cap, uint64_t public_flow_rate_cap)
{
if (public_flow_rate_cap > total_flow_rate_cap) {
DPS_LOG_ERR("Public flow rate cap cannot be greater than total flow rate cap", DP_LOG_PORT(port));
return DP_ERROR;
}

if (DP_FAILED(dp_port_total_flow_meter_config(port, total_flow_rate_cap))) {
DPS_LOG_ERR("Cannot set total flow meter", DP_LOG_PORT(port));
return DP_ERROR;
}

if (public_flow_rate_cap == 0) {
port->soft_metering_enabled = false;
return DP_OK;
}

if (DP_FAILED(dp_port_public_flow_meter_config(port, public_flow_rate_cap))) {
DPS_LOG_ERR("Cannot set public flow meter", DP_LOG_PORT(port));
if (DP_FAILED(dp_port_total_flow_meter_config(port, 0))) {
DPS_LOG_ERR("Cannot reset total flow meter", DP_LOG_PORT(port));
return DP_ERROR;
}
return DP_ERROR;
}

port->soft_metering_enabled = true;

return DP_OK;
}
50 changes: 49 additions & 1 deletion src/dp_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@
#include "dp_conf.h"
#include "dp_error.h"
#include "dp_log.h"
#include "dp_port.h"
#include "rte_flow/dp_rte_flow.h"

#define DP_SYSFS_PREFIX_MLX_VF_COUNT "/sys/class/net/"
#define DP_SYSFS_PREFIX_MLX_DEVICE "/sys/class/net/"

#define DP_SYSFS_PREFIX_MLX_VF_COUNT DP_SYSFS_PREFIX_MLX_DEVICE
#define DP_SYSFS_SUFFIX_MLX_VF_COUNT "/device/sriov_numvfs"

#define DP_SYSFS_PREFIX_MLX_MAX_TX_RATE "/device/sriov/"
#define DP_SYSFS_SUFFIX_MLX_MAX_TX_RATE "/max_tx_rate"
#define DP_SYSFS_MAX_PATH 256

// makes sure there is enough space to prevent collisions
Expand Down Expand Up @@ -160,3 +166,45 @@ void dp_free_jhash_table(struct rte_hash *table)
{
rte_hash_free(table);
}

int dp_set_vf_rate_limit(uint16_t port_id, uint64_t rate)
{
char filename[DP_SYSFS_MAX_PATH];
uint16_t vf_pattern_len = 0;
const char *pattern = dp_conf_get_vf_pattern();
FILE *fp;
struct dp_port *port = dp_get_port_by_id(port_id);
uint64_t rate_in_mbits = rate;

if (!port) {
DPS_LOG_ERR("Cannot get port by id", DP_LOG_PORTID(port_id));
return DP_ERROR;
}

while (*(pattern + vf_pattern_len) != '\0')
vf_pattern_len++;

if (snprintf(filename, sizeof(filename),
"%s%s%s%s%s",
DP_SYSFS_PREFIX_MLX_DEVICE,
dp_conf_get_pf0_name(),
DP_SYSFS_PREFIX_MLX_MAX_TX_RATE,
port->vf_name + vf_pattern_len,
DP_SYSFS_SUFFIX_MLX_MAX_TX_RATE)
>= (int)sizeof(filename)
) {
DPS_LOG_ERR("SR-IOV sysfs path to vf's max tx rate is too long");
return DP_ERROR;
}

fp = fopen(filename, "w+");
if (!fp) {
DPS_LOG_ERR("Cannot open SR-IOV sysfs path to vf's max tx rate", DP_LOG_RET(errno));
return DP_ERROR;
}

fprintf(fp, "%lu\n", rate_in_mbits);
fclose(fp);

return DP_OK;
}
4 changes: 4 additions & 0 deletions src/grpc/dp_async_grpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ const char* CreateInterfaceCall::FillRequest(struct dpgrpc_request* request)
return "Invalid ipv6_config.primary_address";
if (SNPRINTF_FAILED(request->add_iface.iface_id, request_.interface_id()))
return "Invalid interface_id";

request->add_iface.total_flow_rate_cap = request_.metering_parameters().total_rate();
request->add_iface.public_flow_rate_cap = request_.metering_parameters().public_rate();

return NULL;
}
void CreateInterfaceCall::ParseReply(struct dpgrpc_reply* reply)
Expand Down
11 changes: 11 additions & 0 deletions src/grpc/dp_grpc_impl.c
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,12 @@ static int dp_process_create_interface(struct dp_grpc_responder *responder)
goto route6_err;
}

if (!port->is_pf)
if (DP_FAILED(dp_port_meter_config(port, request->total_flow_rate_cap, request->public_flow_rate_cap))) {
ret = DP_GRPC_ERR_PORT_METER;
goto err;
}

rte_memcpy(reply->ul_addr6, port->iface.ul_ipv6, sizeof(reply->ul_addr6));
snprintf(reply->name, sizeof(reply->name), "%s", port->vf_name);
return DP_GRPC_OK;
Expand Down Expand Up @@ -546,6 +552,11 @@ static int dp_process_delete_interface(struct dp_grpc_responder *responder)
dp_virtsvc_del_iface(port->port_id);
#endif
dp_remove_iface_flows(port->port_id, ipv4, vni);

if (!port->is_pf)
if (DP_FAILED(dp_port_meter_config(port, 0, 0)))
ret = DP_GRPC_ERR_PORT_METER;

return ret;
}

Expand Down
1 change: 1 addition & 0 deletions src/nodes/cls_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod
}
#endif
df->l3_type = ntohs(ether_hdr->ether_type);
df->l3_payload_length = rte_pktmbuf_pkt_len(m) - (uint32_t)sizeof(struct rte_ether_hdr);
return CLS_NEXT_CONNTRACK;
}

Expand Down
11 changes: 11 additions & 0 deletions src/nodes/snat_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
#include <rte_graph.h>
#include <rte_graph_worker.h>
#include <rte_mbuf.h>
#include <rte_meter.h>
#include "dp_error.h"
#include "dp_flow.h"
#include "dp_log.h"
#include "dp_mbuf_dyn.h"
#include "dp_nat.h"
#include "dp_vni.h"
#include "nodes/common_node.h"
#include "rte_flow/dp_rte_flow.h"
#include "protocols/dp_icmpv6.h"
Expand Down Expand Up @@ -148,6 +150,15 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod
struct dp_port *port;
rte_be32_t dest_ip4;
uint32_t src_ip;
struct dp_port *in_port = dp_get_in_port(m);
enum rte_color color;

if (!in_port->is_pf && in_port->soft_metering_enabled && df->flow_type == DP_FLOW_SOUTH_NORTH
&& (df->l3_type == RTE_ETHER_TYPE_IPV4 || df->l3_type == RTE_ETHER_TYPE_IPV6)) {
color = rte_meter_srtcm_color_blind_check(&in_port->port_srtcm, &in_port->port_srtcm_profile, rte_rdtsc(), df->l3_payload_length);
if (color == RTE_COLOR_RED)
return SNAT_NEXT_DROP;
}

if (!cntrack)
return SNAT_NEXT_FIREWALL;
Expand Down

0 comments on commit f67513e

Please sign in to comment.