From f4ddeacbb6628eb4f5ff057da4e199f766bd24dc Mon Sep 17 00:00:00 2001 From: Victor Julien Date: Sun, 3 Dec 2023 21:15:36 +0100 Subject: [PATCH 1/8] defrag: match up v4 and v6 packet setup v4 was doing redundant recursion level setup. v6 was missing PKT_REBUILT_FRAGMENT flag. --- src/defrag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/defrag.c b/src/defrag.c index 71cf4204c17a..36d6b8420712 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -288,7 +288,6 @@ Defrag4Reassemble(ThreadVars *tv, DefragTracker *tracker, Packet *p) } PKT_SET_SRC(rp, PKT_SRC_DEFRAG); rp->flags |= PKT_REBUILT_FRAGMENT; - rp->recursion_level = p->recursion_level; int fragmentable_offset = 0; uint16_t fragmentable_len = 0; @@ -430,6 +429,7 @@ Defrag6Reassemble(ThreadVars *tv, DefragTracker *tracker, Packet *p) goto error_remove_tracker; } PKT_SET_SRC(rp, PKT_SRC_DEFRAG); + rp->flags |= PKT_REBUILT_FRAGMENT; uint16_t unfragmentable_len = 0; int fragmentable_offset = 0; From 029035b9600ca09051806186480404fd203256ef Mon Sep 17 00:00:00 2001 From: Victor Julien Date: Sun, 3 Dec 2023 21:48:44 +0100 Subject: [PATCH 2/8] nfq: use bool for verdicted packet var --- src/source-nfq.c | 4 ++-- src/source-nfq.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/source-nfq.c b/src/source-nfq.c index 5293f2529a4c..88793896dc80 100644 --- a/src/source-nfq.c +++ b/src/source-nfq.c @@ -439,7 +439,7 @@ static int NFQSetupPkt (Packet *p, struct nfq_q_handle *qh, void *data) p->ReleasePacket = NFQReleasePacket; p->nfq_v.ifi = nfq_get_indev(tb); p->nfq_v.ifo = nfq_get_outdev(tb); - p->nfq_v.verdicted = 0; + p->nfq_v.verdicted = false; #ifdef NFQ_GET_PAYLOAD_SIGNED ret = nfq_get_payload(tb, &pktdata); @@ -1082,7 +1082,7 @@ TmEcode NFQSetVerdict(Packet *p) /* we could also have a direct pointer but we need to have a ref count in this case */ NFQQueueVars *t = g_nfq_q + p->nfq_v.nfq_index; - p->nfq_v.verdicted = 1; + p->nfq_v.verdicted = true; /* can't verdict a "fake" packet */ if (PKT_IS_PSEUDOPKT(p)) { diff --git a/src/source-nfq.h b/src/source-nfq.h index b60e6d9cc537..8a83c0328927 100644 --- a/src/source-nfq.h +++ b/src/source-nfq.h @@ -40,7 +40,7 @@ typedef struct NFQPacketVars_ { int id; /* this nfq packets id */ uint16_t nfq_index; /* index in NFQ array */ - uint8_t verdicted; + bool verdicted; uint32_t mark; uint32_t ifi; From 5833d1e9ac59fff34bc3ca189a0f53d0eea06e23 Mon Sep 17 00:00:00 2001 From: Victor Julien Date: Mon, 4 Dec 2023 09:18:00 +0100 Subject: [PATCH 3/8] nfq: minor code cleanup --- src/source-nfq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/source-nfq.c b/src/source-nfq.c index 88793896dc80..53e2c8b5a4e2 100644 --- a/src/source-nfq.c +++ b/src/source-nfq.c @@ -1073,10 +1073,10 @@ static inline void UpdateCounters(NFQQueueVars *t, const Packet *p) } #endif /* COUNTERS */ -/** - * \brief NFQ verdict function +/** \internal + * \brief NFQ verdict function */ -TmEcode NFQSetVerdict(Packet *p) +static TmEcode NFQSetVerdict(Packet *p) { int iter = 0; /* we could also have a direct pointer but we need to have a ref count in this case */ From 245fc7da99752ef6ec7d8b0bd0d29e32095389ba Mon Sep 17 00:00:00 2001 From: Victor Julien Date: Mon, 4 Dec 2023 14:53:00 +0100 Subject: [PATCH 4/8] nfq: remove obsolete comment --- src/source-nfq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/source-nfq.c b/src/source-nfq.c index 53e2c8b5a4e2..9c9fdd256c7d 100644 --- a/src/source-nfq.c +++ b/src/source-nfq.c @@ -947,8 +947,6 @@ void *NFQGetThread(int number) /** * \brief NFQ function to get a packet from the kernel - * - * \note separate functions for Linux and Win32 for readability. */ static void NFQRecvPkt(NFQQueueVars *t, NFQThreadVars *tv) { From c7544d54bee2e1e02f69568693aa2edaa8a8e466 Mon Sep 17 00:00:00 2001 From: Victor Julien Date: Mon, 4 Dec 2023 17:59:22 +0100 Subject: [PATCH 5/8] decode/tunnel: move tunnel verdicted logic In preparation of cleaning up thread safety, move "verdicted" logic out of Packet::flags. Unsafe writes to "flags" can potentially have side effects. --- src/decode.h | 9 ++++++--- src/packet.c | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/decode.h b/src/decode.h index 6392f3361e58..445b9d747680 100644 --- a/src/decode.h +++ b/src/decode.h @@ -618,6 +618,9 @@ typedef struct Packet_ /* enum PacketDropReason::PKT_DROP_REASON_* as uint8_t for compactness */ uint8_t drop_reason; + /* has tunnel been verdicted? */ + bool tunnel_verdicted; + /* tunnel/encapsulation handling */ struct Packet_ *root; /* in case of tunnel this is a ptr * to the 'real' packet, the one we @@ -801,8 +804,8 @@ static inline void TUNNEL_INCR_PKT_TPR(Packet *p) #define UNSET_TUNNEL_PKT(p) ((p)->flags &= ~PKT_TUNNEL) #define IS_TUNNEL_ROOT_PKT(p) (IS_TUNNEL_PKT(p) && (p)->root == NULL) -#define IS_TUNNEL_PKT_VERDICTED(p) (((p)->flags & PKT_TUNNEL_VERDICTED)) -#define SET_TUNNEL_PKT_VERDICTED(p) ((p)->flags |= PKT_TUNNEL_VERDICTED) +#define IS_TUNNEL_PKT_VERDICTED(p) (p)->tunnel_verdicted +#define SET_TUNNEL_PKT_VERDICTED(p) (p)->tunnel_verdicted = true enum DecodeTunnelProto { DECODE_TUNNEL_ETHERNET, @@ -1015,7 +1018,7 @@ void DecodeUnregisterCounters(void); #define PKT_STREAM_NOPCAPLOG BIT_U32(12) #define PKT_TUNNEL BIT_U32(13) -#define PKT_TUNNEL_VERDICTED BIT_U32(14) +// vacancy /** Packet checksum is not computed (TX packet for example) */ #define PKT_IGNORE_CHECKSUM BIT_U32(15) diff --git a/src/packet.c b/src/packet.c index 40f3bdfcf385..c798a0d11ea0 100644 --- a/src/packet.c +++ b/src/packet.c @@ -157,6 +157,7 @@ void PacketReinit(Packet *p) AppLayerDecoderEventsResetEvents(p->app_layer_events); p->next = NULL; p->prev = NULL; + p->tunnel_verdicted = false; p->root = NULL; p->livedev = NULL; PACKET_RESET_CHECKSUMS(p); From 263f442919abd667cbff5b281892bea846f527ae Mon Sep 17 00:00:00 2001 From: Victor Julien Date: Sun, 3 Dec 2023 19:37:31 +0100 Subject: [PATCH 6/8] decode/tunnel: improve tunnel handling Give each packet explicit tunnel type `ttype`: none, root, child. Assigning happens when a (tunnel) packet is set up and is thread safe. --- src/decode.c | 35 +++++++++++++------------ src/decode.h | 58 +++++++++++++++++++++++++++++++---------- src/defrag.c | 10 ++----- src/detect-mark.c | 2 +- src/log-pcap.c | 10 +++---- src/output-json-alert.c | 2 +- src/packet.c | 1 + src/respond-reject.c | 2 +- src/source-af-packet.c | 4 +-- src/source-ipfw.c | 7 +++-- src/source-nfq.c | 4 +-- src/source-pfring.c | 2 +- src/source-windivert.c | 2 +- src/stream-tcp-list.c | 2 +- src/tmqh-packetpool.c | 20 +++++++------- src/util-profiling.c | 4 +-- 16 files changed, 95 insertions(+), 70 deletions(-) diff --git a/src/decode.c b/src/decode.c index d302c7654675..70ef282b9f1a 100644 --- a/src/decode.c +++ b/src/decode.c @@ -335,13 +335,15 @@ Packet *PacketTunnelPktSetup(ThreadVars *tv, DecodeThreadVars *dtv, Packet *pare p->livedev = parent->livedev; /* set the root ptr to the lowest layer */ - if (parent->root != NULL) + if (parent->root != NULL) { p->root = parent->root; - else + BUG_ON(parent->ttype != PacketTunnelChild); + } else { p->root = parent; - + parent->ttype = PacketTunnelRoot; + } /* tell new packet it's part of a tunnel */ - SET_TUNNEL_PKT(p); + p->ttype = PacketTunnelChild; ret = DecodeTunnel(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), proto); @@ -351,18 +353,15 @@ Packet *PacketTunnelPktSetup(ThreadVars *tv, DecodeThreadVars *dtv, Packet *pare { /* Not a (valid) tunnel packet */ SCLogDebug("tunnel packet is invalid"); - p->root = NULL; - UNSET_TUNNEL_PKT(p); TmqhOutputPacketpool(tv, p); SCReturnPtr(NULL, "Packet"); } - - /* tell parent packet it's part of a tunnel */ - SET_TUNNEL_PKT(parent); - - /* increment tunnel packet refcnt in the root packet */ + /* Update tunnel settings in parent */ + if (parent->root == NULL) { + parent->ttype = PacketTunnelRoot; + } TUNNEL_INCR_PKT_TPR(p); /* disable payload (not packet) inspection on the parent, as the payload @@ -397,10 +396,15 @@ Packet *PacketDefragPktSetup(Packet *parent, const uint8_t *pkt, uint32_t len, u } /* set the root ptr to the lowest layer */ - if (parent->root != NULL) + if (parent->root != NULL) { p->root = parent->root; - else + BUG_ON(parent->ttype != PacketTunnelChild); + } else { p->root = parent; + // we set parent->ttype later + } + /* tell new packet it's part of a tunnel */ + p->ttype = PacketTunnelChild; /* copy packet and set length, proto */ if (pkt && len) { @@ -410,8 +414,6 @@ Packet *PacketDefragPktSetup(Packet *parent, const uint8_t *pkt, uint32_t len, u p->ts = parent->ts; p->datalink = DLT_RAW; p->tenant_id = parent->tenant_id; - /* tell new packet it's part of a tunnel */ - SET_TUNNEL_PKT(p); memcpy(&p->vlan_id[0], &parent->vlan_id[0], sizeof(p->vlan_id)); p->vlan_idx = parent->vlan_idx; p->livedev = parent->livedev; @@ -426,7 +428,8 @@ Packet *PacketDefragPktSetup(Packet *parent, const uint8_t *pkt, uint32_t len, u void PacketDefragPktSetupParent(Packet *parent) { /* tell parent packet it's part of a tunnel */ - SET_TUNNEL_PKT(parent); + if (parent->ttype == PacketTunnelNone) + parent->ttype = PacketTunnelRoot; /* increment tunnel packet refcnt in the root packet */ TUNNEL_INCR_PKT_TPR(parent); diff --git a/src/decode.h b/src/decode.h index 445b9d747680..822025709048 100644 --- a/src/decode.h +++ b/src/decode.h @@ -407,6 +407,12 @@ enum PacketDropReason { PKT_DROP_REASON_MAX, }; +enum PacketTunnelType { + PacketTunnelNone, + PacketTunnelRoot, + PacketTunnelChild, +}; + /* forward declaration since Packet struct definition requires this */ struct PacketQueue_; @@ -472,6 +478,9 @@ typedef struct Packet_ * hash size still */ uint32_t flow_hash; + /* tunnel type: none, root or child */ + enum PacketTunnelType ttype; + SCTime_t ts; union { @@ -618,7 +627,7 @@ typedef struct Packet_ /* enum PacketDropReason::PKT_DROP_REASON_* as uint8_t for compactness */ uint8_t drop_reason; - /* has tunnel been verdicted? */ + /** has verdict on this tunneled packet been issued? */ bool tunnel_verdicted; /* tunnel/encapsulation handling */ @@ -649,8 +658,8 @@ typedef struct Packet_ /** lock to protect access to: * - tunnel_rtv_cnt * - tunnel_tpr_cnt - * - nfq_v.mark - * - flags + * - tunnel_verdicted + * - nfq_v.mark (if p->ttype != PacketTunnelNone) */ SCSpinlock tunnel_lock; } persistent; @@ -799,13 +808,14 @@ static inline void TUNNEL_INCR_PKT_TPR(Packet *p) #define TUNNEL_PKT_RTV(p) ((p)->root ? (p)->root->tunnel_rtv_cnt : (p)->tunnel_rtv_cnt) #define TUNNEL_PKT_TPR(p) ((p)->root ? (p)->root->tunnel_tpr_cnt : (p)->tunnel_tpr_cnt) -#define IS_TUNNEL_PKT(p) (((p)->flags & PKT_TUNNEL)) -#define SET_TUNNEL_PKT(p) ((p)->flags |= PKT_TUNNEL) -#define UNSET_TUNNEL_PKT(p) ((p)->flags &= ~PKT_TUNNEL) -#define IS_TUNNEL_ROOT_PKT(p) (IS_TUNNEL_PKT(p) && (p)->root == NULL) - -#define IS_TUNNEL_PKT_VERDICTED(p) (p)->tunnel_verdicted -#define SET_TUNNEL_PKT_VERDICTED(p) (p)->tunnel_verdicted = true +static inline bool PacketTunnelIsVerdicted(const Packet *p) +{ + return p->tunnel_verdicted; +} +static inline void PacketTunnelSetVerdicted(Packet *p) +{ + p->tunnel_verdicted = true; +} enum DecodeTunnelProto { DECODE_TUNNEL_ETHERNET, @@ -1017,8 +1027,7 @@ void DecodeUnregisterCounters(void); depth reached. */ #define PKT_STREAM_NOPCAPLOG BIT_U32(12) -#define PKT_TUNNEL BIT_U32(13) -// vacancy +// vacancy 2x /** Packet checksum is not computed (TX packet for example) */ #define PKT_IGNORE_CHECKSUM BIT_U32(15) @@ -1094,6 +1103,26 @@ static inline void DecodeSetNoPacketInspectionFlag(Packet *p) p->flags |= PKT_NOPACKET_INSPECTION; } +static inline bool PacketIsTunnelRoot(const Packet *p) +{ + return (p->ttype == PacketTunnelRoot); +} + +static inline bool PacketIsTunnelChild(const Packet *p) +{ + return (p->ttype == PacketTunnelChild); +} + +static inline bool PacketIsTunnel(const Packet *p) +{ + return (p->ttype != PacketTunnelNone); +} + +static inline bool PacketIsNotTunnel(const Packet *p) +{ + return (p->ttype == PacketTunnelNone); +} + /** \brief return true if *this* packet needs to trigger a verdict. * * If we have the root packet, and we have none outstanding, @@ -1113,10 +1142,11 @@ static inline bool VerdictTunnelPacket(Packet *p) SCLogDebug("tunnel: outstanding %u", outstanding); /* if there are packets outstanding, we won't verdict this one */ - if (IS_TUNNEL_ROOT_PKT(p) && !IS_TUNNEL_PKT_VERDICTED(p) && !outstanding) { + if (PacketIsTunnelRoot(p) && !PacketTunnelIsVerdicted(p) && !outstanding) { // verdict SCLogDebug("root %p: verdict", p); - } else if (!IS_TUNNEL_ROOT_PKT(p) && outstanding == 1 && p->root && IS_TUNNEL_PKT_VERDICTED(p->root)) { + } else if (PacketIsTunnelChild(p) && outstanding == 1 && p->root && + PacketTunnelIsVerdicted(p->root)) { // verdict SCLogDebug("tunnel %p: verdict", p); } else { diff --git a/src/defrag.c b/src/defrag.c index 36d6b8420712..9918747e7d6d 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -873,10 +873,7 @@ DefragInsertFrag(ThreadVars *tv, DecodeThreadVars *dtv, DefragTracker *tracker, r = Defrag4Reassemble(tv, tracker, p); if (r != NULL && tv != NULL && dtv != NULL) { StatsIncr(tv, dtv->counter_defrag_ipv4_reassembled); - if (DecodeIPV4(tv, dtv, r, (void *)r->ip4h, - IPV4_GET_IPLEN(r)) != TM_ECODE_OK) { - - UNSET_TUNNEL_PKT(r); + if (DecodeIPV4(tv, dtv, r, (void *)r->ip4h, IPV4_GET_IPLEN(r)) != TM_ECODE_OK) { r->root = NULL; TmqhOutputPacketpool(tv, r); r = NULL; @@ -890,10 +887,7 @@ DefragInsertFrag(ThreadVars *tv, DecodeThreadVars *dtv, DefragTracker *tracker, if (r != NULL && tv != NULL && dtv != NULL) { StatsIncr(tv, dtv->counter_defrag_ipv6_reassembled); if (DecodeIPV6(tv, dtv, r, (uint8_t *)r->ip6h, - IPV6_GET_PLEN(r) + IPV6_HEADER_LEN) - != TM_ECODE_OK) { - - UNSET_TUNNEL_PKT(r); + IPV6_GET_PLEN(r) + IPV6_HEADER_LEN) != TM_ECODE_OK) { r->root = NULL; TmqhOutputPacketpool(tv, r); r = NULL; diff --git a/src/detect-mark.c b/src/detect-mark.c index 90ed7750a4e5..9c5b9f46ea8e 100644 --- a/src/detect-mark.c +++ b/src/detect-mark.c @@ -228,7 +228,7 @@ static int DetectMarkPacket(DetectEngineThreadCtx *det_ctx, Packet *p, #ifdef NFQ const DetectMarkData *nf_data = (const DetectMarkData *)ctx; if (nf_data->mask) { - if (!(IS_TUNNEL_PKT(p))) { + if (PacketIsNotTunnel(p)) { /* coverity[missing_lock] */ p->nfq_v.mark = (nf_data->mark & nf_data->mask) | (p->nfq_v.mark & ~(nf_data->mask)); diff --git a/src/log-pcap.c b/src/log-pcap.c index 43b93b44defb..fdd0280fc853 100644 --- a/src/log-pcap.c +++ b/src/log-pcap.c @@ -245,7 +245,7 @@ static bool PcapLogCondition(ThreadVars *tv, void *thread_data, const Packet *p) return false; } - if (IS_TUNNEL_PKT(p) && !IS_TUNNEL_ROOT_PKT(p)) { + if (p->ttype == PacketTunnelChild) { return false; } return true; @@ -379,7 +379,7 @@ static int PcapLogOpenHandles(PcapLogData *pl, const Packet *p) PCAPLOG_PROFILE_START; int datalink = p->datalink; - if (IS_TUNNEL_PKT(p) && !IS_TUNNEL_ROOT_PKT(p)) { + if (p->ttype == PacketTunnelChild) { Packet *real_p = p->root; datalink = real_p->datalink; } @@ -588,7 +588,7 @@ static int PcapLog (ThreadVars *t, void *thread_data, const Packet *p) pl->pkt_cnt++; pl->h->ts.tv_sec = SCTIME_SECS(p->ts); pl->h->ts.tv_usec = SCTIME_USECS(p->ts); - if (IS_TUNNEL_PKT(p) && !IS_TUNNEL_ROOT_PKT(p)) { + if (p->ttype == PacketTunnelChild) { rp = p->root; pl->h->caplen = GET_PKT_LEN(rp); pl->h->len = GET_PKT_LEN(rp); @@ -666,7 +666,7 @@ static int PcapLog (ThreadVars *t, void *thread_data, const Packet *p) /* PcapLogDumpSegment has written over the PcapLogData variables so need to update */ pl->h->ts.tv_sec = SCTIME_SECS(p->ts); pl->h->ts.tv_usec = SCTIME_USECS(p->ts); - if (IS_TUNNEL_PKT(p) && !IS_TUNNEL_ROOT_PKT(p)) { + if (p->ttype == PacketTunnelChild) { rp = p->root; pl->h->caplen = GET_PKT_LEN(rp); pl->h->len = GET_PKT_LEN(rp); @@ -679,7 +679,7 @@ static int PcapLog (ThreadVars *t, void *thread_data, const Packet *p) } } - if (IS_TUNNEL_PKT(p) && !IS_TUNNEL_ROOT_PKT(p)) { + if (p->ttype == PacketTunnelChild) { rp = p->root; #ifdef HAVE_LIBLZ4 ret = PcapWrite(pl, comp, GET_PKT_DATA(rp), len); diff --git a/src/output-json-alert.c b/src/output-json-alert.c index 5f511b962d29..1622a323cd3f 100644 --- a/src/output-json-alert.c +++ b/src/output-json-alert.c @@ -562,7 +562,7 @@ static int AlertJson(ThreadVars *tv, JsonAlertLogThread *aft, const Packet *p) /* alert */ AlertJsonHeader(json_output_ctx, p, pa, jb, json_output_ctx->flags, &addr, xff_buffer); - if (IS_TUNNEL_PKT(p)) { + if (PacketIsTunnel(p)) { AlertJsonTunnel(p, jb); } diff --git a/src/packet.c b/src/packet.c index c798a0d11ea0..30ef4f11b31a 100644 --- a/src/packet.c +++ b/src/packet.c @@ -103,6 +103,7 @@ void PacketReinit(Packet *p) p->vlan_id[0] = 0; p->vlan_id[1] = 0; p->vlan_idx = 0; + p->ttype = PacketTunnelNone; SCTIME_INIT(p->ts); p->datalink = 0; p->drop_reason = 0; diff --git a/src/respond-reject.c b/src/respond-reject.c index b53fad9d3f31..e395880ef52f 100644 --- a/src/respond-reject.c +++ b/src/respond-reject.c @@ -69,7 +69,7 @@ static TmEcode RespondRejectFunc(ThreadVars *tv, Packet *p, void *data) return TM_ECODE_OK; } - if (IS_TUNNEL_PKT(p)) { + if (PacketIsTunnel(p)) { return TM_ECODE_OK; } diff --git a/src/source-af-packet.c b/src/source-af-packet.c index 3c783a149029..cf4cff192545 100644 --- a/src/source-af-packet.c +++ b/src/source-af-packet.c @@ -2194,7 +2194,7 @@ static int AFPBypassCallback(Packet *p) /* Bypassing tunneled packets is currently not supported * because we can't discard the inner packet only due to * primitive parsing in eBPF */ - if (IS_TUNNEL_PKT(p)) { + if (PacketIsTunnel(p)) { return 0; } if (PKT_IS_IPV4(p)) { @@ -2349,7 +2349,7 @@ static int AFPXDPBypassCallback(Packet *p) /* Bypassing tunneled packets is currently not supported * because we can't discard the inner packet only due to * primitive parsing in eBPF */ - if (IS_TUNNEL_PKT(p)) { + if (PacketIsTunnel(p)) { return 0; } if (PKT_IS_IPV4(p)) { diff --git a/src/source-ipfw.c b/src/source-ipfw.c index 4bdb7724598a..ecae507c41f3 100644 --- a/src/source-ipfw.c +++ b/src/source-ipfw.c @@ -614,9 +614,8 @@ TmEcode VerdictIPFW(ThreadVars *tv, Packet *p, void *data) } /* This came from NFQ. - * if this is a tunnel packet we check if we are ready to verdict - * already. */ - if (IS_TUNNEL_PKT(p)) { + * If this is a tunnel packet we check if we are ready to verdict already. */ + if (PacketIsTunnel(p)) { bool verdict = VerdictTunnelPacket(p); /* don't verdict if we are not ready */ @@ -628,7 +627,7 @@ TmEcode VerdictIPFW(ThreadVars *tv, Packet *p, void *data) /* no tunnel, verdict normally */ SCLogDebug("Setting verdict on non-tunnel"); retval = IPFWSetVerdict(tv, ptv, p); - } /* IS_TUNNEL_PKT end */ + } SCReturnInt(retval); } diff --git a/src/source-nfq.c b/src/source-nfq.c index 9c9fdd256c7d..a64f02632bc7 100644 --- a/src/source-nfq.c +++ b/src/source-nfq.c @@ -496,7 +496,7 @@ static void NFQReleasePacket(Packet *p) */ static int NFQBypassCallback(Packet *p) { - if (IS_TUNNEL_PKT(p)) { + if (PacketIsTunnel(p)) { /* real tunnels may have multiple flows inside them, so bypass can't * work for those. Rebuilt packets from IP fragments are fine. */ if (p->flags & PKT_REBUILT_FRAGMENT) { @@ -1186,7 +1186,7 @@ TmEcode VerdictNFQ(ThreadVars *tv, Packet *p, void *data) { /* if this is a tunnel packet we check if we are ready to verdict * already. */ - if (IS_TUNNEL_PKT(p)) { + if (p->ttype != PacketTunnelNone) { SCLogDebug("tunnel pkt: %p/%p %s", p, p->root, p->root ? "upper layer" : "root"); bool verdict = VerdictTunnelPacket(p); /* don't verdict if we are not ready */ diff --git a/src/source-pfring.c b/src/source-pfring.c index 3ec02327f92d..f39574b75327 100644 --- a/src/source-pfring.c +++ b/src/source-pfring.c @@ -310,7 +310,7 @@ static int PfringBypassCallback(Packet *p) } /* Bypassing tunneled packets is currently not supported */ - if (IS_TUNNEL_PKT(p)) { + if (PacketIsTunnel(p)) { return 0; } diff --git a/src/source-windivert.c b/src/source-windivert.c index 347d2e7a0f2d..3d37b1aaf6e5 100644 --- a/src/source-windivert.c +++ b/src/source-windivert.c @@ -769,7 +769,7 @@ static TmEcode WinDivertVerdictHelper(ThreadVars *tv, Packet *p) /* we can't verdict tunnel packets without ensuring all encapsulated * packets are verdicted */ - if (IS_TUNNEL_PKT(p)) { + if (PacketIsTunnel(p)) { bool finalVerdict = VerdictTunnelPacket(p); if (!finalVerdict) { SCReturnInt(TM_ECODE_OK); diff --git a/src/stream-tcp-list.c b/src/stream-tcp-list.c index 2b8a4d079cef..0966b9577431 100644 --- a/src/stream-tcp-list.c +++ b/src/stream-tcp-list.c @@ -618,7 +618,7 @@ static void StreamTcpSegmentAddPacketData( return; } - if (IS_TUNNEL_PKT(p) && !IS_TUNNEL_ROOT_PKT(p)) { + if (PacketIsTunnelChild(p)) { Packet *rp = p->root; StreamTcpSegmentAddPacketDataDo(seg, rp, p); } else { diff --git a/src/tmqh-packetpool.c b/src/tmqh-packetpool.c index f71274b4502c..fb2f211012fc 100644 --- a/src/tmqh-packetpool.c +++ b/src/tmqh-packetpool.c @@ -337,7 +337,7 @@ void TmqhOutputPacketpool(ThreadVars *t, Packet *p) SCEnter(); SCLogDebug("Packet %p, p->root %p, alloced %s", p, p->root, BOOL2STR(p->pool == NULL)); - if (IS_TUNNEL_PKT(p)) { + if (PacketIsTunnel(p)) { SCLogDebug("Packet %p is a tunnel packet: %s", p,p->root ? "upper layer" : "tunnel root"); @@ -345,9 +345,9 @@ void TmqhOutputPacketpool(ThreadVars *t, Packet *p) SCSpinlock *lock = p->root ? &p->root->persistent.tunnel_lock : &p->persistent.tunnel_lock; SCSpinLock(lock); - if (IS_TUNNEL_ROOT_PKT(p)) { + if (PacketIsTunnelRoot(p)) { SCLogDebug("IS_TUNNEL_ROOT_PKT == TRUE"); - CaptureStatsUpdate(t, p); + CaptureStatsUpdate(t, p); // TODO move out of lock const uint16_t outstanding = TUNNEL_PKT_TPR(p) - TUNNEL_PKT_RTV(p); SCLogDebug("root pkt: outstanding %u", outstanding); @@ -366,7 +366,7 @@ void TmqhOutputPacketpool(ThreadVars *t, Packet *p) * packets, return this to the pool. It's still referenced * by the tunnel packets, and we will return it * when we handle them */ - SET_TUNNEL_PKT_VERDICTED(p); + PacketTunnelSetVerdicted(p); PACKET_PROFILING_END(p); SCSpinUnlock(lock); @@ -381,9 +381,7 @@ void TmqhOutputPacketpool(ThreadVars *t, Packet *p) /* all tunnel packets are processed except us. Root already * processed. So return tunnel pkt and root packet to the * pool. */ - if (outstanding == 0 && - p->root && IS_TUNNEL_PKT_VERDICTED(p->root)) - { + if (outstanding == 0 && p->root && PacketTunnelIsVerdicted(p->root)) { SCLogDebug("root verdicted == true && no outstanding"); /* handle freeing the root as well*/ @@ -398,8 +396,8 @@ void TmqhOutputPacketpool(ThreadVars *t, Packet *p) * so get rid of the tunnel pkt only */ SCLogDebug("NOT IS_TUNNEL_PKT_VERDICTED (%s) || " - "outstanding > 0 (%u)", - (p->root && IS_TUNNEL_PKT_VERDICTED(p->root)) ? "true" : "false", + "outstanding > 0 (%u)", + (p->root && PacketTunnelIsVerdicted(p->root)) ? "true" : "false", outstanding); /* fall through */ @@ -414,8 +412,8 @@ void TmqhOutputPacketpool(ThreadVars *t, Packet *p) } SCLogDebug("[packet %p][%s] %s", p, - IS_TUNNEL_PKT(p) ? IS_TUNNEL_ROOT_PKT(p) ? "tunnel::root" : "tunnel::leaf" - : "no tunnel", + PacketIsTunnel(p) ? PacketIsTunnelRoot(p) ? "tunnel::root" : "tunnel::leaf" + : "no tunnel", (p->action & ACTION_DROP) ? "DROP" : "no drop"); /* we're done with the tunnel root now as well */ diff --git a/src/util-profiling.c b/src/util-profiling.c index 5d4bcc8905ce..a4f5bbef0ae7 100644 --- a/src/util-profiling.c +++ b/src/util-profiling.c @@ -1125,7 +1125,7 @@ void SCProfilingAddPacket(Packet *p) pd->tot += delta; pd->cnt ++; - if (IS_TUNNEL_PKT(p)) { + if (PacketIsTunnel(p)) { pd = &packet_profile_data4[256]; if (pd->min == 0 || delta < pd->min) { @@ -1161,7 +1161,7 @@ void SCProfilingAddPacket(Packet *p) pd->tot += delta; pd->cnt ++; - if (IS_TUNNEL_PKT(p)) { + if (PacketIsTunnel(p)) { pd = &packet_profile_data6[256]; if (pd->min == 0 || delta < pd->min) { From 563a31b363118bf7e3b4135e53ce6223cf25665d Mon Sep 17 00:00:00 2001 From: Victor Julien Date: Mon, 4 Dec 2023 10:46:34 +0100 Subject: [PATCH 7/8] decode/tunnel: split verdict logic Allows caller to take their own lock. --- src/decode.h | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/src/decode.h b/src/decode.h index 822025709048..63363f57ee3d 100644 --- a/src/decode.h +++ b/src/decode.h @@ -1123,6 +1123,26 @@ static inline bool PacketIsNotTunnel(const Packet *p) return (p->ttype == PacketTunnelNone); } +static inline bool VerdictTunnelPacketInternal(const Packet *p) +{ + const uint16_t outstanding = TUNNEL_PKT_TPR(p) - TUNNEL_PKT_RTV(p); + SCLogDebug("tunnel: outstanding %u", outstanding); + + /* if there are packets outstanding, we won't verdict this one */ + if (PacketIsTunnelRoot(p) && !PacketTunnelIsVerdicted(p) && !outstanding) { + SCLogDebug("root %p: verdict", p); + return true; + + } else if (PacketIsTunnelChild(p) && outstanding == 1 && p->root && + PacketTunnelIsVerdicted(p->root)) { + SCLogDebug("tunnel %p: verdict", p); + return true; + + } else { + return false; + } +} + /** \brief return true if *this* packet needs to trigger a verdict. * * If we have the root packet, and we have none outstanding, @@ -1135,23 +1155,10 @@ static inline bool PacketIsNotTunnel(const Packet *p) */ static inline bool VerdictTunnelPacket(Packet *p) { - bool verdict = true; + bool verdict; SCSpinlock *lock = p->root ? &p->root->persistent.tunnel_lock : &p->persistent.tunnel_lock; SCSpinLock(lock); - const uint16_t outstanding = TUNNEL_PKT_TPR(p) - TUNNEL_PKT_RTV(p); - SCLogDebug("tunnel: outstanding %u", outstanding); - - /* if there are packets outstanding, we won't verdict this one */ - if (PacketIsTunnelRoot(p) && !PacketTunnelIsVerdicted(p) && !outstanding) { - // verdict - SCLogDebug("root %p: verdict", p); - } else if (PacketIsTunnelChild(p) && outstanding == 1 && p->root && - PacketTunnelIsVerdicted(p->root)) { - // verdict - SCLogDebug("tunnel %p: verdict", p); - } else { - verdict = false; - } + verdict = VerdictTunnelPacketInternal(p); SCSpinUnlock(lock); return verdict; } From 3fb4a73bb6c243b8efe9cc8336e8e710f448f05b Mon Sep 17 00:00:00 2001 From: Victor Julien Date: Mon, 4 Dec 2023 06:49:40 +0100 Subject: [PATCH 8/8] nfq: stricter thread sync No longer update `Packet::flags` for tracking packet modifications, as thread safety was not guaranteed. Clearly separate between various kinds of `Packet::nfq_v` accesses for: - mark - mark_modified - verdicted These are either done under lock (Packet::persistent.tunnel_lock) or, if the Packet is not part of a tunnel, not under lock. This is safe as in all the related logic the Packet's tunnel state is fixed and can no longer change. --- src/decode.h | 5 ++- src/detect-mark.c | 9 ++++- src/source-nfq.c | 101 ++++++++++++++++++++++++++++++---------------- src/source-nfq.h | 1 + 4 files changed, 77 insertions(+), 39 deletions(-) diff --git a/src/decode.h b/src/decode.h index 63363f57ee3d..9499303ffb4d 100644 --- a/src/decode.h +++ b/src/decode.h @@ -1021,8 +1021,9 @@ void DecodeUnregisterCounters(void); /** Packet is modified by the stream engine, we need to recalc the csum and \ reinject/replace */ #define PKT_STREAM_MODIFIED BIT_U32(10) -/** Packet mark is modified */ -#define PKT_MARK_MODIFIED BIT_U32(11) + +// vacancy + /** Exclude packet from pcap logging as it's part of a stream that has reassembly \ depth reached. */ #define PKT_STREAM_NOPCAPLOG BIT_U32(12) diff --git a/src/detect-mark.c b/src/detect-mark.c index 9c5b9f46ea8e..cccdefe8fd5d 100644 --- a/src/detect-mark.c +++ b/src/detect-mark.c @@ -229,10 +229,15 @@ static int DetectMarkPacket(DetectEngineThreadCtx *det_ctx, Packet *p, const DetectMarkData *nf_data = (const DetectMarkData *)ctx; if (nf_data->mask) { if (PacketIsNotTunnel(p)) { + /* for a non-tunnel packet we don't need a lock, + * and if we're here we can't turn into a tunnel + * packet anymore. */ + /* coverity[missing_lock] */ p->nfq_v.mark = (nf_data->mark & nf_data->mask) | (p->nfq_v.mark & ~(nf_data->mask)); - p->flags |= PKT_MARK_MODIFIED; + /* coverity[missing_lock] */ + p->nfq_v.mark_modified = true; } else { /* real tunnels may have multiple flows inside them, so marking * might 'mark' too much. Rebuilt packets from IP fragments @@ -242,7 +247,7 @@ static int DetectMarkPacket(DetectEngineThreadCtx *det_ctx, Packet *p, SCSpinLock(&tp->persistent.tunnel_lock); tp->nfq_v.mark = (nf_data->mark & nf_data->mask) | (tp->nfq_v.mark & ~(nf_data->mask)); - tp->flags |= PKT_MARK_MODIFIED; + tp->nfq_v.mark_modified = true; SCSpinUnlock(&tp->persistent.tunnel_lock); } } diff --git a/src/source-nfq.c b/src/source-nfq.c index a64f02632bc7..4e85336e42d6 100644 --- a/src/source-nfq.c +++ b/src/source-nfq.c @@ -142,7 +142,7 @@ static TmEcode DecodeNFQ(ThreadVars *, Packet *, void *); static TmEcode DecodeNFQThreadInit(ThreadVars *, const void *, void **); static TmEcode DecodeNFQThreadDeinit(ThreadVars *tv, void *data); -static TmEcode NFQSetVerdict(Packet *p); +static TmEcode NFQSetVerdict(Packet *p, const uint32_t mark_value, const bool mark_modified); static void NFQReleasePacket(Packet *p); typedef enum NFQMode_ { @@ -324,7 +324,8 @@ static void NFQVerdictCacheFlush(NFQQueueVars *t) #endif } -static int NFQVerdictCacheAdd(NFQQueueVars *t, Packet *p, uint32_t verdict) +static int NFQVerdictCacheAdd(NFQQueueVars *t, Packet *p, const uint32_t verdict, + const uint32_t mark_value, const bool mark_modified) { #ifdef HAVE_NFQ_SET_VERDICT_BATCH if (t->verdict_cache.maxlen == 0) @@ -333,13 +334,13 @@ static int NFQVerdictCacheAdd(NFQQueueVars *t, Packet *p, uint32_t verdict) if (p->flags & PKT_STREAM_MODIFIED || verdict == NF_DROP) goto flush; - if (p->flags & PKT_MARK_MODIFIED) { + if (mark_modified) { if (!t->verdict_cache.mark_valid) { if (t->verdict_cache.len) goto flush; t->verdict_cache.mark_valid = 1; - t->verdict_cache.mark = p->nfq_v.mark; - } else if (t->verdict_cache.mark != p->nfq_v.mark) { + t->verdict_cache.mark = mark_value; + } else if (t->verdict_cache.mark != mark_value) { goto flush; } } else if (t->verdict_cache.mark_valid) { @@ -439,6 +440,7 @@ static int NFQSetupPkt (Packet *p, struct nfq_q_handle *qh, void *data) p->ReleasePacket = NFQReleasePacket; p->nfq_v.ifi = nfq_get_indev(tb); p->nfq_v.ifo = nfq_get_outdev(tb); + /* coverity[missing_lock] */ p->nfq_v.verdicted = false; #ifdef NFQ_GET_PAYLOAD_SIGNED @@ -481,9 +483,28 @@ static int NFQSetupPkt (Packet *p, struct nfq_q_handle *qh, void *data) static void NFQReleasePacket(Packet *p) { - if (unlikely(!p->nfq_v.verdicted)) { - PacketDrop(p, ACTION_DROP, PKT_DROP_REASON_NFQ_ERROR); - NFQSetVerdict(p); + if (PacketIsNotTunnel(p)) { + if (unlikely(!p->nfq_v.verdicted)) { + PacketDrop(p, ACTION_DROP, PKT_DROP_REASON_NFQ_ERROR); + /* coverity[missing_lock] */ + NFQSetVerdict(p, p->nfq_v.mark, p->nfq_v.mark_modified); + /* coverity[missing_lock] */ + p->nfq_v.verdicted = true; + } + } else { + Packet *root_p = p->root ? p->root : p; + SCSpinlock *lock = &root_p->persistent.tunnel_lock; + SCSpinLock(lock); + const bool verdicted = p->nfq_v.verdicted; + // taken from root packet + const bool mark_modified = root_p->nfq_v.mark_modified; + const uint32_t mark_value = root_p->nfq_v.mark; + root_p->nfq_v.verdicted = true; + SCSpinUnlock(lock); + if (!verdicted) { + PacketDrop(p, ACTION_DROP, PKT_DROP_REASON_NFQ_ERROR); + NFQSetVerdict(p, mark_value, mark_modified); + } } PacketFreeOrRelease(p); } @@ -504,7 +525,7 @@ static int NFQBypassCallback(Packet *p) SCSpinLock(&tp->persistent.tunnel_lock); tp->nfq_v.mark = (nfq_config.bypass_mark & nfq_config.bypass_mask) | (tp->nfq_v.mark & ~nfq_config.bypass_mask); - tp->flags |= PKT_MARK_MODIFIED; + tp->nfq_v.mark_modified = true; SCSpinUnlock(&tp->persistent.tunnel_lock); return 1; } @@ -513,7 +534,8 @@ static int NFQBypassCallback(Packet *p) /* coverity[missing_lock] */ p->nfq_v.mark = (nfq_config.bypass_mark & nfq_config.bypass_mask) | (p->nfq_v.mark & ~nfq_config.bypass_mask); - p->flags |= PKT_MARK_MODIFIED; + /* coverity[missing_lock] */ + p->nfq_v.mark_modified = true; } return 1; @@ -1073,15 +1095,14 @@ static inline void UpdateCounters(NFQQueueVars *t, const Packet *p) /** \internal * \brief NFQ verdict function + * \param p Packet to work with. Will be the tunnel root packet in case of tunnel. */ -static TmEcode NFQSetVerdict(Packet *p) +static TmEcode NFQSetVerdict(Packet *p, const uint32_t mark_value, const bool mark_modified) { int iter = 0; /* we could also have a direct pointer but we need to have a ref count in this case */ NFQQueueVars *t = g_nfq_q + p->nfq_v.nfq_index; - p->nfq_v.verdicted = true; - /* can't verdict a "fake" packet */ if (PKT_IS_PSEUDOPKT(p)) { return TM_ECODE_OK; @@ -1101,7 +1122,7 @@ static TmEcode NFQSetVerdict(Packet *p) UpdateCounters(t, p); #endif /* COUNTERS */ - int ret = NFQVerdictCacheAdd(t, p, verdict); + int ret = NFQVerdictCacheAdd(t, p, verdict, mark_value, mark_modified); if (ret == 0) { NFQMutexUnlock(t); return TM_ECODE_OK; @@ -1112,26 +1133,21 @@ static TmEcode NFQSetVerdict(Packet *p) default: case NFQ_ACCEPT_MODE: case NFQ_ROUTE_MODE: - if (p->flags & PKT_MARK_MODIFIED) { + if (mark_modified) { #ifdef HAVE_NFQ_SET_VERDICT2 if (p->flags & PKT_STREAM_MODIFIED) { - ret = nfq_set_verdict2(t->qh, p->nfq_v.id, verdict, - p->nfq_v.mark, + ret = nfq_set_verdict2(t->qh, p->nfq_v.id, verdict, mark_value, GET_PKT_LEN(p), GET_PKT_DATA(p)); } else { - ret = nfq_set_verdict2(t->qh, p->nfq_v.id, verdict, - p->nfq_v.mark, - 0, NULL); + ret = nfq_set_verdict2(t->qh, p->nfq_v.id, verdict, mark_value, 0, NULL); } #else /* fall back to old function */ if (p->flags & PKT_STREAM_MODIFIED) { - ret = nfq_set_verdict_mark(t->qh, p->nfq_v.id, verdict, - htonl(p->nfq_v.mark), + ret = nfq_set_verdict_mark(t->qh, p->nfq_v.id, verdict, htonl(mark_value), GET_PKT_LEN(p), GET_PKT_DATA(p)); } else { - ret = nfq_set_verdict_mark(t->qh, p->nfq_v.id, verdict, - htonl(p->nfq_v.mark), - 0, NULL); + ret = nfq_set_verdict_mark( + t->qh, p->nfq_v.id, verdict, htonl(mark_value), 0, NULL); } #endif /* HAVE_NFQ_SET_VERDICT2 */ } else { @@ -1141,28 +1157,29 @@ static TmEcode NFQSetVerdict(Packet *p) } else { ret = nfq_set_verdict(t->qh, p->nfq_v.id, verdict, 0, NULL); } - } break; case NFQ_REPEAT_MODE: #ifdef HAVE_NFQ_SET_VERDICT2 if (p->flags & PKT_STREAM_MODIFIED) { ret = nfq_set_verdict2(t->qh, p->nfq_v.id, verdict, - (nfq_config.mark & nfq_config.mask) | (p->nfq_v.mark & ~nfq_config.mask), + (nfq_config.mark & nfq_config.mask) | (mark_value & ~nfq_config.mask), GET_PKT_LEN(p), GET_PKT_DATA(p)); } else { ret = nfq_set_verdict2(t->qh, p->nfq_v.id, verdict, - (nfq_config.mark & nfq_config.mask) | (p->nfq_v.mark & ~nfq_config.mask), + (nfq_config.mark & nfq_config.mask) | (mark_value & ~nfq_config.mask), 0, NULL); } #else /* fall back to old function */ if (p->flags & PKT_STREAM_MODIFIED) { ret = nfq_set_verdict_mark(t->qh, p->nfq_v.id, verdict, - htonl((nfq_config.mark & nfq_config.mask) | (p->nfq_v.mark & ~nfq_config.mask)), + htonl((nfq_config.mark & nfq_config.mask) | + (mark_value & ~nfq_config.mask)), GET_PKT_LEN(p), GET_PKT_DATA(p)); } else { ret = nfq_set_verdict_mark(t->qh, p->nfq_v.id, verdict, - htonl((nfq_config.mark & nfq_config.mask) | (p->nfq_v.mark & ~nfq_config.mask)), + htonl((nfq_config.mark & nfq_config.mask) | + (mark_value & ~nfq_config.mask)), 0, NULL); } #endif /* HAVE_NFQ_SET_VERDICT2 */ @@ -1186,19 +1203,33 @@ TmEcode VerdictNFQ(ThreadVars *tv, Packet *p, void *data) { /* if this is a tunnel packet we check if we are ready to verdict * already. */ - if (p->ttype != PacketTunnelNone) { + if (PacketIsTunnel(p)) { + Packet *root_p = p->root ? p->root : p; + SCLogDebug("tunnel pkt: %p/%p %s", p, p->root, p->root ? "upper layer" : "root"); - bool verdict = VerdictTunnelPacket(p); + + SCSpinlock *lock = &root_p->persistent.tunnel_lock; + SCSpinLock(lock); + const bool do_verdict = VerdictTunnelPacketInternal(p); + // taken from root packet + const bool mark_modified = root_p->nfq_v.mark_modified; + const uint32_t mark_value = root_p->nfq_v.mark; + root_p->nfq_v.verdicted = do_verdict; + SCSpinUnlock(lock); /* don't verdict if we are not ready */ - if (verdict == true) { - int ret = NFQSetVerdict(p->root ? p->root : p); + if (do_verdict == true) { + int ret = NFQSetVerdict(root_p, mark_value, mark_modified); if (ret != TM_ECODE_OK) { return ret; } } } else { /* no tunnel, verdict normally */ - int ret = NFQSetVerdict(p); + + /* coverity[missing_lock] */ + p->nfq_v.verdicted = true; + + int ret = NFQSetVerdict(p, p->nfq_v.mark, p->nfq_v.mark_modified); if (ret != TM_ECODE_OK) { return ret; } diff --git a/src/source-nfq.h b/src/source-nfq.h index 8a83c0328927..5b481d3274f2 100644 --- a/src/source-nfq.h +++ b/src/source-nfq.h @@ -41,6 +41,7 @@ typedef struct NFQPacketVars_ int id; /* this nfq packets id */ uint16_t nfq_index; /* index in NFQ array */ bool verdicted; + bool mark_modified; uint32_t mark; uint32_t ifi;