From 95f9bfcb7659008304618a7807a022b6a1c455c4 Mon Sep 17 00:00:00 2001 From: Yael Shamai <111040837+YaelShamai@users.noreply.github.com> Date: Sun, 17 Nov 2024 12:52:39 +0200 Subject: [PATCH] Ys_splunk_enhance (#37078) * start * continue * add validate indexes * little fix * improve * add exception to validate bathed events * code improvement * finish * build improvements * fix failing unit tests * add test * release notes * build improvements * build improvements * remove dot * put dot * build improvements * build improvements * fix * build effort * pre commit * pre commit * pre commit * pre commit * pre commit * pre commit * improve docs * CR improvements * release notes and improvements * . * update release notes * remove unify file * erase release notes * readd release notes * change pack metadata * change pack metadata * add dot * fix readme --- .../SplunkPy/Integrations/SplunkPy/README.md | 16 +- .../Integrations/SplunkPy/SplunkPy.py | 149 ++++++++++-- .../Integrations/SplunkPy/SplunkPy.yml | 6 +- .../Integrations/SplunkPy/SplunkPy_test.py | 212 +++++++++++++++++- Packs/SplunkPy/ReleaseNotes/3_1_45.md | 3 + Packs/SplunkPy/pack_metadata.json | 2 +- 6 files changed, 354 insertions(+), 34 deletions(-) create mode 100644 Packs/SplunkPy/ReleaseNotes/3_1_45.md diff --git a/Packs/SplunkPy/Integrations/SplunkPy/README.md b/Packs/SplunkPy/Integrations/SplunkPy/README.md index 0e22bf9ddaf9..44ae30b2acf7 100644 --- a/Packs/SplunkPy/Integrations/SplunkPy/README.md +++ b/Packs/SplunkPy/Integrations/SplunkPy/README.md @@ -516,7 +516,8 @@ Parses the raw part of the event. ### splunk-submit-event-hec *** -Sends events to an HTTP event collector using the Splunk platform JSON event protocol. +Sends events Splunk. if `batch_event_data` or `entry_id` arguments are provided then all arguments related to a single event are ignored. + ##### Base Command `splunk-submit-event-hec` @@ -524,14 +525,23 @@ Sends events to an HTTP event collector using the Splunk platform JSON event pro | **Argument Name** | **Description** | **Required** | | --- | --- | --- | -| event | The event payload key-value pair. An example string: "event": "Access log test message.". | Required | +| event | The event payload key-value pair. An example string: "event": "Access log test message.". | Optional | | fields | Fields for indexing that do not occur in the event payload itself. Accepts multiple, comma-separated, fields. | Optional | | index | The index name. | Optional | | host | The hostname. | Optional | | source_type | The user-defined event source type. | Optional | | source | The user-defined event source. | Optional | | time | The epoch-formatted time. | Optional | -| request_channel | A channel identifier (ID) where to send the request, must be a Globally Unique Identifier (GUID). **If the indexer acknowledgment is turned on, a channel is required.** | Optional | +| batch_event_data | A batch of events to send to Splunk. For example, `{"event": "something happened at 14/10/2024 12:29", "fields": {"severity": "INFO", "category": "test2, test2"}, "index": "index0","sourcetype": "sourcetype0","source": "/example/something" } {"event": "something happened at 14/10/2024 13:29", "index": "index1", "sourcetype": "sourcetype1","source": "/example/something", "fields":{ "fields" : "severity: INFO, category: test2, test2"}}`. **If provided, the arguments related to a single event and the `entry_id` argument are ignored.** | Optional | +| batch_event_data | A batch of events to send to splunk. For example, `{"event": "something happened at 14/10/2024 12:29", "fields": {"severity": "INFO", "category": "test2, test2"}, "index": "index0","sourcetype": "sourcetype0","source": "/example/something" } {"event": "something happened at 14/10/2024 13:29", "index": "index1", "sourcetype": "sourcetype1","source": "/exeample/something", "fields":{ "fields" : "severity: INFO, category: test2, test2"}}`. **If provided, the arguments related to a single event and the `entry_id` argument are ignored.** | Optional | +| entry_id | The entry id in Cortex XSOAR of the file containing a batch of events. Content of the file should be valid batch event's data, as it would be provided to the `batch_event_data`. **If provided, the arguments related to a single event are ignored.** | Optional | + +##### Batched events description +This command allows sending events to Splunk, either as a single event or a batch of multiple events. +To send a single event: Use the `event`, `fields`, `host`, `index`, `source`, `source_type`, and `time` arguments. +To send a batch of events, there are two options, either use the batch_event_data argument or use the entry_id argument (for a file uploaded to Cortex XSOAR). +Batch format requirements: The batch must be a single string containing valid dictionaries, each representing an event. Events should not be separated by commas. Each dictionary should include all necessary fields for an event. For example: `{"event": "event occurred at 14/10/2024 12:29", "fields": {"severity": "INFO", "category": "test1"}, "index": "index0", "sourcetype": "sourcetype0", "source": "/path/event1"} {"event": "event occurred at 14/10/2024 13:29", "index": "index1", "sourcetype": "sourcetype1", "source": "/path/event2", "fields": {"severity": "INFO", "category": "test2"}}`. +This formatted string can be passed directly via `batch_event_data`, or, if saved in a file, the file can be uploaded to Cortex XSOAR, and the `entry_id` (e.g., ${File.[4].EntryID}) should be provided. ##### Context Output diff --git a/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy.py b/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy.py index a789353e6e82..7d9a07cd0d4f 100644 --- a/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy.py +++ b/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy.py @@ -15,6 +15,7 @@ from splunklib.binding import AuthenticationError, HTTPError, namespace +INTEGRATION_LOG = "Splunk- " OUTPUT_MODE_JSON = 'json' # type of response from splunk-sdk query (json/csv/xml) # Define utf8 as default encoding params = demisto.params() @@ -2615,6 +2616,81 @@ def splunk_submit_event_command(service: client.Service, args: dict): return_results(f'Event was created in Splunk index: {r.name}') +def validate_indexes(indexes, service): + """Validates that all provided Splunk indexes exist within the Splunk service instance.""" + real_indexes = service.indexes + real_indexes_names_set = set() + for real_index in real_indexes: + real_indexes_names_set.add(real_index.name) + indexes_set = set(indexes) + return indexes_set.issubset(real_indexes_names_set) + + +def get_events_from_file(entry_id): + """ + Retrieves event data from a file in Demisto based on a specified entry ID as a string. + + Args: + entry_id (int): The entry ID corresponding to the file containing event data. + + Returns: + str: The content of the file as a string. + """ + get_file_path_res = demisto.getFilePath(entry_id) + file_path = get_file_path_res["path"] + with open(file_path, encoding='utf-8') as file_data: + return file_data.read() + + +def parse_fields(fields): + """ + Parses the `fields` input into a dictionary. + + - If `fields` is a valid JSON string, it is converted into the corresponding dictionary. + - If `fields` is not valid JSON, it is wrapped as a dictionary with a single key-value pair, + where the key is `"fields"` and the value is the original `fields` string. + + Examples: + 1. Input: '{"severity": "INFO", "category": "test2, test2"}' + Output: {"severity": "INFO", "category": "test2, test2"} + + 2. Input: 'severity: INFO, category: test2, test2' + Output: {"fields": "severity: INFO, category: test2, test2"} + """ + if fields: + try: + parsed_fields = json.loads(fields) + except Exception: + demisto.debug('Fields provided are not valid JSON; treating as a single field') + parsed_fields = {'fields': fields} + return parsed_fields + return None + + +def ensure_valid_json_format(events: str | dict): + """Converts a batch of events to a valid JSON format for processing. + + Args: + events (str): The batch of events to be formatted as JSON. + + Raises: + DemistoException: If the input cannot be converted to a valid JSON format, an exception is raised. + + Returns: + list: A list of JSON objects derived from the input events. + """ + try: + events_str = str(events) + + events_str = events_str.replace("'", '"') + rgx = re.compile(r"}[\s]*{") + valid_json_events = rgx.sub("},{", events_str) + valid_json_events = json.loads(f"[{valid_json_events}]") + return valid_json_events + except Exception as e: + raise DemistoException(f'{str(e)}\nMake sure that the events are in the correct format.') + + def splunk_submit_event_hec( hec_token: str | None, baseurl: str, @@ -2625,27 +2701,39 @@ def splunk_submit_event_hec( source_type: str | None, source: str | None, time_: str | None, - request_channel: str | None + request_channel: str | None, + batch_event_data: str | None, + entry_id: int | None, + service ): if hec_token is None: raise Exception('The HEC Token was not provided') - parsed_fields = None - if fields: - try: - parsed_fields = json.loads(fields) - except Exception: - parsed_fields = {'fields': fields} + if batch_event_data: + events = batch_event_data - args = assign_params( - event=event, - host=host, - fields=parsed_fields, - index=index, - sourcetype=source_type, - source=source, - time=time_ - ) + elif entry_id: + demisto.debug(f'{INTEGRATION_LOG} - loading events data from file with {entry_id=}') + events = get_events_from_file(entry_id) + + else: + parsed_fields = parse_fields(fields) + + events = assign_params( + event=event, + host=host, + fields=parsed_fields, + index=index, + sourcetype=source_type, + source=source, + time=time_ + ) + valid_json_events = ensure_valid_json_format(events) + + indexes = [d.get('index') for d in valid_json_events if d.get('index')] + + if not validate_indexes(indexes, service): + raise DemistoException('Index name does not exist in your splunk instance') headers = { 'Authorization': f'Splunk {hec_token}', @@ -2654,15 +2742,23 @@ def splunk_submit_event_hec( if request_channel: headers['X-Splunk-Request-Channel'] = request_channel + data = '' + if entry_id or batch_event_data: + data = events + else: + data = json.dumps(events) + + demisto.debug(f'{INTEGRATION_LOG} sending {len(valid_json_events)}') + return requests.post( f'{baseurl}/services/collector/event', - data=json.dumps(args), + data=data, headers=headers, verify=VERIFY_CERTIFICATE, ) -def splunk_submit_event_hec_command(params: dict, args: dict): +def splunk_submit_event_hec_command(params: dict, service, args: dict): hec_token = params.get('cred_hec_token', {}).get('password') or params.get('hec_token') baseurl = params.get('hec_url') if baseurl is None: @@ -2676,18 +2772,25 @@ def splunk_submit_event_hec_command(params: dict, args: dict): source = args.get('source') time_ = args.get('time') request_channel = args.get('request_channel') + batch_event_data = args.get('batch_event_data') + entry_id = args.get('entry_id') + + if not event and not batch_event_data and not entry_id: + raise DemistoException("Invalid input: Please specify one of the following arguments: `event`, " + "`batch_event_data`, or `entry_id`.") response_info = splunk_submit_event_hec(hec_token, baseurl, event, fields, host, index, source_type, source, time_, - request_channel) + request_channel, batch_event_data, entry_id, service) if 'Success' not in response_info.text: return_error(f"Could not send event to Splunk {response_info.text}") else: - response_dict = json.loads(response_info.text) + response_dict = json.loads(response_info.text + ) if response_dict and 'ackId' in response_dict: - return_results(f"The event was sent successfully to Splunk. AckID: {response_dict['ackId']}") + return_results(f"The events were sent successfully to Splunk. AckID: {response_dict['ackId']}") else: - return_results('The event was sent successfully to Splunk.') + return_results('The events were sent successfully to Splunk.') def splunk_edit_notable_event_command(base_url: str, token: str, auth_token: str | None, args: dict) -> None: @@ -3151,7 +3254,7 @@ def main(): # pragma: no cover token = get_auth_session_key(service) splunk_edit_notable_event_command(base_url, token, auth_token, args) elif command == 'splunk-submit-event-hec': - splunk_submit_event_hec_command(params, args) + splunk_submit_event_hec_command(params, service, args) elif command == 'splunk-job-status': return_results(splunk_job_status(service, args)) elif command.startswith('splunk-kv-') and service is not None: diff --git a/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy.yml b/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy.yml index 0e54e672a0ee..211c0f7e4689 100644 --- a/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy.yml +++ b/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy.yml @@ -454,7 +454,7 @@ script: Event payload key-value pair. String example: "event": "Access log test message". name: event - required: true + required: false - description: Fields for indexing that do not occur in the event payload itself. Accepts multiple, comma-separated, fields. name: fields - description: The index name. @@ -469,6 +469,10 @@ script: name: time - description: A channel identifier (ID) where to send the request, must be a Globally Unique Identifier (GUID). If the indexer acknowledgment is turned on, a channel is required. name: request_channel + - description: 'A batch of events to send to Splunk. For example, `{"event": "something happened at 14/10/2024 12:29", "fields": {"severity": "INFO", "category": "test2, test2"}, "index": "index0","sourcetype": "sourcetype0","source": "/example/something" } {"event": "something happened at 14/10/2024 13:29", "index": "index1", "sourcetype": "sourcetype1","source": "/example/something", "fields":{ "fields" : "severity: INFO, category: test2, test2"}}`. If provided all arguments except of `request_channel` are ignored.' + name: batch_event_data + - description: The entry ID in Cortex XSOAR of the file containing a batch of events. If provided, the arguments related to a single event are ignored. + name: entry_id description: Sends events to an HTTP Event Collector using the Splunk platform JSON event protocol. name: splunk-submit-event-hec - arguments: diff --git a/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy_test.py b/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy_test.py index 8cb20e6aaecc..bc1129b6e706 100644 --- a/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy_test.py +++ b/Packs/SplunkPy/Integrations/SplunkPy/SplunkPy_test.py @@ -11,6 +11,8 @@ from splunklib import results import SplunkPy as splunk from pytest_mock import MockerFixture +from unittest.mock import MagicMock, patch + RETURN_ERROR_TARGET = 'SplunkPy.return_error' @@ -358,7 +360,7 @@ def __init__(self, text): mocker.patch.object(splunk, "splunk_submit_event_hec", return_value=MockRes(text)) return_error_mock = mocker.patch(RETURN_ERROR_TARGET) - splunk.splunk_submit_event_hec_command(params={"hec_url": "mock_url"}, args={}) + splunk.splunk_submit_event_hec_command(params={"hec_url": "mock_url"}, args={"entry_id": "some_entry"}, service=Service) err_msg = return_error_mock.call_args[0][0] assert err_msg == f"Could not send event to Splunk {text}" @@ -390,13 +392,13 @@ def test_splunk_submit_event_hec_command_request_channel(mocker): Then - The return result object contains the correct message. """ - args = {"request_channel": "11111111-1111-1111-1111-111111111111"} + args = {"request_channel": "11111111-1111-1111-1111-111111111111", "entry_id": "some_entry"} mocker.patch.object(splunk, "splunk_submit_event_hec", return_value=check_request_channel(args)) moc = mocker.patch.object(demisto, 'results') splunk.splunk_submit_event_hec_command(params={"hec_url": "mock_url"}, - args=args) + args=args, service=Service) readable_output = moc.call_args[0][0] - assert readable_output == "The event was sent successfully to Splunk. AckID: 1" + assert readable_output == "The events were sent successfully to Splunk. AckID: 1" def test_splunk_submit_event_hec_command_without_request_channel(mocker): @@ -408,12 +410,12 @@ def test_splunk_submit_event_hec_command_without_request_channel(mocker): Then - The return result object contains the correct message. """ - args = {} + args = {"entry_id": "some_entry"} mocker.patch.object(splunk, "splunk_submit_event_hec", return_value=check_request_channel(args)) return_error_mock = mocker.patch(RETURN_ERROR_TARGET) splunk.splunk_submit_event_hec_command(params={"hec_url": "mock_url"}, - args=args) + args=args, service=Service) err_msg = return_error_mock.call_args[0][0] assert err_msg == 'Could not send event to Splunk {"text":"Data channel is missing","code":10}' @@ -2837,3 +2839,201 @@ def test_escape_invalid_chars_in_drilldown_json(drilldown_search, expected_res): res = splunk.escape_invalid_chars_in_drilldown_json(drilldown_search) assert expected_res in json.loads(res)['query'] + + +# Define minimal classes to simulate the service and index behavior +class Index: + def __init__(self, name): + self.name = name + + +class ServiceIndex: + def __init__(self, indexes): + self.indexes = [Index(name) for name in indexes] + + +@pytest.mark.parametrize( + "given_indexes, service_indexes, expected", + [ + # Test case: All indexes exist in the service + (["index1", "index2"], ["index1", "index2", "index3"], True), + + # Test case: Some indexes do not exist in the service + (["index1", "index4"], ["index1", "index2", "index3"], False), + + # Test case: Empty input indexes list + ([], ["index1", "index2", "index3"], True), + ] +) +def test_validate_indexes(given_indexes, service_indexes, expected): + """ + Given: A list of indexes' names. + When: Calling validate_indexes function. + Then: The function returns `True` if all the given index names exist within the Splunk service instance; + otherwise, it returns `False`. + """ + from SplunkPy import validate_indexes + service = ServiceIndex(service_indexes) + # Assert that the function returns the expected result + assert validate_indexes(given_indexes, service) == expected + + +@pytest.mark.parametrize( + "fields, expected", + [ + # Valid JSON input + ('{"key": "value"}', {"key": "value"}), + + # Valid JSON with multiple key-value pairs + ('{"key1": "value1", "key2": 2}', {"key1": "value1", "key2": 2}), + + # Invalid JSON input (non-JSON string) + ("not a json string", {"fields": "not a json string"}), + + # Another invalid JSON input (partially structured JSON) + ("{'key': 'value'}", {"fields": "{'key': 'value'}"}), + ] +) +def test_parse_fields(fields, expected): + """ + Given: A string representing fields, which may be a valid JSON string or a regular string. + When: The parse_fields function is called with the given string. + Then: If the string is valid JSON, the function returns a dictionary of the parsed fields. If the string is not valid JSON, + the function returns a dictionary with a single key-value pair, where the entire input string is the key. + """ + from SplunkPy import parse_fields + result = parse_fields(fields) + assert result == expected + + +@pytest.mark.parametrize( + "events, expected", + [ + ("{'key1': 'value1'} {'key2': 'value2'}", [{"key1": "value1"}, {"key2": "value2"}]), + ("{'key1': 'value1'}", [{"key1": "value1"}]), + ({"key1": "value1", "key2": "value2"}, [{"key1": "value1", "key2": "value2"}]), + ({"key1": {"nestedKey": "nestedValue"}, "key2": "value2"}, [{"key1": {"nestedKey": "nestedValue"}, "key2": "value2"}]), + ] +) +def test_ensure_valid_json_format_valid_inputs(events, expected): + """ + Given: A string or dictionary representing valid JSON inputs, including single, multiple, and nested events. + When: Calling ensure_valid_json_format. + Then: The function should return a list of dictionaries corresponding to the parsed events. + """ + from SplunkPy import ensure_valid_json_format + assert ensure_valid_json_format(events) == expected + + +@pytest.mark.parametrize( + "invalid_events", + [ + "{key1: {'nestedKey': 'nestedValue'}}", # Missing double quotes on the outer key + "{'key1': {nestedKey: 'nestedValue'}}", # Missing double quotes on nested key + "{'key1': 'value1', 'key2': 'value2'", # Missing closing brace + "{'key1': 'value1', 'key2': 'value2'}, {'key3': 'value3'", # Missing closing brace on one event + "{'key1': 'value1' 'key2': 'value2'}", # Missing comma between key-value pairs + ] +) +def test_ensure_valid_json_format_invalid_inputs(invalid_events): + """ + Given: A string representing various invalid JSON formats (e.g., missing quotes, missing commas, unmatched braces). + When: Calling ensure_valid_json_format. + Then: The function should raise a DemistoException due to invalid JSON format. + """ + from SplunkPy import ensure_valid_json_format + with pytest.raises(DemistoException, match=r"Make sure that the events are in the correct format"): + ensure_valid_json_format(invalid_events) + + +@pytest.mark.parametrize("event, batch_event_data, entry_id, expected_data", [ + ("Somthing happened", None, None, '{"event": "Somthing happened", "fields": {"field1": "value1"}, "index": "main"}'), + (None, "{'event': 'some event', 'index': 'some index'} {'event': 'some event', 'index': 'some index'}", None, + "{'event': 'some event', 'index': 'some index'} {'event': 'some event', 'index': 'some index'}"), # Batch event data + (None, None, "some entry_id", "{'event': 'some event', 'index': 'some index'} {'event': 'some event', 'index': 'some index'}") +]) +@patch("requests.post") +@patch("SplunkPy.get_events_from_file") # Replace with the actual module +@patch("SplunkPy.ensure_valid_json_format") +@patch("SplunkPy.validate_indexes") +@patch("SplunkPy.parse_fields") +def test_splunk_submit_event_hec( + mock_parse_fields, + mock_validate_indexes, + mock_ensure_valid_json_format, + mock_get_events_from_file, + mock_post, + event, + batch_event_data, + entry_id, + expected_data +): + """ + Given: Different types of event submission (single event, batch event, entry_id). + When: Calling splunk_submit_event_hec. + Then: Ensure a POST request is sent with the correct data and headers. + """ + from SplunkPy import splunk_submit_event_hec + # Arrange + hec_token = "valid_token" + baseurl = "https://splunk.example.com" + fields = '{"field1": "value1"}' + parsed_fields = {"field1": "value1"} + + # Mocks + mock_parse_fields.return_value = parsed_fields + mock_validate_indexes.return_value = True + + if event: + # Single event + mock_ensure_valid_json_format.return_value = [{"event": event}] + elif batch_event_data: + # Batch event data + mock_ensure_valid_json_format.return_value = [{'event': 'some event', 'index': 'some index'}, + {'event': 'some event', 'index': 'some index'}] + elif entry_id: + # Entry ID + mock_get_events_from_file.return_value =\ + "{'event': 'some event', 'index': 'some index'} {'event': 'some event', 'index': 'some index'}" + mock_ensure_valid_json_format.return_value =\ + [{'event': 'some event', 'index': 'some index'}, {'event': 'some event', 'index': 'some index'}] + + # Act + splunk_submit_event_hec( + hec_token=hec_token, + baseurl=baseurl, + event=event, + fields=fields, + host=None, + index="main", + source_type=None, + source=None, + time_=None, + request_channel="test_channel", + batch_event_data=batch_event_data, + entry_id=entry_id, + service=MagicMock(), + ) + + mock_post.assert_called_once_with( + f"{baseurl}/services/collector/event", + data=expected_data, + headers={ + "Authorization": f"Splunk {hec_token}", + "Content-Type": "application/json", + "X-Splunk-Request-Channel": "test_channel", + }, + verify=True, + ) + + +def test_splunk_submit_event_hec_command_no_required_arguments(): + """ Given: none of these arguments: 'entry_id', 'event', 'batch_event_data' + When: Runing splunk-submit-event-hec command + Then: An exception is thrown + """ + from SplunkPy import splunk_submit_event_hec_command + with pytest.raises(DemistoException, + match=r"Invalid input: Please specify one of the following arguments: `event`, " + r"`batch_event_data`, or `entry_id`."): + splunk_submit_event_hec_command({'hec_url': 'hec_url'}, None, {}) diff --git a/Packs/SplunkPy/ReleaseNotes/3_1_45.md b/Packs/SplunkPy/ReleaseNotes/3_1_45.md new file mode 100644 index 000000000000..aa6fcf3ed868 --- /dev/null +++ b/Packs/SplunkPy/ReleaseNotes/3_1_45.md @@ -0,0 +1,3 @@ +#### Integrations +##### SplunkPy +- Added *batch_event_data* and *entry_id* arguments to **splunk-submit-event-hec** command. \ No newline at end of file diff --git a/Packs/SplunkPy/pack_metadata.json b/Packs/SplunkPy/pack_metadata.json index 64cebd6f0957..ca109bfe417c 100644 --- a/Packs/SplunkPy/pack_metadata.json +++ b/Packs/SplunkPy/pack_metadata.json @@ -2,7 +2,7 @@ "name": "Splunk", "description": "Run queries on Splunk servers.", "support": "xsoar", - "currentVersion": "3.1.44", + "currentVersion": "3.1.45", "author": "Cortex XSOAR", "url": "https://www.paloaltonetworks.com/cortex", "email": "",