diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml new file mode 100644 index 0000000..32b7d56 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -0,0 +1,40 @@ +name: "Bug Report" +description: Report software deficiencies +labels: ["bug"] +body: +- type: markdown + attributes: + value: | + Use this form to report any functional or performance bugs you've found in the software. + + Be sure to check if your [issue](https://github.com/y-scope/clp-ffi-go/issues) has already been reported. + +- type: textarea + attributes: + label: Bug + description: "Describe what's wrong and if applicable, what you expected instead." + validations: + required: true + +- type: input + attributes: + label: clp-ffi-go version + description: "The release version number or development commit hash that has the bug." + placeholder: "Version number or commit hash" + validations: + required: true + +- type: textarea + attributes: + label: Environment + description: "The environment in which you're running/using clp-ffi-go." + placeholder: "OS version, docker version, etc." + validations: + required: true + +- type: textarea + attributes: + label: Reproduction steps + description: "List each step required to reproduce the bug." + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..0086358 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: true diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml new file mode 100644 index 0000000..9484ca6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yml @@ -0,0 +1,23 @@ +name: "Feature/Change Request" +description: Request a feature or change +labels: ["enhancement"] +body: +- type: markdown + attributes: + value: | + Use this form to request a feature/change in the software, or the project as a whole. + +- type: textarea + attributes: + label: Request + description: "Describe your request and why it's important." + validations: + required: true + +- type: textarea + attributes: + label: Possible implementation + description: "Describe any implementations you have in mind." + validations: + required: true + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..9672f6d --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,9 @@ +# References + + +# Description + + +# Validation performed + + diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..50fb14f --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,66 @@ +name: Build + +on: + pull_request: + push: + workflow_call: + +# TODO: add separate jobs for building, linting, and testing c++ +jobs: + prebuilt-test: + strategy: + matrix: + os: [macos-latest, ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + + - uses: actions/setup-go@v4 + with: + go-version: '1.20.x' + check-latest: true + + - run: go clean -cache && go build ./... + + - run: go test -count=1 ./... + + build-lint-test: + strategy: + matrix: + os: [macos-latest, ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + + - uses: actions/setup-go@v4 + with: + go-version: '1.20.x' + check-latest: true + + - if: ${{ 'macos-latest' == matrix.os }} + run: | + brew update + brew install llvm + + - name: Remove repo's generated c++ libraries and go code + run: | + rm ./lib/* ./**/*_string.go + + - run: | + go install mvdan.cc/gofumpt@latest + go install github.com/segmentio/golines@latest + go install golang.org/x/tools/cmd/stringer@latest + + - run: go clean -cache && go generate ./... + + - run: | + diff="$(golines -m 100 -t 4 --base-formatter='gofumpt' --dry-run .)" + if [[ -n "$diff" ]]; then echo "$diff"; exit 1; fi + + # - run: cmake -S ./cpp -B ./cpp/build -DCMAKE_EXPORT_COMPILE_COMMANDS=1 + + - run: go test -count=1 ./... diff --git a/.gitignore b/.gitignore index 567609b..cae36d6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ build/ +cpp/.cache/ +cpp/clp/ +**/compile_commands.json diff --git a/.gitmodules b/.gitmodules index 22bf0de..e69de29 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "clp"] - path = cpp/clp - url = git@github.com:y-scope/clp.git diff --git a/BUILD.bazel b/BUILD.bazel index c132f2e..79dee54 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1,53 +1,18 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") load("@rules_cc//cc:defs.bzl", "cc_library") cc_library( name = "libclp_ffi", - srcs = select({ - "@io_bazel_rules_go//go/platform:android_amd64": [ - "lib/libclp_ffi_linux_amd64.so", - ], - "@io_bazel_rules_go//go/platform:android_arm64": [ - "lib/libclp_ffi_linux_arm64.so", - ], - "@io_bazel_rules_go//go/platform:darwin_amd64": [ - "lib/libclp_ffi_darwin_amd64.so", - ], - "@io_bazel_rules_go//go/platform:darwin_arm64": [ - "lib/libclp_ffi_darwin_arm64.so", - ], - "@io_bazel_rules_go//go/platform:ios_amd64": [ - "lib/libclp_ffi_darwin_amd64.so", - ], - "@io_bazel_rules_go//go/platform:ios_arm64": [ - "lib/libclp_ffi_darwin_arm64.so", - ], - "@io_bazel_rules_go//go/platform:linux_amd64": [ - "lib/libclp_ffi_linux_amd64.so", - ], - "@io_bazel_rules_go//go/platform:linux_arm64": [ - "lib/libclp_ffi_linux_arm64.so", - ], - "//conditions:default": [], - }), - hdrs = glob([ - "cpp/src/**/*.h", - ]), + srcs = glob(["cpp/src/ffi_go/**"]) + [ + ], + hdrs = glob(["cpp/src/ffi_go/**/*.h"]), includes = [ "cpp/src", ], - visibility = ["//visibility:public"], -) - -go_library( - name = "clp-ffi-go", - srcs = ["generate.go"], - importpath = "github.com/y-scope/clp-ffi-go", - visibility = ["//visibility:public"], -) - -alias( - name = "go_default_library", - actual = ":clp-ffi-go", + deps = [ + "@com_github_y_scope_clp//:libclp_core", + ], + copts = [ + "-std=c++20", + ], visibility = ["//visibility:public"], ) diff --git a/LICENSE b/LICENSE index e1c64ab..d645695 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,4 @@ + Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -186,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2022 YScope Inc. + Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.rst b/README.rst index 0893714..1db71d0 100644 --- a/README.rst +++ b/README.rst @@ -15,7 +15,7 @@ Getting started To add the module to your project run: ``go get github.com/y-scope/clp-ffi-go`` Here's an example showing how to decode each log event containing "ERROR" from -a CLP IR stream. +a CLP IR byte stream. .. code:: golang @@ -24,21 +24,29 @@ a CLP IR stream. "time" "github.com/klauspost/compress/zstd" + "github.com/y-scope/clp-ffi-go/ffi" "github.com/y-scope/clp-ffi-go/ir" ) file, _ := os.Open("log-file.clp.zst") + defer file.Close() zstdReader, _ := zstd.NewReader(file) + defer zstdReader.Close() + irReader, _ := ir.NewReader(zstdReader) + defer irReader.Close() - irReader, _ := ir.ReadPreamble(zstdReader, 4096) + var err error for { - // To read every log event replace ReadToContains with - // ReadNextLogEvent(zstdReader) - log, err := irReader.ReadToContains(zstdReader, []byte("ERROR")) - if ir.Eof == err || io.EOF == err { + var log *ffi.LogEventView + // To read every log event replace ReadToContains with Read() + log, err = irReader.ReadToContains("ERROR") + if nil != err { break } - fmt.Printf("%v %v", time.UnixMilli(int64(log.Timestamp)), string(log.Msg)) + fmt.Printf("%v %v", time.UnixMilli(int64(log.Timestamp)), log.LogMessageView) + } + if ir.EndOfIr != err { + fmt.Printf("Reader.Read failed: %v", err) } Building @@ -49,15 +57,11 @@ as well as stringify ``Enum`` style types. 1. Install requirements: a. A C++ compiler that supports C++17 - #. CMake 3.5.1 or higher + #. CMake 3.11 or higher #. The Stringer tool: https://pkg.go.dev/golang.org/x/tools/cmd/stringer - ``go install golang.org/x/tools/cmd/stringer@latest`` -#. ``git submodule update --init --recursive`` - - - Pull all submodules in preparation for building - #. ``go generate ./...`` - Run all generate directives (note the 3 dots after '/') @@ -70,15 +74,6 @@ arguments or modifications. __ https://github.com/bazelbuild/rules_go/blob/master/docs/go/core/rules.md#go_library-deps -Testing -------- -To run all unit tests run: ``go test ./... -args $(readlink -f clp-ir-stream.clp.zst)`` - -- The ``ir`` package's tests currently requries an existing CLP IR file - compressed with zstd. This file's path is taken as the only argument to the - test and is supplied after ``-args``. It can be an absolute path or a path - relative to the ``ir`` directory. - Why not build with cgo? ''''''''''''''''''''''' The primary reason we choose to build with CMake rather than directly with cgo, @@ -86,6 +81,15 @@ is to ease code maintenance by maximizing the reuse of CLP's code with no modifications. If a platform you use is not supported by the pre-built libraries, please open an issue and we can integrate it into our build process. +Testing +------- +To run all unit tests run: ``go_test_ir="/path/to/my-ir.clp.zst" go test ./...`` + +- Some of the ``ir`` package's tests currently require an existing CLP IR file + compressed with zstd. This file's path is taken as an environment variable + named ``go_test_ir``. It can be an absolute path or a path relative to the + ``ir`` directory. + Using an external C++ library ----------------------------- Use the ``external`` build tag to link with different CLP FFI library instead @@ -100,5 +104,6 @@ For example, to run the tests using the ``external`` you can run: .. code:: bash - CGO_LDFLAGS="-L./lib -lclp_ffi_linux_amd64 -lstdc++" \ - go test -tags external,test ./... -args $(readlink -f clp-ir-stream.clp.zst) + CGO_LDFLAGS="-L/path/to/external_libs -lclp_ffi_linux_amd64 -Wl,-rpath=/path/to/external_libs" \ + go_test_ir="/path/to/my-ir.clp.zst" \ + go test -tags external ./... diff --git a/cpp/.clang-format b/cpp/.clang-format index e390658..1e8df9a 100644 --- a/cpp/.clang-format +++ b/cpp/.clang-format @@ -1,26 +1,157 @@ --- -BasedOnStyle: LLVM ColumnLimit: 100 IndentWidth: 4 --- Language: Cpp AccessModifierOffset: -4 +AlignAfterOpenBracket: BlockIndent +AlignArrayOfStructures: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignConsecutiveMacros: None +AlignEscapedNewlines: DontAlign +AlignOperands: Align +AlignTrailingComments: Never +AllowAllArgumentsOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: Always +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Inline +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: Yes BinPackArguments: false BinPackParameters: false +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: MultiLine + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterExternBlock: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyNamespace: false + SplitEmptyRecord: false +BreakAfterAttributes: Never +BreakBeforeBinaryOperators: All +BreakBeforeBraces: Custom +BreakBeforeConceptDeclarations: Always +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +CompactNamespaces: true +ConstructorInitializerIndentWidth: 8 ContinuationIndentWidth: 8 -NamespaceIndentation: Inner +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +FixNamespaceComments: true +IncludeBlocks: Regroup +IncludeCategories: + # NOTE: A header is grouped by first matching regex + # Project headers + - Regex: '^$' + Priority: 1 + # C++ standard libraries + - Regex: '^<.*>$' + Priority: 2 +IndentAccessModifiers: false +IndentCaseBlocks: false +IndentCaseLabels: true +IndentExternBlock: Indent +IndentGotoLabels: false +IndentPPDirectives: None +IndentRequiresClause: false +IndentWrappedFunctionNames: false +InsertBraces: true +InsertNewlineAtEOF: true +IntegerLiteralSeparator: + Binary: 4 + BinaryMinDigits: 4 + Decimal: 3 + DecimalMinDigits: 5 + Hex: 4 + HexMinDigits: 4 +KeepEmptyLinesAtTheStartOfBlocks: false +LambdaBodyIndentation: Signature +LineEnding: LF +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +PPIndentWidth: -1 PackConstructorInitializers: CurrentLine +PenaltyBreakAssignment: 50 +PenaltyBreakOpenParenthesis: 25 +PenaltyBreakBeforeFirstCallParameter: 25 +PenaltyReturnTypeOnItsOwnLine: 100 PointerAlignment: Left -QualifierAlignment: Right -ReflowComments: false +QualifierAlignment: Custom +QualifierOrder: + - static + - friend + - inline + # constexpr west as explained in https://www.youtube.com/watch?v=z6s6bacI424 + - constexpr + - type + - const + - volatile +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RemoveSemicolon: true +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Always +ShortNamespaceLines: 0 +SortIncludes: CaseInsensitive +SortUsingDeclarations: Lexicographic +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true -SpaceBeforeParens: Custom -SpaceBeforeParensOptions: - AfterControlStatements: true -# AfterFunctionDeclarationName: true -# AfterFunctionDefinitionName: true +SpaceBeforeParens: ControlStatements SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false Standard: Latest +TabWidth: 4 +UseTab: Never diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy index 989dd55..5d30670 100644 --- a/cpp/.clang-tidy +++ b/cpp/.clang-tidy @@ -1,9 +1,25 @@ --- -Checks: 'cert-*,clang-analyzer-*,clang-diagnostic-*,cppcoreguidelines-*,modernize-*,performance-*,readability-*,-readability-identifier-length,-readability-simplify-boolean-expr' +Checks: >- + bugprone-*, + -bugprone-easily-swappable-parameters, + cert-*, + clang-analyzer-*, + clang-diagnostic-*, + concurrency-*, + cppcoreguidelines-*, + misc-*, + modernize-*, + performance-*, + portability-*, + readability-*, + -readability-identifier-length, + -readability-named-parameter, + -readability-simplify-boolean-expr, + +WarningsAsErrors: '*' FormatStyle: file -HeaderFileExtensions: ['','h','hh','hpp','hxx','tpp'] -ImplementationFileExtensions: ['','c','cc','cpp','cxx'] CheckOptions: + misc-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: true readability-identifier-naming.ClassCase: 'CamelCase' readability-identifier-naming.ClassMemberCase: 'lower_case' readability-identifier-naming.ClassMemberPrefix: 'm_' @@ -23,4 +39,5 @@ CheckOptions: readability-identifier-naming.ParameterCase: 'lower_case' readability-identifier-naming.StructCase: 'CamelCase' readability-identifier-naming.TypedefCase: 'CamelCase' + readability-identifier-naming.TypedefIgnoredRegexp: '[a-z_]+_t' readability-identifier-naming.UnionCase: 'CamelCase' diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 27bb313..13ba499 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1,18 +1,27 @@ -cmake_minimum_required(VERSION 3.5.1) +cmake_minimum_required(VERSION 3.23) +include(FetchContent) project(clp_ffi LANGUAGES CXX C ) -# Set default build type +# Enable compile commands by default if the generator supports it. +if (NOT CMAKE_EXPORT_COMPILE_COMMANDS AND CMAKE_GENERATOR MATCHES "Ninja|Unix Makefiles") + set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE BOOL + "Enable/Disable output of compile commands during generation." FORCE) +endif() + +# Set default build type to Release if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(default_build_type "Release") message(STATUS "No build type specified. Setting to '${default_build_type}'.") set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Choose the type of build." FORCE) endif() -option(BUILD_SHARED_LIBS "Build using shared libraries" ON) +# Build/package static by default to simplify compatibility in other systems +option(BUILD_SHARED_LIBS "Build using shared libraries" OFF) +# Setup library name based on Go environment variables set by `go generate` set(LIB_NAME "clp_ffi" CACHE STRING "Library name containing os and arch.") if (DEFINED ENV{GOOS}) string(APPEND LIB_NAME "_$ENV{GOOS}") @@ -21,49 +30,98 @@ if (DEFINED ENV{GOARCH}) string(APPEND LIB_NAME "_$ENV{GOARCH}") endif() -add_library(${LIB_NAME} - clp/components/core/src/Defs.h - clp/components/core/src/ffi/ir_stream/encoding_methods.cpp - clp/components/core/src/ffi/ir_stream/encoding_methods.hpp - clp/components/core/src/ffi/ir_stream/decoding_methods.cpp - clp/components/core/src/ffi/ir_stream/decoding_methods.hpp - clp/components/core/src/ffi/encoding_methods.cpp - clp/components/core/src/ffi/encoding_methods.hpp - clp/components/core/src/ffi/encoding_methods.tpp - src/log_event.h - src/LogEvent.cpp - src/LogEvent.hpp - src/ir/encoding.cpp - src/ir/encoding.h - src/ir/decoding.cpp - src/ir/decoding.h - src/message/encoding.cpp - src/message/encoding.h +set(CLP_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/clp" CACHE STRING + "Directory containing CLP source, fetched from github if non existant.") +if (NOT EXISTS ${CLP_SRC_DIR}) + FetchContent_Declare( + clp-core + GIT_REPOSITORY https://github.com/y-scope/clp.git + GIT_TAG 084efa35b7e9a63aecc5e327b97aea2a1cef83bc + SOURCE_DIR ${CLP_SRC_DIR} + ) + message(STATUS "Fetching CLP from github.") + FetchContent_MakeAvailable(clp-core) +endif() + +add_library(${LIB_NAME}) + +set_target_properties(${LIB_NAME} + PROPERTIES + POSITION_INDEPENDENT_CODE ON +) + +# Macro providing the length of the absolute source directory path so we can +# create a relative (rather than absolute) __FILE__ macro +string(LENGTH "${CMAKE_CURRENT_SOURCE_DIR}/" SOURCE_PATH_SIZE) +target_compile_definitions(${LIB_NAME} + PUBLIC + SOURCE_PATH_SIZE=${SOURCE_PATH_SIZE} ) target_compile_features(${LIB_NAME} PRIVATE - cxx_std_17 + cxx_std_20 ) -target_include_directories(${LIB_NAME} +# Set warnings as errors +target_compile_options(${LIB_NAME} PRIVATE - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/src + $<$:/W4 /WX> + $<$>:-Wall -Wextra -Wpedantic -Werror> ) -# target_compile_options(${LIB_NAME} -# PRIVATE -# $<$:/W4 /WX> -# $<$>:-Wall -Wextra -Wpedantic -Werror> -# ) +target_include_directories(${LIB_NAME} + PRIVATE + ${CLP_SRC_DIR}/components/core/submodules + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/src +) -# Macro providing the length of the absolute source directory path so we can -# create a relative (rather than absolute) __FILE__ macro -string(LENGTH "${CMAKE_SOURCE_DIR}/" SOURCE_PATH_SIZE) -target_compile_definitions(${LIB_NAME} +target_sources(${LIB_NAME} PUBLIC PUBLIC - SOURCE_PATH_SIZE=${SOURCE_PATH_SIZE} + FILE_SET HEADERS + BASE_DIRS src/ + FILES + src/ffi_go/api_decoration.h + src/ffi_go/defs.h + src/ffi_go/ir/decoder.h + src/ffi_go/ir/deserializer.h + src/ffi_go/ir/encoder.h + src/ffi_go/ir/serializer.h + src/ffi_go/search/wildcard_query.h + PRIVATE + ${CLP_SRC_DIR}/components/core/src/BufferReader.cpp + ${CLP_SRC_DIR}/components/core/src/BufferReader.hpp + ${CLP_SRC_DIR}/components/core/src/Defs.h + ${CLP_SRC_DIR}/components/core/src/ErrorCode.hpp + ${CLP_SRC_DIR}/components/core/src/ReaderInterface.cpp + ${CLP_SRC_DIR}/components/core/src/ReaderInterface.hpp + ${CLP_SRC_DIR}/components/core/src/string_utils.cpp + ${CLP_SRC_DIR}/components/core/src/string_utils.hpp + ${CLP_SRC_DIR}/components/core/src/string_utils.inc + ${CLP_SRC_DIR}/components/core/src/TraceableException.hpp + ${CLP_SRC_DIR}/components/core/src/type_utils.hpp + ${CLP_SRC_DIR}/components/core/src/ffi/encoding_methods.cpp + ${CLP_SRC_DIR}/components/core/src/ffi/encoding_methods.hpp + ${CLP_SRC_DIR}/components/core/src/ffi/encoding_methods.inc + ${CLP_SRC_DIR}/components/core/src/ffi/ir_stream/byteswap.hpp + ${CLP_SRC_DIR}/components/core/src/ffi/ir_stream/encoding_methods.cpp + ${CLP_SRC_DIR}/components/core/src/ffi/ir_stream/encoding_methods.hpp + ${CLP_SRC_DIR}/components/core/src/ffi/ir_stream/decoding_methods.cpp + ${CLP_SRC_DIR}/components/core/src/ffi/ir_stream/decoding_methods.hpp + ${CLP_SRC_DIR}/components/core/src/ffi/ir_stream/decoding_methods.inc + ${CLP_SRC_DIR}/components/core/src/ffi/ir_stream/protocol_constants.hpp + src/ffi_go/types.hpp + src/ffi_go/ir/decoder.cpp + src/ffi_go/ir/deserializer.cpp + src/ffi_go/ir/encoder.cpp + src/ffi_go/ir/types.hpp + src/ffi_go/ir/serializer.cpp + src/ffi_go/search/wildcard_query.cpp ) -install(TARGETS ${LIB_NAME}) +include(GNUInstallDirs) +install(TARGETS ${LIB_NAME} + ARCHIVE + FILE_SET HEADERS +) diff --git a/cpp/clp b/cpp/clp deleted file mode 160000 index 7e7fbc5..0000000 --- a/cpp/clp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7e7fbc5e412b0c949b707027ea1cdbe14adb29d4 diff --git a/cpp/src/LogEvent.cpp b/cpp/src/LogEvent.cpp deleted file mode 100644 index a9e3d82..0000000 --- a/cpp/src/LogEvent.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include -#include - -void delete_log_event(void* log_event) { - delete reinterpret_cast(log_event); -} diff --git a/cpp/src/LogEvent.hpp b/cpp/src/LogEvent.hpp deleted file mode 100644 index 4ea8beb..0000000 --- a/cpp/src/LogEvent.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef LOG_EVENT_HPP -#define LOG_EVENT_HPP - -#include - -struct LogEvent { - LogEvent() : msg{} {} - LogEvent(size_t cap) { msg.reserve(cap); } - - std::string msg; -}; - -#endif // LOG_EVENT_HPP diff --git a/cpp/src/ffi_go/api_decoration.h b/cpp/src/ffi_go/api_decoration.h new file mode 100644 index 0000000..0a5ff4a --- /dev/null +++ b/cpp/src/ffi_go/api_decoration.h @@ -0,0 +1,21 @@ +#ifndef FFI_GO_API_DECORATION_H +#define FFI_GO_API_DECORATION_H + +/** + * If the file is compiled with a C++ compiler, `extern "C"` must be defined to + * ensure C linkage. + */ +#ifdef __cplusplus +#define CLP_FFI_GO_EXTERN_C extern "C" +#else +#define CLP_FFI_GO_EXTERN_C +#endif + +/** + * `CLP_FFI_GO_METHOD` should be added at the beginning of a function's + * declaration/implementation to decorate any APIs that are exposed to the + * Golang layer. + */ +#define CLP_FFI_GO_METHOD CLP_FFI_GO_EXTERN_C + +#endif diff --git a/cpp/src/ffi_go/defs.h b/cpp/src/ffi_go/defs.h new file mode 100644 index 0000000..563c89a --- /dev/null +++ b/cpp/src/ffi_go/defs.h @@ -0,0 +1,72 @@ +#ifndef FFI_GO_DEF_H +#define FFI_GO_DEF_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-using) + +#include +#include +#include + +// TODO: replace with clp c-compatible header once it exists +typedef int64_t epoch_time_ms_t; + +/** + * A span of a bool array passed down through Cgo. + */ +typedef struct { + bool* m_data; + size_t m_size; +} BoolSpan; + +/** + * A span of a byte array passed down through Cgo. + */ +typedef struct { + void* m_data; + size_t m_size; +} ByteSpan; + +/** + * A span of a Go int32 array passed down through Cgo. + */ +typedef struct { + int32_t* m_data; + size_t m_size; +} Int32tSpan; + +/** + * A span of a Go int64 array passed down through Cgo. + */ +typedef struct { + int64_t* m_data; + size_t m_size; +} Int64tSpan; + +/** + * A span of a Go int/C.size_t array passed down through Cgo. + */ +typedef struct { + size_t* m_data; + size_t m_size; +} SizetSpan; + +/** + * A view of a Go string passed down through Cgo. + */ +typedef struct { + char const* m_data; + size_t m_size; +} StringView; + +/** + * A view of a Go ffi.LogEvent passed down through Cgo. + */ +typedef struct { + StringView m_log_message; + epoch_time_ms_t m_timestamp; +} LogEventView; + +// NOLINTEND(modernize-use-using) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_DEF_H diff --git a/cpp/src/ffi_go/ir/decoder.cpp b/cpp/src/ffi_go/ir/decoder.cpp new file mode 100644 index 0000000..3f47ee3 --- /dev/null +++ b/cpp/src/ffi_go/ir/decoder.cpp @@ -0,0 +1,106 @@ +#include "decoder.h" + +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace ffi_go::ir { +using namespace ffi::ir_stream; + +namespace { +/** + * Generic helper for ir_decoder_decode_*_log_message + */ +template +[[nodiscard]] auto decode_log_message( + StringView logtype, + encoded_var_view_t vars, + StringView dict_vars, + Int32tSpan dict_var_end_offsets, + void* ir_decoder, + StringView* log_msg_view +) -> int { + using encoded_var_t = std::conditional_t< + std::is_same_v, + ffi::eight_byte_encoded_variable_t, + ffi::four_byte_encoded_variable_t>; + if (nullptr == ir_decoder || nullptr == log_msg_view) { + return static_cast(IRErrorCode_Corrupted_IR); + } + Decoder* decoder{static_cast(ir_decoder)}; + auto& log_msg{decoder->m_log_message}; + log_msg.reserve(logtype.m_size + dict_vars.m_size); + + IRErrorCode err{IRErrorCode_Success}; + try { + log_msg = ffi::decode_message( + std::string_view(logtype.m_data, logtype.m_size), + vars.m_data, + vars.m_size, + std::string_view(dict_vars.m_data, dict_vars.m_size), + dict_var_end_offsets.m_data, + dict_var_end_offsets.m_size + ); + } catch (ffi::EncodingException const& e) { + err = IRErrorCode_Decode_Error; + } + + log_msg_view->m_data = log_msg.data(); + log_msg_view->m_size = log_msg.size(); + return static_cast(err); +} +} // namespace + +CLP_FFI_GO_METHOD auto ir_decoder_new() -> void* { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) + return new Decoder{}; +} + +CLP_FFI_GO_METHOD auto ir_decoder_close(void* ir_decoder) -> void { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) + delete static_cast(ir_decoder); +} + +CLP_FFI_GO_METHOD auto ir_decoder_decode_eight_byte_log_message( + StringView logtype, + Int64tSpan vars, + StringView dict_vars, + Int32tSpan dict_var_end_offsets, + void* ir_decoder, + StringView* log_message +) -> int { + return decode_log_message( + logtype, + vars, + dict_vars, + dict_var_end_offsets, + ir_decoder, + log_message + ); +} + +CLP_FFI_GO_METHOD auto ir_decoder_decode_four_byte_log_message( + StringView logtype, + Int32tSpan vars, + StringView dict_vars, + Int32tSpan dict_var_end_offsets, + void* ir_decoder, + StringView* log_message +) -> int { + return decode_log_message( + logtype, + vars, + dict_vars, + dict_var_end_offsets, + ir_decoder, + log_message + ); +} +} // namespace ffi_go::ir diff --git a/cpp/src/ffi_go/ir/decoder.h b/cpp/src/ffi_go/ir/decoder.h new file mode 100644 index 0000000..5d003be --- /dev/null +++ b/cpp/src/ffi_go/ir/decoder.h @@ -0,0 +1,88 @@ +#ifndef FFI_GO_IR_DECODER_H +#define FFI_GO_IR_DECODER_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-trailing-return-type) +// NOLINTBEGIN(modernize-use-using) + +#include +#include + +#include +#include + +/** + * Create a ir::Decoder used as the underlying data storage for a Go ir.Decoder. + * @return New ir::Decoder's address + */ +CLP_FFI_GO_METHOD void* ir_decoder_new(); + +/** + * Clean up the underlying ir::Decoder of a Go ir.Decoder. + * @param[in] ir_encoder Address of a ir::Decoder created and returned by + * ir_decoder_new + */ +CLP_FFI_GO_METHOD void ir_decoder_close(void* decoder); + +/** + * Given the fields of a CLP IR encoded log message with eight byte encoding, + * decode it into the original log message. An ir::Decoder must be provided to + * use as the backing storage for the corresponding Go ir.Decoder. All pointer + * parameters must be non-null (non-nil Cgo C. pointer or unsafe.Pointer + * from Go). + * @param[in] logtype Type of the log message (the log message with variables + * extracted and replaced with placeholders) + * @param[in] vars Array of encoded variables + * @param[in] dict_vars String containing all dictionary variables concatenated + * together + * @param[in] dict_var_end_offsets Array of offsets into dict_vars marking the + * end of a dictionary variable + * @param[in] ir_decoder ir::Decoder to be used as storage for the decoded log + * message + * @param[out] log_message Decoded log message + * @return ffi::ir_stream::IRErrorCode_Decode_Error if ffi::decode_message + * throws or errors + * @return ffi::ir_stream::IRErrorCode_Success on success + */ +CLP_FFI_GO_METHOD int ir_decoder_decode_eight_byte_log_message( + StringView logtype, + Int64tSpan vars, + StringView dict_vars, + Int32tSpan dict_var_end_offsets, + void* ir_decoder, + StringView* log_message +); + +/** + * Given the fields of a CLP IR encoded log message with four byte encoding, + * decode it into the original log message. An ir::Decoder must be provided to + * use as the backing storage for the corresponding Go ir.Decoder. All pointer + * parameters must be non-null (non-nil Cgo C. pointer or unsafe.Pointer + * from Go). + * @param[in] logtype Type of the log message (the log message with variables + * extracted and replaced with placeholders) + * @param[in] vars Array of encoded variables + * @param[in] dict_vars String containing all dictionary variables concatenated + * together + * @param[in] dict_var_end_offsets Array of offsets into dict_vars marking the + * end of a dictionary variable + * @param[in] ir_decoder ir::Decoder to be used as storage for the decoded log + * message + * @param[out] log_message Decoded log message + * @return ffi::ir_stream::IRErrorCode_Decode_Error if ffi::decode_message + * throws or errors + * @return ffi::ir_stream::IRErrorCode_Success on success + */ +CLP_FFI_GO_METHOD int ir_decoder_decode_four_byte_log_message( + StringView logtype, + Int32tSpan vars, + StringView dict_vars, + Int32tSpan dict_var_end_offsets, + void* ir_decoder, + StringView* log_message +); + +// NOLINTEND(modernize-use-using) +// NOLINTEND(modernize-use-trailing-return-type) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_IR_DECODER_H diff --git a/cpp/src/ffi_go/ir/deserializer.cpp b/cpp/src/ffi_go/ir/deserializer.cpp new file mode 100644 index 0000000..d7da93a --- /dev/null +++ b/cpp/src/ffi_go/ir/deserializer.cpp @@ -0,0 +1,322 @@ +#include "deserializer.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace ffi_go::ir { +using namespace ffi; +using namespace ffi::ir_stream; + +namespace { +/** + * Generic helper for ir_deserializer_deserialize_*_log_event + */ +template +[[nodiscard]] auto deserialize_log_event( + ByteSpan ir_view, + void* ir_deserializer, + size_t* ir_pos, + LogEventView* log_event +) -> int; + +/** + * Generic helper for ir_deserializer_deserialize_*_wildcard_match + */ +template +[[nodiscard]] auto deserialize_wildcard_match( + ByteSpan ir_view, + void* ir_deserializer, + TimestampInterval time_interval, + WildcardQueryView queries, + size_t* ir_pos, + LogEventView* log_event, + size_t* matching_query +) -> int; + +template +auto deserialize_log_event( + ByteSpan ir_view, + void* ir_deserializer, + size_t* ir_pos, + LogEventView* log_event +) -> int { + if (nullptr == ir_deserializer || nullptr == ir_pos || nullptr == log_event) { + return static_cast(IRErrorCode_Corrupted_IR); + } + BufferReader ir_buf{static_cast(ir_view.m_data), ir_view.m_size}; + Deserializer* deserializer{static_cast(ir_deserializer)}; + + IRErrorCode err{}; + epoch_time_ms_t timestamp{}; + if constexpr (std::is_same_v) { + err = eight_byte_encoding::decode_next_message( + ir_buf, + deserializer->m_log_event.m_log_message, + timestamp + ); + } else if constexpr (std::is_same_v) { + epoch_time_ms_t timestamp_delta{}; + err = four_byte_encoding::decode_next_message( + ir_buf, + deserializer->m_log_event.m_log_message, + timestamp_delta + ); + timestamp = deserializer->m_timestamp + timestamp_delta; + } else { + static_assert(cAlwaysFalse, "Invalid/unhandled encoding type"); + } + if (IRErrorCode_Success != err) { + return static_cast(err); + } + deserializer->m_timestamp = timestamp; + + size_t pos{0}; + if (ErrorCode_Success != ir_buf.try_get_pos(pos)) { + return static_cast(IRErrorCode_Decode_Error); + } + *ir_pos = pos; + log_event->m_log_message.m_data = deserializer->m_log_event.m_log_message.data(); + log_event->m_log_message.m_size = deserializer->m_log_event.m_log_message.size(); + log_event->m_timestamp = deserializer->m_timestamp; + return static_cast(IRErrorCode_Success); +} + +template +auto deserialize_wildcard_match( + ByteSpan ir_view, + void* ir_deserializer, + TimestampInterval time_interval, + MergedWildcardQueryView merged_query, + size_t* ir_pos, + LogEventView* log_event, + size_t* matching_query +) -> int { + if (nullptr == ir_deserializer || nullptr == ir_pos || nullptr == log_event + || nullptr == matching_query) + { + return static_cast(IRErrorCode_Corrupted_IR); + } + BufferReader ir_buf{static_cast(ir_view.m_data), ir_view.m_size}; + Deserializer* deserializer{static_cast(ir_deserializer)}; + std::string_view const query_view{merged_query.m_queries.m_data, merged_query.m_queries.m_size}; + std::span const end_offsets{ + merged_query.m_end_offsets.m_data, + merged_query.m_end_offsets.m_size}; + std::span const case_sensitivity{ + merged_query.m_case_sensitivity.m_data, + merged_query.m_case_sensitivity.m_size}; + + std::vector> queries(merged_query.m_end_offsets.m_size); + size_t pos{0}; + for (size_t i{0}; i < merged_query.m_end_offsets.m_size; i++) { + auto& [query_str_view, is_case_sensitive]{queries[i]}; + query_str_view = query_view.substr(pos, end_offsets[i]); + is_case_sensitive = case_sensitivity[i]; + pos += end_offsets[i]; + } + + std::function(ffi_go::LogMessage const&)> query_fn; + if (false == queries.empty()) { + query_fn = [&](ffi_go::LogMessage const& log_message) -> std::pair { + auto const found_query = std::find_if( + queries.cbegin(), + queries.cend(), + [&](std::pair const& query) -> bool { + return wildcard_match_unsafe(log_message, query.first, query.second); + } + ); + return {queries.cend() != found_query, found_query - queries.cbegin()}; + }; + } else { + query_fn = [](ffi_go::LogMessage const&) -> std::pair { return {true, 0}; }; + } + + IRErrorCode err{}; + while (true) { + epoch_time_ms_t timestamp{}; + if constexpr (std::is_same_v) { + err = eight_byte_encoding::decode_next_message( + ir_buf, + deserializer->m_log_event.m_log_message, + timestamp + ); + } else if constexpr (std::is_same_v) { + epoch_time_ms_t timestamp_delta{}; + err = four_byte_encoding::decode_next_message( + ir_buf, + deserializer->m_log_event.m_log_message, + timestamp_delta + ); + timestamp = deserializer->m_timestamp + timestamp_delta; + } else { + static_assert(cAlwaysFalse, "Invalid/unhandled encoding type"); + } + if (IRErrorCode_Success != err) { + return static_cast(err); + } + deserializer->m_timestamp = timestamp; + + if (time_interval.m_upper <= deserializer->m_timestamp) { + // TODO this is an extremely fragile hack until the CLP ffi ir + // code is refactored and IRErrorCode includes things beyond + // decoding. + return static_cast(IRErrorCode_Incomplete_IR + 1); + } + if (time_interval.m_lower > deserializer->m_timestamp) { + continue; + } + auto const [has_matching_query, matching_query_idx]{ + query_fn(deserializer->m_log_event.m_log_message)}; + if (false == has_matching_query) { + continue; + } + size_t pos{0}; + if (ErrorCode_Success != ir_buf.try_get_pos(pos)) { + return static_cast(IRErrorCode_Decode_Error); + } + *ir_pos = pos; + log_event->m_log_message.m_data = deserializer->m_log_event.m_log_message.data(); + log_event->m_log_message.m_size = deserializer->m_log_event.m_log_message.size(); + log_event->m_timestamp = deserializer->m_timestamp; + *matching_query = matching_query_idx; + return static_cast(IRErrorCode_Success); + } +} +} // namespace + +CLP_FFI_GO_METHOD auto ir_deserializer_close(void* ir_deserializer) -> void { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) + delete static_cast(ir_deserializer); +} + +CLP_FFI_GO_METHOD auto ir_deserializer_new_deserializer_with_preamble( + ByteSpan ir_view, + size_t* ir_pos, + int8_t* ir_encoding, + int8_t* metadata_type, + size_t* metadata_pos, + uint16_t* metadata_size, + void** ir_deserializer_ptr, + void** timestamp_ptr +) -> int { + if (nullptr == ir_pos || nullptr == ir_encoding || nullptr == metadata_type + || nullptr == metadata_pos || nullptr == metadata_size || nullptr == ir_deserializer_ptr + || nullptr == timestamp_ptr) + { + return static_cast(IRErrorCode_Corrupted_IR); + } + BufferReader ir_buf{static_cast(ir_view.m_data), ir_view.m_size}; + + bool four_byte_encoding{}; + if (IRErrorCode const err{get_encoding_type(ir_buf, four_byte_encoding)}; + IRErrorCode_Success != err) + { + return static_cast(err); + } + *ir_encoding = four_byte_encoding ? 1 : 0; + + if (IRErrorCode const err{ + decode_preamble(ir_buf, *metadata_type, *metadata_pos, *metadata_size)}; + IRErrorCode_Success != err) + { + return static_cast(err); + } + + size_t pos{0}; + if (ErrorCode_Success != ir_buf.try_get_pos(pos)) { + return static_cast(IRErrorCode_Decode_Error); + } + *ir_pos = pos; + auto* deserializer{new Deserializer()}; + *ir_deserializer_ptr = deserializer; + *timestamp_ptr = &deserializer->m_timestamp; + return static_cast(IRErrorCode_Success); +} + +CLP_FFI_GO_METHOD auto ir_deserializer_deserialize_eight_byte_log_event( + ByteSpan ir_view, + void* ir_deserializer, + size_t* ir_pos, + LogEventView* log_event +) -> int { + return deserialize_log_event( + ir_view, + ir_deserializer, + ir_pos, + log_event + ); +} + +CLP_FFI_GO_METHOD auto ir_deserializer_deserialize_four_byte_log_event( + ByteSpan ir_view, + void* ir_deserializer, + size_t* ir_pos, + LogEventView* log_event +) -> int { + return deserialize_log_event( + ir_view, + ir_deserializer, + ir_pos, + log_event + ); +} + +CLP_FFI_GO_METHOD auto ir_deserializer_deserialize_eight_byte_wildcard_match( + ByteSpan ir_view, + void* ir_deserializer, + TimestampInterval time_interval, + MergedWildcardQueryView merged_query, + size_t* ir_pos, + LogEventView* log_event, + size_t* matching_query +) -> int { + return deserialize_wildcard_match( + ir_view, + ir_deserializer, + time_interval, + merged_query, + ir_pos, + log_event, + matching_query + ); +} + +CLP_FFI_GO_METHOD auto ir_deserializer_deserialize_four_byte_wildcard_match( + ByteSpan ir_view, + void* ir_deserializer, + TimestampInterval time_interval, + MergedWildcardQueryView merged_query, + size_t* ir_pos, + LogEventView* log_event, + size_t* matching_query +) -> int { + return deserialize_wildcard_match( + ir_view, + ir_deserializer, + time_interval, + merged_query, + ir_pos, + log_event, + matching_query + ); +} +} // namespace ffi_go::ir diff --git a/cpp/src/ffi_go/ir/deserializer.h b/cpp/src/ffi_go/ir/deserializer.h new file mode 100644 index 0000000..71ae1ab --- /dev/null +++ b/cpp/src/ffi_go/ir/deserializer.h @@ -0,0 +1,158 @@ +#ifndef FFI_GO_IR_DESERIALIZER_H +#define FFI_GO_IR_DESERIALIZER_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-trailing-return-type) + +#include +#include + +#include +#include +#include + +/** + * Clean up the underlying ir::Deserializer of a Go ir.Deserializer. + * @param[in] ir_deserializer The address of a ir::Deserializer created and + * returned by ir_deserializer_new_deserializer_with_preamble + */ +CLP_FFI_GO_METHOD void ir_deserializer_close(void* ir_deserializer); + +/** + * Given a CLP IR buffer (any encoding), attempt to deserialize a preamble and + * extract its information. An ir::Deserializer will be allocated to use as the + * backing storage for a Go ir.Deserializer (i.e. subsequent calls to + * ir_deserializer_deserialize_*_log_event). It is left to the Go layer to read + * the metadata based on the returned type. All pointer parameters must be + * non-null (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] ir_view Byte buffer/slice containing CLP IR + * @param[out] ir_pos Position in ir_view read to + * @param[out] ir_encoding IR encoding type (1: four byte, 0: eight byte) + * @param[out] metadata_type Type of metadata in preamble (e.g. json) + * @param[out] metadata_pos Position in ir_view where the metadata begins + * @param[out] metadata_size Size of the metadata (in bytes) + * @param[out] ir_deserializer_ptr Address of a new ir::Deserializer + * @param[out] timestamp_ptr Address of m_timestamp inside the ir::Deserializer + * to be filled in by Go using the metadata contents + * @return ffi::ir_stream::IRErrorCode forwarded from either + * ffi::ir_stream::get_encoding_type or ffi::ir_stream::decode_preamble + */ +CLP_FFI_GO_METHOD int ir_deserializer_new_deserializer_with_preamble( + ByteSpan ir_view, + size_t* ir_pos, + int8_t* ir_encoding, + int8_t* metadata_type, + size_t* metadata_pos, + uint16_t* metadata_size, + void** ir_deserializer_ptr, + void** timestamp_ptr +); + +/** + * Given a CLP IR buffer with eight byte encoding, deserialize the next log + * event. Returns the components of the found log event and the buffer position + * it ends at. All pointer parameters must be non-null (non-nil Cgo C. + * pointer or unsafe.Pointer from Go). + * @param[in] ir_view Byte buffer/slice containing CLP IR + * @param[in] ir_deserializer ir::Deserializer to be used as storage for a found + * log event + * @param[out] ir_pos Position in ir_view read to + * @param[out] log_event Log event stored in ir_deserializer + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::eight_byte_encoding::decode_next_message + */ +CLP_FFI_GO_METHOD int ir_deserializer_deserialize_eight_byte_log_event( + ByteSpan ir_view, + void* ir_deserializer, + size_t* ir_pos, + LogEventView* log_event +); + +/** + * Given a CLP IR buffer with four byte encoding, deserialize the next log + * event. Returns the components of the found log event and the buffer position + * it ends at. All pointer parameters must be non-null (non-nil Cgo C. + * pointer or unsafe.Pointer from Go). + * @param[in] ir_view Byte buffer/slice containing CLP IR + * @param[in] ir_deserializer ir::Deserializer to be used as storage for a found + * log event + * @param[out] ir_pos Position in ir_view read to + * @param[out] log_event Log event stored in ir_deserializer + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::four_byte_encoding::decode_next_message + */ +CLP_FFI_GO_METHOD int ir_deserializer_deserialize_four_byte_log_event( + ByteSpan ir_view, + void* ir_deserializer, + size_t* ir_pos, + LogEventView* log_event +); + +/** + * Given a CLP IR buffer with eight byte encoding, deserialize the next log + * event until finding an event that is both within the time interval and + * matches any query. If queries is empty, the first log event within the time + * interval is treated as a match. Returns the components of the found log event + * and the buffer position it ends at. All pointer parameters must be non-null + * (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] ir_view Byte buffer/slice containing CLP IR + * @param[in] ir_deserializer ir::Deserializer to be used as storage for a found + * log event + * @param[in] time_interval Timestamp interval: [lower, upper) + * @param[in] merged_query A concatenation of all queries to filter for; if + * empty any log event as a match + * @param[out] ir_pos Position in ir_view read to + * @param[out] log_event Log event stored in ir_deserializer + * @param[out] matching_query Index into queries of the first matching query or + * 0 if queries is empty + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::four_byte_encoding::decode_next_message + * @return ffi::ir_stream::IRErrorCode_Unsupported_Version + 1 if no query is + * found before time_interval.m_upper (TODO this should be replaced/fix in + * clp core) + */ +CLP_FFI_GO_METHOD int ir_deserializer_deserialize_eight_byte_wildcard_match( + ByteSpan ir_view, + void* ir_deserializer, + TimestampInterval time_interval, + MergedWildcardQueryView merged_query, + size_t* ir_pos, + LogEventView* log_event, + size_t* matching_query +); + +/** + * Given a CLP IR buffer with four byte encoding, deserialize the next log event + * until finding an event that is both within the time interval and matches any + * query. If queries is empty, the first log event within the time interval is + * treated as a match. Returns the components of the found log event and the + * buffer position it ends at. All pointer parameters must be non-null (non-nil + * Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] ir_view Byte buffer/slice containing CLP IR + * @param[in] ir_deserializer ir::Deserializer to be used as storage for a found + * log event + * @param[in] time_interval Timestamp interval: [lower, upper) + * @param[in] merged_query A concatenation of all queries to filter for; if + * empty any log event as a match + * @param[out] ir_pos Position in ir_view read to + * @param[out] log_event Log event stored in ir_deserializer + * @param[out] matching_query Index into queries of the matching query + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::four_byte_encoding::decode_next_message + * @return ffi::ir_stream::IRErrorCode_Unsupported_Version + 1 if no query is + * found before time_interval.m_upper (TODO this should be replaced/fix in + * clp core) + */ +CLP_FFI_GO_METHOD int ir_deserializer_deserialize_four_byte_wildcard_match( + ByteSpan ir_view, + void* ir_deserializer, + TimestampInterval time_interval, + MergedWildcardQueryView merged_query, + size_t* ir_pos, + LogEventView* log_event, + size_t* matching_query +); + +// NOLINTEND(modernize-use-trailing-return-type) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_IR_DESERIALIZER_H diff --git a/cpp/src/ffi_go/ir/encoder.cpp b/cpp/src/ffi_go/ir/encoder.cpp new file mode 100644 index 0000000..073e5f8 --- /dev/null +++ b/cpp/src/ffi_go/ir/encoder.cpp @@ -0,0 +1,140 @@ +#include "encoder.h" + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace ffi_go::ir { +using namespace ffi::ir_stream; + +namespace { +/** + * Generic helper for ir_encoder_encode_*_log_message + */ +template +auto encode_log_message( + StringView log_message, + void* ir_encoder, + StringView* logtype, + encoded_var_view_t* vars, + StringView* dict_vars, + Int32tSpan* dict_var_end_offsets +) -> int { + using encoded_var_t = std::conditional_t< + std::is_same_v, + ffi::eight_byte_encoded_variable_t, + ffi::four_byte_encoded_variable_t>; + if (nullptr == ir_encoder || nullptr == logtype || nullptr == vars || nullptr == dict_vars + || nullptr == dict_var_end_offsets) + { + return static_cast(IRErrorCode_Corrupted_IR); + } + Encoder* encoder{static_cast*>(ir_encoder)}; + auto& ir_log_msg{encoder->m_log_message}; + ir_log_msg.reserve(log_message.m_size); + + std::string_view const log_msg_view{log_message.m_data, log_message.m_size}; + std::vector dict_var_offsets; + if (false + == ffi::encode_message( + log_msg_view, + ir_log_msg.m_logtype, + ir_log_msg.m_vars, + dict_var_offsets + )) + { + return static_cast(IRErrorCode_Corrupted_IR); + } + + // dict_var_offsets contains begin_pos followed by end_pos of each + // dictionary variable in the message + int32_t prev_end_off{0}; + for (size_t i = 0; i < dict_var_offsets.size(); i += 2) { + int32_t const begin_pos{dict_var_offsets[i]}; + int32_t const end_pos{dict_var_offsets[i + 1]}; + ir_log_msg.m_dict_vars.insert( + ir_log_msg.m_dict_vars.cbegin() + prev_end_off, + log_msg_view.cbegin() + begin_pos, + log_msg_view.cbegin() + end_pos + ); + prev_end_off = prev_end_off + (end_pos - begin_pos); + ir_log_msg.m_dict_var_end_offsets.push_back(prev_end_off); + } + + logtype->m_data = ir_log_msg.m_logtype.data(); + logtype->m_size = ir_log_msg.m_logtype.size(); + vars->m_data = ir_log_msg.m_vars.data(); + vars->m_size = ir_log_msg.m_vars.size(); + dict_vars->m_data = ir_log_msg.m_dict_vars.data(); + dict_vars->m_size = ir_log_msg.m_dict_vars.size(); + dict_var_end_offsets->m_data = ir_log_msg.m_dict_var_end_offsets.data(); + dict_var_end_offsets->m_size = ir_log_msg.m_dict_var_end_offsets.size(); + return static_cast(IRErrorCode_Success); +} +} // namespace + +CLP_FFI_GO_METHOD auto ir_encoder_eight_byte_new() -> void* { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) + return new Encoder{}; +} + +CLP_FFI_GO_METHOD auto ir_encoder_four_byte_new() -> void* { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) + return new Encoder{}; +} + +CLP_FFI_GO_METHOD auto ir_encoder_eight_byte_close(void* ir_encoder) -> void { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) + delete static_cast*>(ir_encoder); +} + +CLP_FFI_GO_METHOD auto ir_encoder_four_byte_close(void* ir_encoder) -> void { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) + delete static_cast*>(ir_encoder); +} + +CLP_FFI_GO_METHOD auto ir_encoder_encode_eight_byte_log_message( + StringView log_message, + void* ir_encoder, + StringView* logtype, + Int64tSpan* vars_ptr, + StringView* dict_vars, + Int32tSpan* dict_var_end_offsets +) -> int { + return encode_log_message( + log_message, + ir_encoder, + logtype, + vars_ptr, + dict_vars, + dict_var_end_offsets + ); +} + +CLP_FFI_GO_METHOD auto ir_encoder_encode_four_byte_log_message( + StringView log_message, + void* ir_encoder, + StringView* logtype, + Int32tSpan* vars, + StringView* dict_vars, + Int32tSpan* dict_var_end_offsets +) -> int { + return encode_log_message( + log_message, + ir_encoder, + logtype, + vars, + dict_vars, + dict_var_end_offsets + ); +} +} // namespace ffi_go::ir diff --git a/cpp/src/ffi_go/ir/encoder.h b/cpp/src/ffi_go/ir/encoder.h new file mode 100644 index 0000000..d1ae99e --- /dev/null +++ b/cpp/src/ffi_go/ir/encoder.h @@ -0,0 +1,98 @@ +#ifndef FFI_GO_IR_ENCODER_H +#define FFI_GO_IR_ENCODER_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-trailing-return-type) +// NOLINTBEGIN(modernize-use-using) + +#include +#include + +#include +#include + +/** + * Create a ir::Encoder used as the underlying data storage for a Go ir.Encoder. + * @return New ir::Encoder's address + */ +CLP_FFI_GO_METHOD void* ir_encoder_eight_byte_new(); + +/** + * @copydoc ir_encoder_eight_byte_new() + */ +CLP_FFI_GO_METHOD void* ir_encoder_four_byte_new(); + +/** + * Clean up the underlying ir::Encoder of a Go ir.Encoder. + * @param[in] ir_encoder Address of a ir::Encoder created and returned by + * ir_encoder_eight_byte_new + */ +CLP_FFI_GO_METHOD void ir_encoder_eight_byte_close(void* ir_encoder); + +/** + * Clean up the underlying ir::Encoder of a Go ir.Encoder. + * @param[in] ir_encoder Address of a ir::Encoder created and returned by + * ir_encoder_four_byte_new + */ +CLP_FFI_GO_METHOD void ir_encoder_four_byte_close(void* ir_encoder); + +/** + * Given a log message, encode it into a CLP IR object with eight byte encoding. + * An ir::Encoder must be provided to use as the backing storage for the + * corresponding Go ir.Encoder. All pointer parameters must be non-null (non-nil + * Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] log_message Log message to encode + * @param[in] ir_encoder ir::Encoder to be used as storage for the encoded log + * message + * @param[out] logtype Type of the log message (the log message with variables + * extracted and replaced with placeholders) + * @param[out] vars Array of encoded variables + * @param[out] dict_vars String containing all dictionary variables concatenated + * together + * @param[out] dict_var_end_offsets Array of offsets into dict_vars marking the + * end of a dictionary variable + * @return ffi::ir_stream::IRErrorCode_Corrupted_IR if ffi::encode_message + * returns false + * @return ffi::ir_stream::IRErrorCode_Success on success + */ +CLP_FFI_GO_METHOD int ir_encoder_encode_eight_byte_log_message( + StringView log_message, + void* ir_encoder, + StringView* logtype, + Int64tSpan* vars, + StringView* dict_vars, + Int32tSpan* dict_var_end_offsets +); + +/** + * Given a log message, encode it into a CLP IR object with four byte encoding. + * An ir::Encoder must be provided to use as the backing storage for the + * corresponding Go ir.Encoder. All pointer parameters must be non-null (non-nil + * Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] log_message Log message to encode + * @param[in] ir_encoder ir::Encoder to be used as storage for the encoded log + * message + * @param[out] logtype Type of the log message (the log message with variables + * extracted and replaced with placeholders) + * @param[out] vars Array of encoded variables + * @param[out] dict_vars String containing all dictionary variables concatenated + * together + * @param[out] dict_var_end_offsets Array of offsets into dict_vars marking the + * end of a dictionary variable + * @return ffi::ir_stream::IRErrorCode_Corrupted_IR if ffi::encode_message + * returns false + * @return ffi::ir_stream::IRErrorCode_Success on success + */ +CLP_FFI_GO_METHOD int ir_encoder_encode_four_byte_log_message( + StringView log_message, + void* ir_encoder, + StringView* logtype, + Int32tSpan* vars, + StringView* dict_vars, + Int32tSpan* dict_var_end_offsets +); + +// NOLINTEND(modernize-use-using) +// NOLINTEND(modernize-use-trailing-return-type) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_IR_ENCODER_H diff --git a/cpp/src/ffi_go/ir/serializer.cpp b/cpp/src/ffi_go/ir/serializer.cpp new file mode 100644 index 0000000..0cd7bb9 --- /dev/null +++ b/cpp/src/ffi_go/ir/serializer.cpp @@ -0,0 +1,197 @@ +#include "serializer.h" + +#include +#include + +#include +#include +#include + +#include +#include +#include + +namespace ffi_go::ir { +using namespace ffi; +using namespace ffi::ir_stream; + +namespace { +/** + * Generic helper for ir_serializer_new_*_serializer_with_preamble functions. + */ +template +[[nodiscard]] auto new_serializer_with_preamble( + StringView ts_pattern, + StringView ts_pattern_syntax, + StringView time_zone_id, + [[maybe_unused]] epoch_time_ms_t reference_ts, + void** ir_serializer_ptr, + ByteSpan* ir_view +) -> int; + +/** + * Generic helper for ir_serializer_serialize_*_log_event functions. + */ +template +[[nodiscard]] auto serialize_log_event( + StringView log_message, + epoch_time_ms_t timestamp_or_delta, + void* ir_serializer, + ByteSpan* ir_view +) -> int; + +template +auto new_serializer_with_preamble( + StringView ts_pattern, + StringView ts_pattern_syntax, + StringView time_zone_id, + [[maybe_unused]] epoch_time_ms_t reference_ts, + void** ir_serializer_ptr, + ByteSpan* ir_view +) -> int { + if (nullptr == ir_serializer_ptr || nullptr == ir_view) { + return static_cast(IRErrorCode_Corrupted_IR); + } + Serializer* serializer{new Serializer{}}; + if (nullptr == serializer) { + return static_cast(IRErrorCode_Corrupted_IR); + } + *ir_serializer_ptr = serializer; + + bool success{false}; + if constexpr (std::is_same_v) { + success = eight_byte_encoding::encode_preamble( + std::string_view{ts_pattern.m_data, ts_pattern.m_size}, + std::string_view{ts_pattern_syntax.m_data, ts_pattern_syntax.m_size}, + std::string_view{time_zone_id.m_data, time_zone_id.m_size}, + serializer->m_ir_buf + ); + } else if constexpr (std::is_same_v) { + success = four_byte_encoding::encode_preamble( + std::string_view{ts_pattern.m_data, ts_pattern.m_size}, + std::string_view{ts_pattern_syntax.m_data, ts_pattern_syntax.m_size}, + std::string_view{time_zone_id.m_data, time_zone_id.m_size}, + reference_ts, + serializer->m_ir_buf + ); + } else { + static_assert(cAlwaysFalse, "Invalid/unhandled encoding type"); + } + if (false == success) { + return static_cast(IRErrorCode_Corrupted_IR); + } + + ir_view->m_data = serializer->m_ir_buf.data(); + ir_view->m_size = serializer->m_ir_buf.size(); + return static_cast(IRErrorCode_Success); +} + +template +auto serialize_log_event( + StringView log_message, + epoch_time_ms_t timestamp_or_delta, + void* ir_serializer, + ByteSpan* ir_view +) -> int { + if (nullptr == ir_serializer || nullptr == ir_view) { + return static_cast(IRErrorCode_Corrupted_IR); + } + Serializer* serializer{static_cast(ir_serializer)}; + serializer->m_ir_buf.clear(); + + bool success{false}; + if constexpr (std::is_same_v) { + success = eight_byte_encoding::encode_message( + timestamp_or_delta, + std::string_view{log_message.m_data, log_message.m_size}, + serializer->m_logtype, + serializer->m_ir_buf + ); + } else if constexpr (std::is_same_v) { + success = four_byte_encoding::encode_message( + timestamp_or_delta, + std::string_view{log_message.m_data, log_message.m_size}, + serializer->m_logtype, + serializer->m_ir_buf + ); + } else { + static_assert(cAlwaysFalse, "Invalid/unhandled encoding type"); + } + if (false == success) { + return static_cast(IRErrorCode_Corrupted_IR); + } + + ir_view->m_data = serializer->m_ir_buf.data(); + ir_view->m_size = serializer->m_ir_buf.size(); + return static_cast(IRErrorCode_Success); +} +} // namespace + +CLP_FFI_GO_METHOD auto ir_serializer_close(void* ir_serializer) -> void { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) + delete static_cast(ir_serializer); +} + +CLP_FFI_GO_METHOD auto ir_serializer_new_eight_byte_serializer_with_preamble( + StringView ts_pattern, + StringView ts_pattern_syntax, + StringView time_zone_id, + void** ir_serializer_ptr, + ByteSpan* ir_view +) -> int { + return new_serializer_with_preamble( + ts_pattern, + ts_pattern_syntax, + time_zone_id, + 0, + ir_serializer_ptr, + ir_view + ); +} + +CLP_FFI_GO_METHOD auto ir_serializer_new_four_byte_serializer_with_preamble( + StringView ts_pattern, + StringView ts_pattern_syntax, + StringView time_zone_id, + epoch_time_ms_t reference_ts, + void** ir_serializer_ptr, + ByteSpan* ir_view +) -> int { + return new_serializer_with_preamble( + ts_pattern, + ts_pattern_syntax, + time_zone_id, + reference_ts, + ir_serializer_ptr, + ir_view + ); +} + +CLP_FFI_GO_METHOD auto ir_serializer_serialize_eight_byte_log_event( + StringView log_message, + epoch_time_ms_t timestamp, + void* ir_serializer, + ByteSpan* ir_view +) -> int { + return serialize_log_event( + log_message, + timestamp, + ir_serializer, + ir_view + ); +} + +CLP_FFI_GO_METHOD auto ir_serializer_serialize_four_byte_log_event( + StringView log_message, + epoch_time_ms_t timestamp_delta, + void* ir_serializer, + ByteSpan* ir_view +) -> int { + return serialize_log_event( + log_message, + timestamp_delta, + ir_serializer, + ir_view + ); +} +} // namespace ffi_go::ir diff --git a/cpp/src/ffi_go/ir/serializer.h b/cpp/src/ffi_go/ir/serializer.h new file mode 100644 index 0000000..bd02f8c --- /dev/null +++ b/cpp/src/ffi_go/ir/serializer.h @@ -0,0 +1,110 @@ +#ifndef FFI_GO_IR_SERIALIZER_H +#define FFI_GO_IR_SERIALIZER_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-trailing-return-type) + +#include +#include + +#include +#include + +/** + * Clean up the underlying ir::Serializer of a Go ir.Serializer. + * @param[in] ir_serializer Address of a ir::Serializer created and returned by + * ir_serializer_serialize_*_preamble + */ +CLP_FFI_GO_METHOD void ir_serializer_close(void* ir_serializer); + +/** + * Given the fields of a CLP IR preamble, serialize them into an IR byte stream + * with eight byte encoding. An ir::Serializer will be allocated to use as the + * backing storage for a Go ir.Serializer (i.e. subsequent calls to + * ir_serializer_serialize_*_log_event). All pointer parameters must be non-null + * (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] ts_pattern Format string for the timestamp to be used when + * deserializing the IR + * @param[in] ts_pattern_syntax Type of the format string for understanding how + * to parse it + * @param[in] time_zone_id TZID timezone of the timestamps in the IR + * @param[out] ir_serializer_ptr Address of a new ir::Serializer + * @param[out] ir_view View of a IR buffer containing the serialized preamble + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::eight_byte_encoding::encode_preamble + */ +CLP_FFI_GO_METHOD int ir_serializer_new_eight_byte_serializer_with_preamble( + StringView ts_pattern, + StringView ts_pattern_syntax, + StringView time_zone_id, + void** ir_serializer_ptr, + ByteSpan* ir_view +); + +/** + * Given the fields of a CLP IR preamble, serialize them into an IR byte stream + * with four byte encoding. An ir::Serializer will be allocated to use as the + * backing storage for a Go ir.Serializer (i.e. subsequent calls to + * ir_serializer_serialize_*_log_event). All pointer parameters must be non-null + * (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] ts_pattern Format string for the timestamp to be used when + * deserializing the IR + * @param[in] ts_pattern_syntax Type of the format string for understanding how + * to parse it + * @param[in] time_zone_id TZID timezone of the timestamps in the IR + * @param[out] ir_serializer_ptr Address of a new ir::Serializer + * @param[out] ir_view View of a IR buffer containing the serialized preamble + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::four_byte_encoding::encode_preamble + */ +CLP_FFI_GO_METHOD int ir_serializer_new_four_byte_serializer_with_preamble( + StringView ts_pattern, + StringView ts_pattern_syntax, + StringView time_zone_id, + epoch_time_ms_t reference_ts, + void** ir_serializer_ptr, + ByteSpan* ir_view +); + +/** + * Given the fields of a log event, serialize them into an IR byte stream with + * eight byte encoding. An ir::Serializer must be provided to use as the backing + * storage for the corresponding Go ir.Serializer. All pointer parameters must + * be non-null (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] log_message Log message of the log event to serialize + * @param[in] timestamp Timestamp of the log event to serialize + * @param[in] ir_serializer ir::Serializer object to be used as storage + * @param[out] ir_view View of a IR buffer containing the serialized log event + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::eight_byte_encoding::encode_message + */ +CLP_FFI_GO_METHOD int ir_serializer_serialize_eight_byte_log_event( + StringView log_message, + epoch_time_ms_t timestamp, + void* ir_serializer, + ByteSpan* ir_view +); + +/** + * Given the fields of a log event, serialize them into an IR byte stream with + * four byte encoding. An ir::Serializer must be provided to use as the backing + * storage for the corresponding Go ir.Serializer. All pointer parameters must + * be non-null (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] log_message Log message to serialize + * @param[in] timestamp_delta Timestamp delta to the previous log event in the + * IR stream + * @param[in] ir_serializer ir::Serializer object to be used as storage + * @param[out] ir_view View of a IR buffer containing the serialized log event + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::four_byte_encoding::encode_message + */ +CLP_FFI_GO_METHOD int ir_serializer_serialize_four_byte_log_event( + StringView log_message, + epoch_time_ms_t timestamp_delta, + void* ir_serializer, + ByteSpan* ir_view +); + +// NOLINTEND(modernize-use-trailing-return-type) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_IR_SERIALIZER_H diff --git a/cpp/src/ffi_go/ir/types.hpp b/cpp/src/ffi_go/ir/types.hpp new file mode 100644 index 0000000..848d08c --- /dev/null +++ b/cpp/src/ffi_go/ir/types.hpp @@ -0,0 +1,80 @@ +#ifndef FFI_GO_IR_LOG_TYPES_HPP +#define FFI_GO_IR_LOG_TYPES_HPP + +#include +#include +#include +#include + +#include + +#include + +namespace ffi_go::ir { + +template +[[maybe_unused]] constexpr bool cAlwaysFalse{false}; + +template +struct LogMessage { + auto reserve(size_t cap) -> void { m_logtype.reserve(cap); } + + std::string m_logtype; + std::vector m_vars; + std::vector m_dict_vars; + std::vector m_dict_var_end_offsets; +}; + +/** + * The backing storage for a Go ir.Decoder. + * Mutating a field will invalidate the corresponding View (slice) stored in the + * ir.Decoder (without any warning or way to guard in Go). + */ +struct Decoder { + ffi_go::LogMessage m_log_message; +}; + +/** + * The backing storage for a Go ir.Encoder. + * Mutating a field will invalidate the corresponding View (slice) stored in the + * ir.Encoder (without any warning or way to guard in Go). + */ +template +struct Encoder { + LogMessage m_log_message; +}; + +/** + * The backing storage for a Go ir.Deserializer. + * Mutating a field will invalidate the corresponding View (slice) stored in the + * ir.Deserializer (without any warning or way to guard in Go). + */ +struct Deserializer { + ffi_go::LogEventStorage m_log_event; + ffi::epoch_time_ms_t m_timestamp{}; +}; + +/** + * The backing storage for a Go ir.Serializer. + * Mutating a field will invalidate the corresponding View (slice) stored in the + * ir.Serializer (without any warning or way to guard in Go). + */ +struct Serializer { + /** + * Reserve capacity for the logtype and ir buffer. + * We reserve 1.5x the size of the log message type as a heuristic for the + * full IR buffer size. The log message type of a log event is not + * guaranteed to be less than or equal to the size of the actual log + * message, but in general this is true. + */ + auto reserve(size_t cap) -> void { + m_logtype.reserve(cap); + m_ir_buf.reserve(cap + cap / 2); + } + + std::string m_logtype; + std::vector m_ir_buf; +}; +} // namespace ffi_go::ir + +#endif // FFI_GO_IR_LOG_TYPES_HPP diff --git a/cpp/src/ffi_go/search/wildcard_query.cpp b/cpp/src/ffi_go/search/wildcard_query.cpp new file mode 100644 index 0000000..8032720 --- /dev/null +++ b/cpp/src/ffi_go/search/wildcard_query.cpp @@ -0,0 +1,32 @@ +#include "wildcard_query.h" + +#include +#include + +#include + +#include +#include + +namespace ffi_go::search { +CLP_FFI_GO_METHOD auto wildcard_query_new(StringView query, void** ptr) -> StringView { + auto* clean{new std::string{ + clean_up_wildcard_search_string(std::string_view{query.m_data, query.m_size}) + }}; + *ptr = clean; + return {clean->data(), clean->size()}; +} + +CLP_FFI_GO_METHOD auto wildcard_query_delete(void* str) -> void { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) + delete static_cast(str); +} + +CLP_FFI_GO_METHOD auto wildcard_query_match(StringView target, WildcardQueryView query) -> int { + return static_cast(wildcard_match_unsafe( + {target.m_data, target.m_size}, + {query.m_query.m_data, query.m_query.m_size}, + query.m_case_sensitive + )); +} +} // namespace ffi_go::search diff --git a/cpp/src/ffi_go/search/wildcard_query.h b/cpp/src/ffi_go/search/wildcard_query.h new file mode 100644 index 0000000..960beb1 --- /dev/null +++ b/cpp/src/ffi_go/search/wildcard_query.h @@ -0,0 +1,71 @@ +#ifndef FFI_GO_IR_WILDCARD_QUERY_H +#define FFI_GO_IR_WILDCARD_QUERY_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-trailing-return-type) +// NOLINTBEGIN(modernize-use-using) + +#include + +#include +#include + +/** + * A timestamp interval of [m_lower, m_upper). + */ +typedef struct { + epoch_time_ms_t m_lower; + epoch_time_ms_t m_upper; +} TimestampInterval; + +/** + * A view of a wildcard query passed down from Go. The query string is assumed + * to have been cleaned using the CLP function `clean_up_wildcard_search_string` + * on construction. m_case_sensitive is 1 for a case sensitive query (0 for case + * insensitive). + */ +typedef struct { + StringView m_query; + bool m_case_sensitive; +} WildcardQueryView; + +/** + * A view of a Go search.MergedWildcardQuery passed down through Cgo. The + * string is a concatenation of all wildcard queries, while m_end_offsets stores + * the size of each query. + */ +typedef struct { + StringView m_queries; + SizetSpan m_end_offsets; + BoolSpan m_case_sensitivity; +} MergedWildcardQueryView; + +/** + * Given a query string, allocate and return a clean string that is safe for + * matching. See `clean_up_wildcard_search_string` in CLP for more details. + * @param[in] query Query string to clean + * @param[in] ptr Address of a new std::string + * @return New string holding cleaned query + */ +CLP_FFI_GO_METHOD StringView wildcard_query_new(StringView query, void** ptr); + +/** + * Delete a std::string holding a wildcard query. + * @param[in] str Address of a std::string created and returned by + * clean_wildcard_query + */ +CLP_FFI_GO_METHOD void wildcard_query_delete(void* str); + +/** + * Given a target string perform CLP wildcard matching using query. See + * `wildcard_match_unsafe` in CLP src/string_utils.hpp. + * @param[in] target String to perform matching on + * @param[in] query Query to use for matching + * @return 1 if query matches target, 0 otherwise + */ +CLP_FFI_GO_METHOD int wildcard_query_match(StringView target, WildcardQueryView query); + +// NOLINTEND(modernize-use-using) +// NOLINTEND(modernize-use-trailing-return-type) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_IR_WILDCARD_QUERY_H diff --git a/cpp/src/ffi_go/types.hpp b/cpp/src/ffi_go/types.hpp new file mode 100644 index 0000000..0175249 --- /dev/null +++ b/cpp/src/ffi_go/types.hpp @@ -0,0 +1,27 @@ +#ifndef FFI_GO_LOG_TYPES_HPP +#define FFI_GO_LOG_TYPES_HPP + +#include +#include + +namespace ffi_go { +/** + * The backing storage for a Go ffi.LogMessageView. + * Mutating it will invalidate the corresponding View (slice) stored in the + * ffi.LogMessageView (without any warning or way to guard in Go). + */ +using LogMessage = std::string; + +/** + * The backing storage for a Go ffi.LogEventView. + * Mutating a field will invalidate the corresponding View (slice) stored in the + * ffi.LogEventView (without any warning or way to guard in Go). + */ +struct LogEventStorage { + auto reserve(size_t cap) -> void { m_log_message.reserve(cap); } + + LogMessage m_log_message; +}; +} // namespace ffi_go + +#endif // FFI_GO_LOG_TYPES_HPP diff --git a/cpp/src/ir/decoding.cpp b/cpp/src/ir/decoding.cpp deleted file mode 100644 index 7ec821f..0000000 --- a/cpp/src/ir/decoding.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "encoding.h" -#include -#include -#include - -#include -#include -#include - -using namespace ffi::ir_stream; - -int decode_preamble(void* buf_ptr, - size_t buf_size, - size_t* buf_offset, - int8_t* ir_encoding, - int8_t* metadata_type, - size_t* metadata_pos, - uint16_t* metadata_size) { - IrBuffer ir_buf{reinterpret_cast(buf_ptr), buf_size}; - ir_buf.set_cursor_pos(*buf_offset); - - bool four_byte_encoding; - if (IRErrorCode err{get_encoding_type(ir_buf, four_byte_encoding)}; - IRErrorCode_Success != err) { - return static_cast(err); - } - *ir_encoding = four_byte_encoding ? 1 : 0; - - if (IRErrorCode err{decode_preamble(ir_buf, *metadata_type, *metadata_pos, *metadata_size)}; - IRErrorCode_Success != err) { - return static_cast(err); - } - - *buf_offset = ir_buf.get_cursor_pos(); - return static_cast(IRErrorCode_Success); -} - -int decode_next_log_event(IRErrorCode (*decode_fp)(IrBuffer&, std::string&, epoch_time_ms_t&), - void* buf_ptr, - size_t buf_size, - size_t* buf_offset, - void** log_event_ptr, - char** log_event, - size_t* log_event_size, - epoch_time_ms_t* timestamp) { - IrBuffer ir_buf{reinterpret_cast(buf_ptr), buf_size}; - ir_buf.set_cursor_pos(*buf_offset); - auto event = std::make_unique(buf_size); - if (IRErrorCode err{decode_fp(ir_buf, event->msg, *timestamp)}; IRErrorCode_Success != err) { - return static_cast(err); - } - *buf_offset = ir_buf.get_cursor_pos(); - - *log_event = event->msg.data(); - *log_event_size = event->msg.size(); - *log_event_ptr = event.release(); - return static_cast(IRErrorCode_Success); -} - -int eight_byte_decode_next_log_event(void* buf_ptr, - size_t buf_size, - size_t* buf_offset, - void** log_event_ptr, - char** log_event, - size_t* log_event_size, - epoch_time_ms_t* timestamp) { - return decode_next_log_event(eight_byte_encoding::decode_next_message, - buf_ptr, - buf_size, - buf_offset, - log_event_ptr, - log_event, - log_event_size, - timestamp); -} - -int four_byte_decode_next_log_event(void* buf_ptr, - size_t buf_size, - size_t* buf_offset, - void** log_event_ptr, - char** log_event, - size_t* log_event_size, - epoch_time_ms_t* timestamp_delta) { - return decode_next_log_event(four_byte_encoding::decode_next_message, - buf_ptr, - buf_size, - buf_offset, - log_event_ptr, - log_event, - log_event_size, - timestamp_delta); -} diff --git a/cpp/src/ir/decoding.h b/cpp/src/ir/decoding.h deleted file mode 100644 index e0e1632..0000000 --- a/cpp/src/ir/decoding.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef IR_DECODING_H -#define IR_DECODING_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -typedef int64_t epoch_time_ms_t; - -int decode_preamble(void* buf_ptr, - size_t buf_size, - size_t* buf_offset, - int8_t* ir_encoding, - int8_t* metadata_type, - size_t* metadata_pos, - uint16_t* metadata_size); - -int eight_byte_decode_next_log_event(void* buf_ptr, - size_t buf_size, - size_t* buf_offset, - void** decoded_message_ptr, - char** message, - size_t* message_size, - epoch_time_ms_t* timestamp); - -int four_byte_decode_next_log_event(void* buf_ptr, - size_t buf_size, - size_t* buf_offset, - void** message_ptr, - char** message, - size_t* message_size, - epoch_time_ms_t* timestamp_delta); - -#ifdef __cplusplus -} -#endif - -#endif // IR_DECODING_H diff --git a/cpp/src/ir/encoding.cpp b/cpp/src/ir/encoding.cpp deleted file mode 100644 index b7070cf..0000000 --- a/cpp/src/ir/encoding.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -struct IrStreamState { - std::string logtype; - std::vector ir_buf; -}; - -void* eight_byte_encode_preamble(void* ts_pattern_ptr, - size_t ts_pattern_size, - void* ts_pattern_syntax_ptr, - size_t ts_pattern_syntax_size, - void* time_zone_id_ptr, - size_t time_zone_id_size, - void** ir_buf_ptr, - void* ir_buf_size) { - std::string_view ts_pattern(reinterpret_cast(ts_pattern_ptr), ts_pattern_size); - std::string_view ts_pattern_syntax(reinterpret_cast(ts_pattern_syntax_ptr), - ts_pattern_syntax_size); - std::string_view time_zone_id(reinterpret_cast(time_zone_id_ptr), - time_zone_id_size); - - IrStreamState* irs = new IrStreamState(); - if (false == ffi::ir_stream::eight_byte_encoding::encode_preamble( - ts_pattern, ts_pattern_syntax, time_zone_id, irs->ir_buf)) { - delete irs; - return nullptr; - } - - *ir_buf_ptr = irs->ir_buf.data(); - *static_cast(ir_buf_size) = irs->ir_buf.size(); - return irs; -} - -void* four_byte_encode_preamble(void* ts_pattern_ptr, - size_t ts_pattern_size, - void* ts_pattern_syntax_ptr, - size_t ts_pattern_syntax_size, - void* time_zone_id_ptr, - size_t time_zone_id_size, - ffi::epoch_time_ms_t reference_ts, - void** ir_buf_ptr, - void* ir_buf_size) { - std::string_view ts_pattern(reinterpret_cast(ts_pattern_ptr), ts_pattern_size); - std::string_view ts_pattern_syntax(reinterpret_cast(ts_pattern_syntax_ptr), - ts_pattern_syntax_size); - std::string_view time_zone_id(reinterpret_cast(time_zone_id_ptr), - time_zone_id_size); - - IrStreamState* irs = new IrStreamState(); - if (false == ffi::ir_stream::four_byte_encoding::encode_preamble( - ts_pattern, ts_pattern_syntax, time_zone_id, reference_ts, irs->ir_buf)) { - delete irs; - return nullptr; - } - - *ir_buf_ptr = irs->ir_buf.data(); - *static_cast(ir_buf_size) = irs->ir_buf.size(); - return irs; -} - -int encode_message( - bool (*em_fp)(ffi::epoch_time_ms_t, std::string_view, std::string&, std::vector&), - void* irstream, - ffi::epoch_time_ms_t timestamp_or_delta, - void* message_ptr, - size_t message_size, - void** ir_buf_ptr, - void* ir_buf_size) { - IrStreamState* irs(reinterpret_cast(irstream)); - std::string_view message(reinterpret_cast(message_ptr), message_size); - irs->ir_buf.clear(); - if (false == em_fp(timestamp_or_delta, message, irs->logtype, irs->ir_buf)) { - return -1; - } - *ir_buf_ptr = irs->ir_buf.data(); - *static_cast(ir_buf_size) = irs->ir_buf.size(); - return 0; -} - -int eight_byte_encode_message(void* irstream, - ffi::epoch_time_ms_t timestamp, - void* message_ptr, - size_t message_size, - void** ir_buf_ptr, - void* ir_buf_size) { - return encode_message(ffi::ir_stream::eight_byte_encoding::encode_message, - irstream, - timestamp, - message_ptr, - message_size, - ir_buf_ptr, - ir_buf_size); -} - -int four_byte_encode_message(void* irstream, - ffi::epoch_time_ms_t timestamp_delta, - void* message_ptr, - size_t message_size, - void** ir_buf_ptr, - void* ir_buf_size) { - return encode_message(ffi::ir_stream::four_byte_encoding::encode_message, - irstream, - timestamp_delta, - message_ptr, - message_size, - ir_buf_ptr, - ir_buf_size); -} - -void delete_ir_stream_state(void* irs) { delete (IrStreamState*)irs; } diff --git a/cpp/src/ir/encoding.h b/cpp/src/ir/encoding.h deleted file mode 100644 index 774399f..0000000 --- a/cpp/src/ir/encoding.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef IR_ENCODING_H -#define IR_ENCODING_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -// TODO: replace with clp c-compatible header once it is created -typedef int64_t epoch_time_ms_t; - -void* eight_byte_encode_preamble(void* ts_pattern_ptr, - size_t ts_pattern_size, - void* ts_pattern_syntax_ptr, - size_t ts_pattern_syntax_size, - void* time_zone_id_ptr, - size_t time_zone_id_size, - void** ir_buf_ptr, - void* ir_buf_size); -void* four_byte_encode_preamble(void* ts_pattern_ptr, - size_t ts_pattern_size, - void* ts_pattern_syntax_ptr, - size_t ts_pattern_syntax_size, - void* time_zone_id_ptr, - size_t time_zone_id_size, - epoch_time_ms_t reference_ts, - void** ir_buf_ptr, - void* ir_buf_size); - -int eight_byte_encode_message(void* irstream, - epoch_time_ms_t timestamp, - void* message_ptr, - size_t message_size, - void** ir_buf_ptr, - void* ir_buf_size); -int four_byte_encode_message(void* irstream, - epoch_time_ms_t timestamp_delta, - void* message_ptr, - size_t message_size, - void** ir_buf_ptr, - void* ir_buf_size); - -void delete_ir_stream_state(void* irs); - -#ifdef __cplusplus -} -#endif - -#endif // IR_ENCODING_H diff --git a/cpp/src/log_event.h b/cpp/src/log_event.h deleted file mode 100644 index f130477..0000000 --- a/cpp/src/log_event.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef LOG_EVENT_H -#define LOG_EVENT_H - -#ifdef __cplusplus -extern "C" { -#endif - -void delete_log_event(void* log_event); - -#ifdef __cplusplus -} -#endif - -#endif // LOG_EVENT_H diff --git a/cpp/src/message/encoding.cpp b/cpp/src/message/encoding.cpp deleted file mode 100644 index f6c868d..0000000 --- a/cpp/src/message/encoding.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -// Note: dict_var_end_offsets is int32_t due to JNI putting limitations on -// encode_message. -struct EncodedMessage { - std::string logtype; - std::vector vars; - std::vector dict_vars; - std::vector dict_var_end_offsets; -}; - -int decode_message(void* encoded_msg, - void** log_event_ptr, - char** log_event, - size_t* log_event_size) { - EncodedMessage* em(reinterpret_cast(encoded_msg)); - auto event = std::make_unique(em->logtype.size() * 2); - event->msg = ffi::decode_message( - em->logtype, - em->vars.data(), - em->vars.size(), - std::string_view(reinterpret_cast(em->dict_vars.data()), - em->dict_vars.size()), - em->dict_var_end_offsets.data(), - em->dict_var_end_offsets.size()); - - *log_event = event->msg.data(); - *log_event_size = event->msg.size(); - *log_event_ptr = event.release(); - return 0; -} - -void* encode_message(void* src_msg, - size_t src_size, - void** logtype, - void* logtype_size, - void** vars, - void* vars_size, - void** dict_vars, - void* dict_vars_size, - void** dict_var_end_offsets, - void* dict_var_end_offsets_size) { - // We cannot use unique_ptr here as we want the Go code to hold any - // references. Storing references in cpp (to avoid the unique_ptr falling - // out of scope) means we need to synchronize the updates to that storage - // as different go user threads could either encode a new message or free a - // stored encoded message. We also cannot return/move a unique_ptr back up - // to Go. - EncodedMessage* em = new EncodedMessage(); - std::string_view msg(reinterpret_cast(src_msg), src_size); - - std::vector dict_var_offsets; - if (false == ffi::encode_message(msg, em->logtype, em->vars, dict_var_offsets)) { - delete em; - return nullptr; - } - - // dict_var_offsets contains begin_pos followed by end_pos of each - // dictionary variable in msg - int32_t prev_end_off = 0; - for (size_t i = 0; i < dict_var_offsets.size(); i += 2) { - int32_t begin_pos = dict_var_offsets[i]; - int32_t end_pos = dict_var_offsets[i + 1]; - em->dict_vars.insert(em->dict_vars.begin() + prev_end_off, - msg.begin() + begin_pos, - msg.begin() + end_pos); - prev_end_off = prev_end_off + (end_pos - begin_pos); - em->dict_var_end_offsets.push_back(prev_end_off); - } - - *logtype = em->logtype.data(); - *static_cast(logtype_size) = em->logtype.size(); - *vars = em->vars.data(); - *static_cast(vars_size) = em->vars.size(); - *dict_vars = em->dict_vars.data(); - *static_cast(dict_vars_size) = em->dict_vars.size(); - *dict_var_end_offsets = em->dict_var_end_offsets.data(); - *static_cast(dict_var_end_offsets_size) = em->dict_var_end_offsets.size(); - return em; -} - -void delete_encoded_message(void* encoded_msg) { delete (EncodedMessage*)encoded_msg; } diff --git a/cpp/src/message/encoding.h b/cpp/src/message/encoding.h deleted file mode 100644 index b465172..0000000 --- a/cpp/src/message/encoding.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef MESSAGE_ENCODING_H -#define MESSAGE_ENCODING_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -int decode_message(void* encoded_msg, - void** log_event_ptr, - char** log_event, - size_t* log_event_size); - -void* encode_message(void* src_msg, - size_t src_size, - void** logtype, - void* logtype_size, - void** vars, - void* vars_size, - void** dict_vars, - void* dict_vars_size, - void** dict_var_end_offsets, - void* dict_var_end_offsets_size); - -void delete_encoded_message(void* encoded_msg); - -#ifdef __cplusplus -} -#endif - -#endif // MESSAGE_ENCODING_H diff --git a/ffi/BUILD.bazel b/ffi/BUILD.bazel index e0c5912..49b023b 100644 --- a/ffi/BUILD.bazel +++ b/ffi/BUILD.bazel @@ -3,12 +3,8 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "ffi", srcs = [ - "cgo_amd64.go", - "cgo_arm64.go", "ffi.go", ], - cgo = True, - cdeps = ["//:libclp_ffi"], importpath = "github.com/y-scope/clp-ffi-go/ffi", visibility = ["//visibility:public"], ) diff --git a/ffi/cgo_amd64.go b/ffi/cgo_amd64.go deleted file mode 100644 index a698102..0000000 --- a/ffi/cgo_amd64.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build !external && amd64 - -package ffi - -/* -#cgo CFLAGS: -I${SRCDIR}/../cpp/src/ -#cgo linux LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_linux_amd64 -Wl,-rpath=${SRCDIR}/../lib/ -#cgo darwin LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_darwin_amd64 -Wl,-rpath=${SRCDIR}/../lib/ -*/ -import "C" diff --git a/ffi/cgo_arm64.go b/ffi/cgo_arm64.go deleted file mode 100644 index e2e23f0..0000000 --- a/ffi/cgo_arm64.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build !external && arm64 - -package ffi - -/* -#cgo CFLAGS: -I${SRCDIR}/../cpp/src/ -#cgo linux LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_linux_arm64 -Wl,-rpath=${SRCDIR}/../lib/ -#cgo darwin LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_darwin_arm64 -Wl,-rpath=${SRCDIR}/../lib/ -*/ -import "C" diff --git a/ffi/cgo_external.go b/ffi/cgo_external.go deleted file mode 100644 index cd314f6..0000000 --- a/ffi/cgo_external.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build external - -// When using `external` build manually set linkage with `CGO_LDFLAGS`. -package ffi - -/* -#cgo CFLAGS: -I${SRCDIR}/../cpp/src/ -#cgo external LDFLAGS: -*/ -import "C" diff --git a/ffi/ffi.go b/ffi/ffi.go index 3f8105d..7d19294 100644 --- a/ffi/ffi.go +++ b/ffi/ffi.go @@ -1,69 +1,32 @@ +// The ffi package defines types for log events used in logging functions and +// libraries, without CLP encoding or serialization. package ffi -/* -#include -*/ -import "C" - -import ( - "runtime" - "unsafe" -) - -// Mirrors cpp type epoch_time_ms_t defined in: -// src/ir/encoding.h -// src/ir/decoding.h +// Mirrors cpp type epoch_time_ms_t type EpochTimeMs int64 -type cppReference struct { - cptr unsafe.Pointer -} - -type LogMessage struct { - Msg []byte - cref *cppReference -} - -// Creates a new LogMessage backed by C-allocated memory and sets -// [finalizeLogMessage] as a finalizer. -func NewLogMessage (msg unsafe.Pointer, msgSize uint64, obj unsafe.Pointer) LogMessage { - ref := &cppReference{obj} - log := LogMessage{unsafe.Slice((*byte)(msg), msgSize), ref} - runtime.SetFinalizer(ref, finalizeLogMessage) - return log -} - -// DeleteLogMessage calls down to C where any additional clean up occurs before -// calling delete on the stored class pointer. After calling this function log -// is in an empty/nil state and the finalizer is unset. This function is only -// useful if the memory overhead of relying on the finalizer to call delete is -// a concern. -func DeleteLogMessage(log *LogMessage) { - if nil != log.cref { - log.Msg = nil - C.delete_log_event(log.cref.cptr) - runtime.SetFinalizer(log.cref, nil) - log.cref = nil - } -} - -// All LogMessages created with NewLogMessage will use this function as a -// finalizer to mimic GC. If memory overhead is a concern call -// [DeleteLogMessage] to immediately call delete (it will also clean up -// LogMessage and guards against double free). -// -// The rules for finalizers running are not perfectly equivalent to -// Go-allocated memory being GC'd, but in the case of LogMessages the -// C-allocated memory should eventually be deleted in similar fashion to a -// Go-allocated equivalent object. See -// https://pkg.go.dev/runtime#SetFinalizer. -func finalizeLogMessage(obj *cppReference) { - if nil != obj { - C.delete_log_event(obj.cptr) - } -} +// A ffi.LogMessage represents the text (message) component of a log event. +// A LogMessageView is a LogMessage that is backed by C++ allocated memory +// rather than the Go heap. A LogMessageView, x, is valid when returned and will +// remain valid until a new LogMessageView is returned by the same object (e.g. +// an ir.Deserializer) that returns x. +type ( + LogMessageView = string + LogMessage = string +) +// LogEvent provides programmatic access to the various components of a log +// event. type LogEvent struct { LogMessage Timestamp EpochTimeMs } + +// LogEventView memory is allocated and owned by the C++ object (e.g., reader, +// deserializer) that returns it. Reusing a LogEventView after the same object +// has issued a new view leads to undefined behavior, as different objects +// manage their own memory independently. +type LogEventView struct { + LogMessageView + Timestamp EpochTimeMs +} diff --git a/generate.go b/generate.go index 260f04c..50602d6 100644 --- a/generate.go +++ b/generate.go @@ -2,4 +2,4 @@ //go:generate cmake --build cpp/build -j //go:generate cmake --install cpp/build --prefix . -package ffi +package clp_ffi_go diff --git a/go.mod b/go.mod index 13e2c3a..3185152 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module github.com/y-scope/clp-ffi-go -go 1.19 +go 1.20 require github.com/klauspost/compress v1.16.5 diff --git a/include/ffi_go/api_decoration.h b/include/ffi_go/api_decoration.h new file mode 100644 index 0000000..0a5ff4a --- /dev/null +++ b/include/ffi_go/api_decoration.h @@ -0,0 +1,21 @@ +#ifndef FFI_GO_API_DECORATION_H +#define FFI_GO_API_DECORATION_H + +/** + * If the file is compiled with a C++ compiler, `extern "C"` must be defined to + * ensure C linkage. + */ +#ifdef __cplusplus +#define CLP_FFI_GO_EXTERN_C extern "C" +#else +#define CLP_FFI_GO_EXTERN_C +#endif + +/** + * `CLP_FFI_GO_METHOD` should be added at the beginning of a function's + * declaration/implementation to decorate any APIs that are exposed to the + * Golang layer. + */ +#define CLP_FFI_GO_METHOD CLP_FFI_GO_EXTERN_C + +#endif diff --git a/include/ffi_go/defs.h b/include/ffi_go/defs.h new file mode 100644 index 0000000..563c89a --- /dev/null +++ b/include/ffi_go/defs.h @@ -0,0 +1,72 @@ +#ifndef FFI_GO_DEF_H +#define FFI_GO_DEF_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-using) + +#include +#include +#include + +// TODO: replace with clp c-compatible header once it exists +typedef int64_t epoch_time_ms_t; + +/** + * A span of a bool array passed down through Cgo. + */ +typedef struct { + bool* m_data; + size_t m_size; +} BoolSpan; + +/** + * A span of a byte array passed down through Cgo. + */ +typedef struct { + void* m_data; + size_t m_size; +} ByteSpan; + +/** + * A span of a Go int32 array passed down through Cgo. + */ +typedef struct { + int32_t* m_data; + size_t m_size; +} Int32tSpan; + +/** + * A span of a Go int64 array passed down through Cgo. + */ +typedef struct { + int64_t* m_data; + size_t m_size; +} Int64tSpan; + +/** + * A span of a Go int/C.size_t array passed down through Cgo. + */ +typedef struct { + size_t* m_data; + size_t m_size; +} SizetSpan; + +/** + * A view of a Go string passed down through Cgo. + */ +typedef struct { + char const* m_data; + size_t m_size; +} StringView; + +/** + * A view of a Go ffi.LogEvent passed down through Cgo. + */ +typedef struct { + StringView m_log_message; + epoch_time_ms_t m_timestamp; +} LogEventView; + +// NOLINTEND(modernize-use-using) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_DEF_H diff --git a/include/ffi_go/ir/decoder.h b/include/ffi_go/ir/decoder.h new file mode 100644 index 0000000..5d003be --- /dev/null +++ b/include/ffi_go/ir/decoder.h @@ -0,0 +1,88 @@ +#ifndef FFI_GO_IR_DECODER_H +#define FFI_GO_IR_DECODER_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-trailing-return-type) +// NOLINTBEGIN(modernize-use-using) + +#include +#include + +#include +#include + +/** + * Create a ir::Decoder used as the underlying data storage for a Go ir.Decoder. + * @return New ir::Decoder's address + */ +CLP_FFI_GO_METHOD void* ir_decoder_new(); + +/** + * Clean up the underlying ir::Decoder of a Go ir.Decoder. + * @param[in] ir_encoder Address of a ir::Decoder created and returned by + * ir_decoder_new + */ +CLP_FFI_GO_METHOD void ir_decoder_close(void* decoder); + +/** + * Given the fields of a CLP IR encoded log message with eight byte encoding, + * decode it into the original log message. An ir::Decoder must be provided to + * use as the backing storage for the corresponding Go ir.Decoder. All pointer + * parameters must be non-null (non-nil Cgo C. pointer or unsafe.Pointer + * from Go). + * @param[in] logtype Type of the log message (the log message with variables + * extracted and replaced with placeholders) + * @param[in] vars Array of encoded variables + * @param[in] dict_vars String containing all dictionary variables concatenated + * together + * @param[in] dict_var_end_offsets Array of offsets into dict_vars marking the + * end of a dictionary variable + * @param[in] ir_decoder ir::Decoder to be used as storage for the decoded log + * message + * @param[out] log_message Decoded log message + * @return ffi::ir_stream::IRErrorCode_Decode_Error if ffi::decode_message + * throws or errors + * @return ffi::ir_stream::IRErrorCode_Success on success + */ +CLP_FFI_GO_METHOD int ir_decoder_decode_eight_byte_log_message( + StringView logtype, + Int64tSpan vars, + StringView dict_vars, + Int32tSpan dict_var_end_offsets, + void* ir_decoder, + StringView* log_message +); + +/** + * Given the fields of a CLP IR encoded log message with four byte encoding, + * decode it into the original log message. An ir::Decoder must be provided to + * use as the backing storage for the corresponding Go ir.Decoder. All pointer + * parameters must be non-null (non-nil Cgo C. pointer or unsafe.Pointer + * from Go). + * @param[in] logtype Type of the log message (the log message with variables + * extracted and replaced with placeholders) + * @param[in] vars Array of encoded variables + * @param[in] dict_vars String containing all dictionary variables concatenated + * together + * @param[in] dict_var_end_offsets Array of offsets into dict_vars marking the + * end of a dictionary variable + * @param[in] ir_decoder ir::Decoder to be used as storage for the decoded log + * message + * @param[out] log_message Decoded log message + * @return ffi::ir_stream::IRErrorCode_Decode_Error if ffi::decode_message + * throws or errors + * @return ffi::ir_stream::IRErrorCode_Success on success + */ +CLP_FFI_GO_METHOD int ir_decoder_decode_four_byte_log_message( + StringView logtype, + Int32tSpan vars, + StringView dict_vars, + Int32tSpan dict_var_end_offsets, + void* ir_decoder, + StringView* log_message +); + +// NOLINTEND(modernize-use-using) +// NOLINTEND(modernize-use-trailing-return-type) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_IR_DECODER_H diff --git a/include/ffi_go/ir/deserializer.h b/include/ffi_go/ir/deserializer.h new file mode 100644 index 0000000..71ae1ab --- /dev/null +++ b/include/ffi_go/ir/deserializer.h @@ -0,0 +1,158 @@ +#ifndef FFI_GO_IR_DESERIALIZER_H +#define FFI_GO_IR_DESERIALIZER_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-trailing-return-type) + +#include +#include + +#include +#include +#include + +/** + * Clean up the underlying ir::Deserializer of a Go ir.Deserializer. + * @param[in] ir_deserializer The address of a ir::Deserializer created and + * returned by ir_deserializer_new_deserializer_with_preamble + */ +CLP_FFI_GO_METHOD void ir_deserializer_close(void* ir_deserializer); + +/** + * Given a CLP IR buffer (any encoding), attempt to deserialize a preamble and + * extract its information. An ir::Deserializer will be allocated to use as the + * backing storage for a Go ir.Deserializer (i.e. subsequent calls to + * ir_deserializer_deserialize_*_log_event). It is left to the Go layer to read + * the metadata based on the returned type. All pointer parameters must be + * non-null (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] ir_view Byte buffer/slice containing CLP IR + * @param[out] ir_pos Position in ir_view read to + * @param[out] ir_encoding IR encoding type (1: four byte, 0: eight byte) + * @param[out] metadata_type Type of metadata in preamble (e.g. json) + * @param[out] metadata_pos Position in ir_view where the metadata begins + * @param[out] metadata_size Size of the metadata (in bytes) + * @param[out] ir_deserializer_ptr Address of a new ir::Deserializer + * @param[out] timestamp_ptr Address of m_timestamp inside the ir::Deserializer + * to be filled in by Go using the metadata contents + * @return ffi::ir_stream::IRErrorCode forwarded from either + * ffi::ir_stream::get_encoding_type or ffi::ir_stream::decode_preamble + */ +CLP_FFI_GO_METHOD int ir_deserializer_new_deserializer_with_preamble( + ByteSpan ir_view, + size_t* ir_pos, + int8_t* ir_encoding, + int8_t* metadata_type, + size_t* metadata_pos, + uint16_t* metadata_size, + void** ir_deserializer_ptr, + void** timestamp_ptr +); + +/** + * Given a CLP IR buffer with eight byte encoding, deserialize the next log + * event. Returns the components of the found log event and the buffer position + * it ends at. All pointer parameters must be non-null (non-nil Cgo C. + * pointer or unsafe.Pointer from Go). + * @param[in] ir_view Byte buffer/slice containing CLP IR + * @param[in] ir_deserializer ir::Deserializer to be used as storage for a found + * log event + * @param[out] ir_pos Position in ir_view read to + * @param[out] log_event Log event stored in ir_deserializer + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::eight_byte_encoding::decode_next_message + */ +CLP_FFI_GO_METHOD int ir_deserializer_deserialize_eight_byte_log_event( + ByteSpan ir_view, + void* ir_deserializer, + size_t* ir_pos, + LogEventView* log_event +); + +/** + * Given a CLP IR buffer with four byte encoding, deserialize the next log + * event. Returns the components of the found log event and the buffer position + * it ends at. All pointer parameters must be non-null (non-nil Cgo C. + * pointer or unsafe.Pointer from Go). + * @param[in] ir_view Byte buffer/slice containing CLP IR + * @param[in] ir_deserializer ir::Deserializer to be used as storage for a found + * log event + * @param[out] ir_pos Position in ir_view read to + * @param[out] log_event Log event stored in ir_deserializer + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::four_byte_encoding::decode_next_message + */ +CLP_FFI_GO_METHOD int ir_deserializer_deserialize_four_byte_log_event( + ByteSpan ir_view, + void* ir_deserializer, + size_t* ir_pos, + LogEventView* log_event +); + +/** + * Given a CLP IR buffer with eight byte encoding, deserialize the next log + * event until finding an event that is both within the time interval and + * matches any query. If queries is empty, the first log event within the time + * interval is treated as a match. Returns the components of the found log event + * and the buffer position it ends at. All pointer parameters must be non-null + * (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] ir_view Byte buffer/slice containing CLP IR + * @param[in] ir_deserializer ir::Deserializer to be used as storage for a found + * log event + * @param[in] time_interval Timestamp interval: [lower, upper) + * @param[in] merged_query A concatenation of all queries to filter for; if + * empty any log event as a match + * @param[out] ir_pos Position in ir_view read to + * @param[out] log_event Log event stored in ir_deserializer + * @param[out] matching_query Index into queries of the first matching query or + * 0 if queries is empty + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::four_byte_encoding::decode_next_message + * @return ffi::ir_stream::IRErrorCode_Unsupported_Version + 1 if no query is + * found before time_interval.m_upper (TODO this should be replaced/fix in + * clp core) + */ +CLP_FFI_GO_METHOD int ir_deserializer_deserialize_eight_byte_wildcard_match( + ByteSpan ir_view, + void* ir_deserializer, + TimestampInterval time_interval, + MergedWildcardQueryView merged_query, + size_t* ir_pos, + LogEventView* log_event, + size_t* matching_query +); + +/** + * Given a CLP IR buffer with four byte encoding, deserialize the next log event + * until finding an event that is both within the time interval and matches any + * query. If queries is empty, the first log event within the time interval is + * treated as a match. Returns the components of the found log event and the + * buffer position it ends at. All pointer parameters must be non-null (non-nil + * Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] ir_view Byte buffer/slice containing CLP IR + * @param[in] ir_deserializer ir::Deserializer to be used as storage for a found + * log event + * @param[in] time_interval Timestamp interval: [lower, upper) + * @param[in] merged_query A concatenation of all queries to filter for; if + * empty any log event as a match + * @param[out] ir_pos Position in ir_view read to + * @param[out] log_event Log event stored in ir_deserializer + * @param[out] matching_query Index into queries of the matching query + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::four_byte_encoding::decode_next_message + * @return ffi::ir_stream::IRErrorCode_Unsupported_Version + 1 if no query is + * found before time_interval.m_upper (TODO this should be replaced/fix in + * clp core) + */ +CLP_FFI_GO_METHOD int ir_deserializer_deserialize_four_byte_wildcard_match( + ByteSpan ir_view, + void* ir_deserializer, + TimestampInterval time_interval, + MergedWildcardQueryView merged_query, + size_t* ir_pos, + LogEventView* log_event, + size_t* matching_query +); + +// NOLINTEND(modernize-use-trailing-return-type) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_IR_DESERIALIZER_H diff --git a/include/ffi_go/ir/encoder.h b/include/ffi_go/ir/encoder.h new file mode 100644 index 0000000..d1ae99e --- /dev/null +++ b/include/ffi_go/ir/encoder.h @@ -0,0 +1,98 @@ +#ifndef FFI_GO_IR_ENCODER_H +#define FFI_GO_IR_ENCODER_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-trailing-return-type) +// NOLINTBEGIN(modernize-use-using) + +#include +#include + +#include +#include + +/** + * Create a ir::Encoder used as the underlying data storage for a Go ir.Encoder. + * @return New ir::Encoder's address + */ +CLP_FFI_GO_METHOD void* ir_encoder_eight_byte_new(); + +/** + * @copydoc ir_encoder_eight_byte_new() + */ +CLP_FFI_GO_METHOD void* ir_encoder_four_byte_new(); + +/** + * Clean up the underlying ir::Encoder of a Go ir.Encoder. + * @param[in] ir_encoder Address of a ir::Encoder created and returned by + * ir_encoder_eight_byte_new + */ +CLP_FFI_GO_METHOD void ir_encoder_eight_byte_close(void* ir_encoder); + +/** + * Clean up the underlying ir::Encoder of a Go ir.Encoder. + * @param[in] ir_encoder Address of a ir::Encoder created and returned by + * ir_encoder_four_byte_new + */ +CLP_FFI_GO_METHOD void ir_encoder_four_byte_close(void* ir_encoder); + +/** + * Given a log message, encode it into a CLP IR object with eight byte encoding. + * An ir::Encoder must be provided to use as the backing storage for the + * corresponding Go ir.Encoder. All pointer parameters must be non-null (non-nil + * Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] log_message Log message to encode + * @param[in] ir_encoder ir::Encoder to be used as storage for the encoded log + * message + * @param[out] logtype Type of the log message (the log message with variables + * extracted and replaced with placeholders) + * @param[out] vars Array of encoded variables + * @param[out] dict_vars String containing all dictionary variables concatenated + * together + * @param[out] dict_var_end_offsets Array of offsets into dict_vars marking the + * end of a dictionary variable + * @return ffi::ir_stream::IRErrorCode_Corrupted_IR if ffi::encode_message + * returns false + * @return ffi::ir_stream::IRErrorCode_Success on success + */ +CLP_FFI_GO_METHOD int ir_encoder_encode_eight_byte_log_message( + StringView log_message, + void* ir_encoder, + StringView* logtype, + Int64tSpan* vars, + StringView* dict_vars, + Int32tSpan* dict_var_end_offsets +); + +/** + * Given a log message, encode it into a CLP IR object with four byte encoding. + * An ir::Encoder must be provided to use as the backing storage for the + * corresponding Go ir.Encoder. All pointer parameters must be non-null (non-nil + * Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] log_message Log message to encode + * @param[in] ir_encoder ir::Encoder to be used as storage for the encoded log + * message + * @param[out] logtype Type of the log message (the log message with variables + * extracted and replaced with placeholders) + * @param[out] vars Array of encoded variables + * @param[out] dict_vars String containing all dictionary variables concatenated + * together + * @param[out] dict_var_end_offsets Array of offsets into dict_vars marking the + * end of a dictionary variable + * @return ffi::ir_stream::IRErrorCode_Corrupted_IR if ffi::encode_message + * returns false + * @return ffi::ir_stream::IRErrorCode_Success on success + */ +CLP_FFI_GO_METHOD int ir_encoder_encode_four_byte_log_message( + StringView log_message, + void* ir_encoder, + StringView* logtype, + Int32tSpan* vars, + StringView* dict_vars, + Int32tSpan* dict_var_end_offsets +); + +// NOLINTEND(modernize-use-using) +// NOLINTEND(modernize-use-trailing-return-type) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_IR_ENCODER_H diff --git a/include/ffi_go/ir/serializer.h b/include/ffi_go/ir/serializer.h new file mode 100644 index 0000000..bd02f8c --- /dev/null +++ b/include/ffi_go/ir/serializer.h @@ -0,0 +1,110 @@ +#ifndef FFI_GO_IR_SERIALIZER_H +#define FFI_GO_IR_SERIALIZER_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-trailing-return-type) + +#include +#include + +#include +#include + +/** + * Clean up the underlying ir::Serializer of a Go ir.Serializer. + * @param[in] ir_serializer Address of a ir::Serializer created and returned by + * ir_serializer_serialize_*_preamble + */ +CLP_FFI_GO_METHOD void ir_serializer_close(void* ir_serializer); + +/** + * Given the fields of a CLP IR preamble, serialize them into an IR byte stream + * with eight byte encoding. An ir::Serializer will be allocated to use as the + * backing storage for a Go ir.Serializer (i.e. subsequent calls to + * ir_serializer_serialize_*_log_event). All pointer parameters must be non-null + * (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] ts_pattern Format string for the timestamp to be used when + * deserializing the IR + * @param[in] ts_pattern_syntax Type of the format string for understanding how + * to parse it + * @param[in] time_zone_id TZID timezone of the timestamps in the IR + * @param[out] ir_serializer_ptr Address of a new ir::Serializer + * @param[out] ir_view View of a IR buffer containing the serialized preamble + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::eight_byte_encoding::encode_preamble + */ +CLP_FFI_GO_METHOD int ir_serializer_new_eight_byte_serializer_with_preamble( + StringView ts_pattern, + StringView ts_pattern_syntax, + StringView time_zone_id, + void** ir_serializer_ptr, + ByteSpan* ir_view +); + +/** + * Given the fields of a CLP IR preamble, serialize them into an IR byte stream + * with four byte encoding. An ir::Serializer will be allocated to use as the + * backing storage for a Go ir.Serializer (i.e. subsequent calls to + * ir_serializer_serialize_*_log_event). All pointer parameters must be non-null + * (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] ts_pattern Format string for the timestamp to be used when + * deserializing the IR + * @param[in] ts_pattern_syntax Type of the format string for understanding how + * to parse it + * @param[in] time_zone_id TZID timezone of the timestamps in the IR + * @param[out] ir_serializer_ptr Address of a new ir::Serializer + * @param[out] ir_view View of a IR buffer containing the serialized preamble + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::four_byte_encoding::encode_preamble + */ +CLP_FFI_GO_METHOD int ir_serializer_new_four_byte_serializer_with_preamble( + StringView ts_pattern, + StringView ts_pattern_syntax, + StringView time_zone_id, + epoch_time_ms_t reference_ts, + void** ir_serializer_ptr, + ByteSpan* ir_view +); + +/** + * Given the fields of a log event, serialize them into an IR byte stream with + * eight byte encoding. An ir::Serializer must be provided to use as the backing + * storage for the corresponding Go ir.Serializer. All pointer parameters must + * be non-null (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] log_message Log message of the log event to serialize + * @param[in] timestamp Timestamp of the log event to serialize + * @param[in] ir_serializer ir::Serializer object to be used as storage + * @param[out] ir_view View of a IR buffer containing the serialized log event + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::eight_byte_encoding::encode_message + */ +CLP_FFI_GO_METHOD int ir_serializer_serialize_eight_byte_log_event( + StringView log_message, + epoch_time_ms_t timestamp, + void* ir_serializer, + ByteSpan* ir_view +); + +/** + * Given the fields of a log event, serialize them into an IR byte stream with + * four byte encoding. An ir::Serializer must be provided to use as the backing + * storage for the corresponding Go ir.Serializer. All pointer parameters must + * be non-null (non-nil Cgo C. pointer or unsafe.Pointer from Go). + * @param[in] log_message Log message to serialize + * @param[in] timestamp_delta Timestamp delta to the previous log event in the + * IR stream + * @param[in] ir_serializer ir::Serializer object to be used as storage + * @param[out] ir_view View of a IR buffer containing the serialized log event + * @return ffi::ir_stream::IRErrorCode forwarded from + * ffi::ir_stream::four_byte_encoding::encode_message + */ +CLP_FFI_GO_METHOD int ir_serializer_serialize_four_byte_log_event( + StringView log_message, + epoch_time_ms_t timestamp_delta, + void* ir_serializer, + ByteSpan* ir_view +); + +// NOLINTEND(modernize-use-trailing-return-type) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_IR_SERIALIZER_H diff --git a/include/ffi_go/search/wildcard_query.h b/include/ffi_go/search/wildcard_query.h new file mode 100644 index 0000000..960beb1 --- /dev/null +++ b/include/ffi_go/search/wildcard_query.h @@ -0,0 +1,71 @@ +#ifndef FFI_GO_IR_WILDCARD_QUERY_H +#define FFI_GO_IR_WILDCARD_QUERY_H +// header must support C, making modernize checks inapplicable +// NOLINTBEGIN(modernize-deprecated-headers) +// NOLINTBEGIN(modernize-use-trailing-return-type) +// NOLINTBEGIN(modernize-use-using) + +#include + +#include +#include + +/** + * A timestamp interval of [m_lower, m_upper). + */ +typedef struct { + epoch_time_ms_t m_lower; + epoch_time_ms_t m_upper; +} TimestampInterval; + +/** + * A view of a wildcard query passed down from Go. The query string is assumed + * to have been cleaned using the CLP function `clean_up_wildcard_search_string` + * on construction. m_case_sensitive is 1 for a case sensitive query (0 for case + * insensitive). + */ +typedef struct { + StringView m_query; + bool m_case_sensitive; +} WildcardQueryView; + +/** + * A view of a Go search.MergedWildcardQuery passed down through Cgo. The + * string is a concatenation of all wildcard queries, while m_end_offsets stores + * the size of each query. + */ +typedef struct { + StringView m_queries; + SizetSpan m_end_offsets; + BoolSpan m_case_sensitivity; +} MergedWildcardQueryView; + +/** + * Given a query string, allocate and return a clean string that is safe for + * matching. See `clean_up_wildcard_search_string` in CLP for more details. + * @param[in] query Query string to clean + * @param[in] ptr Address of a new std::string + * @return New string holding cleaned query + */ +CLP_FFI_GO_METHOD StringView wildcard_query_new(StringView query, void** ptr); + +/** + * Delete a std::string holding a wildcard query. + * @param[in] str Address of a std::string created and returned by + * clean_wildcard_query + */ +CLP_FFI_GO_METHOD void wildcard_query_delete(void* str); + +/** + * Given a target string perform CLP wildcard matching using query. See + * `wildcard_match_unsafe` in CLP src/string_utils.hpp. + * @param[in] target String to perform matching on + * @param[in] query Query to use for matching + * @return 1 if query matches target, 0 otherwise + */ +CLP_FFI_GO_METHOD int wildcard_query_match(StringView target, WildcardQueryView query); + +// NOLINTEND(modernize-use-using) +// NOLINTEND(modernize-use-trailing-return-type) +// NOLINTEND(modernize-deprecated-headers) +#endif // FFI_GO_IR_WILDCARD_QUERY_H diff --git a/ir/BUILD.bazel b/ir/BUILD.bazel index 41755be..b4a0d30 100644 --- a/ir/BUILD.bazel +++ b/ir/BUILD.bazel @@ -3,20 +3,26 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "ir", srcs = [ - "cgo_amd64.go", - "cgo_arm64.go", - "decoder.go", - "encoder.go", - "ir.go", - "irerror.go", - "irerror_string.go", - "reader.go", + "cgo_defs.go", + "decoder.go", + "deserializer.go", + "encoder.go", + "ir.go", + "irerror.go", + "irerror_string.go", + "reader.go", + "serializer.go", ], cgo = True, - cdeps = ["//:libclp_ffi"], + cdeps = [ + "//:libclp_ffi", + ], importpath = "github.com/y-scope/clp-ffi-go/ir", visibility = ["//visibility:public"], - deps = ["//ffi"], + deps = [ + "//ffi", + "//search", + ], ) alias( @@ -28,12 +34,12 @@ alias( go_test( name = "ir_test", srcs = [ - "encoder_test.go", + "ir_test.go", "reader_test.go", + "serder_test.go", ], embed = [":ir"], deps = [ - "//test", "@com_github_klauspost_compress//zstd", ], ) diff --git a/ir/cgo_amd64.go b/ir/cgo_amd64.go index d90f319..269cf2d 100644 --- a/ir/cgo_amd64.go +++ b/ir/cgo_amd64.go @@ -3,8 +3,8 @@ package ir /* -#cgo CFLAGS: -I${SRCDIR}/../cpp/src/ -#cgo linux LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_linux_amd64 -Wl,-rpath=${SRCDIR}/../lib/ -#cgo darwin LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_darwin_amd64 -Wl,-rpath=${SRCDIR}/../lib/ +#cgo CPPFLAGS: -I${SRCDIR}/../include/ +#cgo linux LDFLAGS: ${SRCDIR}/../lib/libclp_ffi_linux_amd64.a -lstdc++ +#cgo darwin LDFLAGS: ${SRCDIR}/../lib/libclp_ffi_darwin_amd64.a -lstdc++ */ import "C" diff --git a/ir/cgo_arm64.go b/ir/cgo_arm64.go index d73aaab..da2a837 100644 --- a/ir/cgo_arm64.go +++ b/ir/cgo_arm64.go @@ -3,8 +3,8 @@ package ir /* -#cgo CFLAGS: -I${SRCDIR}/../cpp/src/ -#cgo linux LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_linux_arm64 -Wl,-rpath=${SRCDIR}/../lib/ -#cgo darwin LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_darwin_arm64 -Wl,-rpath=${SRCDIR}/../lib/ +#cgo CPPFLAGS: -I${SRCDIR}/../include/ +#cgo linux LDFLAGS: ${SRCDIR}/../lib/libclp_ffi_linux_arm64.a -lstdc++ +#cgo darwin LDFLAGS: ${SRCDIR}/../lib/libclp_ffi_darwin_arm64.a -lstdc++ */ import "C" diff --git a/ir/cgo_defs.go b/ir/cgo_defs.go new file mode 100644 index 0000000..8ed33fc --- /dev/null +++ b/ir/cgo_defs.go @@ -0,0 +1,98 @@ +package ir + +/* +#include +#include +*/ +import "C" + +import ( + "unsafe" + + "github.com/y-scope/clp-ffi-go/search" +) + +// The follow functions are helpers to cleanup Cgo related code. The underlying +// Go type created from a 'C' type is not exported and recreated in each +// package. Therefore, these helpers must be redefined in any package wishing to +// use them, so that they reference the correct underlying Go type of the +// package (see: https://pkg.go.dev/cmd/cgo). This problem could be alivated by +// using Go generate to create/add these helpers to a package necessary. + +func newCByteSpan(s []byte) C.ByteSpan { + return C.ByteSpan{ + unsafe.Pointer(unsafe.SliceData(s)), + C.size_t(len(s)), + } +} + +func newCInt32tSpan(s []int32) C.Int32tSpan { + return C.Int32tSpan{ + (*C.int32_t)(unsafe.Pointer(unsafe.SliceData(s))), + C.size_t(len(s)), + } +} + +func newCInt64tSpan(s []int64) C.Int64tSpan { + return C.Int64tSpan{ + (*C.int64_t)(unsafe.Pointer(unsafe.SliceData(s))), + C.size_t(len(s)), + } +} + +func newCStringView(s string) C.StringView { + return C.StringView{ + (*C.char)(unsafe.Pointer(unsafe.StringData(s))), + C.size_t(len(s)), + } +} + +func newMergedWildcardQueryView(mergedQuery search.MergedWildcardQuery) C.MergedWildcardQueryView { + return C.MergedWildcardQueryView{ + newCStringView(mergedQuery.Queries()), + C.SizetSpan{ + (*C.size_t)(unsafe.Pointer(unsafe.SliceData(mergedQuery.EndOffsets()))), + C.size_t(len(mergedQuery.EndOffsets())), + }, + C.BoolSpan{ + (*C.bool)(unsafe.Pointer(unsafe.SliceData(mergedQuery.CaseSensitivity()))), + C.size_t(len(mergedQuery.CaseSensitivity())), + }, + } +} + +func newLogMessageView[Tgo EightByteEncoding | FourByteEncoding, Tc C.Int64tSpan | C.Int32tSpan]( + logtype C.StringView, + vars Tc, + dictVars C.StringView, + dictVarEndOffsets C.Int32tSpan, +) *LogMessageView[Tgo] { + var msgView LogMessageView[Tgo] + msgView.Logtype = unsafe.String((*byte)(unsafe.Pointer(logtype.m_data)), logtype.m_size) + switch any(msgView.Vars).(type) { + case []EightByteEncoding: + dst := any(&msgView.Vars).(*[]EightByteEncoding) + src := any(vars).(C.Int64tSpan) + if 0 < src.m_size && nil != src.m_data { + *dst = unsafe.Slice((*EightByteEncoding)(src.m_data), src.m_size) + } + case []FourByteEncoding: + dst := any(&msgView.Vars).(*[]FourByteEncoding) + src := any(vars).(C.Int32tSpan) + if 0 < src.m_size && nil != src.m_data { + *dst = unsafe.Slice((*FourByteEncoding)(src.m_data), src.m_size) + } + default: + return nil + } + if 0 < dictVars.m_size && nil != dictVars.m_data { + msgView.Logtype = unsafe.String((*byte)(unsafe.Pointer(dictVars.m_data)), dictVars.m_size) + } + if 0 < dictVarEndOffsets.m_size && nil != dictVarEndOffsets.m_data { + msgView.DictVarEndOffsets = unsafe.Slice( + (*int32)(dictVarEndOffsets.m_data), + dictVarEndOffsets.m_size, + ) + } + return &msgView +} diff --git a/ir/cgo_external.go b/ir/cgo_external.go index 2556d0d..96db50a 100644 --- a/ir/cgo_external.go +++ b/ir/cgo_external.go @@ -4,7 +4,7 @@ package ir /* -#cgo CFLAGS: -I${SRCDIR}/../cpp/src/ +#cgo CPPFLAGS: -I${SRCDIR}/../include/ #cgo external LDFLAGS: */ import "C" diff --git a/ir/decoder.go b/ir/decoder.go index 3893af7..165772d 100644 --- a/ir/decoder.go +++ b/ir/decoder.go @@ -1,179 +1,94 @@ package ir /* -#include -#include +#include */ import "C" import ( - "encoding/json" - "strconv" "unsafe" "github.com/y-scope/clp-ffi-go/ffi" ) -// TODO once we reach go >= 1.20 -// change &buf[:1][0] to unsafe.SliceData(buf) -// https://pkg.go.dev/unsafe#SliceData - -// IrDecoder exports functions to decode log events in an IR stream and also -// inspect the timnestamp information of the stream. An IrDecoder manages the -// internal state of the IR stream, such that the next log event in the stream -// can be decoded. The maintence of the buffer containing the IR stream is left -// to the caller. -type IrDecoder interface { - DecodeNextLogEvent(buf []byte) (ffi.LogEvent, int, error) - TimestampInfo() TimestampInfo +// A Decoder takes objects encoded in CLP IR as input and returns them in their +// natural state prior to encoding. Close must be called to free the underlying +// memory and failure to do so will result in a memory leak. +type Decoder[T EightByteEncoding | FourByteEncoding] interface { + DecodeLogMessage(irMessage LogMessage[T]) (*ffi.LogMessageView, error) + Close() error } -// DecodePreamble attempts to read an IR stream preamble from buf, returning an -// IrDecoder (of the correct stream encoding size), the offset read to in buf -// (the end of the preamble), and an error. Note the metadata stored in the -// preamble is sparse and certain fields in TimestampInfo may be 0 value. -// Return values: -// - nil == error: successful decode -// - nil != error: IrDecode will be nil, offset may be non-zero for debugging purposes -// - type [IRError]: CLP failed to successfully decode -// - type from [encoding/json]: unmarshalling the metadata failed -func DecodePreamble(buf []byte) (IrDecoder, int, error) { - var offset C.size_t - var ir_encoding C.int8_t - var metadata_type C.int8_t - var metadata_pos C.size_t - var metadata_size C.uint16_t - - if err := IRError(C.decode_preamble( - unsafe.Pointer(&buf[:1][0]), - C.size_t(len(buf)), - &offset, - &ir_encoding, - &metadata_type, - &metadata_pos, - &metadata_size)); Success != err { - return nil, int(offset), err - } - - if 1 != metadata_type { - return nil, int(offset), UnsupportedVersion - } - - var metadata map[string]interface{} - if err := json.Unmarshal(buf[metadata_pos:metadata_pos+C.size_t(metadata_size)], &metadata); nil != err { - return nil, int(offset), err - } - - var tsInfo TimestampInfo - if tsPat, ok := metadata["TIMESTAMP_PATTERN"].(string); ok { - tsInfo.Pattern = tsPat - } - if tsSyn, ok := metadata["TIMESTAMP_PATTERN_SYNTAX"].(string); ok { - tsInfo.PatternSyntax = tsSyn - } - if tzid, ok := metadata["TZ_ID"].(string); ok { - tsInfo.TimeZoneId = tzid - } - - var decoder IrDecoder - if 1 == ir_encoding { - var refTs ffi.EpochTimeMs = 0 - if tsStr, ok := metadata["REFERENCE_TIMESTAMP"].(string); ok { - if tsInt, err := strconv.ParseInt(tsStr, 10, 64); nil == err { - refTs = ffi.EpochTimeMs(tsInt) - } - } - decoder = &FourByteIrStream{ - irStream: irStream[FourByteEncodedVariable]{tsInfo: tsInfo, cPtr: nil}, - prevTimestamp: refTs, - } - } else { - decoder = &EightByteIrStream{irStream[EightByteEncodedVariable]{tsInfo, nil}} - } - - return decoder, int(offset), nil +// Return a new Decoder for IR using [EightByteEncoding]. +func EightByteDecoder() (Decoder[EightByteEncoding], error) { + return &eightByteDecoder{commonDecoder{C.ir_decoder_new()}}, nil } -// DecodeNextLogEvent attempts to read the next LogEvent from the IR stream in -// buf, returning the LogEvent, the offset read to in buf (the end of the -// LogEvent in buf), and an error. -// Return values: -// - nil == error: successful decode -// - nil != error: ffi.LogEvent will be nil, offset may be non-zero for debugging purposes -// - [Eof]: CLP found the IR stream EOF tag -// - [IRError]: CLP failed to successfully decode -func (self *EightByteIrStream) DecodeNextLogEvent(buf []byte) (ffi.LogEvent, int, error) { - return decodeNextLogEvent(self, buf) +// Return a new Decoder for IR using [FourByteEncoding]. +func FourByteDecoder() (Decoder[FourByteEncoding], error) { + return &fourByteDecoder{commonDecoder{C.ir_decoder_new()}}, nil } -// DecodeNextLogEvent attempts to read the next LogEvent from the IR stream in -// buf, returning the LogEvent, the offset read to in buf (the end of the -// LogEvent in buf), and an error. -// Return values: -// - nil == error: successful decode -// - nil != error: ffi.LogEvent will be nil, offset may be non-zero for debugging purposes -// - [Eof]: CLP found the IR stream EOF tag -// - [IRError]: CLP failed to successfully decode -func (self *FourByteIrStream) DecodeNextLogEvent(buf []byte) (ffi.LogEvent, int, error) { - return decodeNextLogEvent(self, buf) +type commonDecoder struct { + cptr unsafe.Pointer } -// decodeNextLogEvent performs the actual work for DecodeNextLogEvent in a -// generic way. -func decodeNextLogEvent[T EightByteIrStream | FourByteIrStream]( - irstream *T, - buf []byte, -) (ffi.LogEvent, int, error) { - if 0 >= len(buf) { - return ffi.LogEvent{}, 0, IncompleteIR +// Close will delete the underlying C++ allocated memory used by the +// deserializer. Failure to call Close will result in a memory leak. +func (self *commonDecoder) Close() error { + if nil != self.cptr { + C.ir_decoder_close(self.cptr) + self.cptr = nil } - var offset C.size_t - var msgObj unsafe.Pointer - var msg *C.char - var msgSize C.size_t - var timestampOrDelta C.int64_t + return nil +} - var err error - switch any(irstream).(type) { - case *EightByteIrStream: - err = IRError(C.eight_byte_decode_next_log_event( - unsafe.Pointer(&buf[:1][0]), - C.size_t(len(buf)), - &offset, - &msgObj, - &msg, - &msgSize, - ×tampOrDelta)) - case *FourByteIrStream: - err = IRError(C.four_byte_decode_next_log_event( - unsafe.Pointer(&buf[:1][0]), - C.size_t(len(buf)), - &offset, - &msgObj, - &msg, - &msgSize, - ×tampOrDelta)) - default: - return ffi.LogEvent{}, 0, UnsupportedVersion - } +type eightByteDecoder struct { + commonDecoder +} + +// Decode an IR encoded log message, returning a view of the original +// (non-encoded) log message. +func (self *eightByteDecoder) DecodeLogMessage( + irMessage LogMessage[EightByteEncoding], +) (*ffi.LogMessageView, error) { + var msg C.StringView + err := IrError(C.ir_decoder_decode_eight_byte_log_message( + newCStringView(irMessage.Logtype), + newCInt64tSpan(irMessage.Vars), + newCStringView(irMessage.DictVars), + newCInt32tSpan(irMessage.DictVarEndOffsets), + self.cptr, + &msg, + )) if Success != err { - return ffi.LogEvent{}, int(offset), err + return nil, DecodeError } + view := unsafe.String((*byte)(unsafe.Pointer(msg.m_data)), msg.m_size) + return &view, nil +} - var ts ffi.EpochTimeMs - switch irs := any(irstream).(type) { - case *EightByteIrStream: - ts = ffi.EpochTimeMs(timestampOrDelta) - case *FourByteIrStream: - ts = irs.prevTimestamp + ffi.EpochTimeMs(timestampOrDelta) - irs.prevTimestamp = ts - default: - return ffi.LogEvent{}, 0, UnsupportedVersion - } +type fourByteDecoder struct { + commonDecoder +} - event := ffi.LogEvent{ - LogMessage: ffi.NewLogMessage(unsafe.Pointer(msg), uint64(msgSize), msgObj), - Timestamp: ts, +// Decode an IR encoded log message, returning a view of the original +// (non-encoded) log message. +func (self *fourByteDecoder) DecodeLogMessage( + irMessage LogMessage[FourByteEncoding], +) (*ffi.LogMessageView, error) { + var msg C.StringView + err := IrError(C.ir_decoder_decode_four_byte_log_message( + newCStringView(irMessage.Logtype), + newCInt32tSpan(irMessage.Vars), + newCStringView(irMessage.DictVars), + newCInt32tSpan(irMessage.DictVarEndOffsets), + self.cptr, + &msg, + )) + if Success != err { + return nil, DecodeError } - return event, int(offset), nil + view := unsafe.String((*byte)(unsafe.Pointer(msg.m_data)), msg.m_size) + return &view, nil } diff --git a/ir/deserializer.go b/ir/deserializer.go new file mode 100644 index 0000000..cd0dc8d --- /dev/null +++ b/ir/deserializer.go @@ -0,0 +1,314 @@ +package ir + +/* +#include +#include +#include +*/ +import "C" + +import ( + "encoding/json" + "strconv" + "unsafe" + + "github.com/y-scope/clp-ffi-go/ffi" + "github.com/y-scope/clp-ffi-go/search" +) + +const ( + metadata_reference_timestamp_key = "REFERENCE_TIMESTAMP" + metadata_timestamp_pattern_key = "TIMESTAMP_PATTERN" + metadata_timestamp_pattern_syntax_key = "TIMESTAMP_PATTERN_SYNTAX" + metadata_tz_id_key = "TZ_ID" +) + +// A Deserializer exports functions to deserialize log events from a CLP IR byte +// stream. Deserialization functions take an IR buffer as input, but how that +// buffer is materialized is left to the user. These functions return views +// (slices) of the log events extracted from the IR. Each Deserializer owns its +// own unique underlying memory for the views it produces/returns. This memory +// is reused for each view, so to persist the contents the memory must be copied +// into another object. Close must be called to free the underlying memory and +// failure to do so will result in a memory leak. +type Deserializer interface { + DeserializeLogEvent(irBuf []byte) (*ffi.LogEventView, int, error) + DeserializeWildcardMatchWithTimeInterval( + irBuf []byte, + mergedQuery search.MergedWildcardQuery, + timeInterval search.TimestampInterval, + ) (*ffi.LogEventView, int, int, error) + TimestampInfo() TimestampInfo + Close() error +} + +// DeserializePreamble attempts to read an IR stream preamble from irBuf, +// returning an Deserializer (of the correct stream encoding size), the position +// read to in irBuf (the end of the preamble), and an error. Note the metadata +// stored in the preamble is sparse and certain fields in TimestampInfo may be 0 +// value. On error returns: +// - nil Deserializer +// - 0 position +// - [IrError] error: CLP failed to successfully deserialize +// - [encoding/json] error: unmarshalling the metadata failed +func DeserializePreamble(irBuf []byte) (Deserializer, int, error) { + if 0 >= len(irBuf) { + return nil, 0, IncompleteIr + } + + // TODO: Add version validation in this method or ir_deserializer_new_deserializer_with_preamble + // after updating the clp version. + + var pos C.size_t + var irEncoding C.int8_t + var metadataType C.int8_t + var metadataPos C.size_t + var metadataSize C.uint16_t + var deserializerCptr unsafe.Pointer + var timestampCptr unsafe.Pointer + if err := IrError(C.ir_deserializer_new_deserializer_with_preamble( + newCByteSpan(irBuf), + &pos, + &irEncoding, + &metadataType, + &metadataPos, + &metadataSize, + &deserializerCptr, + ×tampCptr, + )); Success != err { + return nil, int(pos), err + } + + if 1 != metadataType { + return nil, 0, UnsupportedVersion + } + + var metadata map[string]interface{} + if err := json.Unmarshal( + irBuf[metadataPos:metadataPos+C.size_t(metadataSize)], + &metadata, + ); nil != err { + return nil, 0, err + } + + var tsInfo TimestampInfo + if tsPat, ok := metadata[metadata_timestamp_pattern_key].(string); ok { + tsInfo.Pattern = tsPat + } + if tsSyn, ok := metadata[metadata_timestamp_pattern_syntax_key].(string); ok { + tsInfo.PatternSyntax = tsSyn + } + if tzid, ok := metadata[metadata_tz_id_key].(string); ok { + tsInfo.TimeZoneId = tzid + } + + var deserializer Deserializer + if 1 == irEncoding { + var refTs ffi.EpochTimeMs = 0 + if tsStr, ok := metadata[metadata_reference_timestamp_key].(string); ok { + if tsInt, err := strconv.ParseInt(tsStr, 10, 64); nil == err { + refTs = ffi.EpochTimeMs(tsInt) + *(*ffi.EpochTimeMs)(timestampCptr) = refTs + } + } + deserializer = &fourByteDeserializer{commonDeserializer{tsInfo, deserializerCptr}, refTs} + } else { + deserializer = &eightByteDeserializer{commonDeserializer{tsInfo, deserializerCptr}} + } + + return deserializer, int(pos), nil +} + +// commonDeserializer contains fields common to all types of CLP IR encoding. +// TimestampInfo stores information common to all timestamps found in the IR. +// cptr holds a reference to the underlying C++ objected used as backing storage +// for the Views returned by the deserializer. Close must be called to free this +// underlying memory and failure to do so will result in a memory leak. +type commonDeserializer struct { + tsInfo TimestampInfo + cptr unsafe.Pointer +} + +// Close will delete the underlying C++ allocated memory used by the +// deserializer. Failure to call Close will result in a memory leak. +func (self *commonDeserializer) Close() error { + if nil != self.cptr { + C.ir_deserializer_close(self.cptr) + self.cptr = nil + } + return nil +} + +// Returns the TimestampInfo used by the Deserializer. +func (self commonDeserializer) TimestampInfo() TimestampInfo { + return self.tsInfo +} + +type eightByteDeserializer struct { + commonDeserializer +} + +// DeserializeLogEvent attempts to read the next log event from the IR stream in +// irBuf, returning the deserialized [ffi.LogEventView], the position read to in +// irBuf (the end of the log event in irBuf), and an error. On error returns: +// - nil *ffi.LogEventView +// - 0 position +// - [IrError] error: CLP failed to successfully deserialize +// - [EndOfIr] error: CLP found the IR stream EOF tag +func (self *eightByteDeserializer) DeserializeLogEvent( + irBuf []byte, +) (*ffi.LogEventView, int, error) { + return deserializeLogEvent(self, irBuf) +} + +// DeserializeWildcardMatchWithTimeInterval attempts to read the next log event +// from the IR stream in irBuf that matches mergedQuery within timeInterval. It +// returns the deserialized [ffi.LogEventView], the position read to in irBuf +// (the end of the log event in irBuf), the index of the matched query in +// mergedQuery, and an error. On error returns: +// - nil *ffi.LogEventView +// - 0 position +// - -1 index +// - [IrError] error: CLP failed to successfully deserialize +// - [EndOfIr] error: CLP found the IR stream EOF tag +func (self *eightByteDeserializer) DeserializeWildcardMatchWithTimeInterval( + irBuf []byte, + mergedQuery search.MergedWildcardQuery, + timeInterval search.TimestampInterval, +) (*ffi.LogEventView, int, int, error) { + return deserializeWildcardMatch(self, irBuf, mergedQuery, timeInterval) +} + +// fourByteDeserializer contains both a common CLP IR deserializer and stores +// the previously seen log event's timestamp. The previous timestamp is +// necessary to calculate the current timestamp as four byte encoding only +// encodes the timestamp delta between the current log event and the previous. +type fourByteDeserializer struct { + commonDeserializer + prevTimestamp ffi.EpochTimeMs +} + +// DeserializeLogEvent attempts to read the next log event from the IR stream in +// irBuf, returning the deserialized [ffi.LogEventView], the position read to in +// irBuf (the end of the log event in irBuf), and an error. On error returns: +// - nil *ffi.LogEventView +// - 0 position +// - [IrError] error: CLP failed to successfully deserialize +// - [EndOfIr] error: CLP found the IR stream EOF tag +func (self *fourByteDeserializer) DeserializeLogEvent( + irBuf []byte, +) (*ffi.LogEventView, int, error) { + return deserializeLogEvent(self, irBuf) +} + +// DeserializeWildcardMatchWithTimeInterval attempts to read the next log event +// from the IR stream in irBuf that matches mergedQuery within timeInterval. It +// returns the deserialized [ffi.LogEventView], the position read to in irBuf +// (the end of the log event in irBuf), the index of the matched query in +// mergedQuery, and an error. On error returns: +// - nil *ffi.LogEventView +// - 0 position +// - -1 index +// - [IrError] error: CLP failed to successfully deserialize +// - [EndOfIr] error: CLP found the IR stream EOF tag +func (self *fourByteDeserializer) DeserializeWildcardMatchWithTimeInterval( + irBuf []byte, + mergedQuery search.MergedWildcardQuery, + timeInterval search.TimestampInterval, +) (*ffi.LogEventView, int, int, error) { + return deserializeWildcardMatch(self, irBuf, mergedQuery, timeInterval) +} + +func deserializeLogEvent( + deserializer Deserializer, + irBuf []byte, +) (*ffi.LogEventView, int, error) { + if 0 >= len(irBuf) { + return nil, 0, IncompleteIr + } + + var pos C.size_t + var event C.LogEventView + var err error + switch irs := deserializer.(type) { + case *eightByteDeserializer: + err = IrError(C.ir_deserializer_deserialize_eight_byte_log_event( + newCByteSpan(irBuf), + irs.cptr, + &pos, + &event, + )) + case *fourByteDeserializer: + err = IrError(C.ir_deserializer_deserialize_four_byte_log_event( + newCByteSpan(irBuf), + irs.cptr, + &pos, + &event, + )) + } + if Success != err { + return nil, 0, err + } + + return &ffi.LogEventView{ + LogMessageView: unsafe.String( + (*byte)((unsafe.Pointer)(event.m_log_message.m_data)), + event.m_log_message.m_size, + ), + Timestamp: ffi.EpochTimeMs(event.m_timestamp), + }, + int(pos), + nil +} + +func deserializeWildcardMatch( + deserializer Deserializer, + irBuf []byte, + mergedQuery search.MergedWildcardQuery, + time search.TimestampInterval, +) (*ffi.LogEventView, int, int, error) { + if 0 >= len(irBuf) { + return nil, 0, -1, IncompleteIr + } + + var pos C.size_t + var event C.LogEventView + var match C.size_t + var err error + switch irs := deserializer.(type) { + case *eightByteDeserializer: + err = IrError(C.ir_deserializer_deserialize_eight_byte_wildcard_match( + newCByteSpan(irBuf), + irs.cptr, + C.TimestampInterval{C.int64_t(time.Lower), C.int64_t(time.Upper)}, + newMergedWildcardQueryView(mergedQuery), + &pos, + &event, + &match, + )) + case *fourByteDeserializer: + err = IrError(C.ir_deserializer_deserialize_four_byte_wildcard_match( + newCByteSpan(irBuf), + irs.cptr, + C.TimestampInterval{C.int64_t(time.Lower), C.int64_t(time.Upper)}, + newMergedWildcardQueryView(mergedQuery), + &pos, + &event, + &match, + )) + } + if Success != err { + return nil, 0, -1, err + } + + return &ffi.LogEventView{ + LogMessageView: unsafe.String( + (*byte)((unsafe.Pointer)(event.m_log_message.m_data)), + event.m_log_message.m_size, + ), + Timestamp: ffi.EpochTimeMs(event.m_timestamp), + }, + int(pos), + int(match), + nil +} diff --git a/ir/encoder.go b/ir/encoder.go index d16ef18..a6570bb 100644 --- a/ir/encoder.go +++ b/ir/encoder.go @@ -1,169 +1,102 @@ package ir /* -#include +#include */ import "C" import ( - "runtime" "unsafe" "github.com/y-scope/clp-ffi-go/ffi" ) -type IrEncoder interface { - EncodeMessage(ts ffi.EpochTimeMs, msg string) ([]byte, int) - EncodeMessageUnsafe(ts ffi.EpochTimeMs, msg string) ([]byte, int) - TimestampInfo() TimestampInfo +// An Encoder takes logging objects (commonly used/created by logging libraries) +// and encodes them as CLP IR. Close must be called to free the underlying +// memory and failure to do so will result in a memory leak. +type Encoder[T EightByteEncoding | FourByteEncoding] interface { + EncodeLogMessage(logMessage ffi.LogMessage) (*LogMessageView[T], error) + Close() error } -func EightByteEncodePreamble( - ts_pattern string, - ts_pattern_syntax string, - time_zone_id string, -) (EightByteIrStream, []byte, int) { - irs, preamble, ret := EightByteEncodePreambleUnsafe(ts_pattern, ts_pattern_syntax, - time_zone_id) - if 0 != ret { - return irs, nil, ret - } - safePreamble := make([]byte, len(preamble)) - copy(safePreamble, preamble) - return irs, safePreamble, 0 +// Return a new Encoder that produces IR using [EightByteEncoding]. +func EightByteEncoder() (Encoder[EightByteEncoding], error) { + return &eightByteEncoder{C.ir_encoder_eight_byte_new()}, nil } -func EightByteEncodePreambleUnsafe( - ts_pattern string, - ts_pattern_syntax string, - time_zone_id string, -) (EightByteIrStream, []byte, int) { - var bufPtr unsafe.Pointer - var bufSize uint64 - irs := EightByteIrStream{ - irStream[EightByteEncodedVariable]{ - TimestampInfo{ts_pattern, ts_pattern_syntax, time_zone_id}, nil, - }, - } - irs.cPtr = C.eight_byte_encode_preamble( - unsafe.Pointer(&[]byte(ts_pattern)[0]), C.size_t(len(ts_pattern)), - unsafe.Pointer(&[]byte(ts_pattern_syntax)[0]), C.size_t(len(ts_pattern_syntax)), - unsafe.Pointer(&[]byte(time_zone_id)[0]), C.size_t(len(time_zone_id)), - &bufPtr, unsafe.Pointer(&bufSize)) - buf := unsafe.Slice((*byte)(bufPtr), bufSize) - if nil == buf { - return irs, nil, -2 - } - runtime.SetFinalizer(&irs, - func(irs *EightByteIrStream) { C.delete_ir_stream_state(irs.cPtr) }) - return irs, buf, 0 +// Return a new Encoder that produces IR using [FourByteEncoding]. +func FourByteEncoder() (Encoder[FourByteEncoding], error) { + return &fourByteEncoder{C.ir_encoder_four_byte_new()}, nil } -func FourByteEncodePreamble( - ts_pattern string, - ts_pattern_syntax string, - time_zone_id string, - reference_ts ffi.EpochTimeMs, -) (FourByteIrStream, []byte, int) { - irs, preamble, ret := FourByteEncodePreambleUnsafe(ts_pattern, ts_pattern_syntax, - time_zone_id, reference_ts) - if 0 != ret { - return irs, nil, ret - } - safePreamble := make([]byte, len(preamble)) - copy(safePreamble, preamble) - return irs, safePreamble, 0 +type eightByteEncoder struct { + cptr unsafe.Pointer } -func FourByteEncodePreambleUnsafe( - ts_pattern string, - ts_pattern_syntax string, - time_zone_id string, - reference_ts ffi.EpochTimeMs, -) (FourByteIrStream, []byte, int) { - var bufPtr unsafe.Pointer - var bufSize uint64 - irs := FourByteIrStream{ - irStream[FourByteEncodedVariable]{ - TimestampInfo{ts_pattern, ts_pattern_syntax, time_zone_id}, nil, - }, - reference_ts, - } - irs.cPtr = C.four_byte_encode_preamble( - unsafe.Pointer(&[]byte(ts_pattern)[0]), C.size_t(len(ts_pattern)), - unsafe.Pointer(&[]byte(ts_pattern_syntax)[0]), C.size_t(len(ts_pattern_syntax)), - unsafe.Pointer(&[]byte(time_zone_id)[0]), C.size_t(len(time_zone_id)), - C.int64_t(reference_ts), &bufPtr, unsafe.Pointer(&bufSize)) - buf := unsafe.Slice((*byte)(bufPtr), bufSize) - if nil == buf { - return irs, nil, -2 +// Close will delete the underlying C++ allocated memory used by the +// deserializer. Failure to call Close will result in a memory leak. +func (self *eightByteEncoder) Close() error { + if nil != self.cptr { + C.ir_encoder_eight_byte_close(self.cptr) + self.cptr = nil } - runtime.SetFinalizer(&irs, - func(irs *FourByteIrStream) { C.delete_ir_stream_state(irs.cPtr) }) - return irs, buf, 0 -} - -func (self *EightByteIrStream) EncodeMessage(ts ffi.EpochTimeMs, msg string) ([]byte, int) { - return encodeMessage(self, ts, msg) -} - -func (self *FourByteIrStream) EncodeMessage(ts ffi.EpochTimeMs, msg string) ([]byte, int) { - return encodeMessage(self, ts, msg) + return nil } -func encodeMessage(irEncoder IrEncoder, ts ffi.EpochTimeMs, msg string) ([]byte, int) { - buf, ret := irEncoder.EncodeMessageUnsafe(ts, msg) - if 0 != ret { - return nil, ret +// Encode a log message into CLP IR, returning a view of the encoded message. +func (self *eightByteEncoder) EncodeLogMessage( + logMessage ffi.LogMessage, +) (*LogMessageView[EightByteEncoding], error) { + var logtype C.StringView + var vars C.Int64tSpan + var dictVars C.StringView + var dictVarEndOffsets C.Int32tSpan + err := IrError(C.ir_encoder_encode_eight_byte_log_message( + newCStringView(logMessage), + self.cptr, + &logtype, + &vars, + &dictVars, + &dictVarEndOffsets, + )) + if Success != err { + return nil, EncodeError } - safeBuf := make([]byte, len(buf)) - copy(safeBuf, buf) - return safeBuf, 0 + return newLogMessageView[EightByteEncoding](logtype, vars, dictVars, dictVarEndOffsets), nil } -func (self *EightByteIrStream) EncodeMessageUnsafe(ts ffi.EpochTimeMs, msg string) ([]byte, int) { - return encodeMessageUnsafe(self, ts, msg) +type fourByteEncoder struct { + cptr unsafe.Pointer } -func (self *FourByteIrStream) EncodeMessageUnsafe(ts ffi.EpochTimeMs, msg string) ([]byte, int) { - buf, ret := encodeMessageUnsafe(self, self.prevTimestamp-ts, msg) - if 0 != ret { - return nil, ret +// Close will delete the underlying C++ allocated memory used by the +// deserializer. Failure to call Close will result in a memory leak. +func (self *fourByteEncoder) Close() error { + if nil != self.cptr { + C.ir_encoder_four_byte_close(self.cptr) + self.cptr = nil } - self.prevTimestamp = ts - return buf, ret + return nil } -// returns 0 on success, >0 on error, <0 on c error -// returned byte slice points to c memory and is only valid until the next call -// to encodeMessage (from either EncodeMessage or EncodeMessageUnsafe) -func encodeMessageUnsafe[T EightByteIrStream | FourByteIrStream]( - irstream *T, - timestampOrDelta ffi.EpochTimeMs, - msg string, -) ([]byte, int) { - var ret C.int - var bufPtr unsafe.Pointer - var bufSize uint64 - - switch irs := any(irstream).(type) { - case *EightByteIrStream: - ret = C.eight_byte_encode_message(irs.cPtr, C.int64_t(timestampOrDelta), - unsafe.Pointer(&[]byte(msg)[0]), C.size_t(len(msg)), - &bufPtr, unsafe.Pointer(&bufSize)) - case *FourByteIrStream: - ret = C.four_byte_encode_message(irs.cPtr, C.int64_t(timestampOrDelta), - unsafe.Pointer(&[]byte(msg)[0]), C.size_t(len(msg)), - &bufPtr, unsafe.Pointer(&bufSize)) - default: - return nil, 2 - } - if 0 > ret { - return nil, int(ret) - } - buf := unsafe.Slice((*byte)(bufPtr), bufSize) - if nil == buf { - return nil, 3 +// Encode a log message into CLP IR, returning a view of the encoded message. +func (self *fourByteEncoder) EncodeLogMessage( + logMessage ffi.LogMessage, +) (*LogMessageView[FourByteEncoding], error) { + var logtype C.StringView + var vars C.Int32tSpan + var dictVars C.StringView + var dictVarEndOffsets C.Int32tSpan + err := IrError(C.ir_encoder_encode_four_byte_log_message( + newCStringView(logMessage), + self.cptr, + &logtype, + &vars, + &dictVars, + &dictVarEndOffsets, + )) + if Success != err { + return nil, EncodeError } - return buf, 0 + return newLogMessageView[FourByteEncoding](logtype, vars, dictVars, dictVarEndOffsets), nil } diff --git a/ir/encoder_test.go b/ir/encoder_test.go deleted file mode 100644 index 3610df3..0000000 --- a/ir/encoder_test.go +++ /dev/null @@ -1,73 +0,0 @@ -package ir - -import ( - "fmt" - "io" - "os" - "testing" - "time" - - "github.com/y-scope/clp-ffi-go/ffi" -) - -type WriteCloser interface { - io.Writer - io.Closer -} - -func openIrEncoder(t *testing.T, eightByte bool) (WriteCloser, IrEncoder) { - f, err := os.Create(fmt.Sprintf("../testdata/%s.clp", t.Name())) - if err != nil { - t.Fatalf("os.Create: %v", err) - } - - timestampPattern := "yyyy-MM-dd HH:mm:ss,SSS" - timestampPatternSyntax := "java::SimpleDateFormat" - timeZoneId := "America/Toronto" - - var irEncoder IrEncoder - var preamble []byte - var ret int - if eightByte { - var ebIrs EightByteIrStream - ebIrs, preamble, ret = EightByteEncodePreambleUnsafe(timestampPattern, - timestampPatternSyntax, timeZoneId) - irEncoder = &ebIrs - } else { - var fbIrs FourByteIrStream - fbIrs, preamble, ret = FourByteEncodePreambleUnsafe(timestampPattern, - timestampPatternSyntax, timeZoneId, ffi.EpochTimeMs(time.Now().UnixMilli())) - irEncoder = &fbIrs - } - if 0 != ret { - t.Fatalf("*EncodePreamble failed: %v", ret) - } - n, err := f.Write(preamble) - if n != len(preamble) { - t.Fatalf("short write for preamble: %v/%v", n, len(preamble)) - } - if err != nil { - t.Fatalf("io.Writer.Write preamble: %v", err) - } - return f, irEncoder -} - -func writeIrEncoder(t *testing.T, writer io.Writer, irs IrEncoder) { - msg, ret := irs.EncodeMessageUnsafe(ffi.EpochTimeMs(time.Now().UnixMilli()), "log") - if 0 != ret { - t.Fatalf("EncodeMessageUnsafe failed: %v", ret) - } - n, err := writer.Write(msg) - if n != len(msg) { - t.Fatalf("short write for message: %v/%v", n, len(msg)) - } - if err != nil { - t.Fatalf("io.Writer.Write message: %v", err) - } -} - -func TestUnsafeFourByteIrEncoder(t *testing.T) { - writer, irEncoder := openIrEncoder(t, false) - defer writer.Close() - writeIrEncoder(t, writer, irEncoder) -} diff --git a/ir/ir.go b/ir/ir.go index 84ef775..edd31dd 100644 --- a/ir/ir.go +++ b/ir/ir.go @@ -1,63 +1,53 @@ -// Package ir implements interfaces for the encoding and decoding of [CLP] IR -// (intermediate representation) streams through CLP's FFI (foreign function -// interface). More details on CLP IR streams are described in this [Uber -// blog]. -// Log events compressed in IR format can be viewed in the [log viewer] or -// programmatically analyzed using APIs provided here. They can also be -// decompressed back into plain-text log files using CLP (in a future release). +// The ir package implements interfaces for the encoding, decoding, +// serialization, and deserialization of [CLP] IR (intermediate representation) +// streams through CLP's FFI (foreign function interface). More details on CLP +// IR streams are described in this [Uber blog]. +// Log events in IR format can be viewed in the [log viewer] or programmatically +// analyzed using APIs provided in this package. // // [CLP]: https://github.com/y-scope/clp // [Uber blog]: https://www.uber.com/blog/reducing-logging-cost-by-two-orders-of-magnitude-using-clp/ // [log viewer]: https://github.com/y-scope/yscope-log-viewer package ir -import ( - "unsafe" +/* +#include +*/ +import "C" - "github.com/y-scope/clp-ffi-go/ffi" +// Must match c++ equivalent types +type ( + EightByteEncoding = int64 + FourByteEncoding = int32 ) -// TimestampInfo contains information relevant to all timestamps in the IR -// stream. This information comes from the metadata in the IR preamble. +// TimestampInfo contains general information applying to all timestamps in +// contiguous IR. This information comes from the metadata in the IR preamble. type TimestampInfo struct { Pattern string PatternSyntax string TimeZoneId string } -// Empty types used to constrain irStream to ensure the correct encoding size -// is used during encoding and decoding. -type ( - EightByteEncodedVariable struct{} - FourByteEncodedVariable struct{} - EncodedVariable interface { - EightByteEncodedVariable | FourByteEncodedVariable - } -) - -// irStream is constrained by EncodedVariable to prevent mistaken usage of an -// incorrect sized stream. -type irStream[T EncodedVariable] struct { - tsInfo TimestampInfo - cPtr unsafe.Pointer // currently unused in the decoder path -} - -// Returns the TimestampInfo of an irStream. -func (self irStream[T]) TimestampInfo() TimestampInfo { - return self.tsInfo -} - -// Returns the TimestampInfo of an irStream. -type EightByteIrStream struct { - irStream[EightByteEncodedVariable] +// ir.BufView represents a slice of CLP IR, utilizing memory allocated by C++ +// instead of the Go heap. A BufView, denoted as x, is valid upon being returned +// and maintains its validity until the same object (e.g., an [ir.Serializer]) +// that issued x returns a new BufView. +type BufView = []byte + +// A ir.LogMessage contains all the different components of a log message +// ([ffi.LogMessage]) encoded/separated into fields. +type LogMessage[T EightByteEncoding | FourByteEncoding] struct { + Logtype string + Vars []T + DictVars string + DictVarEndOffsets []int32 } -// FourByteIrStream contains both a CLP IR stream (irStream) and keeps track of -// the previous timestamp seen in the stream. Four byte encoding encodes log -// event timestamps as time deltas from the previous log event. Therefore, we -// must track the previous timestamp to be able to calculate the full timestamp -// of a log event. -type FourByteIrStream struct { - irStream[FourByteEncodedVariable] - prevTimestamp ffi.EpochTimeMs +// ir.LogMessageView is a [ir.LogMessage] using memory allocated by C++ instead +// of the Go heap. A LogMessageView, denoted as x, is valid upon being returned +// and maintains its validity until the same object (e.g., an [ir.Encoder]) +// that issued x returns a new LogMessageView. +type LogMessageView[T EightByteEncoding | FourByteEncoding] struct { + LogMessage[T] } diff --git a/ir/ir_test.go b/ir/ir_test.go new file mode 100644 index 0000000..3b75b3f --- /dev/null +++ b/ir/ir_test.go @@ -0,0 +1,215 @@ +package ir + +import ( + "fmt" + "io" + "math" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/klauspost/compress/zstd" + "github.com/y-scope/clp-ffi-go/ffi" +) + +const ( + defaultTimestampPattern string = "yyyy-MM-dd HH:mm:ss,SSS" + defaultTimestampPatternSyntax string = "java::SimpleDateFormat" + defaultTimeZoneId string = "America/Toronto" +) + +type testArg int + +const ( + eightByteEncoding testArg = iota + fourByteEncoding + noCompression + zstdCompression +) + +var testArgStr = []string{ + "eightByteEncoding", + "fourByteEncoding", + "noCompression", + "zstdCompression", +} + +type testArgs struct { + encoding testArg + compression testArg + name string + filePath string +} + +type preambleFields struct { + TimestampInfo + prevTimestamp ffi.EpochTimeMs +} + +func TestLogMessagesCombo(t *testing.T) { + messages := []ffi.LogMessage{ + "static text dict=var notint123 -1.234 4321.", + "static123 text321 dict=var0123 321.1234 -3210.", + } + testLogMessages(t, messages) +} + +func TestLogMessagesDict(t *testing.T) { + messages := []ffi.LogMessage{ + "textint1234 textequal=variable", + fmt.Sprintf("test=bigint %v", math.MaxInt32+1), + } + testLogMessages(t, messages) +} + +func TestLogMessagesFloat(t *testing.T) { + messages := []ffi.LogMessage{ + "float 1.0 1.2 1.23 1.234", + "-float -1.0 -1.2 -1.23 -1.234", + } + testLogMessages(t, messages) +} + +func TestLogMessagesInt(t *testing.T) { + messages := []ffi.LogMessage{ + "int 1 12 123 1234", + "-int -1 -12 -123 -1234", + } + testLogMessages(t, messages) +} + +func TestLogMessagesStatic(t *testing.T) { + messages := []ffi.LogMessage{ + "static text log zero.", + "static text log one.", + } + testLogMessages(t, messages) +} + +func TestLogMessagesLongLogs(t *testing.T) { + const eightMB int = 8 * 1024 * 1024 + messages := []ffi.LogMessage{ + strings.Repeat("x", eightMB), + strings.Repeat("x", eightMB-1), + } + testLogMessages(t, messages) +} + +func assertEndOfIr( + t *testing.T, + reader io.Reader, + irreader *Reader, +) { + _, err := irreader.Read() + if EndOfIr != err { + t.Fatalf("assertEndOfIr failed got: %v", err) + } +} + +func assertIrLogEvent( + t *testing.T, + reader io.Reader, + irreader *Reader, + event ffi.LogEvent, +) { + log, err := irreader.Read() + if nil != err { + t.Fatalf("Reader.Read failed: %v", err) + } + if event.Timestamp != log.Timestamp { + t.Fatalf("Reader.Read wrong timestamp: '%v' != '%v'", log.Timestamp, event.Timestamp) + } + if event.LogMessage != log.LogMessageView { + t.Fatalf("Reader.Read wrong message: '%v' != '%v'", log.LogMessageView, event.LogMessage) + } + t.Logf("'%v' : '%.128v'\n", log.Timestamp, log.LogMessageView) +} + +func generateTestArgs(t *testing.T, prefix string) []testArgs { + var tests []testArgs + tmpdir := t.TempDir() + for _, encoding := range []testArg{eightByteEncoding, fourByteEncoding} { + for _, compression := range []testArg{noCompression, zstdCompression} { + testName := prefix + "-" + testArgStr[encoding] + "-" + testArgStr[compression] + fileName := testName + ".clp" + if zstdCompression == compression { + fileName += ".zst" + } + filePath := filepath.Join(tmpdir, fileName) + tests = append(tests, testArgs{encoding, compression, testName, filePath}) + } + } + return tests +} + +func testLogMessages(t *testing.T, messages []ffi.LogMessage) { + for _, args := range generateTestArgs(t, t.Name()+"-SerDer") { + args := args // capture range variable for func literal + t.Run( + args.name, + func(t *testing.T) { t.Parallel(); testSerDerLogMessages(t, args, messages) }, + ) + } + for _, args := range generateTestArgs(t, t.Name()+"-WriteRead") { + args := args // capture range variable for func literal + t.Run( + args.name, + func(t *testing.T) { t.Parallel(); testWriteReadLogMessages(t, args, messages) }, + ) + } +} + +func openIoReader(t *testing.T, args testArgs) io.ReadCloser { + file, err := os.Open(args.filePath) + if nil != err { + t.Fatalf("os.Open: %v", err) + } + var reader io.ReadCloser + switch args.compression { + case noCompression: + reader = file + case zstdCompression: + reader, err = newZstdReader(file) + if nil != err { + t.Fatalf("zstd.NewReader failed: %v", err) + } + default: + t.Fatalf("unsupported compression: %v", args.compression) + } + return reader +} + +func openIoWriter(t *testing.T, args testArgs) io.WriteCloser { + file, err := os.Create(args.filePath) + if nil != err { + t.Fatalf("os.Create: %v", err) + } + var writer io.WriteCloser + switch args.compression { + case noCompression: + writer = file + case zstdCompression: + writer, err = zstd.NewWriter(file) + if nil != err { + t.Fatalf("zstd.NewWriter failed: %v", err) + } + default: + t.Fatalf("unsupported compression: %v", args.compression) + } + return writer +} + +type zstdReader struct { + *zstd.Decoder +} + +func newZstdReader(reader io.Reader) (*zstdReader, error) { + zreader, err := zstd.NewReader(reader) + return &zstdReader{zreader}, err +} + +func (self *zstdReader) Close() error { + self.Decoder.Close() + return nil +} diff --git a/ir/irerror.go b/ir/irerror.go index d29044a..e1b83e9 100644 --- a/ir/irerror.go +++ b/ir/irerror.go @@ -1,20 +1,22 @@ package ir -// IRError mirrors cpp type IRErrorCode defined in: +// IrError mirrors cpp type IRErrorCode defined in: // clp/components/core/src/ffi/ir_stream/decoding_methods.hpp -//go:generate stringer -type=IRError -type IRError int +// +//go:generate stringer -type=IrError +type IrError int const ( - Success IRError = iota + Success IrError = iota DecodeError - Eof - CorruptedIR - CorruptedMetadata - IncompleteIR - UnsupportedVersion + EndOfIr + CorruptedIr + IncompleteIr + QueryNotFound // must be IncompleteIr + 1 + EncodeError // not from clp + UnsupportedVersion // not from clp ) -func (self IRError) Error() string { +func (self IrError) Error() string { return self.String() } diff --git a/ir/irerror_string.go b/ir/irerror_string.go index cf2585e..39133ad 100644 --- a/ir/irerror_string.go +++ b/ir/irerror_string.go @@ -1,4 +1,4 @@ -// Code generated by "stringer -type=IRError"; DO NOT EDIT. +// Code generated by "stringer -type=IrError"; DO NOT EDIT. package ir @@ -10,20 +10,21 @@ func _() { var x [1]struct{} _ = x[Success-0] _ = x[DecodeError-1] - _ = x[Eof-2] - _ = x[CorruptedIR-3] - _ = x[CorruptedMetadata-4] - _ = x[IncompleteIR-5] - _ = x[UnsupportedVersion-6] + _ = x[EndOfIr-2] + _ = x[CorruptedIr-3] + _ = x[IncompleteIr-4] + _ = x[QueryNotFound-5] + _ = x[EncodeError-6] + _ = x[UnsupportedVersion-7] } -const _IRError_name = "SuccessDecodeErrorEofCorruptedIRCorruptedMetadataIncompleteIRUnsupportedVersion" +const _IrError_name = "SuccessDecodeErrorEndOfIrCorruptedIrIncompleteIrQueryNotFoundEncodeErrorUnsupportedVersion" -var _IRError_index = [...]uint8{0, 7, 18, 21, 32, 49, 61, 79} +var _IrError_index = [...]uint8{0, 7, 18, 25, 36, 48, 61, 72, 90} -func (i IRError) String() string { - if i < 0 || i >= IRError(len(_IRError_index)-1) { - return "IRError(" + strconv.FormatInt(int64(i), 10) + ")" +func (i IrError) String() string { + if i < 0 || i >= IrError(len(_IrError_index)-1) { + return "IrError(" + strconv.FormatInt(int64(i), 10) + ")" } - return _IRError_name[_IRError_index[i]:_IRError_index[i+1]] + return _IrError_name[_IrError_index[i]:_IrError_index[i+1]] } diff --git a/ir/reader.go b/ir/reader.go index 5511cd8..816c8d4 100644 --- a/ir/reader.go +++ b/ir/reader.go @@ -1,118 +1,148 @@ package ir import ( - "bytes" "io" + "math" + "strings" "github.com/y-scope/clp-ffi-go/ffi" + "github.com/y-scope/clp-ffi-go/search" ) -// IrReader abstracts maintenance of a buffer containing an IR stream. It keeps -// track of the range in the buffer containing valid, unconsumed IR. It does -// not store a Reader to allow callers to mutate the Reader as necessary. -type IrReader struct { - IrDecoder - buf []byte - start int - end int +// Reader abstracts maintenance of a buffer containing a [Deserializer]. It +// keeps track of the range [start, end) in the buffer containing valid, +// unconsumed CLP IR. [NewReader] will construct a Reader with the appropriate +// Deserializer based on the consumed CLP IR preamble. The buffer will grow if +// it is not large enough to service a read call (e.g. it cannot hold the next +// log event in the IR). Close must be called to free the underlying memory and +// failure to do so will result in a memory leak. +type Reader struct { + Deserializer + ioReader io.Reader + buf []byte + start int + end int } -// adjust_buf mutates the IrReader.buf so that the next read call has space to -// fill. If the start of IrReader.buf is not 0 the contents of buf will be -// shifted back, so that end -= start and start = 0. If start is already 0 the -// buffer is grown. -func (self *IrReader) adjust_buf() int { - if 0 == self.start { - buf := make([]byte, len(self.buf)*2) - copy(buf, self.buf[self.start:self.end]) - self.buf = buf - } else { - copy(self.buf, self.buf[self.start:self.end]) +// NewReaderSize creates a new [Reader] and uses [DeserializePreamble] to read a +// CLP IR preamble from the [io.Reader], r. size denotes the initial size to use +// for the Reader's buffer that the io.Reader is read into. This buffer will +// grow if it is too small to contain the preamble or next log event. Returns: +// - success: valid [*Reader], nil +// - error: nil [*Reader], error propagated from [DeserializePreamble] or +// [io.Reader.Read] +func NewReaderSize(r io.Reader, size int) (*Reader, error) { + irr := &Reader{nil, r, make([]byte, size), 0, 0} + var err error + if _, err = irr.read(); nil != err { + return nil, err } - self.end -= self.start - self.start = 0 - return len(self.buf) + for { + irr.Deserializer, irr.start, err = DeserializePreamble(irr.buf[irr.start:irr.end]) + if IncompleteIr != err { + break + } + if _, err = irr.fillBuf(); nil != err { + break + } + } + if nil != err { + return nil, err + } + return irr, nil } -// read is a wrapper around the Read call to the io.Reader. It uses the correct -// range in buf and adjusts the range accordingly. On success nil is returned. -// On failure an error whose type depends on the io.Reader is returned. -// Note we do not return io.EOF if n > 0 as we have not yet consumed the IR. -func (self *IrReader) read(r io.Reader) error { - n, err := r.Read(self.buf[self.end:]) - if nil != err && io.EOF != err { - return err - } - self.end += n - return nil +// Returns [NewReaderSize] with a default buffer size of 1MB. +func NewReader(r io.Reader) (*Reader, error) { + return NewReaderSize(r, 1024*1024) } -// ReadPreamble uses [DecodePreamble] to read an IR stream preamble from r. -// bufSize denotes the initial size to use for the underlying buffer io.Reader -// is read into. This buffer will grow if it is too small to contain the -// preamble or next log event. -// Return values: -// - nil == error: success -// - [IRError] or [encoding/json]: error propagated from [DecodePreamble] -// - [io] error type or underlying reader type: io.Reader.Read failed -func ReadPreamble(r io.Reader, bufSize int) (IrReader, error) { - irr := IrReader{nil, make([]byte, bufSize), 0, 0} - - if err := irr.read(r); nil != err { - return irr, err - } +// Close will delete the underlying C++ allocated memory used by the +// deserializer. Failure to call Close will result in a memory leak. +func (self *Reader) Close() error { + return self.Deserializer.Close() +} +// Read uses [Deserializer.DeserializeLogEvent] to read from the CLP IR byte stream. The underlying +// buffer will grow if it is too small to contain the next log event. On error returns: +// - nil *ffi.LogEventView +// - error propagated from [Deserializer.DeserializeLogEvent] or [io.Reader.Read] +func (self *Reader) Read() (*ffi.LogEventView, error) { + var event *ffi.LogEventView + var pos int + var err error for { - var err error - irr.IrDecoder, irr.start, err = DecodePreamble(irr.buf[irr.start:irr.end]) - if nil == err { - return irr, nil - } else if IncompleteIR == err { - irr.adjust_buf() - if err := irr.read(r); nil != err { - return irr, err - } - } else { - return irr, err + event, pos, err = self.DeserializeLogEvent(self.buf[self.start:self.end]) + if IncompleteIr != err { + break + } + if _, err = self.fillBuf(); nil != err { + break } } + if nil != err { + return nil, err + } + self.start += pos + return event, nil } +// ReadToWildcardMatch wraps ReadToWildcardMatchWithTimeInterval, attempting to +// read the next log event that matches any query in queries, within the entire +// IR. It forwards the result of ReadToWildcardMatchWithTimeInterval. +func (self *Reader) ReadToWildcardMatch( + queries []search.WildcardQuery, +) (*ffi.LogEventView, int, error) { + return self.ReadToWildcardMatchWithTimeInterval( + queries, + search.TimestampInterval{0, math.MaxInt64}, + ) +} -// ReadNextLogEvent uses [DecodeNextLogEvent] to read from the IR stream in r. -// bufSize denotes the initial size to use for the underlying buffer io.Reader -// is read into. This buffer will grow if it is too small to contain the -// preamble or next log event. -// Return values: -// - nil == error: success -// - IRError.Eof: CLP found the IR stream EOF tag -// - io.EOF: io.Reader.Read got EOF -// - else: -// - type [IRError]: error propagated from [DecodeNextLogEvent] -// - type from io.Reader: io.Reader.Read failed -func (self *IrReader) ReadNextLogEvent(r io.Reader) (ffi.LogEvent, error) { +// ReadToWildcardMatchWithTimeInterval attempts to read the next log event that +// matches any query in queries, within timeInterval. It returns the +// deserialized [ffi.LogEventView], the index of the matched query in queries, +// and an error. On error returns: +// - nil *ffi.LogEventView +// - -1 index +// - [IrError] error: CLP failed to successfully deserialize +// - [EndOfIr] error: CLP found the IR stream EOF tag +func (self *Reader) ReadToWildcardMatchWithTimeInterval( + queries []search.WildcardQuery, + timeInterval search.TimestampInterval, +) (*ffi.LogEventView, int, error) { + var event *ffi.LogEventView + var pos int + var matchingQuery int + var err error + mergedQuery := search.MergeWildcardQueries(queries) for { - event, offset, err := self.DecodeNextLogEvent(self.buf[self.start:self.end]) - if nil == err { - self.start += offset - return event, nil - } else if IncompleteIR == err { - self.adjust_buf() - if err := self.read(r); nil != err { - return event, err - } - } else { - return event, err + event, pos, matchingQuery, err = self.DeserializeWildcardMatchWithTimeInterval( + self.buf[self.start:self.end], + mergedQuery, + timeInterval, + ) + if IncompleteIr != err { + break + } + if _, err = self.fillBuf(); nil != err { + break } } + if nil != err { + return nil, -1, err + } + self.start += pos + return event, matchingQuery, nil } -// Read the IR stream using the io.Reader until f returns true for a -// [ffi.LogEvent]. The succeeding LogEvent is returned. Errors are propagated -// from ReadNextLogEvent. -func (self *IrReader) ReadToFunc(r io.Reader, f func(ffi.LogEvent) bool) (ffi.LogEvent, error) { +// Read the CLP IR byte stream until f returns true for a [ffi.LogEventView]. +// The successful LogEvent is returned. Errors are propagated from [Read]. +func (self *Reader) ReadToFunc( + f func(*ffi.LogEventView) bool, +) (*ffi.LogEventView, error) { for { - event, err := self.ReadNextLogEvent(r) + event, err := self.Read() if nil != err { return event, err } @@ -122,26 +152,72 @@ func (self *IrReader) ReadToFunc(r io.Reader, f func(ffi.LogEvent) bool) (ffi.Lo } } -// Read the IR stream using the io.Reader until [ffi.LogEvent.Timestamp] >= -// time. Errors are propagated from ReadNextLogEvent. -func (self *IrReader) ReadToEpochTime(r io.Reader, time ffi.EpochTimeMs) (ffi.LogEvent, error) { - return self.ReadToFunc(r, func(e ffi.LogEvent) bool { return e.Timestamp >= time }) +// Read the CLP IR stream until a [ffi.LogEventView] is greater than or equal to +// the given timestamp. Errors are propagated from [ReadToFunc]. +func (self *Reader) ReadToEpochTime( + time ffi.EpochTimeMs, +) (*ffi.LogEventView, error) { + return self.ReadToFunc(func(event *ffi.LogEventView) bool { return event.Timestamp >= time }) } -// Read the IR stream using the io.Reader until [bytes/Contains] returns true -// for [ffi.LogEvent.Msg] and subslice. Errors are propagated from ReadNextLogEvent. -func (self *IrReader) ReadToContains(r io.Reader, subslice []byte) (ffi.LogEvent, error) { - return self.ReadToFunc(r, func(e ffi.LogEvent) bool { return bytes.Contains(e.Msg, subslice) }) +// Read the CLP IR stream until [strings/Contains] returns true for a +// [ffi.LogEventView] and the given sub string. Errors are propagated from +// [ReadToFunc]. +func (self *Reader) ReadToContains(substr string) (*ffi.LogEventView, error) { + fn := func(event *ffi.LogEventView) bool { + return strings.Contains(event.LogMessageView, substr) + } + return self.ReadToFunc(fn) } -// Read the IR stream using the io.Reader until [bytes/HasPrefix] returns true -// for [ffi.LogEvent.Msg] and prefix. Errors are propagated from ReadNextLogEvent. -func (self *IrReader) ReadToPrefix(r io.Reader, prefix []byte) (ffi.LogEvent, error) { - return self.ReadToFunc(r, func(e ffi.LogEvent) bool { return bytes.HasPrefix(e.Msg, prefix) }) +// Read the CLP IR stream until [strings/HasPrefix] returns true for a +// [ffi.LogEventView] and the given prefix. Errors are propagated from +// [ReadToFunc]. +func (self *Reader) ReadToPrefix(prefix string) (*ffi.LogEventView, error) { + fn := func(event *ffi.LogEventView) bool { + return strings.HasPrefix(event.LogMessageView, prefix) + } + return self.ReadToFunc(fn) } -// Read the IR stream using the io.Reader until [bytes/HasSuffix] returns true -// for [ffi.LogEvent.Msg] field and suffix. Errors are propagated from ReadNextLogEvent. -func (self *IrReader) ReadToSuffix(r io.Reader, suffix []byte) (ffi.LogEvent, error) { - return self.ReadToFunc(r, func(e ffi.LogEvent) bool { return bytes.HasSuffix(e.Msg, suffix) }) +// Read the CLP IR stream until [strings/HasSuffix] returns true for a +// [ffi.LogEventView] and the given suffix. Errors are propagated from +// [ReadToFunc]. +func (self *Reader) ReadToSuffix(suffix string) (*ffi.LogEventView, error) { + fn := func(event *ffi.LogEventView) bool { + return strings.HasSuffix(event.LogMessageView, suffix) + } + return self.ReadToFunc(fn) +} + +// fillBuf shifts the remaining valid IR in [Reader.buf] to the front and then +// calls [io.Reader.Read] to fill the remainder with more IR. Before reading into +// the buffer, it is doubled if more than half of it is unconsumed IR. +// Forwards the return of [io.Reader.Read]. +func (self *Reader) fillBuf() (int, error) { + if (self.end - self.start) > len(self.buf)/2 { + buf := make([]byte, len(self.buf)*2) + copy(buf, self.buf[self.start:self.end]) + self.buf = buf + } else { + copy(self.buf, self.buf[self.start:self.end]) + } + self.end -= self.start + self.start = 0 + n, err := self.read() + return n, err +} + +// read is a wrapper around a io.Reader.Read call. It uses the correct range in +// buf and adjusts the range accordingly. Always returns the number of bytes +// read. On success nil is returned. On failure an error is forwarded from +// [io.Reader], unless n > 0 and io.EOF == err as we have not yet consumed the +// CLP IR. +func (self *Reader) read() (int, error) { + n, err := self.ioReader.Read(self.buf[self.end:]) + self.end += n + if nil != err && io.EOF != err { + return n, err + } + return n, nil } diff --git a/ir/reader_test.go b/ir/reader_test.go index 796f6d4..4d7a439 100644 --- a/ir/reader_test.go +++ b/ir/reader_test.go @@ -1,24 +1,24 @@ package ir import ( - "fmt" - "io" + "math" "os" "testing" "time" - "runtime" "github.com/klauspost/compress/zstd" - "github.com/y-scope/clp-ffi-go/test" + "github.com/y-scope/clp-ffi-go/ffi" + "github.com/y-scope/clp-ffi-go/search" ) -func TestFourByteIrReader(t *testing.T) { - if 0 == len(os.Args) { - t.Fatalf("This test requires an input ir stream from -args: %v", os.Args) +func TestIrReader(t *testing.T) { + var fpath string = os.Getenv("go_test_ir") + if "" == fpath { + t.Skip("Set an input ir stream using the env variable: go_test_ir") } var err error var file *os.File - if file, err = os.Open(os.Args[len(os.Args)-1]); nil != err { + if file, err = os.Open(fpath); nil != err { t.Fatalf("os.Open failed: %v", err) } defer file.Close() @@ -26,25 +26,32 @@ func TestFourByteIrReader(t *testing.T) { reader, _ := zstd.NewReader(file) defer reader.Close() - var irr IrReader - if irr, err = ReadPreamble(reader, 4096); nil != err { - t.Fatalf("ReadPreamble failed: %v", err) + var irr *Reader + if irr, err = NewReaderSize(reader, 512*1024*1024); nil != err { + t.Fatalf("NewReader failed: %v", err) } + defer irr.Close() - fins := []test.Finalizer{} + interval := search.TimestampInterval{Lower: 0, Upper: math.MaxInt64} + queries := []search.WildcardQuery{ + search.NewWildcardQuery("*ERROR*", true), + search.NewWildcardQuery("*WARN*", true), + } for { - // log, err := irr.ReadNextLogEvent(reader) - log, err := irr.ReadToContains(reader, []byte("ERROR")) - // run GC to try and test that log.Msg isn't freed by finalizer - runtime.GC() - if nil == err { - fmt.Printf("msg: %v | %v", time.UnixMilli(int64(log.Timestamp)), string(log.Msg)) - } else if Eof == err || io.EOF == err { + var log *ffi.LogEventView + // log, err = irr.Read() + // log, err = irr.ReadToContains("ERROR") + // var _ search.WildcardQuery + log, _, err = irr.ReadToWildcardMatchWithTimeInterval( + queries, + interval, + ) + if nil != err { break - } else { - t.Fatalf("ReadNextLogEvent failed: %v", err) } - fins = append(fins, test.NewFinalizer(&log)) + t.Logf("msg: %v | %v", time.UnixMilli(int64(log.Timestamp)), log.LogMessageView) + } + if EndOfIr != err { + t.Fatalf("Reader.Read failed: %v", err) } - test.AssertFinalizers(t, fins...) } diff --git a/ir/serder_test.go b/ir/serder_test.go new file mode 100644 index 0000000..f75fb31 --- /dev/null +++ b/ir/serder_test.go @@ -0,0 +1,161 @@ +package ir + +import ( + "io" + "testing" + "time" + + "github.com/y-scope/clp-ffi-go/ffi" +) + +func TestPreamble(t *testing.T) { + preamble := preambleFields{ + TimestampInfo{defaultTimestampPattern, defaultTimestampPatternSyntax, defaultTimeZoneId}, + ffi.EpochTimeMs(time.Now().UnixMilli()), + } + for _, args := range generateTestArgs(t, t.Name()) { + args := args // capture range variable for func literal + t.Run(args.name, func(t *testing.T) { t.Parallel(); testPreamble(t, args, preamble) }) + } +} + +func testPreamble(t *testing.T, args testArgs, preamble preambleFields) { + writer := openIoWriter(t, args) + irSerializer := serializeIrPreamble(t, args, preamble, writer) + + writer.Close() + irSerializer.Close() + + reader := openIoReader(t, args) + assertIrPreamble(t, args, reader, preamble) +} + +func testSerDerLogMessages( + t *testing.T, + args testArgs, + logMessages []ffi.LogMessage, +) { + ioWriter := openIoWriter(t, args) + + preamble := preambleFields{ + TimestampInfo{defaultTimestampPattern, defaultTimestampPatternSyntax, defaultTimeZoneId}, + ffi.EpochTimeMs(time.Now().UnixMilli()), + } + irSerializer := serializeIrPreamble(t, args, preamble, ioWriter) + + var events []ffi.LogEvent + for _, msg := range logMessages { + event := ffi.LogEvent{ + LogMessage: msg, + Timestamp: ffi.EpochTimeMs(time.Now().UnixMilli()), + } + irView, err := irSerializer.SerializeLogEvent(event) + if nil != err { + t.Fatalf("SerializeLogEvent failed: %v", err) + } + _, err = ioWriter.Write(irView) + if nil != err { + t.Fatalf("io.Writer.Write message: %v", err) + } + events = append(events, event) + } + irSerializer.Close() + ioWriter.Write([]byte{0x0}) + ioWriter.Close() + + ioReader := openIoReader(t, args) + defer ioReader.Close() + irReader := assertIrPreamble(t, args, ioReader, preamble) + defer irReader.Close() + + for _, event := range events { + assertIrLogEvent(t, ioReader, irReader, event) + } + assertEndOfIr(t, ioReader, irReader) +} + +func serializeIrPreamble( + t *testing.T, + args testArgs, + preamble preambleFields, + writer io.Writer, +) Serializer { + var err error + var serializer Serializer + var preambleIr BufView + switch args.encoding { + case eightByteEncoding: + serializer, preambleIr, err = EightByteSerializer( + preamble.Pattern, + preamble.PatternSyntax, + preamble.TimeZoneId, + ) + case fourByteEncoding: + serializer, preambleIr, err = FourByteSerializer( + preamble.Pattern, + preamble.PatternSyntax, + preamble.TimeZoneId, + preamble.prevTimestamp, + ) + default: + t.Fatalf("unsupported encoding: %v", args.encoding) + } + if nil != err { + t.Fatalf("constructor failed: %v", err) + } + n, err := writer.Write(preambleIr) + if n != len(preambleIr) { + t.Fatalf("short write for preamble: %v/%v", n, len(preambleIr)) + } + if nil != err { + t.Fatalf("io.Writer.Write preamble: %v", err) + } + return serializer +} + +func assertIrPreamble( + t *testing.T, + args testArgs, + reader io.Reader, + preamble preambleFields, +) *Reader { + irreader, err := NewReaderSize(reader, 4096) + if nil != err { + t.Fatalf("NewReader failed: %v", err) + } + if irreader.TimestampInfo().Pattern != preamble.Pattern { + t.Fatalf( + "NewReader wrong pattern: '%v' != '%v'", + irreader.TimestampInfo().Pattern, + preamble.Pattern, + ) + } + if irreader.TimestampInfo().PatternSyntax != preamble.PatternSyntax { + t.Fatalf( + "NewReader wrong pattern syntax: '%v' != '%v'", + irreader.TimestampInfo().PatternSyntax, + preamble.PatternSyntax, + ) + } + if irreader.TimestampInfo().TimeZoneId != preamble.TimeZoneId { + t.Fatalf( + "NewReader wrong time zone id: '%v' != '%v'", + irreader.TimestampInfo().TimeZoneId, + preamble.TimeZoneId, + ) + } + if fourByteEncoding == args.encoding { + deserializer, ok := irreader.Deserializer.(*fourByteDeserializer) + if false == ok { + t.Fatalf("casting Deserializer to *fourByteDeserializer failed for fourByteEncoding.") + } + if deserializer.prevTimestamp != preamble.prevTimestamp { + t.Fatalf( + "NewReader wrong reference timestamp: '%v' != '%v'", + deserializer.prevTimestamp, + preamble.prevTimestamp, + ) + } + } + return irreader +} diff --git a/ir/serializer.go b/ir/serializer.go new file mode 100644 index 0000000..67a80b6 --- /dev/null +++ b/ir/serializer.go @@ -0,0 +1,172 @@ +package ir + +/* +#include +#include +*/ +import "C" + +import ( + "unsafe" + + "github.com/y-scope/clp-ffi-go/ffi" +) + +// A Serializer exports functions to serialize log events into a CLP IR byte +// stream. Serialization functions only return views (slices) of IR bytes, +// leaving their use to the user. Each Serializer owns its own unique underlying +// memory for the views it produces/returns. This memory is reused for each +// view, so to persist the contents the memory must be copied into another +// object. Close must be called to free the underlying memory and failure to do +// so will result in a memory leak. +type Serializer interface { + SerializeLogEvent(event ffi.LogEvent) (BufView, error) + TimestampInfo() TimestampInfo + Close() error +} + +// EightByteSerializer creates and returns a new Serializer that writes eight +// byte encoded CLP IR and serializes a IR preamble into a BufView using it. On +// error returns: +// - nil Serializer +// - nil BufView +// - [IrError] error: CLP failed to successfully serialize +func EightByteSerializer( + tsPattern string, + tsPatternSyntax string, + timeZoneId string, +) (Serializer, BufView, error) { + var irView C.ByteSpan + irs := eightByteSerializer{ + commonSerializer{TimestampInfo{tsPattern, tsPatternSyntax, timeZoneId}, nil}, + } + if err := IrError(C.ir_serializer_new_eight_byte_serializer_with_preamble( + newCStringView(tsPattern), + newCStringView(tsPatternSyntax), + newCStringView(timeZoneId), + &irs.cptr, + &irView, + )); Success != err { + return nil, nil, err + } + return &irs, unsafe.Slice((*byte)(irView.m_data), irView.m_size), nil +} + +// FourByteSerializer creates and returns a new Serializer that writes four byte +// encoded CLP IR and serializes a IR preamble into a BufView using it. On error +// returns: +// - nil Serializer +// - nil BufView +// - [IrError] error: CLP failed to successfully serialize +func FourByteSerializer( + tsPattern string, + tsPatternSyntax string, + timeZoneId string, + referenceTs ffi.EpochTimeMs, +) (Serializer, BufView, error) { + var irView C.ByteSpan + irs := fourByteSerializer{ + commonSerializer{TimestampInfo{tsPattern, tsPatternSyntax, timeZoneId}, nil}, + referenceTs, + } + if err := IrError(C.ir_serializer_new_four_byte_serializer_with_preamble( + newCStringView(tsPattern), + newCStringView(tsPatternSyntax), + newCStringView(timeZoneId), + C.int64_t(referenceTs), + &irs.cptr, + &irView, + )); Success != err { + return nil, nil, err + } + return &irs, unsafe.Slice((*byte)(irView.m_data), irView.m_size), nil +} + +// commonSerializer contains fields common to all types of CLP IR encoding. +// TimestampInfo stores information common to all timestamps found in the IR. +// cptr holds a reference to the underlying C++ objected used as backing storage +// for the Views returned by the serializer. Close must be called to free this +// underlying memory and failure to do so will result in a memory leak. +type commonSerializer struct { + tsInfo TimestampInfo + cptr unsafe.Pointer +} + +// Closes the serializer by releasing the underlying C++ allocated memory. +// Failure to call Close will result in a memory leak. +func (self *commonSerializer) Close() error { + if nil != self.cptr { + C.ir_serializer_close(self.cptr) + self.cptr = nil + } + return nil +} + +// Returns the TimestampInfo of the Serializer. +func (self commonSerializer) TimestampInfo() TimestampInfo { + return self.tsInfo +} + +type eightByteSerializer struct { + commonSerializer +} + +// SerializeLogEvent attempts to serialize the log event, event, into an eight +// byte encoded CLP IR byte stream. On error returns: +// - a nil BufView +// - [IrError] based on the failure of the Cgo call +func (self *eightByteSerializer) SerializeLogEvent( + event ffi.LogEvent, +) (BufView, error) { + return serializeLogEvent(self, event) +} + +// fourByteSerializer contains both a common CLP IR serializer and stores the +// previously seen log event's timestamp. The previous timestamp is necessary to +// calculate the current timestamp as four byte encoding only encodes the +// timestamp delta between the current log event and the previous. +type fourByteSerializer struct { + commonSerializer + prevTimestamp ffi.EpochTimeMs +} + +// SerializeLogEvent attempts to serialize the log event, event, into a four +// byte encoded CLP IR byte stream. On error returns: +// - nil BufView +// - [IrError] based on the failure of the Cgo call +func (self *fourByteSerializer) SerializeLogEvent( + event ffi.LogEvent, +) (BufView, error) { + return serializeLogEvent(self, event) +} + +func serializeLogEvent( + serializer Serializer, + event ffi.LogEvent, +) (BufView, error) { + var irView C.ByteSpan + var err error + switch irs := serializer.(type) { + case *eightByteSerializer: + err = IrError(C.ir_serializer_serialize_eight_byte_log_event( + newCStringView(event.LogMessage), + C.int64_t(event.Timestamp), + irs.cptr, + &irView, + )) + case *fourByteSerializer: + err = IrError(C.ir_serializer_serialize_four_byte_log_event( + newCStringView(event.LogMessage), + C.int64_t(event.Timestamp-irs.prevTimestamp), + irs.cptr, + &irView, + )) + if Success == err { + irs.prevTimestamp = event.Timestamp + } + } + if Success != err { + return nil, err + } + return unsafe.Slice((*byte)(irView.m_data), irView.m_size), nil +} diff --git a/ir/writer.go b/ir/writer.go new file mode 100644 index 0000000..49d48b4 --- /dev/null +++ b/ir/writer.go @@ -0,0 +1,141 @@ +package ir + +import ( + "bytes" + "fmt" + "io" + "time" + + "github.com/y-scope/clp-ffi-go/ffi" +) + +// Writer builds up a buffer of serialized CLP IR using a [Serializer]. +// [NewWriter] will construct a Writer with the appropriate Serializer based on +// the arguments used. Close must be called to free the underlying memory and +// failure to do so will result in a memory leak. To write a complete IR stream +// Close must be called before the final WriteTo call. +type Writer struct { + Serializer + buf bytes.Buffer +} + +// Returns [NewWriterSize] with a FourByteEncoding Serializer using the local +// time zone, and a buffer size of 1MB. +func NewWriter() (*Writer, error) { + return NewWriterSize[FourByteEncoding](1024*1024, time.Local.String()) +} + +// NewWriterSize creates a new [Writer] with a [Serializer] based on T, and +// writes a CLP IR preamble. The preamble is stored inside the Writer's internal +// buffer to be written out later. The size parameter denotes the initial buffer +// size to use and timeZoneId denotes the time zone of the source producing the +// log events, so that local times (any time that is not a unix timestamp) are +// handled correctly. +// - success: valid [*Writer], nil +// - error: nil [*Writer], invalid type error or an error propagated from +// [FourByteSerializer], [EightByteSerializer], or [bytes.Buffer.Write] +func NewWriterSize[T EightByteEncoding | FourByteEncoding]( + size int, + timeZoneId string, +) (*Writer, error) { + var irw Writer + irw.buf.Grow(size) + + var irView BufView + var err error + var t T + switch any(t).(type) { + case EightByteEncoding: + irw.Serializer, irView, err = EightByteSerializer( + "", + "", + timeZoneId, + ) + case FourByteEncoding: + irw.Serializer, irView, err = FourByteSerializer( + "", + "", + timeZoneId, + ffi.EpochTimeMs(time.Now().UnixMilli()), + ) + default: + err = fmt.Errorf("Invalid type: %T", t) + } + if nil != err { + return nil, err + } + _, err = irw.buf.Write(irView) + if nil != err { + return nil, err + } + return &irw, nil +} + +// Close will write a null byte denoting the end of the IR stream and delete the +// underlying C++ allocated memory used by the serializer. Failure to call Close +// will result in a memory leak. +func (self *Writer) Close() error { + self.buf.WriteByte(0x0) + return self.Serializer.Close() +} + +// CloseTo is a combination of [Close] and [WriteTo]. It will completely close +// the Writer (and underlying serializer) and write the data out to the +// io.Writer. +// Returns: +// - success: number of bytes written, nil +// - error: number of bytes written, error propagated from [WriteTo] +func (self *Writer) CloseTo(w io.Writer) (int64, error) { + self.Close() + return self.WriteTo(w) +} + +// Bytes returns a slice of the Writer's internal buffer. The slice is valid for +// use only until the next buffer modification (that is, only until the next +// call to Write, WriteTo, or Reset). +func (self *Writer) Bytes() []byte { + return self.buf.Bytes() +} + +// Reset resets the buffer to be empty, but it retains the underlying storage +// for use by future writes. +func (self *Writer) Reset() { + self.buf.Reset() +} + +// Write uses [SerializeLogEvent] to serialize the provided log event to CLP IR +// and then stores it in the internal buffer. Returns: +// - success: number of bytes written, nil +// - error: number of bytes written (can be 0), error propagated from +// [SerializeLogEvent] or [bytes.Buffer.Write] +func (self *Writer) Write(event ffi.LogEvent) (int, error) { + irView, err := self.SerializeLogEvent(event) + if nil != err { + return 0, err + } + // bytes.Buffer.Write will always return nil for err (https://pkg.go.dev/bytes#Buffer.Write) + // However, err is still propagated to correctly alert the user in case this ever changes. If + // Write can fail in the future, we should either: + // 1. fix the issue and retry the write + // 2. store irView and provide a retry API (allowing the user to fix the issue and retry) + n, err := self.buf.Write(irView) + if nil != err { + return n, err + } + return n, nil +} + +// WriteTo writes data to w until the buffer is drained or an error occurs. If +// no error occurs the buffer is reset. On an error the user is expected to use +// [self.Bytes] and [self.Reset] to manually handle the buffer's contents before +// continuing. Returns: +// - success: number of bytes written, nil +// - error: number of bytes written, error propagated from +// [bytes.Buffer.WriteTo] +func (self *Writer) WriteTo(w io.Writer) (int64, error) { + n, err := self.buf.WriteTo(w) + if nil == err { + self.buf.Reset() + } + return n, err +} diff --git a/ir/writeread_test.go b/ir/writeread_test.go new file mode 100644 index 0000000..d529898 --- /dev/null +++ b/ir/writeread_test.go @@ -0,0 +1,70 @@ +package ir + +import ( + "io" + "testing" + "time" + + "github.com/y-scope/clp-ffi-go/ffi" +) + +func testWriteReadLogMessages( + t *testing.T, + args testArgs, + messages []ffi.LogMessage, +) { + ioWriter := openIoWriter(t, args) + irWriter := openIrWriter(t, args, ioWriter) + + var events []ffi.LogEvent + for _, msg := range messages { + event := ffi.LogEvent{ + LogMessage: msg, + Timestamp: ffi.EpochTimeMs(time.Now().UnixMilli()), + } + _, err := irWriter.Write(event) + if nil != err { + t.Fatalf("ir.Writer.Write failed: %v", err) + } + events = append(events, event) + } + _, err := irWriter.CloseTo(ioWriter) + if nil != err { + t.Fatalf("ir.Writer.CloseTo failed: %v", err) + } + ioWriter.Close() + + ioReader := openIoReader(t, args) + defer ioReader.Close() + irReader, err := NewReader(ioReader) + if nil != err { + t.Fatalf("NewReader failed: %v", err) + } + defer irReader.Close() + + for _, event := range events { + assertIrLogEvent(t, ioReader, irReader, event) + } + assertEndOfIr(t, ioReader, irReader) +} + +func openIrWriter( + t *testing.T, + args testArgs, + writer io.Writer, +) *Writer { + var irWriter *Writer + var err error + switch args.encoding { + case eightByteEncoding: + irWriter, err = NewWriterSize[EightByteEncoding](1024*1024, defaultTimeZoneId) + case fourByteEncoding: + irWriter, err = NewWriterSize[FourByteEncoding](1024*1024, defaultTimeZoneId) + default: + t.Fatalf("unsupported encoding: %v", args.encoding) + } + if nil != err { + t.Fatalf("NewWriterSize failed: %v", err) + } + return irWriter +} diff --git a/lib/libclp_ffi_darwin_arm64.a b/lib/libclp_ffi_darwin_arm64.a new file mode 100644 index 0000000..56264af Binary files /dev/null and b/lib/libclp_ffi_darwin_arm64.a differ diff --git a/lib/libclp_ffi_linux_amd64.a b/lib/libclp_ffi_linux_amd64.a new file mode 100644 index 0000000..82aca71 Binary files /dev/null and b/lib/libclp_ffi_linux_amd64.a differ diff --git a/lib/libclp_ffi_linux_amd64.so b/lib/libclp_ffi_linux_amd64.so deleted file mode 100644 index 4e5a5ed..0000000 Binary files a/lib/libclp_ffi_linux_amd64.so and /dev/null differ diff --git a/message/BUILD.bazel b/message/BUILD.bazel deleted file mode 100644 index e7a8293..0000000 --- a/message/BUILD.bazel +++ /dev/null @@ -1,30 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") - -go_library( - name = "message", - srcs = [ - "cgo_amd64.go", - "cgo_arm64.go", - "encoding.go", - "msgerror.go", - "msgerror_string.go", - ], - cgo = True, - cdeps = ["//:libclp_ffi"], - importpath = "github.com/y-scope/clp-ffi-go/message", - visibility = ["//visibility:public"], - deps = ["//ffi"], -) - -alias( - name = "go_default_library", - actual = ":message", - visibility = ["//visibility:public"], -) - -go_test( - name = "message_test", - srcs = ["encoding_test.go"], - embed = [":message"], - deps = ["//test"], -) diff --git a/message/cgo_amd64.go b/message/cgo_amd64.go deleted file mode 100644 index 1f2bb1a..0000000 --- a/message/cgo_amd64.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build !external && amd64 - -package message - -/* -#cgo CFLAGS: -I${SRCDIR}/../cpp/src/ -#cgo linux LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_linux_amd64 -Wl,-rpath=${SRCDIR}/../lib/ -#cgo darwin LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_darwin_amd64 -Wl,-rpath=${SRCDIR}/../lib/ -*/ -import "C" diff --git a/message/cgo_arm64.go b/message/cgo_arm64.go deleted file mode 100644 index 50838a9..0000000 --- a/message/cgo_arm64.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build !external && arm64 - -package message - -/* -#cgo CFLAGS: -I${SRCDIR}/../cpp/src/ -#cgo linux LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_linux_arm64 -Wl,-rpath=${SRCDIR}/../lib/ -#cgo darwin LDFLAGS: -L${SRCDIR}/../lib/ -lclp_ffi_darwin_arm64 -Wl,-rpath=${SRCDIR}/../lib/ -*/ -import "C" diff --git a/message/encoding.go b/message/encoding.go deleted file mode 100644 index 22d4e88..0000000 --- a/message/encoding.go +++ /dev/null @@ -1,145 +0,0 @@ -package message - -/* -#include -#include -*/ -import "C" - -import ( - "runtime" - "unsafe" - - "github.com/y-scope/clp-ffi-go/ffi" -) - -/* TODO outdated -There are two sets of structs exposed: -1. DecodedMessage, EncodedMessage -The fields of these structs point to regular go memory. This memory was -populated by copying the data returned by the native calls. The behaviour of -these structs is the same as any normal go struct, at the expense of extra -copying. -To greatly simplify decoding an EncodedMessage uses a private reference to the -DecodedMessageUnsafe that created it. Holding this reference prevents the c -memory from being freed and will increase memory usage. ReleaseRef can be used -to drop this reference, but DecodeMessage will always return an error -afterwards. - -2. DecodedMessageUnsafe, EncodedMessageUnsafe -The fields of these structs point to c memory. Slices are created to wrap the -native memory, with no copying performed. The underlying c memory will be freed -by a finalizer set on the creation of these objects. This means once the -original object becomes unreachable any access to the underlying memory is -undefined. Any reference created to this memory (e.g. making a copy of the -object or fields, new slices of the fields, etc) is only valid as long as the -original object is reachable. -With that said, if all usage is made through the original object, practical -usage will behave as expected. - -There is no common interface to abstract/generalize the use of these structs. -It would be fairly easy for an unsuspecting user to pass an unsafe structure to -a function that will not handle it properly. We encourage the user to be -explicit about the usage of unsafe structs. - -*/ - -type EncodedMessage struct { - Logtype []byte - Vars []byte - DictVars []byte - DictVarEndOffsets []int32 - unsafeRef *EncodedMessageUnsafe -} - -type EncodedMessageUnsafe struct { - Logtype []byte - Vars []byte - DictVars []byte - DictVarEndOffsets []int32 - cPtr unsafe.Pointer -} - -func (self *EncodedMessage) ReleaseRef() { - self.unsafeRef = nil -} - -func (self *EncodedMessageUnsafe) MakeSafe() EncodedMessage { - var em EncodedMessage - em.unsafeRef = self - em.Logtype = make([]byte, len(self.Logtype)) - em.Vars = make([]byte, len(self.Vars)) - em.DictVars = make([]byte, len(self.DictVars)) - em.DictVarEndOffsets = make([]int32, len(self.DictVarEndOffsets)) - copy(em.Logtype, self.Logtype) - copy(em.Vars, self.Vars) - copy(em.DictVars, self.DictVars) - copy(em.DictVarEndOffsets, self.DictVarEndOffsets) - return em -} - -func (self *EncodedMessage) DecodeMessage() (ffi.LogMessage, error) { - if nil == self.unsafeRef { - return ffi.LogMessage{}, NilRef - } - msg, ret := self.unsafeRef.DecodeMessage() - return msg, ret -} - -func (self *EncodedMessageUnsafe) DecodeMessage() (ffi.LogMessage, error) { - var msgClass unsafe.Pointer - var msg *C.char - var msgSize C.size_t - C.decode_message(self.cPtr, &msgClass, &msg, &msgSize) - - if nil == msgClass || nil == msg { - return ffi.LogMessage{}, DecodeError - } - - logmsg := ffi.NewLogMessage(unsafe.Pointer(msg), uint64(msgSize), msgClass) - return logmsg, nil -} - -func EncodeMessage(msg string) (EncodedMessage, int) { - em, ret := EncodeMessageUnsafe(msg) - return em.MakeSafe(), ret -} - -func EncodeMessageUnsafe(msg string) (EncodedMessageUnsafe, int) { - var logtypePtr, varsPtr, dictVarsPtr, dictVarEndOffsetsPtr unsafe.Pointer - var logtypeSize, varsSize, dictVarsSize, dictVarEndOffsetsSize uint64 - var em EncodedMessageUnsafe - em.cPtr = C.encode_message(unsafe.Pointer(&[]byte(msg)[0]), C.size_t(len(msg)), - &logtypePtr, unsafe.Pointer(&logtypeSize), - &varsPtr, unsafe.Pointer(&varsSize), - &dictVarsPtr, unsafe.Pointer(&dictVarsSize), - &dictVarEndOffsetsPtr, unsafe.Pointer(&dictVarEndOffsetsSize)) - if nil == em.cPtr { - return em, -1 - } - em.Logtype = unsafe.Slice((*byte)(logtypePtr), logtypeSize) - if nil == em.Logtype { - return em, -2 - } - if 0 != varsSize { - em.Vars = unsafe.Slice((*byte)(varsPtr), varsSize) - if nil == em.Vars { - return em, -3 - } - } - if 0 != dictVarsSize { - em.DictVars = unsafe.Slice((*byte)(dictVarsPtr), dictVarsSize) - if nil == em.DictVars { - return em, -4 - } - } - if 0 != dictVarEndOffsetsSize { - em.DictVarEndOffsets = unsafe.Slice((*int32)(dictVarEndOffsetsPtr), dictVarEndOffsetsSize) - if nil == em.DictVarEndOffsets { - return em, -5 - } - } - runtime.SetFinalizer(&em, - func(em *EncodedMessageUnsafe) { C.delete_encoded_message(em.cPtr) }) - return em, 0 -} diff --git a/message/encoding_test.go b/message/encoding_test.go deleted file mode 100644 index 68953dc..0000000 --- a/message/encoding_test.go +++ /dev/null @@ -1,100 +0,0 @@ -package message - -import ( - "encoding/binary" - _ "fmt" - "math" - "runtime" - "testing" - - "github.com/y-scope/clp-ffi-go/test" -) - -type testLog struct { - name string - msg string - // vars []byte - // dictVars []string - // dictVarEndOffsets []int32 -} - -var testlogs []testLog = []testLog{ - {name: "static", msg: "static text static text static text"}, - {name: "int", msg: "0 1 2 3 0123"}, - {name: "float", msg: "0.0 1.1 2.2 3.3 01234.0123"}, - {name: "dict", msg: "dictVar0 dictVar1 dictVar=dictVar2"}, - {name: "combo", msg: "Static text, dictVar1, 123, 456.7, dictVar2, 987, 654.3"}, -} - -func assertDecodedMessage(t *testing.T, log testLog, err error, msg string) { - t.Helper() - if nil != err { - t.Fatalf("DecodeMessage: %v", err) - } - if log.msg != msg { - t.Fatalf("Test msg does not match LogMessage.Msg:\nwant| %v\ngot| %v", log.msg, msg) - } -} - -func assertEncodedMessage(t *testing.T, log testLog, ret int, logtype []byte, vars []byte) { - t.Helper() - if 0 != ret { - t.Fatalf("EncodeMessage: %v", ret) - } - // TODO: test other fields...? -} - -func testDecodeMessage(t *testing.T, testlog testLog) { - uem, ret := EncodeMessageUnsafe(testlog.msg) - var em EncodedMessage = uem.MakeSafe() - assertEncodedMessage(t, testlog, ret, em.Logtype, em.Vars) - - log, err := em.unsafeRef.DecodeMessage() - runtime.GC() - - // calling ReleaseRef allows uem to be collected despite em and msg being - // still reachable - em.ReleaseRef() - test.AssertFinalizers(t, test.NewFinalizer(&uem)) - runtime.GC() - - assertDecodedMessage(t, testlog, err, string(log.Msg)) - test.AssertFinalizers(t, test.NewFinalizer(&em), test.NewFinalizer(&log)) -} - -func testUnsafeDecodeMessage(t *testing.T, testlog testLog) { - em, ret := EncodeMessageUnsafe(testlog.msg) - assertEncodedMessage(t, testlog, ret, em.Logtype, em.Vars) - - log, err := em.DecodeMessage() - runtime.GC() - assertDecodedMessage(t, testlog, err, string(log.Msg)) - test.AssertFinalizers(t, test.NewFinalizer(&em), test.NewFinalizer(&log)) -} - -func TestSafeEncodeDecodeMessage(t *testing.T) { - for _, testlog := range testlogs { - test := testlog - t.Run(test.name, func(t *testing.T) { testDecodeMessage(t, test) }) - } -} - -func TestUnsafeEncodeDecodeMessage(t *testing.T) { - for _, testlog := range testlogs { - test := testlog - t.Run(test.name, func(t *testing.T) { testUnsafeDecodeMessage(t, test) }) - } -} - -func Float64frombytes(bytes []byte) float64 { - bits := binary.LittleEndian.Uint64(bytes) - float := math.Float64frombits(bits) - return float -} - -func Float64bytes(float float64) []byte { - bits := math.Float64bits(float) - bytes := make([]byte, 8) - binary.LittleEndian.PutUint64(bytes, bits) - return bytes -} diff --git a/message/msgerror.go b/message/msgerror.go deleted file mode 100644 index ef86020..0000000 --- a/message/msgerror.go +++ /dev/null @@ -1,16 +0,0 @@ -package message - -// MsgError defines errors created in the go FFI code, but may need to mirror a -// cpp type in the future. -//go:generate stringer -type=MsgError -type MsgError int - -const ( - _ MsgError = iota - DecodeError - NilRef -) - -func (self MsgError) Error() string { - return self.String() -} diff --git a/message/msgerror_string.go b/message/msgerror_string.go deleted file mode 100644 index ee35ac9..0000000 --- a/message/msgerror_string.go +++ /dev/null @@ -1,25 +0,0 @@ -// Code generated by "stringer -type=MsgError"; DO NOT EDIT. - -package message - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[DecodeError-1] - _ = x[NilRef-2] -} - -const _MsgError_name = "DecodeErrorNilRef" - -var _MsgError_index = [...]uint8{0, 11, 17} - -func (i MsgError) String() string { - i -= 1 - if i < 0 || i >= MsgError(len(_MsgError_index)-1) { - return "MsgError(" + strconv.FormatInt(int64(i+1), 10) + ")" - } - return _MsgError_name[_MsgError_index[i]:_MsgError_index[i+1]] -} diff --git a/search/BUILD.bazel b/search/BUILD.bazel new file mode 100644 index 0000000..0c48f14 --- /dev/null +++ b/search/BUILD.bazel @@ -0,0 +1,23 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "search", + srcs = [ + "wildcard_query.go", + ], + cgo = True, + cdeps = [ + "//:libclp_ffi", + ], + importpath = "github.com/y-scope/clp-ffi-go/search", + visibility = ["//visibility:public"], + deps = [ + "//ffi", + ], +) + +alias( + name = "go_default_library", + actual = ":search", + visibility = ["//visibility:public"], +) diff --git a/search/cgo_amd64.go b/search/cgo_amd64.go new file mode 100644 index 0000000..2c6891a --- /dev/null +++ b/search/cgo_amd64.go @@ -0,0 +1,10 @@ +//go:build !external && amd64 + +package search + +/* +#cgo CPPFLAGS: -I${SRCDIR}/../include/ +#cgo linux LDFLAGS: ${SRCDIR}/../lib/libclp_ffi_linux_amd64.a -lstdc++ +#cgo darwin LDFLAGS: ${SRCDIR}/../lib/libclp_ffi_darwin_amd64.a -lstdc++ +*/ +import "C" diff --git a/search/cgo_arm64.go b/search/cgo_arm64.go new file mode 100644 index 0000000..9c7993a --- /dev/null +++ b/search/cgo_arm64.go @@ -0,0 +1,10 @@ +//go:build !external && arm64 + +package search + +/* +#cgo CPPFLAGS: -I${SRCDIR}/../include/ +#cgo linux LDFLAGS: ${SRCDIR}/../lib/libclp_ffi_linux_arm64.a -lstdc++ +#cgo darwin LDFLAGS: ${SRCDIR}/../lib/libclp_ffi_darwin_arm64.a -lstdc++ +*/ +import "C" diff --git a/message/cgo_external.go b/search/cgo_external.go similarity index 71% rename from message/cgo_external.go rename to search/cgo_external.go index ff49970..ee54523 100644 --- a/message/cgo_external.go +++ b/search/cgo_external.go @@ -1,10 +1,10 @@ //go:build external // When using `external` build manually set linkage with `CGO_LDFLAGS`. -package message +package search /* -#cgo CFLAGS: -I${SRCDIR}/../cpp/src/ +#cgo CPPFLAGS: -I${SRCDIR}/../include/ #cgo external LDFLAGS: */ import "C" diff --git a/search/wildcard_query.go b/search/wildcard_query.go new file mode 100644 index 0000000..7913253 --- /dev/null +++ b/search/wildcard_query.go @@ -0,0 +1,88 @@ +package search + +/* +#include +#include +*/ +import "C" + +import ( + "strings" + "unsafe" + + "github.com/y-scope/clp-ffi-go/ffi" +) + +// A CLP wildcard query containing a query string and a bool for whether the +// query is case sensitive or not. The fields must be accessed through getters +// to ensure that the query string remains clean/safe after creation by +// NewWildcardQuery. +// Two wildcards are currently supported: '*' to match 0 or more characters, and +// '?' to match any single character. Each can be escaped using a preceding '\'. +// Other characters which are escaped are treated as normal characters. +type WildcardQuery struct { + query string + caseSensitive bool +} + +// Create a new WildcardQuery that is cleaned to contain a safe wildcard query +// string. A wildcard query string must follow 2 rules: +// 1. The wildcard string should not contain consecutive '*'. +// 2. The wildcard string should not contain an escape character without a +// character following it. +// +// NewWildcardQuery will sanitize the provided query and store the safe version. +func NewWildcardQuery(query string, caseSensitive bool) WildcardQuery { + var cptr unsafe.Pointer + cleanQuery := C.wildcard_query_new( + C.StringView{ + (*C.char)(unsafe.Pointer(unsafe.StringData(query))), + C.size_t(len(query)), + }, + &cptr, + ) + defer C.wildcard_query_delete(cptr) + return WildcardQuery{ + strings.Clone(unsafe.String( + (*byte)((unsafe.Pointer)(cleanQuery.m_data)), + cleanQuery.m_size, + )), + caseSensitive, + } +} + +func (self WildcardQuery) Query() string { return self.query } +func (self WildcardQuery) CaseSensitive() bool { return self.caseSensitive } + +// A MergedWildcardQuery represents the union of multiple wildcard queries +// (multiple WildcardQuery instances each with their own query string and case +// sensitivity). +type MergedWildcardQuery struct { + queries string + endOffsets []int + caseSensitivity []bool +} + +func (self MergedWildcardQuery) Queries() string { return self.queries } +func (self MergedWildcardQuery) EndOffsets() []int { return self.endOffsets } +func (self MergedWildcardQuery) CaseSensitivity() []bool { return self.caseSensitivity } + +// Merge multiple WildcardQuery objects together by concatenating their query +// strings, storing their end/length offsets, and recording their case +// sensitivity. +func MergeWildcardQueries(queries []WildcardQuery) MergedWildcardQuery { + var sb strings.Builder + offsets := make([]int, len(queries)) + caseSensitivity := make([]bool, len(queries)) + for i, q := range queries { + offsets[i], _ = sb.WriteString(q.query) // err always nil + caseSensitivity[i] = queries[i].caseSensitive + } + return MergedWildcardQuery{sb.String(), offsets, caseSensitivity} +} + +// A timestamp interval of [m_lower, m_upper). +type TimestampInterval struct { + Lower ffi.EpochTimeMs + Upper ffi.EpochTimeMs +} diff --git a/test/BUILD.bazel b/test/BUILD.bazel deleted file mode 100644 index a4a5f5b..0000000 --- a/test/BUILD.bazel +++ /dev/null @@ -1,14 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "test", - srcs = ["finalizers.go"], - importpath = "github.com/y-scope/clp-ffi-go/test", - visibility = ["//visibility:public"], -) - -alias( - name = "go_default_library", - actual = ":test", - visibility = ["//visibility:public"], -) diff --git a/test/finalizers.go b/test/finalizers.go deleted file mode 100644 index 9ca02a8..0000000 --- a/test/finalizers.go +++ /dev/null @@ -1,44 +0,0 @@ -package test - -import ( - "fmt" - "runtime" - "testing" - "time" -) - -type Finalizer struct { - ch chan bool - msg string -} - -// We must split NewFinalizers and AssertFinalizers into two functions to -// ensure that the GC call in assertFinalizers will find our pointers -// unreachable. If we combine these functions the runtime will assume the -// pointers in ptrs are still reachable in the caller (even if the caller -// returns immediately after the function call) and will not GC them (so the -// finalizers will not run). -func NewFinalizer[T any](ptr *T) Finalizer { - fin := Finalizer{ - make(chan bool, 1), - fmt.Sprintf("%T", ptr), - } - // must capture fin.ch for SetFinalizer lambda - ch := fin.ch - runtime.SetFinalizer(ptr, func(_ any) { ch <- true }) - return fin -} - -// AssertFinalizers checks that each Finalizer channel has been signalled after -// running a GC cycle. -func AssertFinalizers(t *testing.T, fins ...Finalizer) { - t.Helper() - runtime.GC() - for _, fin := range fins { - select { - case <-fin.ch: - case <-time.After(4 * time.Second): - t.Fatalf("finalizer did not run for: %s", fin.msg) - } - } -}