Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Split Github comments instead of trimming #301

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
153 changes: 134 additions & 19 deletions pkg/vcs/github_client/message.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,75 @@ import (
"github.com/zapier/kubechecks/telemetry"
)

const MaxCommentLength = 64 * 1024
const (
MaxCommentLength = 64 * 1024

// Using string concatenation for better code readability.
// Cannot use raw string literal (backticks) here as the separators themselves contain backticks.
sepStart = "Continued from previous comment.\n" +
"<details><summary>Show Output</summary>\n\n" +
"```diff\n"

sepEnd = "\n```\n" +
"</details>\n" +
"<br>\n\n" +
"**Warning**: Output length greater than maximum allowed comment size. Continued in next comment."
)

// splitComment splits the given comment into chunks from the beginning,
// ensuring that each decorated chunk does not exceed maxSize.
// - The first chunk has no prefix but, if not the only chunk, is suffixed with sepEnd.
// - Subsequent chunks are prefixed with sepStart. If they’re not final, they are also suffixed with sepEnd.
// This forward‐splitting approach preserves the beginning of the comment.
func splitComment(comment string, maxSize int, sepEnd string, sepStart string) []string {
// Guard: If the comment fits in one chunk, return it unsplit.
if len(comment) <= maxSize {
return []string{comment}
}
// Guard: if maxSize is too small to accommodate even one raw character with decorations,
// return the unsplit comment.
if maxSize < len(sepEnd)+1 || maxSize < len(sepStart)+1 {
return []string{comment}
}

// Check if we have capacity for subsequent chunks
if maxSize-len(sepStart)-len(sepEnd) <= 0 {
// No room for raw text if we try to use both prefix and suffix
// => fallback to unsplit
return []string{comment}
}

var parts []string

// Process the first chunk.
// For the first chunk (if a split is needed) we reserve space for sepEnd only.
firstRawCapacity := maxSize - len(sepEnd)
firstChunkRaw := comment[0:firstRawCapacity]
parts = append(parts, firstChunkRaw+sepEnd)
i := firstRawCapacity

// Process subsequent chunks.
for i < len(comment) {
remaining := len(comment) - i

// If the remaining text fits in one final chunk (with only the sepStart prefix),
// then create that final chunk without a trailing sepEnd.
if remaining <= maxSize-len(sepStart) {
parts = append(parts, sepStart+comment[i:])
break
} else {
// Otherwise, for a non-final chunk, reserve space for both prefix and suffix.
rawCapacity := maxSize - len(sepStart) - len(sepEnd)
// The following slice is guaranteed to be in range because we only land here
// if remaining > maxSize - len(sepStart). Consequently, rawCapacity <= remaining,
// ensuring i+rawCapacity is within the comment's length.
chunk := sepStart + comment[i:i+rawCapacity] + sepEnd
parts = append(parts, chunk)
i += rawCapacity
}
}
return parts
}

func (c *Client) PostMessage(ctx context.Context, pr vcs.PullRequest, message string) (*msg.Message, error) {
_, span := tracer.Start(ctx, "PostMessageToMergeRequest")
Expand All @@ -27,6 +95,11 @@ func (c *Client) PostMessage(ctx context.Context, pr vcs.PullRequest, message st
message = message[:MaxCommentLength]
}

if err := c.deleteLatestRunningComment(ctx, pr); err != nil {
log.Error().Err(err).Msg("failed to delete latest 'kubechecks running' comment")
return nil, err
}

log.Debug().Msgf("Posting message to PR %d in repo %s", pr.CheckID, pr.FullName)
comment, _, err := c.googleClient.Issues.CreateComment(
ctx,
Expand All @@ -48,30 +121,72 @@ func (c *Client) UpdateMessage(ctx context.Context, m *msg.Message, msg string)
_, span := tracer.Start(ctx, "UpdateMessage")
defer span.End()

if len(msg) > MaxCommentLength {
log.Warn().Int("original_length", len(msg)).Msg("trimming the comment size")
msg = msg[:MaxCommentLength]
comments := splitComment(msg, MaxCommentLength, sepEnd, sepStart)

owner, repo, ok := strings.Cut(m.Name, "/")
if !ok {
e := fmt.Errorf("invalid GitHub repository name: no '/' in %q", m.Name)
telemetry.SetError(span, e, "Invalid GitHub full repository name")
log.Error().Err(e).Msg("invalid GitHub repository name")
return e
}

log.Info().Msgf("Updating message for PR %d in repo %s", m.CheckID, m.Name)
pr := vcs.PullRequest{
Owner: owner,
Name: repo,
CheckID: m.CheckID,
FullName: m.Name,
}

repoNameComponents := strings.Split(m.Name, "/")
comment, resp, err := c.googleClient.Issues.EditComment(
ctx,
repoNameComponents[0],
repoNameComponents[1],
int64(m.NoteID),
&github.IssueComment{Body: &msg},
)
log.Debug().Msgf("Updating message in PR %d in repo %s", pr.CheckID, pr.FullName)

if err != nil {
telemetry.SetError(span, err, "Update Pull Request comment")
log.Error().Err(err).Msgf("could not update message to PR, msg: %s, response: %+v", msg, resp)
if err := c.deleteLatestRunningComment(ctx, pr); err != nil {
return err
}

// update note id just in case it changed
m.NoteID = int(*comment.ID)
for _, comment := range comments {
cc, _, err := c.googleClient.Issues.CreateComment(
ctx, owner, repo, m.CheckID, &github.IssueComment{Body: &comment},
)
if err != nil {
telemetry.SetError(span, err, "Update Pull Request comment")
log.Error().Err(err).Msg("could not post updated message comment to PR")
return err
}
m.NoteID = int(*cc.ID)
}

return nil
}

func (c *Client) deleteLatestRunningComment(ctx context.Context, pr vcs.PullRequest) error {
_, span := tracer.Start(ctx, "deleteLatestRunningComment")
defer span.End()

existingComments, resp, err := c.googleClient.Issues.ListComments(
ctx, pr.Owner, pr.Name, pr.CheckID, &github.IssueListCommentsOptions{
Sort: pkg.Pointer("created"),
Direction: pkg.Pointer("asc"),
},
)
if err != nil {
telemetry.SetError(span, err, "List Pull Request comments")
log.Error().Err(err).Msgf("could not retrieve existing PR comments, response: %+v", resp)
return fmt.Errorf("failed to list comments: %w", err)
}

// Find and delete the first running comment.
for _, existingComment := range existingComments {
if existingComment.Body != nil && strings.Contains(*existingComment.Body, ":hourglass: kubechecks running ... ") {
log.Debug().Msgf("Deleting 'kubechecks running' comment with ID %d", *existingComment.ID)
if r, e := c.googleClient.Issues.DeleteComment(ctx, pr.Owner, pr.Name, *existingComment.ID); e != nil {
telemetry.SetError(span, e, "Delete Pull Request comment")
log.Error().Err(e).Msgf("failed to delete 'kubechecks running' comment, response: %+v", r)
return fmt.Errorf("failed to delete 'kubechecks running' comment: %w", e)
}
break
}
}

return nil
}
Expand Down Expand Up @@ -105,7 +220,7 @@ func (c *Client) hideOutdatedMessages(ctx context.Context, pr vcs.PullRequest, c

for _, comment := range comments {
if strings.EqualFold(comment.GetUser().GetLogin(), c.username) {
// Github API does not expose minimizeComment API. IT's only available from the GraphQL API
// GitHub API does not expose minimizeComment API. It's only available from the GraphQL API
// https://docs.github.com/en/graphql/reference/mutations#minimizecomment
var m struct {
MinimizeComment struct {
Expand Down
163 changes: 163 additions & 0 deletions pkg/vcs/github_client/nessage_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package github_client

import (
"reflect"
"strings"
"testing"
)

func TestSplitComment(t *testing.T) {
t.Parallel()

thousandA := strings.Repeat("a", 1000)

tests := []struct {
name string
comment string
maxSize int
sepEnd string
sepStart string
want []string
}{
{
name: "EmptyComment",
comment: "",
maxSize: 10,
sepEnd: "-E",
sepStart: "-S",
want: []string{""},
},
{
name: "ExactFit_NoSplit",
comment: "exact_fit",
maxSize: len("exact_fit"),
sepEnd: "-E",
sepStart: "-S",
want: []string{"exact_fit"},
},
{
name: "UnderMax_NoSplit",
comment: "comment under max size",
maxSize: len("comment under max size") + 1,
sepEnd: "sepEnd",
sepStart: "sepStart",
want: []string{"comment under max size"},
},
{
name: "TwoComments",
comment: thousandA,
// Force a split by choosing maxSize just under the full length.
// Calculation:
// For a 1000-character comment and maxSize = 1000 - 1 = 999:
// - The first chunk raw capacity = 999 - len(sepEnd).
// With sepEnd = "-sepEnd" (length 7), firstRawCapacity = 999 - 7 = 992.
// Thus, first chunk = thousandA[0:992] + "-sepEnd".
// - The remaining raw text is thousandA[992:1000] (8 characters).
// For the final chunk, we only need to add the prefix.
// Final chunk = sepStart + thousandA[992:].
maxSize: len(thousandA) - 1, // 999
sepEnd: "-sepEnd",
sepStart: "-sepStart",
want: func() []string {
firstRawCapacity := 999 - len("-sepEnd") // 999 - 7 = 992
firstChunk := thousandA[:firstRawCapacity] + "-sepEnd"
secondChunk := "-sepStart" + thousandA[firstRawCapacity:]
return []string{firstChunk, secondChunk}
}(),
},
{
name: "FourComments",
comment: thousandA,
sepEnd: "-sepEnd",
sepStart: "-sepStart",
// For splitting into four chunks:
// Set maxSize = (len(thousandA)/4) + len(sepEnd) + len(sepStart).
// For thousandA of length 1000, with sepEnd length 7 and sepStart length 9:
// maxSize = (1000/4) + 7 + 9 = 250 + 16 = 266.
// Then:
// - First chunk raw capacity = 266 - len(sepEnd) = 266 - 7 = 259.
// First chunk = thousandA[0:259] + "-sepEnd".
// - Subsequent non-final chunks have raw capacity = 266 - 9 - 7 = 250.
// - Second chunk = "-sepStart" + thousandA[259:259+250] + "-sepEnd".
// - Third chunk = "-sepStart" + thousandA[509:509+250] + "-sepEnd".
// - Final chunk = "-sepStart" + thousandA[759:].
maxSize: (1000 / 4) + 7 + 9, // 266
want: func() []string {
firstRawCapacity := 266 - 7 // 259
chunk1 := thousandA[:firstRawCapacity] + "-sepEnd"
// For subsequent chunks, raw capacity = 266 - 9 - 7 = 250.
rawCapacity := 266 - 9 - 7 // 250
chunk2 := "-sepStart" + thousandA[firstRawCapacity:firstRawCapacity+rawCapacity] + "-sepEnd"
chunk3 := "-sepStart" + thousandA[firstRawCapacity+rawCapacity:firstRawCapacity+2*rawCapacity] + "-sepEnd"
chunk4 := "-sepStart" + thousandA[firstRawCapacity+2*rawCapacity:]
return []string{chunk1, chunk2, chunk3, chunk4}
}(),
},
{
name: "MaxSizeTooSmall_ReturnUnsplit",
comment: "Hello, world!",
// When maxSize is too small to fit even one raw character plus the decorations,
// the function should return the original unsplit comment.
// For example, if sepEnd = "ZZ" (length 2) and sepStart = "TOP" (length 3),
// then we require at least maxSize >= 2+1 = 3 for first chunk
// and maxSize >= 3+1 = 4 for subsequent chunks.
// Here we set maxSize to 5 (which is borderline) and expect unsplit output.
maxSize: 5,
sepEnd: "ZZ",
sepStart: "TOP",
want: []string{"Hello, world!"},
},
{
name: "MaxSizeTooSmall_UnsplitFallback",
comment: "abc",
// sepEnd="YYZ" => length=3 => we need at least 4 to store 1 raw char + suffix
// maxSize=2 => triggers the fallback to unsplit.
maxSize: 2,
sepEnd: "YYZ",
sepStart: "S",
want: []string{"abc"},
},
{
name: "NewlinesInComment", // Test with newlines to verify they are preserved.
comment: "line1\nline2\nline3",
maxSize: 20, // Comment fits unsplit.
sepEnd: "--E--",
sepStart: "--S--",
want: []string{"line1\nline2\nline3"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := splitComment(tt.comment, tt.maxSize, tt.sepEnd, tt.sepStart)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("%s:\n got: %#v\nwant: %#v", tt.name, got, tt.want)
}
})
}
}

// TestSplitComment_RealSeparators uses real production separator values and maximum comment length.
// This integration-style test verifies that with the actual production values:
// - No chunk exceeds MaxCommentLength.
// - The first chunk does not have the prefix (sepStart).
// - The final chunk does not have the suffix (sepEnd).
func TestSplitComment_RealSeparators(t *testing.T) {
// Create a long comment that exceeds MaxCommentLength.
comment := strings.Repeat("a", MaxCommentLength+100)
got := splitComment(comment, MaxCommentLength, sepEnd, sepStart)
// Verify that none of the chunks exceed MaxCommentLength.
for i, part := range got {
if len(part) > MaxCommentLength {
t.Errorf("Chunk %d exceeds MaxCommentLength: len=%d", i, len(part))
}
}
// Verify that the first chunk does not have the sepStart prefix.
if len(got) > 0 && strings.HasPrefix(got[0], sepStart) {
t.Errorf("First chunk should not start with sepStart")
}
// Verify that the final chunk does not have the sepEnd suffix.
if len(got) > 0 && strings.HasSuffix(got[len(got)-1], sepEnd) {
t.Errorf("Final chunk should not end with sepEnd")
}
}