zapier · andridzi · Nov 7, 2024 · Nov 13, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/pkg/vcs/github_client/message.go b/pkg/vcs/github_client/message.go
@@ -16,7 +16,75 @@ import (
 	"github.com/zapier/kubechecks/telemetry"
 )
 
-const MaxCommentLength = 64 * 1024
+const (
+	MaxCommentLength = 64 * 1024
+
+	// Using string concatenation for better code readability.
+	// Cannot use raw string literal (backticks) here as the separators themselves contain backticks.
+	sepStart = "Continued from previous comment.\n" +
+		"<details><summary>Show Output</summary>\n\n" +
+		"```diff\n"
+
+	sepEnd = "\n```\n" +
+		"</details>\n" +
+		"<br>\n\n" +
+		"**Warning**: Output length greater than maximum allowed comment size. Continued in next comment."
+)
+
+// splitComment splits the given comment into chunks from the beginning,
+// ensuring that each decorated chunk does not exceed maxSize.
+// - The first chunk has no prefix but, if not the only chunk, is suffixed with sepEnd.
+// - Subsequent chunks are prefixed with sepStart. If they’re not final, they are also suffixed with sepEnd.
+// This forward‐splitting approach preserves the beginning of the comment.
+func splitComment(comment string, maxSize int, sepEnd string, sepStart string) []string {
+	// Guard: If the comment fits in one chunk, return it unsplit.
+	if len(comment) <= maxSize {
+		return []string{comment}
+	}
+	// Guard: if maxSize is too small to accommodate even one raw character with decorations,
+	// return the unsplit comment.
+	if maxSize < len(sepEnd)+1 || maxSize < len(sepStart)+1 {
+		return []string{comment}
+	}
+
+	// Check if we have capacity for subsequent chunks
+	if maxSize-len(sepStart)-len(sepEnd) <= 0 {
+		// No room for raw text if we try to use both prefix and suffix
+		// => fallback to unsplit
+		return []string{comment}
+	}
+
+	var parts []string
+
+	// Process the first chunk.
+	// For the first chunk (if a split is needed) we reserve space for sepEnd only.
+	firstRawCapacity := maxSize - len(sepEnd)
+	firstChunkRaw := comment[0:firstRawCapacity]
+	parts = append(parts, firstChunkRaw+sepEnd)
+	i := firstRawCapacity
+
+	// Process subsequent chunks.
+	for i < len(comment) {
+		remaining := len(comment) - i
+
+		// If the remaining text fits in one final chunk (with only the sepStart prefix),
+		// then create that final chunk without a trailing sepEnd.
+		if remaining <= maxSize-len(sepStart) {
+			parts = append(parts, sepStart+comment[i:])
+			break
+		} else {
+			// Otherwise, for a non-final chunk, reserve space for both prefix and suffix.
+			rawCapacity := maxSize - len(sepStart) - len(sepEnd)
+			// The following slice is guaranteed to be in range because we only land here
+			// if remaining > maxSize - len(sepStart). Consequently, rawCapacity <= remaining,
+			// ensuring i+rawCapacity is within the comment's length.
+			chunk := sepStart + comment[i:i+rawCapacity] + sepEnd
+			parts = append(parts, chunk)
+			i += rawCapacity
+		}
+	}
+	return parts
+}
 
 func (c *Client) PostMessage(ctx context.Context, pr vcs.PullRequest, message string) (*msg.Message, error) {
 	_, span := tracer.Start(ctx, "PostMessageToMergeRequest")
@@ -27,6 +95,11 @@ func (c *Client) PostMessage(ctx context.Context, pr vcs.PullRequest, message st
 		message = message[:MaxCommentLength]
 	}
 
+	if err := c.deleteLatestRunningComment(ctx, pr); err != nil {
+		log.Error().Err(err).Msg("failed to delete latest 'kubechecks running' comment")
+		return nil, err
+	}
+
 	log.Debug().Msgf("Posting message to PR %d in repo %s", pr.CheckID, pr.FullName)
 	comment, _, err := c.googleClient.Issues.CreateComment(
 		ctx,
@@ -48,30 +121,72 @@ func (c *Client) UpdateMessage(ctx context.Context, m *msg.Message, msg string)
 	_, span := tracer.Start(ctx, "UpdateMessage")
 	defer span.End()
 
-	if len(msg) > MaxCommentLength {
-		log.Warn().Int("original_length", len(msg)).Msg("trimming the comment size")
-		msg = msg[:MaxCommentLength]
+	comments := splitComment(msg, MaxCommentLength, sepEnd, sepStart)
+
+	owner, repo, ok := strings.Cut(m.Name, "/")
+	if !ok {
+		e := fmt.Errorf("invalid GitHub repository name: no '/' in %q", m.Name)
+		telemetry.SetError(span, e, "Invalid GitHub full repository name")
+		log.Error().Err(e).Msg("invalid GitHub repository name")
+		return e
 	}
 
-	log.Info().Msgf("Updating message for PR %d in repo %s", m.CheckID, m.Name)
+	pr := vcs.PullRequest{
+		Owner:    owner,
+		Name:     repo,
+		CheckID:  m.CheckID,
+		FullName: m.Name,
+	}
 
-	repoNameComponents := strings.Split(m.Name, "/")
-	comment, resp, err := c.googleClient.Issues.EditComment(
-		ctx,
-		repoNameComponents[0],
-		repoNameComponents[1],
-		int64(m.NoteID),
-		&github.IssueComment{Body: &msg},
-	)
+	log.Debug().Msgf("Updating message in PR %d in repo %s", pr.CheckID, pr.FullName)
 
-	if err != nil {
-		telemetry.SetError(span, err, "Update Pull Request comment")
-		log.Error().Err(err).Msgf("could not update message to PR, msg: %s, response: %+v", msg, resp)
+	if err := c.deleteLatestRunningComment(ctx, pr); err != nil {
 		return err
 	}
 
-	// update note id just in case it changed
-	m.NoteID = int(*comment.ID)
+	for _, comment := range comments {
+		cc, _, err := c.googleClient.Issues.CreateComment(
+			ctx, owner, repo, m.CheckID, &github.IssueComment{Body: &comment},
+		)
+		if err != nil {
+			telemetry.SetError(span, err, "Update Pull Request comment")
+			log.Error().Err(err).Msg("could not post updated message comment to PR")
+			return err
+		}
+		m.NoteID = int(*cc.ID)
+	}
+
+	return nil
+}
+
+func (c *Client) deleteLatestRunningComment(ctx context.Context, pr vcs.PullRequest) error {
+	_, span := tracer.Start(ctx, "deleteLatestRunningComment")
+	defer span.End()
+
+	existingComments, resp, err := c.googleClient.Issues.ListComments(
+		ctx, pr.Owner, pr.Name, pr.CheckID, &github.IssueListCommentsOptions{
+			Sort:      pkg.Pointer("created"),
+			Direction: pkg.Pointer("asc"),
+		},
+	)
+	if err != nil {
+		telemetry.SetError(span, err, "List Pull Request comments")
+		log.Error().Err(err).Msgf("could not retrieve existing PR comments, response: %+v", resp)
+		return fmt.Errorf("failed to list comments: %w", err)
+	}
+
+	// Find and delete the first running comment.
+	for _, existingComment := range existingComments {
+		if existingComment.Body != nil && strings.Contains(*existingComment.Body, ":hourglass: kubechecks running ... ") {
+			log.Debug().Msgf("Deleting 'kubechecks running' comment with ID %d", *existingComment.ID)
+			if r, e := c.googleClient.Issues.DeleteComment(ctx, pr.Owner, pr.Name, *existingComment.ID); e != nil {
+				telemetry.SetError(span, e, "Delete Pull Request comment")
+				log.Error().Err(e).Msgf("failed to delete 'kubechecks running' comment, response: %+v", r)
+				return fmt.Errorf("failed to delete 'kubechecks running' comment: %w", e)
+			}
+			break
+		}
+	}
 
 	return nil
 }
@@ -105,7 +220,7 @@ func (c *Client) hideOutdatedMessages(ctx context.Context, pr vcs.PullRequest, c
 
 	for _, comment := range comments {
 		if strings.EqualFold(comment.GetUser().GetLogin(), c.username) {
-			// Github API does not expose minimizeComment API. IT's only available from the GraphQL API
+			// GitHub API does not expose minimizeComment API. It's only available from the GraphQL API
 			// https://docs.github.com/en/graphql/reference/mutations#minimizecomment
 			var m struct {
 				MinimizeComment struct {

diff --git a/pkg/vcs/github_client/nessage_test.go b/pkg/vcs/github_client/nessage_test.go
@@ -0,0 +1,163 @@
+package github_client
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+)
+
+func TestSplitComment(t *testing.T) {
+	t.Parallel()
+
+	thousandA := strings.Repeat("a", 1000)
+
+	tests := []struct {
+		name     string
+		comment  string
+		maxSize  int
+		sepEnd   string
+		sepStart string
+		want     []string
+	}{
+		{
+			name:     "EmptyComment",
+			comment:  "",
+			maxSize:  10,
+			sepEnd:   "-E",
+			sepStart: "-S",
+			want:     []string{""},
+		},
+		{
+			name:     "ExactFit_NoSplit",
+			comment:  "exact_fit",
+			maxSize:  len("exact_fit"),
+			sepEnd:   "-E",
+			sepStart: "-S",
+			want:     []string{"exact_fit"},
+		},
+		{
+			name:     "UnderMax_NoSplit",
+			comment:  "comment under max size",
+			maxSize:  len("comment under max size") + 1,
+			sepEnd:   "sepEnd",
+			sepStart: "sepStart",
+			want:     []string{"comment under max size"},
+		},
+		{
+			name:    "TwoComments",
+			comment: thousandA,
+			// Force a split by choosing maxSize just under the full length.
+			// Calculation:
+			//   For a 1000-character comment and maxSize = 1000 - 1 = 999:
+			//     - The first chunk raw capacity = 999 - len(sepEnd).
+			//       With sepEnd = "-sepEnd" (length 7), firstRawCapacity = 999 - 7 = 992.
+			//       Thus, first chunk = thousandA[0:992] + "-sepEnd".
+			//     - The remaining raw text is thousandA[992:1000] (8 characters).
+			//       For the final chunk, we only need to add the prefix.
+			//       Final chunk = sepStart + thousandA[992:].
+			maxSize:  len(thousandA) - 1, // 999
+			sepEnd:   "-sepEnd",
+			sepStart: "-sepStart",
+			want: func() []string {
+				firstRawCapacity := 999 - len("-sepEnd") // 999 - 7 = 992
+				firstChunk := thousandA[:firstRawCapacity] + "-sepEnd"
+				secondChunk := "-sepStart" + thousandA[firstRawCapacity:]
+				return []string{firstChunk, secondChunk}
+			}(),
+		},
+		{
+			name:     "FourComments",
+			comment:  thousandA,
+			sepEnd:   "-sepEnd",
+			sepStart: "-sepStart",
+			// For splitting into four chunks:
+			//   Set maxSize = (len(thousandA)/4) + len(sepEnd) + len(sepStart).
+			//   For thousandA of length 1000, with sepEnd length 7 and sepStart length 9:
+			//     maxSize = (1000/4) + 7 + 9 = 250 + 16 = 266.
+			//   Then:
+			//     - First chunk raw capacity = 266 - len(sepEnd) = 266 - 7 = 259.
+			//       First chunk = thousandA[0:259] + "-sepEnd".
+			//     - Subsequent non-final chunks have raw capacity = 266 - 9 - 7 = 250.
+			//     - Second chunk = "-sepStart" + thousandA[259:259+250] + "-sepEnd".
+			//     - Third chunk = "-sepStart" + thousandA[509:509+250] + "-sepEnd".
+			//     - Final chunk = "-sepStart" + thousandA[759:].
+			maxSize: (1000 / 4) + 7 + 9, // 266
+			want: func() []string {
+				firstRawCapacity := 266 - 7 // 259
+				chunk1 := thousandA[:firstRawCapacity] + "-sepEnd"
+				// For subsequent chunks, raw capacity = 266 - 9 - 7 = 250.
+				rawCapacity := 266 - 9 - 7 // 250
+				chunk2 := "-sepStart" + thousandA[firstRawCapacity:firstRawCapacity+rawCapacity] + "-sepEnd"
+				chunk3 := "-sepStart" + thousandA[firstRawCapacity+rawCapacity:firstRawCapacity+2*rawCapacity] + "-sepEnd"
+				chunk4 := "-sepStart" + thousandA[firstRawCapacity+2*rawCapacity:]
+				return []string{chunk1, chunk2, chunk3, chunk4}
+			}(),
+		},
+		{
+			name:    "MaxSizeTooSmall_ReturnUnsplit",
+			comment: "Hello, world!",
+			// When maxSize is too small to fit even one raw character plus the decorations,
+			// the function should return the original unsplit comment.
+			// For example, if sepEnd = "ZZ" (length 2) and sepStart = "TOP" (length 3),
+			// then we require at least maxSize >= 2+1 = 3 for first chunk
+			// and maxSize >= 3+1 = 4 for subsequent chunks.
+			// Here we set maxSize to 5 (which is borderline) and expect unsplit output.
+			maxSize:  5,
+			sepEnd:   "ZZ",
+			sepStart: "TOP",
+			want:     []string{"Hello, world!"},
+		},
+		{
+			name:    "MaxSizeTooSmall_UnsplitFallback",
+			comment: "abc",
+			// sepEnd="YYZ" => length=3 => we need at least 4 to store 1 raw char + suffix
+			// maxSize=2 => triggers the fallback to unsplit.
+			maxSize:  2,
+			sepEnd:   "YYZ",
+			sepStart: "S",
+			want:     []string{"abc"},
+		},
+		{
+			name:     "NewlinesInComment", // Test with newlines to verify they are preserved.
+			comment:  "line1\nline2\nline3",
+			maxSize:  20, // Comment fits unsplit.
+			sepEnd:   "--E--",
+			sepStart: "--S--",
+			want:     []string{"line1\nline2\nline3"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := splitComment(tt.comment, tt.maxSize, tt.sepEnd, tt.sepStart)
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("%s:\n got: %#v\nwant: %#v", tt.name, got, tt.want)
+			}
+		})
+	}
+}
+
+// TestSplitComment_RealSeparators uses real production separator values and maximum comment length.
+// This integration-style test verifies that with the actual production values:
+// - No chunk exceeds MaxCommentLength.
+// - The first chunk does not have the prefix (sepStart).
+// - The final chunk does not have the suffix (sepEnd).
+func TestSplitComment_RealSeparators(t *testing.T) {
+	// Create a long comment that exceeds MaxCommentLength.
+	comment := strings.Repeat("a", MaxCommentLength+100)
+	got := splitComment(comment, MaxCommentLength, sepEnd, sepStart)
+	// Verify that none of the chunks exceed MaxCommentLength.
+	for i, part := range got {
+		if len(part) > MaxCommentLength {
+			t.Errorf("Chunk %d exceeds MaxCommentLength: len=%d", i, len(part))
+		}
+	}
+	// Verify that the first chunk does not have the sepStart prefix.
+	if len(got) > 0 && strings.HasPrefix(got[0], sepStart) {
+		t.Errorf("First chunk should not start with sepStart")
+	}
+	// Verify that the final chunk does not have the sepEnd suffix.
+	if len(got) > 0 && strings.HasSuffix(got[len(got)-1], sepEnd) {
+		t.Errorf("Final chunk should not end with sepEnd")
+	}
+}