Skip to content

Commit

Permalink
Merge branch 'sj/ref-contents-check' into seen
Browse files Browse the repository at this point in the history
"git fsck" learned to issue warnings on "curiously formatted" ref
contents that have always been taken valid but something Git
wouldn't have written itself (e.g., missing terminating end-of-line
after the full object name).

Ready?

* sj/ref-contents-check:
  ref: add symlink ref content check for files backend
  ref: add symref content check for files backend
  ref: add regular ref content check for files backend
  ref: initialize "fsck_ref_report" with zero
  • Loading branch information
gitster committed Sep 8, 2024
2 parents b3550dd + 65aea4f commit 6a6050a
Show file tree
Hide file tree
Showing 6 changed files with 548 additions and 5 deletions.
20 changes: 20 additions & 0 deletions Documentation/fsck-msgids.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,19 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.

`badRefContent`::
(ERROR) A ref has a bad content.

`badRefFiletype`::
(ERROR) A ref has a bad file type.

`badRefName`::
(ERROR) A ref has an invalid format.

`badSymrefTarget`::
(ERROR) The symref target points outside the ref directory or
the name of the symref target is invalid.

`badTagName`::
(INFO) A tag has an invalid format.

Expand Down Expand Up @@ -170,6 +177,19 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.

`refMissingNewline`::
(INFO) A ref does not end with newline. This kind of ref may
be considered ERROR in the future.

`symlinkRef`::
(INFO) A symref uses the symbolic link. This kind of symref may
be considered ERROR in the future when totally dropping the
symlink support.

`trailingRefContent`::
(INFO) A ref has trailing contents. This kind of ref may be
considered ERROR in the future.

`treeNotSorted`::
(ERROR) A tree is not properly sorted.

Expand Down
5 changes: 5 additions & 0 deletions fsck.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_CONTENT, ERROR) \
FUNC(BAD_REF_FILETYPE, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_SYMREF_TARGET, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
Expand Down Expand Up @@ -84,6 +86,9 @@ enum fsck_msg_type {
FUNC(MAILMAP_SYMLINK, INFO) \
FUNC(BAD_TAG_NAME, INFO) \
FUNC(MISSING_TAGGER_ENTRY, INFO) \
FUNC(REF_MISSING_NEWLINE, INFO) \
FUNC(SYMLINK_REF, INFO) \
FUNC(TRAILING_REF_CONTENT, INFO) \
/* ignored (elevated when requested) */ \
FUNC(EXTRA_HEADER_ENTRY, IGNORE)

Expand Down
2 changes: 1 addition & 1 deletion refs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1761,7 +1761,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
}

result = parse_loose_ref_contents(ref_store->repo->hash_algo, content.buf,
oid, referent, type, failure_errno);
oid, referent, type, NULL, failure_errno);

done:
strbuf_release(&full_path);
Expand Down
190 changes: 187 additions & 3 deletions refs/files-backend.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#define USE_THE_REPOSITORY_VARIABLE

#include "../git-compat-util.h"
#include "../abspath.h"
#include "../config.h"
#include "../copy.h"
#include "../environment.h"
Expand Down Expand Up @@ -568,7 +569,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
buf = sb_contents.buf;

ret = parse_loose_ref_contents(ref_store->repo->hash_algo, buf,
oid, referent, type, &myerr);
oid, referent, type, NULL, &myerr);

out:
if (ret && !myerr)
Expand Down Expand Up @@ -605,7 +606,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
int parse_loose_ref_contents(const struct git_hash_algo *algop,
const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
int *failure_errno)
const char **trailing, int *failure_errno)
{
const char *p;
if (skip_prefix(buf, "ref:", &buf)) {
Expand All @@ -627,6 +628,10 @@ int parse_loose_ref_contents(const struct git_hash_algo *algop,
*failure_errno = EINVAL;
return -1;
}

if (trailing)
*trailing = p;

return 0;
}

Expand Down Expand Up @@ -3504,6 +3509,184 @@ typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store,
const char *refs_check_dir,
struct dir_iterator *iter);

/*
* Check the symref "referent" and "referent_path". For textual symref,
* "referent" would be the content after "refs:". For symlink ref,
* "referent" would be the relative path agaignst "gitdir" which should
* be the same as the textual symref literally.
*/
static int files_fsck_symref_target(struct fsck_options *o,
struct fsck_ref_report *report,
struct strbuf *referent,
struct strbuf *referent_path,
unsigned int symbolic_link)
{
size_t len = referent->len - 1;
const char *p = NULL;
struct stat st;
int ret = 0;

if (!skip_prefix(referent->buf, "refs/", &p)) {

ret = fsck_report_ref(o, report,
FSCK_MSG_BAD_SYMREF_TARGET,
"points to ref outside the refs directory");
goto out;
}

if (!symbolic_link && referent->buf[referent->len - 1] != '\n') {
ret = fsck_report_ref(o, report,
FSCK_MSG_REF_MISSING_NEWLINE,
"missing newline");
len++;
}

if (!symbolic_link)
strbuf_rtrim(referent);

if (check_refname_format(referent->buf, 0)) {
ret = fsck_report_ref(o, report,
FSCK_MSG_BAD_SYMREF_TARGET,
"points to refname with invalid format");
goto out;
}

if (!symbolic_link && len != referent->len) {
ret = fsck_report_ref(o, report,
FSCK_MSG_TRAILING_REF_CONTENT,
"trailing garbage in ref");
}

/*
* Missing target should not be treated as any error worthy event and
* not even warn. It is a common case that a symbolic ref points to a
* ref that does not exist yet. If the target ref does not exist, just
* skip the check for the file type.
*/
if (lstat(referent_path->buf, &st))
goto out;

/*
* We cannot distinguish whether "refs/heads/a" is directory or nots by
* using "check_refname_format(referent->buf, 0)". Instead, we need to
* check the file type of the target.
*/
if (S_ISDIR(st.st_mode)) {
ret = fsck_report_ref(o, report,
FSCK_MSG_BAD_SYMREF_TARGET,
"points to the directory");
goto out;
}

out:
return ret;
}

static int files_fsck_refs_content(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter)
{
struct strbuf referent_path = STRBUF_INIT;
struct strbuf ref_content = STRBUF_INIT;
struct strbuf abs_gitdir = STRBUF_INIT;
struct strbuf referent = STRBUF_INIT;
struct strbuf refname = STRBUF_INIT;
struct fsck_ref_report report = {0};
unsigned int symbolic_link = 0;
const char *trailing = NULL;
unsigned int type = 0;
int failure_errno = 0;
struct object_id oid;
int ret = 0;

strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
report.path = refname.buf;

if (S_ISLNK(iter->st.st_mode)) {
const char* relative_referent_path;

symbolic_link = 1;
ret = fsck_report_ref(o, &report,
FSCK_MSG_SYMLINK_REF,
"use deprecated symbolic link for symref");

strbuf_add_absolute_path(&abs_gitdir, ref_store->gitdir);
strbuf_normalize_path(&abs_gitdir);
if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
strbuf_addch(&abs_gitdir, '/');

strbuf_add_real_path(&referent_path, iter->path.buf);

if (!skip_prefix(referent_path.buf,
abs_gitdir.buf,
&relative_referent_path)) {
ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_SYMREF_TARGET,
"point to target outside gitdir");
goto cleanup;
}

strbuf_addstr(&referent, relative_referent_path);
ret = files_fsck_symref_target(o, &report,
&referent, &referent_path,
symbolic_link);

goto cleanup;
}

if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
ret = error_errno(_("%s/%s: unable to read the ref"),
refs_check_dir, iter->relative_path);
goto cleanup;
}

if (parse_loose_ref_contents(ref_store->repo->hash_algo,
ref_content.buf, &oid, &referent,
&type, &trailing, &failure_errno)) {
ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_REF_CONTENT,
"invalid ref content");
goto cleanup;
}

if (!(type & REF_ISSYMREF)) {
if (*trailing == '\0') {
ret = fsck_report_ref(o, &report,
FSCK_MSG_REF_MISSING_NEWLINE,
"missing newline");
goto cleanup;
}

if (*trailing != '\n' || (*(trailing + 1) != '\0')) {
ret = fsck_report_ref(o, &report,
FSCK_MSG_TRAILING_REF_CONTENT,
"trailing garbage in ref");
goto cleanup;
}
} else {
strbuf_addf(&referent_path, "%s/%s",
ref_store->gitdir, referent.buf);
/*
* the referent may contain the spaces and the newline, need to
* trim for path.
*/
strbuf_rtrim(&referent_path);
ret = files_fsck_symref_target(o, &report,
&referent,
&referent_path,
symbolic_link);
}

cleanup:
strbuf_release(&refname);
strbuf_release(&ref_content);
strbuf_release(&referent);
strbuf_release(&referent_path);
strbuf_release(&abs_gitdir);
return ret;
}

static int files_fsck_refs_name(struct ref_store *ref_store UNUSED,
struct fsck_options *o,
const char *refs_check_dir,
Expand All @@ -3520,7 +3703,7 @@ static int files_fsck_refs_name(struct ref_store *ref_store UNUSED,
goto cleanup;

if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
struct fsck_ref_report report = { .path = NULL };
struct fsck_ref_report report = { 0 };

strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
report.path = sb.buf;
Expand Down Expand Up @@ -3586,6 +3769,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
{
files_fsck_refs_fn fsck_refs_fn[]= {
files_fsck_refs_name,
files_fsck_refs_content,
NULL,
};

Expand Down
2 changes: 1 addition & 1 deletion refs/refs-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ struct ref_store {
int parse_loose_ref_contents(const struct git_hash_algo *algop,
const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
int *failure_errno);
const char **trailing, int *failure_errno);

/*
* Fill in the generic part of refs and add it to our collection of
Expand Down
Loading

0 comments on commit 6a6050a

Please sign in to comment.