From d6cdd409572641917c3b6f52980961ca44328f54 Mon Sep 17 00:00:00 2001 From: David Buchanan Date: Sat, 21 Dec 2024 11:10:33 +0000 Subject: [PATCH] sync.getRecord exclusion-proof tests --- src/millipds/atproto_sync.py | 8 +++-- src/millipds/repo_ops.py | 14 ++++----- src/millipds/util.py | 16 ---------- tests/integration_test.py | 58 +++++++++++++++++++++++++++++++++--- 4 files changed, 64 insertions(+), 32 deletions(-) diff --git a/src/millipds/atproto_sync.py b/src/millipds/atproto_sync.py index e06b754..84dc587 100644 --- a/src/millipds/atproto_sync.py +++ b/src/millipds/atproto_sync.py @@ -69,15 +69,17 @@ async def sync_get_blocks(request: web.Request): await res.write(util.serialize_car_header()) for cid in cids: # we don't use executemany so as not to hog the db - for value, *_ in db.con.execute( + row = db.con.execute( """ SELECT commit_bytes FROM user WHERE head=? AND id=? UNION SELECT value FROM mst WHERE cid=? AND repo=? UNION SELECT value FROM record WHERE cid=? AND repo=? """, (cid, user_id) * 3, - ): - await res.write(util.serialize_car_entry(cid, value)) + ).fetchone() + if row is None: + continue # hmm, we can't 404 because we already send the response headers + await res.write(util.serialize_car_entry(cid, row[0])) await res.write_eof() return res diff --git a/src/millipds/repo_ops.py b/src/millipds/repo_ops.py index 2cc224e..13a354f 100644 --- a/src/millipds/repo_ops.py +++ b/src/millipds/repo_ops.py @@ -378,15 +378,11 @@ def blob_incref(con: apsw.Connection, user_id: int, ref: cbrrr.CID, tid: str): if changes == 1: return # happy path - if changes == 0: - raise ValueError( - "tried to incref a blob that doesn't exist" - ) # could happen if e.g. user didn't upload blob first - - # changes > 1 - raise ValueError( - "welp, that's not supposed to happen" - ) # should be impossible given UNIQUE constraints + if changes == 0: # could happen if e.g. user didn't upload blob first + raise ValueError("tried to incref a blob that doesn't exist") + + # changes > 1 (should be impossible given UNIQUE constraints) + raise ValueError("welp, that's not supposed to happen") def blob_decref(con: apsw.Connection, user_id: int, ref: cbrrr.CID): diff --git a/src/millipds/util.py b/src/millipds/util.py index ac1efc8..0a764a4 100644 --- a/src/millipds/util.py +++ b/src/millipds/util.py @@ -20,22 +20,6 @@ def mkdirs_for_file(path: str) -> None: ) -def did_to_safe_filename(did: str) -> str: - """ - The format is _ - The former guarantees uniqueness, and the latter makes it human-recognizeable (ish) - """ - - hexdigest = hashlib.sha256(did.encode()).hexdigest() - filtered = "".join( - char for char in did.replace(":", "-") if char in FILANEME_SAFE_CHARS - ) - - # Truncate to make sure we're staying within PATH_MAX - # (with room to spare, in case the caller appends a file extension) - return f"{hexdigest}_{filtered}"[:200] - - B32_CHARSET = "234567abcdefghijklmnopqrstuvwxyz" diff --git a/tests/integration_test.py b/tests/integration_test.py index cc19c63..8a91064 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -224,7 +224,34 @@ async def authn(s, pds_host): return {"Authorization": "Bearer " + token} +@pytest.fixture +async def populated_pds_host(s, pds_host, authn): + # same thing as test_repo_applyWrites, for now + for i in range(10): + async with s.post( + pds_host + "/xrpc/com.atproto.repo.applyWrites", + headers=authn, + json={ + "repo": TEST_DID, + "writes": [ + { + "$type": "com.atproto.repo.applyWrites#create", + "action": "create", + "collection": "app.bsky.feed.like", + "rkey": f"{i}-{j}", + "value": {"blah": "test record"}, + } + for j in range(30) + ], + }, + ) as r: + print(await r.json()) + assert r.status == 200 + return pds_host + + async def test_repo_applyWrites(s, pds_host, authn): + # TODO: test more than just "create"! for i in range(10): async with s.post( pds_host + "/xrpc/com.atproto.repo.applyWrites", @@ -260,6 +287,13 @@ async def test_repo_uploadBlob(s, pds_host, authn): print(res) assert r.status == 200 + # getBlob should still 404 because refcount==0 + async with s.get( + pds_host + "/xrpc/com.atproto.sync.getBlob", + params={"did": TEST_DID, "cid": res["blob"]["ref"]["$link"]}, + ) as r: + assert r.status == 404 + # get the blob refcount >0 async with s.post( pds_host + "/xrpc/com.atproto.repo.createRecord", @@ -296,11 +330,10 @@ async def test_sync_getRepo_not_found(s, pds_host): assert r.status == 404 -@pytest.mark.depends(on=["test_repo_applyWrites"]) -async def test_sync_getRecord_nonexistent(s, pds_host): +async def test_sync_getRecord_nonexistent(s, populated_pds_host): # nonexistent DID should still 404 async with s.get( - pds_host + "/xrpc/com.atproto.sync.getRecord", + populated_pds_host + "/xrpc/com.atproto.sync.getRecord", params={ "did": "did:web:nonexistent.invalid", "collection": "app.bsky.feed.post", @@ -311,7 +344,7 @@ async def test_sync_getRecord_nonexistent(s, pds_host): # but extant DID with nonexistent record should 200, with exclusion proof CAR async with s.get( - pds_host + "/xrpc/com.atproto.sync.getRecord", + populated_pds_host + "/xrpc/com.atproto.sync.getRecord", params={ "did": TEST_DID, "collection": "app.bsky.feed.post", @@ -323,3 +356,20 @@ async def test_sync_getRecord_nonexistent(s, pds_host): proof_car = await r.read() assert proof_car # nonempty # TODO: make sure the proof is valid + + +async def test_sync_getRecord_existent(s, populated_pds_host): + async with s.get( + populated_pds_host + "/xrpc/com.atproto.sync.getRecord", + params={ + "did": TEST_DID, + "collection": "app.bsky.feed.like", + "rkey": "1-1", + }, + ) as r: + assert r.status == 200 + assert r.content_type == "application/vnd.ipld.car" + proof_car = await r.read() + assert proof_car # nonempty + # TODO: make sure the proof is valid, and contains the record + assert b"test record" in proof_car