Skip to content

Commit

Permalink
Add documentation for string formats; fix at-identifier format (#491)
Browse files Browse the repository at this point in the history
Co-authored-by: Ilya (Marshal) <[email protected]>
  • Loading branch information
zzstoatzz and MarshalX authored Dec 17, 2024
1 parent d85a59f commit c2a51c7
Show file tree
Hide file tree
Showing 32 changed files with 489 additions and 80 deletions.
25 changes: 23 additions & 2 deletions docs/source/alias_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,21 @@
# FIXME(MarshalX): I didn't find a fast way to fix aliases resolving after migration to Pydantic.
# I hope this is temporary and resolving will be on the Sphinx side.


def get_alias_from_db(alias: str) -> t.Optional[str]:
_GLOBAL_ALIASES_DB = {
'string_formats.validate_at_uri': 'atproto_client.models.string_formats.validate_at_uri',
'string_formats.validate_cid': 'atproto_client.models.string_formats.validate_cid',
'string_formats.validate_datetime': 'atproto_client.models.string_formats.validate_datetime',
'string_formats.validate_did': 'atproto_client.models.string_formats.validate_did',
'string_formats.validate_handle': 'atproto_client.models.string_formats.validate_handle',
'string_formats.validate_language': 'atproto_client.models.string_formats.validate_language',
'string_formats.validate_nsid': 'atproto_client.models.string_formats.validate_nsid',
'string_formats.validate_record_key': 'atproto_client.models.string_formats.validate_record_key',
'string_formats.validate_tid': 'atproto_client.models.string_formats.validate_tid',
'string_formats.validate_uri': 'atproto_client.models.string_formats.validate_uri',
}


def _get_model_alias(alias: str) -> t.Optional[str]:
# FIXME(MarshalX): Resolving of models.AppBskyGraphDefs ListPurpose is not working.
alias_split = alias.rsplit('.', maxsplit=1)
if len(alias_split) < 2:
Expand All @@ -28,6 +41,14 @@ def get_alias_from_db(alias: str) -> t.Optional[str]:
return f'{ALIASES_DB[alias_prefix]}.{alias_suffix}'


def get_alias_from_db(alias: str) -> t.Optional[str]:
model_alias = _get_model_alias(alias)
if model_alias:
return model_alias

return _GLOBAL_ALIASES_DB.get(alias)


# annotate
def resolve_intersphinx_aliases(
app: 'Sphinx', env: 'BuildEnvironment', node: pending_xref, contnode: 'TextElement'
Expand Down
1 change: 1 addition & 0 deletions docs/source/atproto_client/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ Submodules
namespace
models
auth
string_formats
utils/index
1 change: 1 addition & 0 deletions docs/source/atproto_client/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ Submodules
../atproto/atproto_client.models.blob_ref
../atproto/atproto_client.models.dot_dict
../atproto/atproto_client.models.utils
../atproto/atproto_client.models.string_formats
128 changes: 128 additions & 0 deletions docs/source/atproto_client/string_formats.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
## String Formats

The AT Protocol defines several string formats that are used throughout the protocol. This page describes these formats and how to validate them in your code.

### Overview

The SDK provides optional strict validation for AT Protocol string formats. By default, validation is disabled for performance reasons, but you can enable it when needed.

### Supported String Formats

The SDK supports validation of the following string formats:

:::{attention}
These formats are a working empirical understanding of the required formats based on the following resources:

- [AT Protocol Lexicon](https://atproto.com/specs/lexicon)

- [AT Protocol Interoperability Test Files](https://github.com/bluesky-social/atproto/tree/main/interop-test-files/syntax)

:::


#### Handle
A handle must be a valid domain name (e.g., `user.bsky.social`):
- 2+ segments separated by dots
- ASCII alphanumeric characters and hyphens only
- 1-63 chars per segment
- Max 253 chars total
- Last segment cannot start with a digit

#### DID (Decentralized Identifier)
A DID follows the pattern `did:method:identifier`:
- Method must be lowercase letters
- Identifier allows alphanumeric chars, dots, underscores, hyphens, and percent
- Max 2KB length
- No /?#[]@ characters allowed

#### NSID (Namespaced Identifier)
An NSID must have:
- 3+ segments separated by dots
- Reversed domain name (lowercase alphanumeric + hyphen)
- Name segment (letters only)
- Max 317 chars total
- No segments ending in numbers
- No @_*#! special characters
- Max 63 chars per segment

#### AT-URI
An AT-URI must follow the pattern `at://authority/collection/record-key`:
- Starts with `at://`
- Contains handle or DID
- Optional /collection/record-key path
- Max 8KB length
- No query parameters or fragments

#### CID (Content Identifier)
Must be:
- Minimum 8 characters
- Alphanumeric characters and plus signs only

#### DateTime
Requirements:
- Must use uppercase T as time separator
- Must include seconds (HH:MM:SS)
- Must have timezone (Z or ±HH:MM)
- No -00:00 timezone allowed
- Valid fractional seconds format if used
- No whitespace allowed

#### TID (Timestamp Identifiers)
Must be:
- Exactly 13 characters
- Only lowercase letters and numbers 2-7
- First byte's high bit (0x40) must be 0

#### Record Key (rkey)
A record key must:
- Be 1-512 characters
- Contain only alphanumeric chars, dots, underscores, colons, tildes, or hyphens
- Not be "." or ".."

#### URI
Requirements:
- Must have a scheme starting with a letter
- Must have authority (netloc) or path/query/fragment
- Max 8KB length
- No spaces allowed
- Must follow RFC-3986 format

#### Language
Must match pattern:
- 2-3 letter language code or 'i'
- Optional subtag with alphanumeric chars and hyphens

### Using Validation in Your Code

There are two ways to enable validation:

1. Using `get_or_create` with `strict_string_format=True`:

```python
from atproto_client.models.utils import get_or_create
from atproto_client.models.string_formats import Handle
from pydantic import BaseModel

class MyModel(BaseModel):
handle: Handle

data = {"handle": "alice.bsky.social"}
model_instance = get_or_create(data, MyModel, strict_string_format=True)
```

2. Using Pydantic's validation context directly:

```python
from pydantic import BaseModel
from atproto_client.models.string_formats import Handle

class MyModel(BaseModel):
handle: Handle

model_instance = MyModel.model_validate(
{"handle": "alice.bsky.social"},
context={"strict_string_format": True}
)
```

When validation is disabled (the default), any string value will be accepted for any format. When enabled, the values must conform to the above validation rules, or else a `ValidationError` will be raised.
4 changes: 2 additions & 2 deletions packages/atproto_client/models/app/bsky/actor/get_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.actor.getProfile`."""

actor: string_formats.Handle #: Handle or DID of account to fetch profile of.
actor: string_formats.AtIdentifier #: Handle or DID of account to fetch profile of.


class ParamsDict(t.TypedDict):
actor: string_formats.Handle #: Handle or DID of account to fetch profile of.
actor: string_formats.AtIdentifier #: Handle or DID of account to fetch profile of.
4 changes: 2 additions & 2 deletions packages/atproto_client/models/app/bsky/actor/get_profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.actor.getProfiles`."""

actors: t.List[string_formats.Handle] = Field(max_length=25) #: Actors.
actors: t.List[string_formats.AtIdentifier] = Field(max_length=25) #: Actors.


class ParamsDict(t.TypedDict):
actors: t.List[string_formats.Handle] #: Actors.
actors: t.List[string_formats.AtIdentifier] #: Actors.


class Response(base.ResponseModelBase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.feed.getActorFeeds`."""

actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: t.Optional[str] = None #: Cursor.
limit: t.Optional[int] = Field(default=50, ge=1, le=100) #: Limit.


class ParamsDict(t.TypedDict):
actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: te.NotRequired[t.Optional[str]] #: Cursor.
limit: te.NotRequired[t.Optional[int]] #: Limit.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.feed.getActorLikes`."""

actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: t.Optional[str] = None #: Cursor.
limit: t.Optional[int] = Field(default=50, ge=1, le=100) #: Limit.


class ParamsDict(t.TypedDict):
actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: te.NotRequired[t.Optional[str]] #: Cursor.
limit: te.NotRequired[t.Optional[int]] #: Limit.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.feed.getAuthorFeed`."""

actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: t.Optional[str] = None #: Cursor.
filter: t.Optional[
t.Union[
Expand All @@ -36,7 +36,7 @@ class Params(base.ParamsModelBase):


class ParamsDict(t.TypedDict):
actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: te.NotRequired[t.Optional[str]] #: Cursor.
filter: te.NotRequired[
t.Optional[
Expand Down
8 changes: 4 additions & 4 deletions packages/atproto_client/models/app/bsky/feed/search_posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.feed.searchPosts`."""

q: str #: Search query string; syntax, phrase, boolean, and faceting is unspecified, but Lucene query syntax is recommended.
author: t.Optional[string_formats.Handle] = (
author: t.Optional[string_formats.AtIdentifier] = (
None #: Filter to posts by the given account. Handles are resolved to DID before query-time.
)
cursor: t.Optional[str] = (
Expand All @@ -34,7 +34,7 @@ class Params(base.ParamsModelBase):
None #: Filter to posts in the given language. Expected to be based on post language field, though server may override language detection.
)
limit: t.Optional[int] = Field(default=25, ge=1, le=100) #: Limit.
mentions: t.Optional[string_formats.Handle] = (
mentions: t.Optional[string_formats.AtIdentifier] = (
None #: Filter to posts which mention the given account. Handles are resolved to DID before query-time. Only matches rich-text facet mentions.
)
since: t.Optional[str] = (
Expand All @@ -57,7 +57,7 @@ class Params(base.ParamsModelBase):
class ParamsDict(t.TypedDict):
q: str #: Search query string; syntax, phrase, boolean, and faceting is unspecified, but Lucene query syntax is recommended.
author: te.NotRequired[
t.Optional[string_formats.Handle]
t.Optional[string_formats.AtIdentifier]
] #: Filter to posts by the given account. Handles are resolved to DID before query-time.
cursor: te.NotRequired[
t.Optional[str]
Expand All @@ -70,7 +70,7 @@ class ParamsDict(t.TypedDict):
] #: Filter to posts in the given language. Expected to be based on post language field, though server may override language detection.
limit: te.NotRequired[t.Optional[int]] #: Limit.
mentions: te.NotRequired[
t.Optional[string_formats.Handle]
t.Optional[string_formats.AtIdentifier]
] #: Filter to posts which mention the given account. Handles are resolved to DID before query-time. Only matches rich-text facet mentions.
since: te.NotRequired[
t.Optional[str]
Expand Down
2 changes: 1 addition & 1 deletion packages/atproto_client/models/app/bsky/graph/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ class ListViewerState(base.ModelBase):
class NotFoundActor(base.ModelBase):
"""Definition model for :obj:`app.bsky.graph.defs`. indicates that a handle or DID could not be resolved."""

actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
not_found: bool = Field(frozen=True) #: Not found.

py_type: t.Literal['app.bsky.graph.defs#notFoundActor'] = Field(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.graph.getActorStarterPacks`."""

actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: t.Optional[str] = None #: Cursor.
limit: t.Optional[int] = Field(default=50, ge=1, le=100) #: Limit.


class ParamsDict(t.TypedDict):
actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: te.NotRequired[t.Optional[str]] #: Cursor.
limit: te.NotRequired[t.Optional[int]] #: Limit.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.graph.getFollowers`."""

actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: t.Optional[str] = None #: Cursor.
limit: t.Optional[int] = Field(default=50, ge=1, le=100) #: Limit.


class ParamsDict(t.TypedDict):
actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: te.NotRequired[t.Optional[str]] #: Cursor.
limit: te.NotRequired[t.Optional[int]] #: Limit.

Expand Down
4 changes: 2 additions & 2 deletions packages/atproto_client/models/app/bsky/graph/get_follows.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.graph.getFollows`."""

actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: t.Optional[str] = None #: Cursor.
limit: t.Optional[int] = Field(default=50, ge=1, le=100) #: Limit.


class ParamsDict(t.TypedDict):
actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: te.NotRequired[t.Optional[str]] #: Cursor.
limit: te.NotRequired[t.Optional[int]] #: Limit.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.graph.getKnownFollowers`."""

actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: t.Optional[str] = None #: Cursor.
limit: t.Optional[int] = Field(default=50, ge=1, le=100) #: Limit.


class ParamsDict(t.TypedDict):
actor: string_formats.Handle #: Actor.
actor: string_formats.AtIdentifier #: Actor.
cursor: te.NotRequired[t.Optional[str]] #: Cursor.
limit: te.NotRequired[t.Optional[int]] #: Limit.

Expand Down
4 changes: 2 additions & 2 deletions packages/atproto_client/models/app/bsky/graph/get_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.graph.getLists`."""

actor: string_formats.Handle #: The account (actor) to enumerate lists from.
actor: string_formats.AtIdentifier #: The account (actor) to enumerate lists from.
cursor: t.Optional[str] = None #: Cursor.
limit: t.Optional[int] = Field(default=50, ge=1, le=100) #: Limit.


class ParamsDict(t.TypedDict):
actor: string_formats.Handle #: The account (actor) to enumerate lists from.
actor: string_formats.AtIdentifier #: The account (actor) to enumerate lists from.
cursor: te.NotRequired[t.Optional[str]] #: Cursor.
limit: te.NotRequired[t.Optional[int]] #: Limit.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@
class Params(base.ParamsModelBase):
"""Parameters model for :obj:`app.bsky.graph.getRelationships`."""

actor: string_formats.Handle #: Primary account requesting relationships for.
others: t.Optional[t.List[string_formats.Handle]] = Field(
actor: string_formats.AtIdentifier #: Primary account requesting relationships for.
others: t.Optional[t.List[string_formats.AtIdentifier]] = Field(
default=None, max_length=30
) #: List of 'other' accounts to be related back to the primary.


class ParamsDict(t.TypedDict):
actor: string_formats.Handle #: Primary account requesting relationships for.
actor: string_formats.AtIdentifier #: Primary account requesting relationships for.
others: te.NotRequired[
t.Optional[t.List[string_formats.Handle]]
t.Optional[t.List[string_formats.AtIdentifier]]
] #: List of 'other' accounts to be related back to the primary.


Expand Down
Loading

0 comments on commit c2a51c7

Please sign in to comment.