Skip to content

Commit

Permalink
Use preferred fsspec abstraction
Browse files Browse the repository at this point in the history
  • Loading branch information
edgarrmondragon committed Sep 18, 2024
1 parent 742524c commit 83c6754
Show file tree
Hide file tree
Showing 10 changed files with 607 additions and 867 deletions.
1,061 changes: 590 additions & 471 deletions poetry.lock

Large diffs are not rendered by default.

12 changes: 3 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,9 @@ sphinx-inline-tabs = {version = ">=2023.4.21", python = ">=3.9", optional = true
sphinx-notfound-page = {version = ">=1.0.0", python = ">=3.9", optional = true}
sphinx-reredirects = {version = ">=0.1.5", python = ">=3.9", optional = true}

# AWS-speficic dependencies
boto3 = { version = ">=1.35", optional = true }

# File storage dependencies installed as optional 'filesystem' extras
fs-s3fs = {version = ">=1.1.1", optional = true}
s3fs = { version = ">=2024.9.0", optional = true }

# Parquet file dependencies installed as optional 'parquet' extras
# We add Python constraints to force Poetry to add the latest supported Numpy version
Expand Down Expand Up @@ -118,7 +116,7 @@ docs = [
"sphinx-notfound-page",
"sphinx-reredirects",
]
s3 = ["boto3", "fs-s3fs"]
s3 = ["fs-s3fs", "s3fs"]
testing = [
"pytest",
]
Expand All @@ -145,7 +143,6 @@ time-machine = ">=2.10.0"
xdoctest = ">=1.1.1"

[tool.poetry.group.typing.dependencies]
boto3-stubs = {extras = ["s3"], version = ">=1.35.16"}
mypy = ">=1.9"
types-jsonschema = ">=4.17.0.6"
types-pytz = ">=2022.7.1.2"
Expand Down Expand Up @@ -186,7 +183,7 @@ filterwarnings = [
# https://github.com/joblib/joblib/pull/1518
"ignore:Attribute n is deprecated:DeprecationWarning:joblib._utils",
# https://github.com/boto/boto3/issues/3889
"ignore:datetime\\.datetime\\.utcnow\\(\\) is deprecated:DeprecationWarning:botocore",
# "ignore:datetime\\.datetime\\.utcnow\\(\\) is deprecated:DeprecationWarning:botocore",
]
log_cli_level = "INFO"
markers = [
Expand Down Expand Up @@ -262,9 +259,6 @@ types-PyYAML = "yaml"
types-requests = "requests"

[tool.deptry.per_rule_ignores]
DEP001 = [
"mypy_boto3_s3",
]
DEP002 = [
# Transitive constraints
"numpy",
Expand Down
2 changes: 1 addition & 1 deletion samples/sample_tap_csv/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import csv
import typing as t

from singer_sdk.contrib.filesystem.stream import FileStream
from singer_sdk.contrib.filesystem import FileStream

if t.TYPE_CHECKING:
from singer_sdk.helpers.types import Record
Expand Down
4 changes: 2 additions & 2 deletions singer_sdk/contrib/filesystem/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

from __future__ import annotations

from singer_sdk.contrib.filesystem import local, s3
from singer_sdk.contrib.filesystem.stream import FileStream

__all__ = ["local", "s3"]
__all__ = ["FileStream"]
123 changes: 0 additions & 123 deletions singer_sdk/contrib/filesystem/base.py

This file was deleted.

61 changes: 0 additions & 61 deletions singer_sdk/contrib/filesystem/local.py

This file was deleted.

99 changes: 0 additions & 99 deletions singer_sdk/contrib/filesystem/s3.py

This file was deleted.

21 changes: 11 additions & 10 deletions singer_sdk/contrib/filesystem/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@
class FileStream(Stream, metaclass=abc.ABCMeta):
"""Abstract base class for file streams."""

BASE_SCHEMA: t.ClassVar[dict[str, t.Any]] = {
"type": ["object"],
"properties": {
SDC_META_FILEPATH: {"type": "string"},
SDC_META_MODIFIED_AT: {"type": ["string", "null"], "format": "date-time"},
},
"required": [],
"additionalProperties": {"type": "string"},
}

def __init__(
self,
tap: Tap,
Expand All @@ -40,16 +50,7 @@ def __init__(
partitions: List of partitions for this stream.
"""
# TODO(edgarmondragon): Build schema from file.
schema = {
"type": ["object"],
"properties": {
SDC_META_FILEPATH: {"type": "string"},
SDC_META_MODIFIED_AT: {"type": "string", "format": "date-time"},
},
"required": [],
"additionalProperties": {"type": "string"},
}
super().__init__(tap, schema, name)
super().__init__(tap, self.BASE_SCHEMA, name)

# TODO(edgarrmondragon): Make this None if the filesytem does not support it.
self.replication_key = SDC_META_MODIFIED_AT
Expand Down
Loading

0 comments on commit 83c6754

Please sign in to comment.