-
Notifications
You must be signed in to change notification settings - Fork 74
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #25 from bruin-data/feature/introduce-stripe-source
Feature/introduce stripe source
- Loading branch information
Showing
7 changed files
with
243 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
"""This source uses Stripe API and dlt to load data such as Customer, Subscription, Event etc. to the database and to calculate the MRR and churn rate.""" | ||
|
||
from typing import Any, Dict, Generator, Iterable, Optional, Tuple | ||
|
||
import dlt | ||
import stripe | ||
from dlt.sources import DltResource | ||
from pendulum import DateTime | ||
|
||
from .helpers import pagination, transform_date | ||
from .settings import ENDPOINTS, INCREMENTAL_ENDPOINTS | ||
|
||
|
||
@dlt.source | ||
def stripe_source( | ||
endpoints: Tuple[str, ...] = ENDPOINTS, | ||
stripe_secret_key: str = dlt.secrets.value, | ||
start_date: Optional[DateTime] = None, | ||
end_date: Optional[DateTime] = None, | ||
) -> Iterable[DltResource]: | ||
""" | ||
Retrieves data from the Stripe API for the specified endpoints. | ||
For all endpoints, Stripe API responses do not provide the key "updated", | ||
so in most cases, we are forced to load the data in 'replace' mode. | ||
This source is suitable for all types of endpoints, including 'Events', 'Invoice', etc. | ||
but these endpoints can also be loaded in incremental mode (see source incremental_stripe_source). | ||
Args: | ||
endpoints (Tuple[str, ...]): A tuple of endpoint names to retrieve data from. Defaults to most popular Stripe API endpoints. | ||
stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object. | ||
start_date (Optional[DateTime]): An optional start date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to None. | ||
end_date (Optional[DateTime]): An optional end date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to None. | ||
Returns: | ||
Iterable[DltResource]: Resources with data that was created during the period greater than or equal to 'start_date' and less than 'end_date'. | ||
""" | ||
stripe.api_key = stripe_secret_key | ||
stripe.api_version = "2022-11-15" | ||
|
||
def stripe_resource( | ||
endpoint: str, | ||
) -> Generator[Dict[Any, Any], Any, None]: | ||
yield from pagination(endpoint, start_date, end_date) | ||
|
||
for endpoint in endpoints: | ||
yield dlt.resource( | ||
stripe_resource, | ||
name=endpoint, | ||
write_disposition="replace", | ||
)(endpoint) | ||
|
||
|
||
@dlt.source | ||
def incremental_stripe_source( | ||
endpoints: Tuple[str, ...] = INCREMENTAL_ENDPOINTS, | ||
stripe_secret_key: str = dlt.secrets.value, | ||
initial_start_date: Optional[DateTime] = None, | ||
end_date: Optional[DateTime] = None, | ||
) -> Iterable[DltResource]: | ||
""" | ||
As Stripe API does not include the "updated" key in its responses, | ||
we are only able to perform incremental downloads from endpoints where all objects are uneditable. | ||
This source yields the resources with incremental loading based on "append" mode. | ||
You will load only the newest data without duplicating and without downloading a huge amount of data each time. | ||
Args: | ||
endpoints (tuple): A tuple of endpoint names to retrieve data from. Defaults to Stripe API endpoints with uneditable data. | ||
stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object. | ||
initial_start_date (Optional[DateTime]): An optional parameter that specifies the initial value for dlt.sources.incremental. | ||
If parameter is not None, then load only data that were created after initial_start_date on the first run. | ||
Defaults to None. Format: datetime(YYYY, MM, DD). | ||
end_date (Optional[DateTime]): An optional end date to limit the data retrieved. | ||
Defaults to None. Format: datetime(YYYY, MM, DD). | ||
Returns: | ||
Iterable[DltResource]: Resources with only that data has not yet been loaded. | ||
""" | ||
stripe.api_key = stripe_secret_key | ||
stripe.api_version = "2022-11-15" | ||
start_date_unix = ( | ||
transform_date(initial_start_date) if initial_start_date is not None else -1 | ||
) | ||
|
||
def incremental_resource( | ||
endpoint: str, | ||
created: Optional[Any] = dlt.sources.incremental( | ||
"created", initial_value=start_date_unix | ||
), | ||
) -> Generator[Dict[Any, Any], Any, None]: | ||
start_value = created.last_value | ||
yield from pagination(endpoint, start_date=start_value, end_date=end_date) | ||
|
||
for endpoint in endpoints: | ||
yield dlt.resource( | ||
incremental_resource, | ||
name=endpoint, | ||
write_disposition="append", | ||
primary_key="id", | ||
)(endpoint) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
"""Stripe analytics source helpers""" | ||
|
||
from typing import Any, Dict, Iterable, Optional, Union | ||
|
||
import stripe | ||
from dlt.common import pendulum | ||
from dlt.common.typing import TDataItem | ||
from pendulum import DateTime | ||
|
||
|
||
def pagination( | ||
endpoint: str, start_date: Optional[Any] = None, end_date: Optional[Any] = None | ||
) -> Iterable[TDataItem]: | ||
""" | ||
Retrieves data from an endpoint with pagination. | ||
Args: | ||
endpoint (str): The endpoint to retrieve data from. | ||
start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to None. | ||
end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to None. | ||
Returns: | ||
Iterable[TDataItem]: Data items retrieved from the endpoint. | ||
""" | ||
starting_after = None | ||
while True: | ||
response = stripe_get_data( | ||
endpoint, | ||
start_date=start_date, | ||
end_date=end_date, | ||
starting_after=starting_after, | ||
) | ||
|
||
if len(response["data"]) > 0: | ||
starting_after = response["data"][-1]["id"] | ||
yield response["data"] | ||
|
||
if not response["has_more"]: | ||
break | ||
|
||
|
||
def transform_date(date: Union[str, DateTime, int]) -> int: | ||
if isinstance(date, str): | ||
date = pendulum.from_format(date, "%Y-%m-%dT%H:%M:%SZ") | ||
if isinstance(date, DateTime): | ||
# convert to unix timestamp | ||
date = int(date.timestamp()) | ||
return date | ||
|
||
|
||
def stripe_get_data( | ||
resource: str, | ||
start_date: Optional[Any] = None, | ||
end_date: Optional[Any] = None, | ||
**kwargs: Any, | ||
) -> Dict[Any, Any]: | ||
if start_date: | ||
start_date = transform_date(start_date) | ||
if end_date: | ||
end_date = transform_date(end_date) | ||
|
||
if resource == "Subscription": | ||
kwargs.update({"status": "all"}) | ||
|
||
resource_dict = getattr(stripe, resource).list( | ||
created={"gte": start_date, "lt": end_date}, limit=100, **kwargs | ||
) | ||
return dict(resource_dict) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
"""Stripe analytics source settings and constants""" | ||
|
||
# the most popular endpoints | ||
# Full list of the Stripe API endpoints you can find here: https://stripe.com/docs/api. | ||
ENDPOINTS = ( | ||
"Subscription", | ||
"Account", | ||
"Coupon", | ||
"Customer", | ||
"Product", | ||
"Price", | ||
) | ||
# possible incremental endpoints | ||
INCREMENTAL_ENDPOINTS = ("Event", "Invoice", "BalanceTransaction") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,3 +24,4 @@ SQLAlchemy==1.4.52 | |
sqlalchemy2-stubs==0.0.2a38 | ||
tqdm==4.66.2 | ||
typer==0.12.3 | ||
stripe==10.7.0 |