From afafcf594b80f47ce533cb458c1c71889d9617ce Mon Sep 17 00:00:00 2001 From: Derek Melchin Date: Fri, 13 Oct 2023 17:24:09 -0600 Subject: [PATCH] Update Object Store streaming docs --- ... Providers.html => 02 File Providers.html} | 5 ++++- ...tom Data.php => 03 Stream Custom Data.php} | 0 ...ulk Data.php => 04 Download Bulk Data.php} | 0 .../01 Key Concepts/97 File Quotas.html | 8 ++++---- .../01 Key Concepts/98 Rate Limits.html | 2 +- .../01 Key Concepts/01 Introduction.html | 2 +- .../01 Key Concepts/02 Data Formats.html | 4 ++-- .../90 Live Trading Considerations.html | 1 + .../04 Define Custom Types.html | 3 +++ .../04 Define Custom Types.html | 2 ++ .../04 Define Custom Types.html | 2 ++ .../03 Bulk Downloads/01 Introduction.html | 2 ++ .../02 Recommended Use Cases.php | 2 +- .../03 Bulk Downloads/04 Save Files.html | 1 + ...ata.html => 05 Transport Binary Data.html} | 9 ++++++--- .../custom-data/download-use-cases.html | 1 + Resources/object-store/read-data.php | 18 ++++++++--------- Resources/object-store/save-data.php | 20 ++++++++++++------- 18 files changed, 53 insertions(+), 29 deletions(-) rename 03 Writing Algorithms/16 Importing Data/01 Key Concepts/{04 Remote File Providers.html => 02 File Providers.html} (82%) rename 03 Writing Algorithms/16 Importing Data/01 Key Concepts/{02 Stream Custom Data.php => 03 Stream Custom Data.php} (100%) rename 03 Writing Algorithms/16 Importing Data/01 Key Concepts/{03 Download Bulk Data.php => 04 Download Bulk Data.php} (100%) create mode 100644 03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/90 Live Trading Considerations.html create mode 100644 03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/04 Save Files.html rename 03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/{04 Transport Binary Data.html => 05 Transport Binary Data.html} (54%) diff --git a/03 Writing Algorithms/16 Importing Data/01 Key Concepts/04 Remote File Providers.html b/03 Writing Algorithms/16 Importing Data/01 Key Concepts/02 File Providers.html similarity index 82% rename from 03 Writing Algorithms/16 Importing Data/01 Key Concepts/04 Remote File Providers.html rename to 03 Writing Algorithms/16 Importing Data/01 Key Concepts/02 File Providers.html index abd6a7335a..b6de9071d4 100644 --- a/03 Writing Algorithms/16 Importing Data/01 Key Concepts/04 Remote File Providers.html +++ b/03 Writing Algorithms/16 Importing Data/01 Key Concepts/02 File Providers.html @@ -1,4 +1,7 @@ -

The most common remote file providers to use are Dropbox, GitHub, and Google Sheets.

+

The most common file providers to use are the Object Store, Dropbox, GitHub, and Google Sheets.

+ +

Object Store

+

The Object Store is the fastest file provider. If you import files from remote providers, you will be restricted by their rate limits and your download speed.

Dropbox

If you store your custom data in Dropbox, you need to create a link to the file and add ?dl=1 to the end of the file URL. To create file links, see How to share files or folders in the Dropbox documentation. The ?dl=1 parameter lets you download the direct file link, not the HTML page of the file.

diff --git a/03 Writing Algorithms/16 Importing Data/01 Key Concepts/02 Stream Custom Data.php b/03 Writing Algorithms/16 Importing Data/01 Key Concepts/03 Stream Custom Data.php similarity index 100% rename from 03 Writing Algorithms/16 Importing Data/01 Key Concepts/02 Stream Custom Data.php rename to 03 Writing Algorithms/16 Importing Data/01 Key Concepts/03 Stream Custom Data.php diff --git a/03 Writing Algorithms/16 Importing Data/01 Key Concepts/03 Download Bulk Data.php b/03 Writing Algorithms/16 Importing Data/01 Key Concepts/04 Download Bulk Data.php similarity index 100% rename from 03 Writing Algorithms/16 Importing Data/01 Key Concepts/03 Download Bulk Data.php rename to 03 Writing Algorithms/16 Importing Data/01 Key Concepts/04 Download Bulk Data.php diff --git a/03 Writing Algorithms/16 Importing Data/01 Key Concepts/97 File Quotas.html b/03 Writing Algorithms/16 Importing Data/01 Key Concepts/97 File Quotas.html index 4464b6a40d..c5d567b97f 100644 --- a/03 Writing Algorithms/16 Importing Data/01 Key Concepts/97 File Quotas.html +++ b/03 Writing Algorithms/16 Importing Data/01 Key Concepts/97 File Quotas.html @@ -1,4 +1,6 @@ -

The following table shows the number of files you can download during a single backtest or Research Environment session in QuantConnect Cloud:

+

There are no limits to the number of files you can load from the Object Store during a single backtest or Research Environment session in QuantConnect Cloud.

+ +

The following table shows the number of remote files you can download during a single backtest or Research Environment session in QuantConnect Cloud:

@@ -38,6 +40,4 @@ } -

Each file can be up to 200 MB in size and have a file name up to 200 characters long.

- -

If you need to import more files than your quota allows, save your custom data files in the Object Store and load them from there.

+

Remote files can be up to 200 MB in size and can have names up to 200 characters long.

\ No newline at end of file diff --git a/03 Writing Algorithms/16 Importing Data/01 Key Concepts/98 Rate Limits.html b/03 Writing Algorithms/16 Importing Data/01 Key Concepts/98 Rate Limits.html index 0379584335..8ebb5f2b02 100644 --- a/03 Writing Algorithms/16 Importing Data/01 Key Concepts/98 Rate Limits.html +++ b/03 Writing Algorithms/16 Importing Data/01 Key Concepts/98 Rate Limits.html @@ -1 +1 @@ -

We do not impose a rate limit on file downloads but often external providers do. Dropbox caps download speeds to 10 kb/s after 3-4 download requests. To ensure your algorithms run fast, only use a small number of small custom data files.

+

We do not impose a rate limit on file downloads but often external providers do. Dropbox caps download speeds to 10 kb/s after 3-4 download requests. To ensure your algorithms run fast, only use a small number of small custom data files or use the Object Store.

diff --git a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/01 Introduction.html b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/01 Introduction.html index a18c3f6edc..253764d6f2 100644 --- a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/01 Introduction.html +++ b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/01 Introduction.html @@ -1 +1 @@ -

There are two techniques to import data into your algorithm. You can either manually import the entire file or stream the file line-by-line into your algorithm's OnData event. This page explores streaming a file's contents into your algorithm line-by-line.

+

There are two techniques to import data into your algorithm. You can either manually import the entire file or stream the file line-by-line into your algorithm's OnData event. This page explores streaming a file's contents into your algorithm line-by-line. The data you import can be from a remote server or the Object Store.

diff --git a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/02 Data Formats.html b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/02 Data Formats.html index db7ef92f39..5420b72c30 100644 --- a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/02 Data Formats.html +++ b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/02 Data Formats.html @@ -1,3 +1,3 @@ -

Common data formats are CSV, JSON, and XML, but you can use any file type that can be read over the internet. Each request has a one-second overhead, so you should package your custom data to minimize requests. Bundle dates together where possible to speed up execution. Just ensure the data in the file is in chronological order.

+

Common data formats are CSV, JSON, and XML, but you can use any file type that can be read over the internet. For Excel files, double check the raw data format for parsing in the data reader, since data will be formatted for convenient visualization in Excel application view. To avoid confusion of data format, save the spreadsheet as a CSV file and open it in a text editor to confirm the raw data format.

-

For Excel files, please double check the raw data format for parsing in the data reader, since data will be formatted for convenient visualization in Excel application view. To avoid confusion of data format, save the spreadsheet as a CSV file and open it in a text editor to confirm the raw data format.

+

The data in the file must be in chronological order. If you import from a remote file provider, each request has a one-second overhead, so package your custom data to minimize requests. Bundle dates together where possible to speed up execution. The Object Store file provider gives you the fastest execution because you don't need to download the files on every execution.

\ No newline at end of file diff --git a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/90 Live Trading Considerations.html b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/90 Live Trading Considerations.html new file mode 100644 index 0000000000..41bca1576c --- /dev/null +++ b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/01 Key Concepts/90 Live Trading Considerations.html @@ -0,0 +1 @@ +

In live trading, we pass custom data to your algorithm as soon as it arrives. The time it arrives may not align with the time of other slices. Design your algorithm to handle unsychronized data so that you don't run into issues.

\ No newline at end of file diff --git a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/02 Custom Securities/02 CSV Format Example/04 Define Custom Types.html b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/02 Custom Securities/02 CSV Format Example/04 Define Custom Types.html index 27224334f2..8c3f6439ee 100644 --- a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/02 Custom Securities/02 CSV Format Example/04 Define Custom Types.html +++ b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/02 Custom Securities/02 CSV Format Example/04 Define Custom Types.html @@ -53,6 +53,9 @@ config: SubscriptionDataConfig, date: datetime, isLive: bool) -> SubscriptionDataSource: + + if not isLive: + return SubscriptionDataSource("<custom_data_key>", SubscriptionTransportMedium.ObjectStore, FileFormat.Csv) return SubscriptionDataSource("https://www.dropbox.com/s/rsmg44jr6wexn2h/CNXNIFTY.csv?dl=1", SubscriptionTransportMedium.RemoteFile) def Reader(self, diff --git a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/02 Custom Securities/03 JSON Format Example/04 Define Custom Types.html b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/02 Custom Securities/03 JSON Format Example/04 Define Custom Types.html index 5ffab0e4b2..3c64e56454 100644 --- a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/02 Custom Securities/03 JSON Format Example/04 Define Custom Types.html +++ b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/02 Custom Securities/03 JSON Format Example/04 Define Custom Types.html @@ -60,6 +60,8 @@ config: SubscriptionDataConfig, date: datetime, isLive: bool) -> SubscriptionDataSource: + if not isLive: + return SubscriptionDataSource("<custom_data_key>", SubscriptionTransportMedium.ObjectStore, FileFormat.Csv) return SubscriptionDataSource("https://raw.githubusercontent.com/QuantConnect/Documentation/master/Resources/datasets/custom-data/unfolding-collection-example.json", SubscriptionTransportMedium.RemoteFile, FileFormat.UnfoldingCollection) def Reader(self, diff --git a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/03 Custom Universes/03 JSON Format Example/04 Define Custom Types.html b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/03 Custom Universes/03 JSON Format Example/04 Define Custom Types.html index ff98a924e7..8c3cbaf759 100644 --- a/03 Writing Algorithms/16 Importing Data/02 Streaming Data/03 Custom Universes/03 JSON Format Example/04 Define Custom Types.html +++ b/03 Writing Algorithms/16 Importing Data/02 Streaming Data/03 Custom Universes/03 JSON Format Example/04 Define Custom Types.html @@ -50,6 +50,8 @@ config: SubscriptionDataConfig, date: datetime, isLive: bool) -> SubscriptionDataSource: + if not isLive: + return SubscriptionDataSource("<custom_data_key>", SubscriptionTransportMedium.ObjectStore, FileFormat.Csv) return SubscriptionDataSource("https://www.dropbox.com/s/7xe7lfac52mdfpe/custom-universe.json?dl=1", SubscriptionTransportMedium.RemoteFile, FileFormat.UnfoldingCollection) diff --git a/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/01 Introduction.html b/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/01 Introduction.html index 7d7da07220..12783dc1e7 100644 --- a/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/01 Introduction.html +++ b/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/01 Introduction.html @@ -1 +1,3 @@

There are two techniques to import data into your algorithm. You can either manually import the entire file or stream the file line-by-line into your algorithm's OnData event. This page explores importing an entire file for manual use.

+ +

Instead of downloading the file from a remote file provider, you can upload the file to the Object Store (with the Algorithm Lab or with the CLI) for faster execution.

diff --git a/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/02 Recommended Use Cases.php b/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/02 Recommended Use Cases.php index eb0de5479e..dada445975 100644 --- a/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/02 Recommended Use Cases.php +++ b/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/02 Recommended Use Cases.php @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/04 Save Files.html b/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/04 Save Files.html new file mode 100644 index 0000000000..54f1a841b0 --- /dev/null +++ b/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/04 Save Files.html @@ -0,0 +1 @@ +

When you download a remote file, save it into the Object Store so that you don't have to download the file again. If you need to import the file multiple times, it's faster to import it from the Object Store rather than repeatedly downloading the file from the remote file provider.

diff --git a/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/04 Transport Binary Data.html b/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/05 Transport Binary Data.html similarity index 54% rename from 03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/04 Transport Binary Data.html rename to 03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/05 Transport Binary Data.html index 44507245e7..35f8f7c405 100644 --- a/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/04 Transport Binary Data.html +++ b/03 Writing Algorithms/16 Importing Data/03 Bulk Downloads/05 Transport Binary Data.html @@ -1,7 +1,7 @@

Follow these steps to transport binary files:

    -
  1. Add the following imports to your program:
  2. +
  3. Add the following imports to your local program:
  4. import pickle
     import base64
    @@ -13,9 +13,12 @@ base64_str = base64.b64encode(pickle_bytes).decode('ascii')
    -
  5. Save the string representation of your object into the Object Store or one of the supported external sources.
  6. +
  7. Save the string representation of your object to one of the supported external sources.
  8. -
  9. Load the string representation of your object into your trading algorithm.
  10. +
  11. Download the remote file into your project.
  12. +
    +
    base64_str = self.Download("<fileURL>")
    +
  13. Restore the object.
  14. diff --git a/Resources/datasets/custom-data/download-use-cases.html b/Resources/datasets/custom-data/download-use-cases.html index 2d1a4569a9..ef4fa03139 100644 --- a/Resources/datasets/custom-data/download-use-cases.html +++ b/Resources/datasets/custom-data/download-use-cases.html @@ -1,5 +1,6 @@

    The batch import technique is outside of the LEAN's awareness or control, so it can't enforce good practices. However, the batch import technique is good for the loading the following datasets:

      +
    • Loading data into the Object Store
    • Trained AI Models
    • Well-defined historical price datasets
    • Parameters and setting imports such as Symbol lists
    • diff --git a/Resources/object-store/read-data.php b/Resources/object-store/read-data.php index bc8a996b75..00ee90a6bd 100644 --- a/Resources/object-store/read-data.php +++ b/Resources/object-store/read-data.php @@ -27,15 +27,6 @@
    -

    Bytes

    - -

    To read a Bytes object, call the ReadBytes method.

    - -
    -
    var bytesData = ObjectStore.ReadBytes($"{ProjectId}/bytesKey");
    -
    byte_data = ObjectStore.ReadBytes(f"{ProjectId}/bytes_key")
    -
    -

    Strings

    To read a string object, call the Read or ReadString method.

    @@ -60,4 +51,13 @@

    If you created the XML object from a dictionary, reconstruct the dictionary.

    var dict = xmlData.Elements().ToDictionary(x => x.Name.LocalName, x => int.Parse(x.Value));
    +
    + +

    Bytes

    + +

    To read a Bytes object, call the ReadBytes method.

    + +
    +
    var bytesData = ObjectStore.ReadBytes($"{ProjectId}/bytesKey");
    +
    byte_data = ObjectStore.ReadBytes(f"{ProjectId}/bytes_key")
    \ No newline at end of file diff --git a/Resources/object-store/save-data.php b/Resources/object-store/save-data.php index b4d59c4d85..7512790a68 100644 --- a/Resources/object-store/save-data.php +++ b/Resources/object-store/save-data.php @@ -22,13 +22,6 @@ quantconnect.com/project/12345 is 12345." : ""?>

    -

    Bytes

    -

    To save a Bytes object, call the SaveBytes method.

    -
    -
    var saveSuccessful = ObjectStore.SaveBytes($"{ProjectId}/bytesKey", bytesSample)
    -
    save_successful = ObjectStore.SaveBytes(f"{ProjectId}/bytes_key", bytes_sample)
    -
    -

    Strings

    To save a string object, call the Save or SaveString method.

    @@ -47,3 +40,16 @@
    var saveSuccessful = ObjectStore.SaveXml<XElement>($"{ProjectId}/xmlKey", xmlSample);
    + +

    Bytes

    +

    To save a Bytes object (for example, zipped data), call the SaveBytes method.

    +
    +
    var saveSuccessful = ObjectStore.SaveBytes($"{ProjectId}/bytesKey", bytesSample)
    +
    +var zippedDataSample = Compression.ZipBytes(Encoding.UTF8.GetBytes(stringSample), "data");
    +var saveSuccessful = ObjectStore.SaveBytes($"{ProjectId}/bytesKey.zip", zippedDataSample);
    +
    save_successful = ObjectStore.SaveBytes(f"{ProjectId}/bytes_key", bytes_sample)
    +
    +zipped_data_sample = Compression.ZipBytes(bytes(string_sample, "utf-8"), "data")
    +zip_save_successful = ObjectStore.SaveBytes($"{ProjectId}/bytesKey.zip", zipped_data_sample)
    +
    \ No newline at end of file