-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
68 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,35 @@ | |
|
||
<title>qs2</title> | ||
|
||
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to | ||
// be compatible with the behavior of Pandoc < 2.8). | ||
document.addEventListener('DOMContentLoaded', function(e) { | ||
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); | ||
var i, h, a; | ||
for (i = 0; i < hs.length; i++) { | ||
h = hs[i]; | ||
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 | ||
a = h.attributes; | ||
while (a.length > 0) h.removeAttribute(a[0].name); | ||
} | ||
}); | ||
</script> | ||
<script>// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> | ||
// v0.0.1 | ||
// Written by JooYoung Seo ([email protected]) and Atsushi Yasumoto on June 1st, 2020. | ||
|
||
document.addEventListener('DOMContentLoaded', function() { | ||
const codeList = document.getElementsByClassName("sourceCode"); | ||
for (var i = 0; i < codeList.length; i++) { | ||
var linkList = codeList[i].getElementsByTagName('a'); | ||
for (var j = 0; j < linkList.length; j++) { | ||
if (linkList[j].innerHTML === "") { | ||
linkList[j].setAttribute('aria-hidden', 'true'); | ||
} | ||
} | ||
} | ||
}); | ||
</script> | ||
|
||
<style type="text/css"> | ||
code{white-space: pre-wrap;} | ||
|
@@ -35,26 +64,27 @@ | |
} | ||
</style> | ||
<style type="text/css" data-origin="pandoc"> | ||
a.sourceLine { display: inline-block; line-height: 1.25; } | ||
a.sourceLine { pointer-events: none; color: inherit; text-decoration: inherit; } | ||
a.sourceLine:empty { height: 1.2em; } | ||
.sourceCode { overflow: visible; } | ||
code.sourceCode { white-space: pre; position: relative; } | ||
pre > code.sourceCode { white-space: pre; position: relative; } | ||
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } | ||
pre > code.sourceCode > span:empty { height: 1.2em; } | ||
code.sourceCode > span { color: inherit; text-decoration: inherit; } | ||
div.sourceCode { margin: 1em 0; } | ||
pre.sourceCode { margin: 0; } | ||
@media screen { | ||
div.sourceCode { overflow: auto; } | ||
} | ||
@media print { | ||
code.sourceCode { white-space: pre-wrap; } | ||
a.sourceLine { text-indent: -1em; padding-left: 1em; } | ||
} | ||
pre.numberSource a.sourceLine | ||
{ position: relative; left: -4em; } | ||
pre.numberSource a.sourceLine::before | ||
{ content: attr(title); | ||
pre > code.sourceCode { white-space: pre-wrap; } | ||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } | ||
} | ||
pre.numberSource code | ||
{ counter-reset: source-line 0; } | ||
pre.numberSource code > span | ||
{ position: relative; left: -4em; counter-increment: source-line; } | ||
pre.numberSource code > span > a:first-child::before | ||
{ content: counter(source-line); | ||
position: relative; left: -1em; text-align: right; vertical-align: baseline; | ||
border: none; pointer-events: all; display: inline-block; | ||
border: none; display: inline-block; | ||
-webkit-touch-callout: none; -webkit-user-select: none; | ||
-khtml-user-select: none; -moz-user-select: none; | ||
-ms-user-select: none; user-select: none; | ||
|
@@ -63,9 +93,9 @@ | |
} | ||
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } | ||
div.sourceCode | ||
{ } | ||
{ } | ||
@media screen { | ||
a.sourceLine::before { text-decoration: underline; } | ||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } | ||
} | ||
code span.al { color: #ff0000; font-weight: bold; } /* Alert */ | ||
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ | ||
|
@@ -329,46 +359,46 @@ <h1 class="title toc-ignore">qs2</h1> | |
<p><em>qs2: a framework for efficient serialization</em></p> | ||
<p><code>qs2</code> is the successor to the <code>qs</code> package. The goal is to have reliable and fast performance for saving and loading objects in R.</p> | ||
<p>The <code>qs2</code> format directly uses R serialization (via the <code>R_Serialize</code>/<code>R_Unserialize</code> C API) while improving underlying compression and disk IO patterns. If you are familiar with the <code>qs</code> package, the benefits and usage are the same.</p> | ||
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb1-1" title="1"><span class="kw">qs_save</span>(data, <span class="st">"myfile.qs2"</span>)</a> | ||
<a class="sourceLine" id="cb1-2" title="2">data <-<span class="st"> </span><span class="kw">qs_read</span>(<span class="st">"myfile.qs2"</span>)</a></code></pre></div> | ||
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true"></a><span class="kw">qs_save</span>(data, <span class="st">"myfile.qs2"</span>)</span> | ||
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true"></a>data <-<span class="st"> </span><span class="kw">qs_read</span>(<span class="st">"myfile.qs2"</span>)</span></code></pre></div> | ||
<p>Use the file extension <code>qs2</code> to distinguish it from the original <code>qs</code> package. It is not compatible with the original <code>qs</code> format.</p> | ||
<div id="installation" class="section level2"> | ||
<h2>Installation</h2> | ||
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb2-1" title="1"><span class="kw">install.packages</span>(<span class="st">"qs2"</span>)</a></code></pre></div> | ||
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true"></a><span class="kw">install.packages</span>(<span class="st">"qs2"</span>)</span></code></pre></div> | ||
<p>On Mac or Linux, you can enable multi-threading by compiling from source. It is enabled by default on Windows.</p> | ||
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb3-1" title="1">remotes<span class="op">::</span><span class="kw">install_cran</span>(<span class="st">"qs2"</span>, <span class="dt">type =</span> <span class="st">"source"</span>, <span class="dt">configure.args =</span> <span class="st">" --with-TBB --with-simd=AVX2"</span>)</a></code></pre></div> | ||
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true"></a>remotes<span class="op">::</span><span class="kw">install_cran</span>(<span class="st">"qs2"</span>, <span class="dt">type =</span> <span class="st">"source"</span>, <span class="dt">configure.args =</span> <span class="st">" --with-TBB --with-simd=AVX2"</span>)</span></code></pre></div> | ||
<p>Multi-threading in <code>qs2</code> uses the <code>Intel Thread Building Blocks</code> framework via the <code>RcppParallel</code> package.</p> | ||
</div> | ||
<div id="converting-qs2-to-rds" class="section level2"> | ||
<h2>Converting qs2 to RDS</h2> | ||
<p>Because the <code>qs2</code> format directly uses R serialization, you can convert it to RDS and vice versa.</p> | ||
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb4-1" title="1">file_qs2 <-<span class="st"> </span><span class="kw">tempfile</span>(<span class="dt">fileext =</span> <span class="st">".qs2"</span>)</a> | ||
<a class="sourceLine" id="cb4-2" title="2">file_rds <-<span class="st"> </span><span class="kw">tempfile</span>(<span class="dt">fileext =</span> <span class="st">".RDS"</span>)</a> | ||
<a class="sourceLine" id="cb4-3" title="3">x <-<span class="st"> </span><span class="kw">runif</span>(<span class="fl">1e6</span>)</a> | ||
<a class="sourceLine" id="cb4-4" title="4"></a> | ||
<a class="sourceLine" id="cb4-5" title="5"><span class="co"># save `x` with qs_save</span></a> | ||
<a class="sourceLine" id="cb4-6" title="6"><span class="kw">qs_save</span>(x, file_qs2)</a> | ||
<a class="sourceLine" id="cb4-7" title="7"></a> | ||
<a class="sourceLine" id="cb4-8" title="8"><span class="co"># convert the file to RDS</span></a> | ||
<a class="sourceLine" id="cb4-9" title="9"><span class="kw">qs_to_rds</span>(<span class="dt">input_file =</span> file_qs2, <span class="dt">output_file =</span> file_rds)</a> | ||
<a class="sourceLine" id="cb4-10" title="10"></a> | ||
<a class="sourceLine" id="cb4-11" title="11"><span class="co"># read `x` back in with `readRDS`</span></a> | ||
<a class="sourceLine" id="cb4-12" title="12">xrds <-<span class="st"> </span><span class="kw">readRDS</span>(file_rds)</a> | ||
<a class="sourceLine" id="cb4-13" title="13"><span class="kw">stopifnot</span>(<span class="kw">identical</span>(x, xrds))</a></code></pre></div> | ||
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true"></a>file_qs2 <-<span class="st"> </span><span class="kw">tempfile</span>(<span class="dt">fileext =</span> <span class="st">".qs2"</span>)</span> | ||
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true"></a>file_rds <-<span class="st"> </span><span class="kw">tempfile</span>(<span class="dt">fileext =</span> <span class="st">".RDS"</span>)</span> | ||
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true"></a>x <-<span class="st"> </span><span class="kw">runif</span>(<span class="fl">1e6</span>)</span> | ||
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true"></a></span> | ||
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true"></a><span class="co"># save `x` with qs_save</span></span> | ||
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true"></a><span class="kw">qs_save</span>(x, file_qs2)</span> | ||
<span id="cb4-7"><a href="#cb4-7" aria-hidden="true"></a></span> | ||
<span id="cb4-8"><a href="#cb4-8" aria-hidden="true"></a><span class="co"># convert the file to RDS</span></span> | ||
<span id="cb4-9"><a href="#cb4-9" aria-hidden="true"></a><span class="kw">qs_to_rds</span>(<span class="dt">input_file =</span> file_qs2, <span class="dt">output_file =</span> file_rds)</span> | ||
<span id="cb4-10"><a href="#cb4-10" aria-hidden="true"></a></span> | ||
<span id="cb4-11"><a href="#cb4-11" aria-hidden="true"></a><span class="co"># read `x` back in with `readRDS`</span></span> | ||
<span id="cb4-12"><a href="#cb4-12" aria-hidden="true"></a>xrds <-<span class="st"> </span><span class="kw">readRDS</span>(file_rds)</span> | ||
<span id="cb4-13"><a href="#cb4-13" aria-hidden="true"></a><span class="kw">stopifnot</span>(<span class="kw">identical</span>(x, xrds))</span></code></pre></div> | ||
</div> | ||
<div id="validating-file-integrity" class="section level2"> | ||
<h2>Validating file integrity</h2> | ||
<p>The <code>qs2</code> format saves an internal checksum. This can be used to test for file corruption before deserialization via the <code>validate_checksum</code> parameter, but has a minor performance penalty.</p> | ||
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb5-1" title="1"><span class="kw">qs_save</span>(data, <span class="st">"myfile.qs2"</span>)</a> | ||
<a class="sourceLine" id="cb5-2" title="2">data <-<span class="st"> </span><span class="kw">qs_read</span>(<span class="st">"myfile.qs2"</span>, <span class="dt">validate_checksum =</span> <span class="ot">TRUE</span>)</a></code></pre></div> | ||
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true"></a><span class="kw">qs_save</span>(data, <span class="st">"myfile.qs2"</span>)</span> | ||
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true"></a>data <-<span class="st"> </span><span class="kw">qs_read</span>(<span class="st">"myfile.qs2"</span>, <span class="dt">validate_checksum =</span> <span class="ot">TRUE</span>)</span></code></pre></div> | ||
</div> | ||
<div id="the-qdata-format" class="section level1"> | ||
<h1>The qdata format</h1> | ||
<p>The package also introduces the <code>qdata</code> format which has its own serialization layout and works with only data types (vectors, lists, data frames, matrices).</p> | ||
<p>It will replace internal types (functions, promises, external pointers, environments, objects) with NULL. The <code>qdata</code> format differs from the <code>qs2</code> format in that it is NOT a general.</p> | ||
<p>The eventual goal of <code>qdata</code> is to also have interoperability with other languages, particularly <code>Python</code>.</p> | ||
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb6-1" title="1"><span class="kw">qd_save</span>(data, <span class="st">"myfile.qs2"</span>)</a> | ||
<a class="sourceLine" id="cb6-2" title="2">data <-<span class="st"> </span><span class="kw">qd_read</span>(<span class="st">"myfile.qs2"</span>)</a></code></pre></div> | ||
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true"></a><span class="kw">qd_save</span>(data, <span class="st">"myfile.qs2"</span>)</span> | ||
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true"></a>data <-<span class="st"> </span><span class="kw">qd_read</span>(<span class="st">"myfile.qs2"</span>)</span></code></pre></div> | ||
<div id="benchmarks" class="section level2"> | ||
<h2>Benchmarks</h2> | ||
<p>A summary across 4 datasets is presented below.</p> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters