ISA-tools · HLWeil · Feb 22, 2023 · Feb 25, 2023 · Feb 25, 2023 · Feb 25, 2023
diff --git a/source/_static/isajson/data_schema.json b/source/_static/isajson/data_schema.json
@@ -1,11 +1,14 @@
 {
     "$schema": "http://json-schema.org/draft-04/schema",
     "title": "ISA data schema",
-    "description": "JSON-schema representing a data file in the ISA model",
+    "description": "JSON-schema representing a data object in the ISA model",
     "type": "object",
     "properties": {
         "@id": { "type": "string", "format": "uri" },
-        "name": {
+        "filename": {
+            "type": "string"
+        },
+        "pointer": {
             "type": "string"
         },
         "type": {
@@ -16,6 +19,21 @@
                 "Image File"
             ]
         },
+        "generatedBy": {
+            "type": "string"
+        },
+        "explication": {
+            "$ref": "ontology_annotation_schema.json#"
+        },
+        "unit": {
+            "$ref": "ontology_annotation_schema.json#"
+        },
+        "objectType": {
+            "$ref": "ontology_annotation_schema.json#"
+        },
+        "label": {
+            "type": "string"
+        },
         "comments" : {
             "type": "array",
             "items": {

diff --git a/source/_static/isatab/d_sleuth.txt b/source/_static/isatab/d_sleuth.txt
@@ -0,0 +1,13 @@
+Data Pointer	Data File	Generated By	Explication	Term Source REF	Term Accession Number	Unit	Term Source REF	Term Accession Number	Object Type	Term Source REF	Term Accession Number	Label	Comment [Explanation]
+target_id	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	gene identifier	NCIT	C48664				String	NCIT	C45253	Gene ID	Gene identifier, reference zum fasta
+pval	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	p-value	NCIT	C44185				Float	NCIT	C48150	P-Value	Pvalue, has a term
+qval	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	q-value	NCIT	C64217				Float	NCIT	C48150	Q-Value	qVal, has a term
+test_stat	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	test statistic	OBCS	0000013				Float	NCIT	C48150	Test Statistic	test statisitc, has a term
+rss	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	residual	STATO	0000234				Float	NCIT	C48150	Residual sum of squares	residual sum of squares, has a term
+degrees_free	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	number of degrees of freedom	STATO	0000069				Integer	NCIT	C45255	Number Of Degrees Of Freedom	degrees of freedom, has a term
+mean_obs	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	mean	NCIT	C53319	Transcript Fragment Count per Million Formula	NCIT	C181324	Float	NCIT	C48150	Mean	mean of observations, needs references
+var_obs	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	variance	NCIT	C48918				Float	NCIT	C48150	Variance	variance of observations, needs references
+tech_var	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	variance	NCIT	C48918				Float	NCIT	C48150	Technical variance	technical variance? Has a term
+sigma_sq	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	variance	NCIT	C48918				Float	NCIT	C48150	Variance	variance of observations, needs references
+smooth_sigma_sq	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	smoothed variance						Float	NCIT	C48150	Smoothed Variance	smoothed variance
+final_sigma_sq	runs/kallisto_sleuth/sleuth_dge.csv	workflows/kallisto_sleuth.R	corrected variance						Float	NCIT	C48150	Corrected Variance	adjusted variance
diff --git a/source/_static/model/data_node.csv b/source/_static/model/data_node.csv
@@ -1,2 +1,7 @@
 Property,Datatype,Description
-File name,String,"A file name or full path referencing a data file produced by the related process that MAY be packaged with, or is accessible via, the ISA reference implementation content."
+File name,String,"A file name or full path referencing a data file produced by the related process that MAY be packaged with, or is accessible via, the ISA reference implementation content."
+Pointer,String,"A pointer referencing a location inside the data file. This SHOULD always be specified when the data of interest is not the complete file, but a specific part of it."
+Generated By,String,"A file name, full path or identifier referencing the tool with which this data object was generated."
+Explication,Ontology Annotation,An ontology annotation qualifying what the data describes.
+Unit,Ontology Annotation,The unit qualifying the value stored in the data object.
+Object Type,Ontology Annotation,Specifies the format in which the value in the data object will be stored.
diff --git a/source/conf.py b/source/conf.py
@@ -66,7 +66,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = 'en'
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
@@ -339,4 +339,4 @@
 
 
 def setup(app):
-   app.add_stylesheet("theme_overrides.css")
+   app.add_css_file("theme_overrides.css")
diff --git a/source/isatab.rst b/source/isatab.rst
@@ -16,10 +16,10 @@ these examples to better understand the structure of ISA-Tab documents.
 
 Format
 ======
-ISA-Tab uses three types of file to capture the experimental metadata:
+ISA-Tab uses three types of files to capture the experimental and computational metadata:
  - Investigation file
  - Study file
- - Assay file (with associated data files)
+ - Assay file (with associated Dataset files)
 
 The Investigation file contains all the information needed to understand the overall goals and means used in an
 experiment; experimental steps (or sequences of events) are described in the Study and in the Assay file(s). For each
@@ -489,6 +489,25 @@ For example,
 
 The Assay Table file implements the ``Assay`` graphs from the ISA Abstract Model.
 
+
+Data Pointers
+-------------
+
+In cases where the input or output of a process of the ``Assay`` is not a complete ``Data File``, but rather a part of the data file, a ``Data Pointer`` column SHOULD be used to qualify the ``Data File`` column. 
+These pointer columns contain a string pointing to the header or field name by which the data object can be found in the file.
+
+The type of the ``Data Pointer`` column MUST match the according ``Data File`` column: ``Image Pointer`` for ``Image File``,  ``Raw Data Pointer`` for ``Raw Data File`` and ``Derived Data Pointer`` for ``Derived Data File``
+
+We might represent measurement values of different samples being written to the same file like this:
+
++-------------+--------------+-----------------+------------------+
+| Source Name | Protocol REF | Raw Data File   | Raw Data Pointer |
++=============+==============+=================+==================+
+| source1     | measurement  | measurement.csv | source1_quant    |
++-------------+--------------+-----------------+------------------+
+| source2     | measurement  | measurement.csv | source2_quant    |
++-------------+--------------+-----------------+------------------+
+
 Special cases
 -------------
 
@@ -533,12 +552,75 @@ Capturing data resulting from the use of mass spectrometry in metabol/nomics req
 Metabolite Assignment File (inter alia); such a file is currently under development in collaboration with the
 Metabolomics Standards Initiative (MSI).
 
-Data Files
-----------
-ISA-Tab focuses on structuring experimental metadata; raw and derived data files are considered as external files.
-The Assay file can refer to one or more of these external data files. For guidelines on how to
-format these data files, users should refer to the relevant standards group or reference
-repository.
+Dataset files
+=============
+
+``Dataset`` Table files are structured with fields organized on a per-row basis. The first row MUST be used for column headers. 
+It is used to give additional metadata about the values stored in a data file or about full data files.
+In contrast to Assay and Study files, all headers besides comments MUST appear at most once.
+
+.. attention::
+    Comments are also allowed in Dataset files, in a similar fashion to how they are used in the Investigation
+    file. Columns headed with ``Comment[<comment name>]`` MAY appear anywhere in the table.
+
+``<entity> File`` MUST be used as an identifier for the file path of the data node. 
+It SHOULD be either the file name, a relative file path or an absolute file path.
+
+``<entity> Pointer`` SHOULD be used to further quality the data node, if it is not the full data file, but a part of the data file. 
+It MAY be a REGEX pattern.
+
+``Generated By`` MAY be used to specify the tool, by which this data node was created. 
+
+``Label`` MAY be used to give the data node an additional, free-text name.
+
+Explication
+-----------
+To contextualize the meaning of the values, an ``Explication`` MAY be used. In this case, an ``Explication`` heading MUST be present, and MAY be further annotated as an ``Ontology Annotation``.
+
+For example, to contextualize the values in the datafile as being an  ``Arithmetic Mean`` qualified as an ``Ontology Annotation`` from the NCIT Ontology declared
+in the Ontology Sources with ``NCIT``:
+
++-----------------+-----------------+-------------------------+
+| Explication       | Term Source REF | Term Accession Number |
++=================+=================+=========================+
+| Arithmetic Mean | NCIT            | C53319                  |
++-----------------+-----------------+-------------------------+
+
+Unit
+----
+Where a the value of a data object is numeric, a ``Unit`` MAY be used to qualify the quantity. In this case, a ``Unit`` heading MUST be present, and MAY be further annotated as an ``Ontology Annotation``.
+
+For example, to qualify the values in the datafile with a ``Unit`` ``parts per million`` qualified as an ``Ontology Annotation`` from the Units Ontology declared
+in the Ontology Sources with ``UO``:
+
++-------------------+-----------------+----------------------------+
+| Unit              | Term Source REF | Term Accession Number      |
++===================+=================+============================+
+| parts per million | UO              | http://.../obo/UO_0000169  |
++-------------------+-----------------+----------------------------+
+
+Object Type
+-----------
+To specify the format in which a data object will appear in the data file, an ``Object Type`` SHOULD be used. In this case, an ``Object Type`` heading MUST be present, and MAY be further annotated as an ``Ontology Annotation``.
+
+For example, to specify, that the data objects in the datafile are written as ``Float``s, qualified as an ``Ontology Annotation`` from the NCIT Ontology declared
+in the Ontology Sources with ``NCIT``:
+
++-------------+-----------------+-----------------------+
+| Object Type | Term Source REF | Term Accession Number |
++=============+=================+=======================+
+| Float       | NCIT            | C48150                |
++-------------+-----------------+-----------------------+
+
+Dataset Table file
+------------------
+The ``Dataset`` file does not represent processes with inputs and outputs, like studies and assays, but rather statically describes the values that can be found in data files. 
+It contextualizes the data, so that it can be more easily interpreted without preceding knowledge about it.
+
+Dataset Table files SHOULD have file names corresponding to the pattern ``d_*.txt``, e.g. ``d_Dataset01.txt``
+
+For example:
+
+.. literalinclude:: _static/isatab/d_sleuth.txt
 
-For submission or transfer, ISA-Tab files and associated data files MAY be packaged into an ISArchive, a zip file
-containing all the files together.
+The Dataset Table file implements the ``Data`` graphs from the ISA Abstract Model.