YouTube videos will accompany each notebook. When these videos are published, they can be included using the following code:
+from IPython.display import YouTubeVideo
+YouTubeVideo('FGHJhAFf1W0')
+
Be sure to change the id of the video!
+ + + + +diff --git a/.ipynb_checkpoints/05_how_to_structure_your_NetCDF_file_or_files-checkpoint.ipynb b/.ipynb_checkpoints/05_how_to_structure_your_NetCDF_file_or_files-checkpoint.ipynb index 7cc3c3d..e0a4e6d 100644 --- a/.ipynb_checkpoints/05_how_to_structure_your_NetCDF_file_or_files-checkpoint.ipynb +++ b/.ipynb_checkpoints/05_how_to_structure_your_NetCDF_file_or_files-checkpoint.ipynb @@ -14,8 +14,6 @@ "\n", "There are far too many to go through all the examples. We will just look at a few. But the aim is that this tutorial will teach you what you should be thinking about when deciding to structure your data, so you can tackle any setup you have.\n", "\n", - "NOTE TO SELF. SCREEN SHOWING IMAGE OF EXAMPLE THEN QUICK DISPLAY OF CODE AND HOW THE XARRAY DATASET LOOKS. SHOW RELEVANT SECTION OF CF CONVENTIONS WHERE APPLICABLE.\n", - "\n", "## What to consider\n", "\n", "Whenever we are creating one or many CF-NetCDF files, we should try to make sure they are \n", @@ -94,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 2, "id": "3e189c74", "metadata": {}, "outputs": [], @@ -118,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 3, "id": "6ed61391", "metadata": {}, "outputs": [], @@ -136,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 4, "id": "f33df992", "metadata": {}, "outputs": [ @@ -149,7 +147,7 @@ " [21.87, 21.01, 19.99]]" ] }, - "execution_count": 24, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -169,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 5, "id": "bb09ff8a", "metadata": {}, "outputs": [ @@ -545,10 +543,10 @@ " * depth (depth) int64 10 20 30\n", " * time (time) int64 0 1 2 3\n", "Data variables:\n", - " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99
PandasIndex(Index([10, 20, 30], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([10, 20, 30], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([78.5142, 79.2833, 79.984, 80.4228], dtype='float64', name='latitude'))
PandasIndex(Index([30.4231, 30.3591, 30.4994, 30.42], dtype='float64', name='longitude'))
PandasIndex(Index([10, 20, 30], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([78.5142, 79.2833, 79.984], dtype='float64', name='latitude'))
PandasIndex(Index([30.4231, 30.3591, 30.4994, 30.42], dtype='float64', name='longitude'))
PandasIndex(Index([10, 20, 30], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([10, 20, 30, 40, 50, 60], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([5, 10, 12, 20, 24, 25, 35, 40, 50, 60, 70, 80, 90, 100], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\n", + " ...\n", + " 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000],\n", + " dtype='int64', name='depth', length=1001))
PandasIndex(Index([ 75.0, 75.1, 75.19999999999999,\n", + " 75.29999999999998, 75.39999999999998, 75.49999999999997,\n", + " 75.59999999999997, 75.69999999999996, 75.79999999999995,\n", + " 75.89999999999995, 75.99999999999994, 76.09999999999994,\n", + " 76.19999999999993, 76.29999999999993, 76.39999999999992,\n", + " 76.49999999999991, 76.59999999999991, 76.6999999999999,\n", + " 76.7999999999999, 76.89999999999989, 76.99999999999989,\n", + " 77.09999999999988, 77.19999999999987, 77.29999999999987,\n", + " 77.39999999999986, 77.49999999999986, 77.59999999999985,\n", + " 77.69999999999985, 77.79999999999984, 77.89999999999984,\n", + " 77.99999999999983, 78.09999999999982, 78.19999999999982,\n", + " 78.29999999999981, 78.3999999999998, 78.4999999999998,\n", + " 78.5999999999998, 78.69999999999979, 78.79999999999978,\n", + " 78.89999999999978, 78.99999999999977, 79.09999999999977,\n", + " 79.19999999999976, 79.29999999999976, 79.39999999999975,\n", + " 79.49999999999974, 79.59999999999974, 79.69999999999973,\n", + " 79.79999999999973, 79.89999999999972, 79.99999999999972,\n", + " 80.09999999999971, 80.1999999999997, 80.2999999999997,\n", + " 80.3999999999997, 80.49999999999969, 80.59999999999968,\n", + " 80.69999999999968, 80.79999999999967, 80.89999999999966,\n", + " 80.99999999999966, 81.09999999999965, 81.19999999999965,\n", + " 81.29999999999964, 81.39999999999964, 81.49999999999963,\n", + " 81.59999999999962, 81.69999999999962, 81.79999999999961,\n", + " 81.89999999999961, 81.9999999999996],\n", + " dtype='float64', name='latitude'))
PandasIndex(Index([ 28.0, 28.1, 28.200000000000003,\n", + " 28.300000000000004, 28.400000000000006, 28.500000000000007,\n", + " 28.60000000000001, 28.70000000000001, 28.80000000000001,\n", + " 28.900000000000013, 29.000000000000014, 29.100000000000016,\n", + " 29.200000000000017, 29.30000000000002, 29.40000000000002,\n", + " 29.50000000000002, 29.600000000000023, 29.700000000000024,\n", + " 29.800000000000026, 29.900000000000027, 30.00000000000003,\n", + " 30.10000000000003, 30.20000000000003, 30.300000000000033,\n", + " 30.400000000000034, 30.500000000000036, 30.600000000000037,\n", + " 30.70000000000004, 30.80000000000004, 30.90000000000004,\n", + " 31.000000000000043, 31.100000000000044, 31.200000000000045,\n", + " 31.300000000000047, 31.40000000000005, 31.50000000000005,\n", + " 31.60000000000005, 31.700000000000053, 31.800000000000054,\n", + " 31.900000000000055, 32.00000000000006, 32.10000000000006],\n", + " dtype='float64', name='longitude'))
PandasIndex(Index([10, 20, 30], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([10, 20, 30], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([78.5142, 79.2833, 79.984, 80.4228], dtype='float64', name='latitude'))
PandasIndex(Index([30.4231, 30.3591, 30.4994, 30.42], dtype='float64', name='longitude'))
PandasIndex(Index([10, 20, 30], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([78.5142, 79.2833, 79.984], dtype='float64', name='latitude'))
PandasIndex(Index([30.4231, 30.3591, 30.4994, 30.42], dtype='float64', name='longitude'))
PandasIndex(Index([10, 20, 30], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([10, 20, 30, 40, 50, 60], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([5, 10, 12, 20, 24, 25, 35, 40, 50, 60, 70, 80, 90, 100], dtype='int64', name='depth'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([0, 1, 2, 3], dtype='int64', name='time'))
PandasIndex(Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\n", + " ...\n", + " 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000],\n", + " dtype='int64', name='depth', length=1001))
PandasIndex(Index([ 75.0, 75.1, 75.19999999999999,\n", + " 75.29999999999998, 75.39999999999998, 75.49999999999997,\n", + " 75.59999999999997, 75.69999999999996, 75.79999999999995,\n", + " 75.89999999999995, 75.99999999999994, 76.09999999999994,\n", + " 76.19999999999993, 76.29999999999993, 76.39999999999992,\n", + " 76.49999999999991, 76.59999999999991, 76.6999999999999,\n", + " 76.7999999999999, 76.89999999999989, 76.99999999999989,\n", + " 77.09999999999988, 77.19999999999987, 77.29999999999987,\n", + " 77.39999999999986, 77.49999999999986, 77.59999999999985,\n", + " 77.69999999999985, 77.79999999999984, 77.89999999999984,\n", + " 77.99999999999983, 78.09999999999982, 78.19999999999982,\n", + " 78.29999999999981, 78.3999999999998, 78.4999999999998,\n", + " 78.5999999999998, 78.69999999999979, 78.79999999999978,\n", + " 78.89999999999978, 78.99999999999977, 79.09999999999977,\n", + " 79.19999999999976, 79.29999999999976, 79.39999999999975,\n", + " 79.49999999999974, 79.59999999999974, 79.69999999999973,\n", + " 79.79999999999973, 79.89999999999972, 79.99999999999972,\n", + " 80.09999999999971, 80.1999999999997, 80.2999999999997,\n", + " 80.3999999999997, 80.49999999999969, 80.59999999999968,\n", + " 80.69999999999968, 80.79999999999967, 80.89999999999966,\n", + " 80.99999999999966, 81.09999999999965, 81.19999999999965,\n", + " 81.29999999999964, 81.39999999999964, 81.49999999999963,\n", + " 81.59999999999962, 81.69999999999962, 81.79999999999961,\n", + " 81.89999999999961, 81.9999999999996],\n", + " dtype='float64', name='latitude'))
PandasIndex(Index([ 28.0, 28.1, 28.200000000000003,\n", + " 28.300000000000004, 28.400000000000006, 28.500000000000007,\n", + " 28.60000000000001, 28.70000000000001, 28.80000000000001,\n", + " 28.900000000000013, 29.000000000000014, 29.100000000000016,\n", + " 29.200000000000017, 29.30000000000002, 29.40000000000002,\n", + " 29.50000000000002, 29.600000000000023, 29.700000000000024,\n", + " 29.800000000000026, 29.900000000000027, 30.00000000000003,\n", + " 30.10000000000003, 30.20000000000003, 30.300000000000033,\n", + " 30.400000000000034, 30.500000000000036, 30.600000000000037,\n", + " 30.70000000000004, 30.80000000000004, 30.90000000000004,\n", + " 31.000000000000043, 31.100000000000044, 31.200000000000045,\n", + " 31.300000000000047, 31.40000000000005, 31.50000000000005,\n", + " 31.60000000000005, 31.700000000000053, 31.800000000000054,\n", + " 31.900000000000055, 32.00000000000006, 32.10000000000006],\n", + " dtype='float64', name='longitude'))
Frozen({'PRES': 320})
+FrozenMappingWarningOnValuesAccess({'PRES': 320})
/tmp/ipykernel_76988/304843689.py:1: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
+ xrds.dims['PRES']
+
320
[<matplotlib.lines.Line2D at 0x7fb44601c8d0>]
+[<matplotlib.lines.Line2D at 0x7fb9d7e9d3d0>]
-
+
That doesn’t look nice! if we look at the values we can see what is happening.
@@ -846,10 +845,10 @@[<matplotlib.lines.Line2D at 0x7fb445503b90>]
+[<matplotlib.lines.Line2D at 0x7fb9d6a14c90>]
-
+
It makes more sense to have depth on the y axis, so let’s do that
@@ -860,10 +859,10 @@[<matplotlib.lines.Line2D at 0x7fb445568c90>]
+[<matplotlib.lines.Line2D at 0x7fb9d6ace6d0>]
-
+
Altneratively, we can easily create a scatter plot
@@ -874,10 +873,10 @@<matplotlib.collections.PathCollection at 0x7fb445d85910>
+<matplotlib.collections.PathCollection at 0x7fb9d74e9c90>
-
+
<matplotlib.collections.PathCollection at 0x7fb445d9e0d0>
+<matplotlib.collections.PathCollection at 0x7fb9d67fe710>
-
+
Let’s try some alternative plots
@@ -909,7 +908,7 @@[<matplotlib.lines.Line2D at 0x7fb445e03110>]
+[<matplotlib.lines.Line2D at 0x7fb9d66fd010>]
-
+
We can also plot multiple variables together
@@ -935,10 +934,10 @@[<matplotlib.lines.Line2D at 0x7fb44dc34550>]
+[<matplotlib.lines.Line2D at 0x7fb9d687e6d0>]
-
+
But if we actually want to see the labels, we need to use the matplotlib library
@@ -953,7 +952,7 @@We can now do all kinds of things to customise this plot using the full matplotlib library. For example:
@@ -980,7 +979,7 @@<matplotlib.collections.QuadMesh at 0x7fb444d20690>
+<matplotlib.collections.QuadMesh at 0x7fb9d668a990>
-
+
Without the coastlines this is difficult to interpret. We can use the cartopy to help use here.
@@ -1863,16 +1862,13 @@/home/lukem/anaconda3/lib/python3.11/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_coastline.zip
- warnings.warn(f'Downloading: {url}', DownloadWarning)
-
This is designed only to be a short demonstration of how easy it can be to plot data out of a CF-NetCDF file. Matplotlib is very powerful and flexible, and you can use it to create all sorts of plots! There are many good tutorials on how to use matplotlib online, so go and explore!
diff --git a/_build/html/03_extracting_data_to_different_formats.html b/_build/html/03_extracting_data_to_different_formats.html index e94733b..ea8413a 100644 --- a/_build/html/03_extracting_data_to_different_formats.html +++ b/_build/html/03_extracting_data_to_different_formats.html @@ -176,7 +176,6 @@[-1.68 -1.666 -1.661 ... nan nan nan]
+time: 2022-03-05T10:49:14.000000000
+latitude: 81.5868
+longitude: 30.7572
The object has a dimension of length 10 and a coordinate variable (time) which has a dimension of time. The values are all integers. Some more examples.
@@ -1635,12 +1637,12 @@array([[1, 8],
- [4, 1],
- [0, 2]])
+array([[7, 5],
+ [9, 6],
+ [4, 1]])
array([[[30.44228214, 33.67992853],
- [33.86491454, 33.55266901],
- [34.93279989, 30.69190301]],
+array([[[31.87084598, 33.49050905],
+ [30.84985151, 33.64069725],
+ [30.51071245, 34.06787226]],
- [[30.97643874, 34.05638227],
- [31.64994576, 31.02799202],
- [33.9159575 , 30.63406255]],
+ [[33.54525355, 30.93298548],
+ [31.96617178, 34.41262319],
+ [34.62147345, 30.91279036]],
- [[33.44308338, 31.94832308],
- [30.18575963, 30.50238482],
- [30.22115887, 33.44214148]],
+ [[33.71462586, 31.95494427],
+ [34.67103075, 33.40567996],
+ [30.54145318, 33.76023112]],
- [[33.33645545, 31.38115398],
- [33.32845452, 32.17391897],
- [34.7124349 , 33.664393 ]],
+ [[32.82419956, 34.46975371],
+ [34.07976598, 32.60474244],
+ [30.8113405 , 32.79622084]],
- [[32.8075262 , 31.78839604],
- [31.41428294, 32.92765145],
- [33.94531136, 34.95548292]]])
+ [[34.16893982, 32.06003363],
+ [33.26530911, 34.15051125],
+ [32.34662455, 32.6357126 ]]])
Hurrah! Your data are in the xarray dataset object. But are you ready to export a NetCDF file? Will that file be compliant with the FAIR principles? No! We need metadata.
-Variable attributes are metadata that describe the variables.
-The Climate & Forecast (CF) conventions dictate which variable attributes should be included for different data.
- -For example for latitude: +
Variable attributes are metadata that describe the variables. Global attributes are metadata that describe the file as a whole. You can find a list of attributes here provided by the Climate & Forecast (CF) conventions: +https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#attribute-appendix
+The table in the link above specifies which attributes can be used as global attributes and which can be used as variable attributes. Some attributes can be used as either.
+The CF conventions are light on discovery metadata. Discovery metadata are metadata that can be used to find data. For example, when and where the data were collected and by whom, some keywords etc. So we also use the ACDD convention - The Attribute Convention for Data Discovery. +https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3
+This is a list of recommendations. SIOS advises that people follow the requirements of the Arctic Data Centre, here. Requirements are a more effective way to encourage consistency than recommendations. These requirements are compliant with the ACDD conventions: +https://adc.met.no/node/4
+The CF conventions provide examples of which variable attributes you should be including in your CF-NetCDF file. For example for latitude: https://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html#latitude-coordinate
Let’s replicate that setup.
-These attributes are well documented on the ACDD convention host page, here: https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#Highly_Recommended_Variable_Attributes
+Additionally, the ACDD convention recommends that and attribute coverage_content_type is also added, which is used to state whether the data are modelResult, physicalMeasurement or something else, see the list here: +https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#Highly_Recommended_Variable_Attributes
+And remember we might want to select additional applicable attributes for our variables from this section of the CF conventions: +https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#attribute-appendix
xrds['latitude'].attrs['standard_name'] = 'latitude' # One at a time
@@ -4726,7 +4737,7 @@ Variable attributes
@@ -4749,6 +4760,24 @@ Variable attributes'units': 'μg m-3',
'coverage_content_type': 'physicalMeasurement',
}
+xrds['salinity'].attrs = {
+ 'standard_name': 'sea_water_salinity',
+ 'long_name': 'a description about each variable in your own words',
+ 'units': '1e-3',
+ 'coverage_content_type': 'physicalMeasurement',
+}
+xrds['temperature'].attrs = {
+ 'standard_name': 'sea_water_temperature',
+ 'long_name': 'a description about each variable in your own words',
+ 'units': 'degreesC',
+ 'coverage_content_type': 'physicalMeasurement',
+}
+xrds['wind_speed'].attrs = {
+ 'standard_name': 'wind_speed',
+ 'long_name': 'a description about each variable in your own words',
+ 'units': 'm s-1',
+ 'coverage_content_type': 'physicalMeasurement',
+}
# And so on for each variable..
xrds
@@ -5127,45 +5156,44 @@ Variable attributes
-Global attributes#
-The CF conventions are light on discovery metadata. Discovery metadata are metadata that can be used to find data. For example, when and where the data were collected and by whom, some keywords etc. So we also use the ACDD convention - The Attribute Convention for Data Discovery.
-https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3
-This is a list of recommendations. SIOS advises that people follow the requirements of the Arctic Data Centre, here. Requirements are a more effective way to encourage consistency than recommendations. These requirements are compliant with the ACDD conventions:
-https://adc.met.no/node/4
-Go through and add each required attribute and any others you wish to. You are also welcome to add any custom attributes on top of these requirements. Similarly to variable attributes, this can either be done one by one or all in one in a dictionary.
+Global attributes#
+As mentioned above, the requirements of the Arctic Data Centre for global attributes (based on the ACDD convention) can serve as a guide for which global attributes you should be including. https://adc.met.no/node/4
+And remember we might want to select additional applicable global attributes from this section of the CF conventions:
+https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#attribute-appendix
+Go through and add each required attribute and any others you wish to. You are also welcome to add any global attributes from the CF conventions as well as any custom attributes on top of these requirements. Similarly to variable attributes, this can either be done one by one or all in one in a dictionary.
xrds.attrs['title'] = 'my title' # One by one
@@ -5183,8 +5211,8 @@ Global attributes'creator_url': '; https://orcid.org/0000-0002-9746-544X', # OrcID is best practice if possible. Other URLs okay, or leave blank for authors that don't have one.
'time_coverage_start': '2020-05-10T08:14:58Z',
'time_coverage_end': '2020-05-10T11:51:12Z',
- 'keywords': '',
- 'keywords_vocabulary': 'GCMD:GCMD Keywords',
+ 'keywords': 'wind_speed, sea_water_temperature, sea_water_salinity, mass_concentration_of_chlorophyll_a_in_sea_water',
+ 'keywords_vocabulary': 'CF:NetCDF COARDS Climate and Forecast Standard Names',
'institution': 'Your Institution',
'publisher_name': 'Publisher Name', # Data centre where your data will be published
'publisher_email': 'publisher@email.com',
@@ -5230,8 +5258,8 @@ Global attributes
Exporting your xarray object to a NetCDF file#
-Finally you need to export your data. Firstly, you need to specify how each variable should be encoded.
+Finally you need to export your data. Firstly, you can specify how each variable should be encoded. This is an optional step - it will be assumed from the data type in python if you don’t specify the encoding manually.
Fill values: The fill value will be used to fill in any missing values. It should be an unrealistic value that will obviously show up as a spike in the data when plotted. The _FillValue is a special variable attribute that some softwares can understand, so when one opens the data, the fill values are replaced by NaNs again.
dtype: What type of data does your variable contain? Characters? Integers? Decimal numbers? Some commonly used dtype values are:
@@ -5265,7 +5294,7 @@ Exporting your xarray object to a NetCDF file
-# Specifiy encoding
+# Specifiy encoding - you can write a file without this and encoding will be assumed, but you should check in any case.
myencoding = {
'depth': {
'dtype': 'int32',
@@ -5390,8 +5419,11 @@ Checking your data3D data from pandas dataframe to 3D grid
-
- Variable attributes
-- Global attributes
+- Metadata (attributes)
+
- Exporting your xarray object to a NetCDF file
- Checking your data
diff --git a/_build/html/05_how_to_structure_your_NetCDF_file_or_files.html b/_build/html/05_how_to_structure_your_NetCDF_file_or_files.html
index 7476121..cef1e91 100644
--- a/_build/html/05_how_to_structure_your_NetCDF_file_or_files.html
+++ b/_build/html/05_how_to_structure_your_NetCDF_file_or_files.html
@@ -176,7 +176,6 @@
- 06: Creating multiple CF-NetCDF files in one go
- 07: Combining data from multiple netcdf files
- 08: Ancillary variables
-- 09: Cells and cell methods
@@ -371,13 +370,12 @@ Contents
@@ -396,7 +394,6 @@ 05: How to structure you NetCDF file or fileshttps://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html
There are far too many to go through all the examples. We will just look at a few. But the aim is that this tutorial will teach you what you should be thinking about when deciding to structure your data, so you can tackle any setup you have.
-NOTE TO SELF. IN THE VIDEO ONLY SHOW ANIMATIONS AND EXPLAIN AND REDUCE THE AMOUNT OF CODE SHOWN. OR THE VIDEO WILL BE TOO LONG. MENTION THEY CAN LOOK AT EXAMPLES IN THE NOTEBOOK ATTACHED.
What to consider#
Whenever we are creating one or many CF-NetCDF files, we should try to make sure they are
@@ -410,6 +407,35 @@ What to consider
+An appropriate level of granularity for your data#
+How many files CF-NetCDF files should you divide your data into?
+There are many things to consider here. Quite often the granularity of data is not fine enough, as data creators encode their discovery level metadata (global attributes) considering only what is useful for human interpretation and not for machine action. Finer granularity data is easier to build services upon, such as on-the-fly visualisation of datasets, or aggregates datasets together. Finer granularity data allows computers do the data preparation so that humans can focus on interpretation and analysis.
+The data creator should also consider what granularity is useful for the data users. This will vary on a case-by-case basis. Perhaps you have a long time series of data, spanning many years. Many data users might not be interested in the entire time series, but might want to access data for a single day or month. Therefore, you might consider dividing your data into daily or monthly files to simplify data use. You could also separate different data variables into different files if you envisage that each file might attract different data users.
+Data from different stations should also be divided into different datasets. Imagine we have set up 6 weather stations and want to publish the fictitious data.
+
+We have 2 options for how to struture our data.
+
+Include all 6 time series in one NetCDF file
+Create a separate time series for each profile.
+
+The best practice is to create a separate file for each time series. This might suprise some of you, and the idea of creating dozens or maybe hundreds of files might sound daunting or even ridiculous.
+Firstly, you shouldn’t think of your data in isolation. Your data are a contribution to a much larger network of data. Imagine that each marker below is a weather station, and the colour is the person or project who is responsible for the data. Imagine you are a looking for data inside the red square. For many data users, the project that the data were collected as part of is irrelevant. Many users want to access data from all the projects and combine them.
+
+Sure, it is easier for you and the people in your project to group the data by project. But you already have the data and can group them how you like between yourselves. When it comes to publishing data, we need to think about the data users outside of the project. Data, if published correctly, could be used for decades in to the future, perhaps longer in some cases. The majority of your data users might have never heard of your project!
+Creating individual files for each time series has another advantage; each file can have its own set of global attributes! This has many advantages, including:
+
+If one time series was processed differently, this can be described.
+Each file has its own bounding coordinates. Imagine you go to a data centre or data access portal and you are looking for data on a map. A file with many time series will show up as a box on a map. Without opening the file up, it is difficult to see which locations have been sampled. What if a potential data user is only interested in a certain location within the bounding box? A file that contains a single time series would show up as a point on a map.
+Each time series can be published separately and have its own citation - including different authors if that is desired. If someone uses only one or a few of the files, they can only cite the relevant ones. It is then clear which data they have used in their work. Many good data centres are now able to provide you with a ‘parent’ page for your entire data collection with its own DOI and recommended citation that one can cite if they are using most or all of the files.
+
+Some might see it as a hassle to download and open lots of files. Some services are already being developed to allow data to download data from many similar files into a single file. More services will surely be available in the future. When we are publishing data, we should think not only about what is possible now, but what will be possible in the future.
+But for now, in the next few tutorials, we will look at how you can create and access multiple files quickly and easily today using Python!
+Need some convincing? Let’s look at another example for vertical profiles (e.g. CTD data).
+
+
+Multiple profiles#
Let’s import the modules we will use for this tutorial.
@@ -419,22 +445,54 @@ What to consider
-One profile or time series#
-The simplest setup we can have (except a single point) is data with a single dimension. This could be a time series or a vertical profile. Below is some basic code to add an depth dimension, a coordinate variable and data variable (sea water temperature) to a NetCDF file.
+Imagine we have the following 4 depth profiles:
depth = [10,20,30]
-temperature = [21.42, 21.21, 20.98]
-
-
-xrds = xr.Dataset(
+temperature_1 = [21.42, 21.21, 20.98]
+temperature_2 = [22.08, 21.56, 20.42]
+temperature_3 = [22.11, 21.38, 20.12]
+temperature_4 = [21.87, 21.01, 19.99]
+
+
+
+
+Many people publish all the data from a single cruise in one file. But we have just learned that this is not the best practice!
+If we wanted to publish all the profiles in one file, we need another dimension to distinguish between the profiles. Time is a sensible choice.
+
+
+time = [0,1,2,3]
+
+
+
+
+But now we need to made a 2D array of our temperature values.
+
+
+temperature_2d = [temperature_1, temperature_2, temperature_3, temperature_4]
+temperature_2d
+
+
+
+
+[[21.42, 21.21, 20.98],
+ [22.08, 21.56, 20.42],
+ [22.11, 21.38, 20.12],
+ [21.87, 21.01, 19.99]]
+
+
+
+
+We can fit the data into our xarray object like this:
+
+
+xrds = xr.Dataset(
coords = {
- 'depth': depth
+ 'depth': depth,
+ 'time': time
},
data_vars = {
- 'sea_water_temperature': ('depth',temperature)
+ 'sea_water_temperature': (['time','depth'],temperature_2d)
}
)
@@ -807,36 +865,34 @@ One profile or time series
-A time series for instruments that move#
-If we tried to use latitude, longitude, depth/elevation and time dimensions for our data variables, there would be a lot of empty space! Not all coordinates have to be coordinate variables with their own dimensions!
-In this case we can use time as our dimension, and everything else as 1D variables.
+But don’t forget that we now also need to include the latitude and longitude as variables. Latitude and longitude aren’t required variables if your NetCDF file contains a single vertical profile - they can be written as global attributes instead.
+But in this case, latitude and longitude can be 1D variables with a dimension of time.
-time = [0,1,2,3,4,5,6,7,8,9]
-latitude = [6.4970, 6.4756, 6.5584, 6.5087, 6.4815, 6.5029, 6.4279, 6.3409, 6.4066, 6.4134]
-longitude = [-66.6972, -66.6399, -66.5838, -66.6357, -66.7313, -66.686, -66.7192, -66.6737, -66.7594, -66.8479]
-depth = [6.6388, 5.8899, 5.0289, 4.7409, 5.5595, 5.4532, 5.7104, 6.2129, 6.2548, 6.5595]
-temperature = [10.77, 10.15, 9.69, 9.46, 9.06, 9.97, 9.66, 10.25, 10.25, 10.56]
+latitude = [78.5142,79.2833,79.9840,80.4228]
+longitude = [30.4231,30.3591,30.4994, 30.4200]
xrds = xr.Dataset(
coords = {
+ 'depth': depth,
'time': time
},
data_vars = {
- 'longitude': ('time', longitude),
+ 'sea_water_temperature': (['time','depth'],temperature_2d),
'latitude': ('time', latitude),
- 'depth': ('time', depth),
- 'sea_water_temperature': ('time', temperature)
+ 'longitude': ('time', longitude)
}
- )
+)
xrds
@@ -1207,45 +1263,35 @@ A time series for instruments that move
-Data on a multidimensional grid#
-Some data sit on a grid with multiple dimensions. Some examples are satellite data or data output from certain models. Imagine we have sea water temperature data exported from a model. There are 4 dimensions; latitude, longitude, depth and time. In this case, let’s imagine that the model exports data to a regular grid.
-I am going to use random values! If you have data, your first job is get your data into a multidimensional array. You will find some help in tutorial 04 on how to convert tabular data to a multidimensional array. ChatGPT can also be very helpful - but make sure the values are going in to the right places.
+This NetCDF file has more variables and dimensions than one that includes only a single profile. This leaves more room for variation in how people might structure their data. Sometimes you see people including latitude and longitude as coordinate variables with themselves as their dimensions, like below.
-depth = np.arange(0,1001,1) # 0 to 1000, incrementing by 1
-latitude = np.arange(75,82.1,0.1) # 75 to 82, incrementing by 0.1
-longitude = np.arange(28,32.1,0.1) # 28 to 32, incrementing by 0.1
-time = [0,1,2,3]
-
-# Create 4D array of random temperature values between 0 and 2
-temperature = np.random.uniform(0, 2, size=(len(time), len(depth), len(latitude), len(longitude)))
+latitude = [78.5142,79.2833,79.9840,80.4228]
+longitude = [30.4231,30.3591,30.4994, 30.4200]
xrds = xr.Dataset(
coords = {
- 'time': time,
'depth': depth,
- 'latitude': latitude,
- 'longitude': longitude
- },
+ 'time': time
+ },
data_vars = {
- 'sea_water_temperature': (['time', 'depth', 'latitude', 'longitude'], temperature)
+ 'sea_water_temperature': (['time','depth'],temperature_2d),
+ 'latitude': ('latitude', latitude),
+ 'longitude': ('longitude', longitude)
}
- )
+)
xrds
@@ -1616,130 +1662,34 @@ Data on a multidimensional grid
-Random points in space and time (ungridded data)#
-Often data don’t fit on a regular grid. What then?
-You could bin your data to a regular grid and specify what you have done in the metadata. You (the data creator) are best person to know if this is a suitable thing to do for your own data. However, in many cases, you will not want to bin your data.
-The best practice is to use a common ‘counter’ dimension for all your coordinates. You can think of this as an index or counter of your data points.
-This method is quite easy to write. You just need to create 1D arrays for all your coordinates and data variables.
+If we are being pedantic, we could say that latitude and longitude are not explicitely linked to time in this case. One has to make the assumption that they are linked.
+It is also easier to make mistakes when creating the files in this case - for example by accidentally encoding latitude with the wrong length.
-num_points = 100
-node = np.arange(0,num_points)
-
-# Generate some example irregular data
-latitudes = np.random.uniform(low=-90, high=90, size=num_points)
-longitudes = np.random.uniform(low=-180, high=180, size=num_points)
-data_values = np.random.rand(num_points) # Replace with your actual data
+latitude = [78.5142,79.2833,79.9840]
+longitude = [30.4231,30.3591,30.4994, 30.4200]
-# Create an xarray Dataset
xrds = xr.Dataset(
coords = {
- 'node': node
+ 'depth': depth,
+ 'time': time
},
data_vars = {
- 'data_var': ('node', data_values),
- 'latitude': ('node', latitudes),
- 'longitude': ('node', longitudes),
+ 'sea_water_temperature': (['time','depth'],temperature_2d),
+ 'latitude': ('latitude', latitude),
+ 'longitude': ('longitude', longitude)
}
)
@@ -2112,169 +2062,166 @@ Random points in space and time (ungridded data)
-Multiple time series#
-Imagine we have set up 6 weather stations and want to publish the data.
-We have 2 options for how to struture our data.
-
-Include all 6 time series in one NetCDF file
-Create a separate time series for each profile.
-
-The best practice is to create a separate file for each time series. This might suprise some of you, and the idea of creating dozens or maybe hundreds of files might sound daunting or even ridiculous.
-Firstly, you shouldn’t think of your data in isolation. Your data are a contribution to a much larger network of data. Imagine that each marker below is a weather station, and the colour is the person or project who is responsible for the data. Imagine you are a looking for data inside the red square. For many data users, the project that the data were collected as part of is irrelevant. Many users want to access data from all the projects and combine them.
-Sure, it is easier for you and the people in your project to group the data by project. But you already have the data and can group them how you like between yourselves. When it comes to publishing data, we need to think about the data users outside of the project. Data, if published correctly, could be used for decades in to the future, perhaps longer in some cases. The majority of your data users might never have heard of your project!
-Creating individual files for each time series has another advantage; each file can have its own set of global attributes! This has many advantages, including:
-
-If one time series was processed differently, this can be described.
-Each file has its own bounding coordinates. Imagine you go to a data centre or data access portal and you are looking for data on a map. A file with many time series will show up as a box on a map. Without opening the file up, it is difficult to see which locations have been sampled. What if a potential data user is only interested in a certain location within the bounding box? A file that contains a single time series would show up as a point on a map.
-Each time series can be published separately and have its own citation - including different authors if that is desired. If someone uses only one or a few of the files, they can only cite the relevant ones. It is then clear which data they have used in their work. Many good data centres are now able to provide you with a ‘parent’ page for your entire data collection with its own DOI and recommended citation that one can cite if they are using most or all of the files.
-
-Some might see it as a hassle to download and open lots of files. Some services are already being developed to allow data to download data from many similar files into a single file. More services will surely be available in the future. When we are publishing data, we should think not only about what is possible now, but what will be possible in the future.
-But for now, in the next few tutorials, we will look at how you can create and access multiple files quickly and easily today using Python!
-Need some convincing? Let’s look at another example for vertical profiles (e.g. CTD data).
-
-
-Multiple profiles#
-Imagine we have the following 4 depth profiles:
-
-
-depth = [10,20,30]
-temperature_1 = [21.42, 21.21, 20.98]
-temperature_2 = [22.08, 21.56, 20.42]
-temperature_3 = [22.11, 21.38, 20.12]
-temperature_4 = [21.87, 21.01, 19.99]
-
-
-
-
-Many people publish all the data from a single cruise in one file. But we have just learned that this is not the best practice!
-If we wanted to publish all the profiles in one file, we need another dimension to distinguish between the profiles. Time is a sensible choice.
-
-
-time = [0,1,2,3]
-
-
-
+ sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99
-But now we need to made a 2D array of our temperature values.
+No error! It would be easy to overlook that we have only 3 latitude values and 4 longitude values.
+If you make assign a dimension of time to your latitude and longitude variables, an error is returned if, for example, your latitude variable is the wrong length.
-temperature_2d = [temperature_1, temperature_2, temperature_3, temperature_4]
-temperature_2d
+latitude = [78.5142,79.2833,79.9840]
+longitude = [30.4231,30.3591,30.4994, 30.4200]
+
+xrds = xr.Dataset(
+ coords = {
+ 'depth': depth,
+ 'time': time
+ },
+ data_vars = {
+ 'sea_water_temperature': (['time','depth'],temperature_2d),
+ 'latitude': ('time', latitude),
+ 'longitude': ('time', longitude)
+ }
+)
-[[21.42, 21.21, 20.98],
- [22.08, 21.56, 20.42],
- [22.11, 21.38, 20.12],
- [21.87, 21.01, 19.99]]
+---------------------------------------------------------------------------
+ValueError Traceback (most recent call last)
+Cell In[9], line 4
+ 1 latitude = [78.5142,79.2833,79.9840]
+ 2 longitude = [30.4231,30.3591,30.4994, 30.4200]
+----> 4 xrds = xr.Dataset(
+ 5 coords = {
+ 6 'depth': depth,
+ 7 'time': time
+ 8 },
+ 9 data_vars = {
+ 10 'sea_water_temperature': (['time','depth'],temperature_2d),
+ 11 'latitude': ('time', latitude),
+ 12 'longitude': ('time', longitude)
+ 13 }
+ 14 )
+
+File ~/anaconda3/lib/python3.11/site-packages/xarray/core/dataset.py:694, in Dataset.__init__(self, data_vars, coords, attrs)
+ 691 if isinstance(coords, Dataset):
+ 692 coords = coords._variables
+--> 694 variables, coord_names, dims, indexes, _ = merge_data_and_coords(
+ 695 data_vars, coords
+ 696 )
+ 698 self._attrs = dict(attrs) if attrs is not None else None
+ 699 self._close = None
+
+File ~/anaconda3/lib/python3.11/site-packages/xarray/core/dataset.py:423, in merge_data_and_coords(data_vars, coords)
+ 419 coords = create_coords_with_default_indexes(coords, data_vars)
+ 421 # exclude coords from alignment (all variables in a Coordinates object should
+ 422 # already be aligned together) and use coordinates' indexes to align data_vars
+--> 423 return merge_core(
+ 424 [data_vars, coords],
+ 425 compat="broadcast_equals",
+ 426 join="outer",
+ 427 explicit_coords=tuple(coords),
+ 428 indexes=coords.xindexes,
+ 429 priority_arg=1,
+ 430 skip_align_args=[1],
+ 431 )
+
+File ~/anaconda3/lib/python3.11/site-packages/xarray/core/merge.py:724, in merge_core(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value, skip_align_args)
+ 719 prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
+ 720 variables, out_indexes = merge_collected(
+ 721 collected, prioritized, compat=compat, combine_attrs=combine_attrs
+ 722 )
+--> 724 dims = calculate_dimensions(variables)
+ 726 coord_names, noncoord_names = determine_coords(coerced)
+ 727 if compat == "minimal":
+ 728 # coordinates may be dropped in merged results
+
+File ~/anaconda3/lib/python3.11/site-packages/xarray/core/variable.py:3008, in calculate_dimensions(variables)
+ 3006 last_used[dim] = k
+ 3007 elif dims[dim] != size:
+-> 3008 raise ValueError(
+ 3009 f"conflicting sizes for dimension {dim!r}: "
+ 3010 f"length {size} on {k!r} and length {dims[dim]} on {last_used!r}"
+ 3011 )
+ 3012 return dims
+
+ValueError: conflicting sizes for dimension 'time': length 3 on 'latitude' and length 4 on {'time': 'sea_water_temperature', 'depth': 'sea_water_temperature'}
-We can fit the data into our xarray object like this:
+Okay, that is all well and good. These issues might seem small. However, often our profiles are different lengths, like this.
-xrds = xr.Dataset(
+depth_1 = [10,20,30]
+temperature_1 = [21.42, 21.21, 20.98]
+depth_2 = [10,20,30,40,50]
+temperature_2 = [22.08, 21.56, 20.42, 19.23, 18.53]
+depth_3 = [10,20,30,40,50,60]
+temperature_3 = [22.42, 21.21, 20.12, 19.45, 18.72, 16.99]
+depth_4 = [10,20]
+temperature_4 = [21.84, 21.49]
+
+
+
+
+But a 2D array needs to have profiles of equal lengths. To address this, we need to fill the rest of our 2D array with NaNs.
+
+
+# Finding the maximum length among the depth arrays
+max_depth_length = max(len(depth_1), len(depth_2), len(depth_3), len(depth_4))
+
+# Creating arrays filled with NaNs
+temp_arr_1 = np.full(max_depth_length, np.nan)
+temp_arr_2 = np.full(max_depth_length, np.nan)
+temp_arr_3 = np.full(max_depth_length, np.nan)
+temp_arr_4 = np.full(max_depth_length, np.nan)
+
+# Filling the arrays with available temperature data
+temp_arr_1[:len(temperature_1)] = temperature_1
+temp_arr_2[:len(temperature_2)] = temperature_2
+temp_arr_3[:len(temperature_3)] = temperature_3
+temp_arr_4[:len(temperature_4)] = temperature_4
+
+# Creating a 2D array
+temperature_2d = np.array([temp_arr_1, temp_arr_2, temp_arr_3, temp_arr_4])
+temperature_2d
+
+
+
+
+array([[21.42, 21.21, 20.98, nan, nan, nan],
+ [22.08, 21.56, 20.42, 19.23, 18.53, nan],
+ [22.42, 21.21, 20.12, 19.45, 18.72, 16.99],
+ [21.84, 21.49, nan, nan, nan, nan]])
+
+
+
+
+
+
+latitude = [78.5142,79.2833,79.9840,80.4228]
+longitude = [30.4231,30.3591,30.4994, 30.4200]
+
+xrds = xr.Dataset(
coords = {
- 'depth': depth,
+ 'depth': depth_3, # The longest profile
'time': time
},
data_vars = {
- 'sea_water_temperature': (['time','depth'],temperature_2d)
+ 'sea_water_temperature': (['time','depth'],temperature_2d),
+ 'latitude': ('time', latitude),
+ 'longitude': ('time', longitude)
}
)
@@ -2647,18 +2594,65 @@ Multiple profiles
+
+depth_1 = [5,20,50]
+temperature_1 = [21.42, 21.21, 20.98]
+depth_2 = [10,35,70,90,100]
+temperature_2 = [22.08, 21.56, 20.42, 19.23, 18.53]
+depth_3 = [10,25,40,60,80,100]
+temperature_3 = [22.42, 21.21, 20.12, 19.45, 18.72, 16.99]
+depth_4 = [12,24]
+temperature_4 = [21.84, 21.49]
+
+# Merge all unique depth values
+all_depths = sorted(set(depth_1 + depth_2 + depth_3 + depth_4))
+
+# Create arrays filled with NaNs for temperatures
+all_temperatures = []
+for depths, temps in [(depth_1, temperature_1), (depth_2, temperature_2),
+ (depth_3, temperature_3), (depth_4, temperature_4)]:
+ temp_arr = np.full(len(all_depths), np.nan)
+ # Adding temperature values to each array
+ for depth, temp in zip(depths, temps):
+ index = all_depths.index(depth)
+ temp_arr[index] = temp
+ all_temperatures.append(temp_arr)
+
+# Create a 2D array
+temperature_2d = np.array(all_temperatures)
+temperature_2d
+
+
+
+
+array([[21.42, nan, nan, 21.21, nan, nan, nan, nan, 20.98,
+ nan, nan, nan, nan, nan],
+ [ nan, 22.08, nan, nan, nan, nan, 21.56, nan, nan,
+ nan, 20.42, nan, 19.23, 18.53],
+ [ nan, 22.42, nan, nan, nan, 21.21, nan, 20.12, nan,
+ 19.45, nan, 18.72, nan, 16.99],
+ [ nan, nan, 21.84, nan, 21.49, nan, nan, nan, nan,
+ nan, nan, nan, nan, nan]])
+
+
+
-But don’t forget that we now also need to include the latitude and longitude as variables. Latitude and longitude aren’t required variables if your NetCDF file contains a single vertical profile - they can be written as global attributes instead.
-But in this case, latitude and longitude can be 1D variables with a dimension of time.
latitude = [78.5142,79.2833,79.9840,80.4228]
@@ -2666,7 +2660,7 @@ Multiple profilesxrds = xr.Dataset(
coords = {
- 'depth': depth,
+ 'depth': all_depths,
'time': time
},
data_vars = {
@@ -3045,35 +3039,63 @@ Multiple profiles
+on-the-fly visualisation of data on a data centre or data access portal website
+aggregating data from several files together
+allowing users to access only a subset of the data in a file
+
+
+
+What have we learned about publishing multiple time series or profiles?#
+
+NetCDF files that include many profiles or time series can include a lot of empty space.
+NetCDF files that include only one profile or time series often require only one dimension and coordinate variables and are therefore simpler.
+Because the files are simpler, they are easier to create, easier to understand, and easier to build services upon.
+There are more different ways to encode the same data in a NetCDF file that includes multiple profiles. Files with a single profile are more likely to be consistent with each other, regardless of who is creating them.
+The project or cruise that data were collected as part of is arbitary to most data users over medium to long time scales. Your data are a contribution to a broader network of data that someone can download and use altogether - combining data from many projects or cruises.
+
+
+
+A time series for instruments that move#
+If we tried to use latitude, longitude, depth/elevation and time dimensions for our data variables, there would be a lot of empty space! Not all coordinates have to be coordinate variables with their own dimensions!
+In this case we can use time as our dimension, and everything else as 1D variables.
-latitude = [78.5142,79.2833,79.9840,80.4228]
-longitude = [30.4231,30.3591,30.4994, 30.4200]
+time = [0,1,2,3,4,5,6,7,8,9]
+latitude = [6.4970, 6.4756, 6.5584, 6.5087, 6.4815, 6.5029, 6.4279, 6.3409, 6.4066, 6.4134]
+longitude = [-66.6972, -66.6399, -66.5838, -66.6357, -66.7313, -66.686, -66.7192, -66.6737, -66.7594, -66.8479]
+depth = [6.6388, 5.8899, 5.0289, 4.7409, 5.5595, 5.4532, 5.7104, 6.2129, 6.2548, 6.5595]
+temperature = [10.77, 10.15, 9.69, 9.46, 9.06, 9.97, 9.66, 10.25, 10.25, 10.56]
xrds = xr.Dataset(
coords = {
- 'depth': depth,
'time': time
},
data_vars = {
- 'sea_water_temperature': (['time','depth'],temperature_2d),
- 'latitude': ('latitude', latitude),
- 'longitude': ('longitude', longitude)
+ 'longitude': ('time', longitude),
+ 'latitude': ('time', latitude),
+ 'depth': ('time', depth),
+ 'sea_water_temperature': ('time', temperature)
}
-)
+ )
xrds
@@ -3444,36 +3466,45 @@ Multiple profiles
+Data on a multidimensional grid#
+Some data sit on a grid with multiple dimensions. Some examples are satellite data or data output from certain models. Imagine we have sea water temperature data exported from a model. There are 4 dimensions; latitude, longitude, depth and time. In this case, let’s imagine that the model exports data to a regular grid.
+I am going to use random values! If you have data, your first job is get your data into a multidimensional array. You will find some help in tutorial 04 on how to convert tabular data to a multidimensional array. ChatGPT can also be very helpful - but make sure the values are going in to the right places.
-latitude = [78.5142,79.2833,79.9840]
-longitude = [30.4231,30.3591,30.4994, 30.4200]
+depth = np.arange(0,1001,1) # 0 to 1000, incrementing by 1
+latitude = np.arange(75,82.1,0.1) # 75 to 82, incrementing by 0.1
+longitude = np.arange(28,32.1,0.1) # 28 to 32, incrementing by 0.1
+time = [0,1,2,3]
+
+# Create 4D array of random temperature values between 0 and 2
+temperature = np.random.uniform(0, 2, size=(len(time), len(depth), len(latitude), len(longitude)))
xrds = xr.Dataset(
coords = {
+ 'time': time,
'depth': depth,
- 'time': time
- },
+ 'latitude': latitude,
+ 'longitude': longitude
+ },
data_vars = {
- 'sea_water_temperature': (['time','depth'],temperature_2d),
- 'latitude': ('latitude', latitude),
- 'longitude': ('longitude', longitude)
+ 'sea_water_temperature': (['time', 'depth', 'latitude', 'longitude'], temperature)
}
-)
+ )
xrds
@@ -3844,606 +3875,130 @@ Multiple profiles
-
-latitude = [78.5142,79.2833,79.9840]
-longitude = [30.4231,30.3591,30.4994, 30.4200]
-
-xrds = xr.Dataset(
- coords = {
- 'depth': depth,
- 'time': time
- },
- data_vars = {
- 'sea_water_temperature': (['time','depth'],temperature_2d),
- 'latitude': ('time', latitude),
- 'longitude': ('time', longitude)
- }
-)
-
-
-
-
----------------------------------------------------------------------------
-ValueError Traceback (most recent call last)
-Cell In[13], line 4
- 1 latitude = [78.5142,79.2833,79.9840]
- 2 longitude = [30.4231,30.3591,30.4994, 30.4200]
-----> 4 xrds = xr.Dataset(
- 5 coords = {
- 6 'depth': depth,
- 7 'time': time
- 8 },
- 9 data_vars = {
- 10 'sea_water_temperature': (['time','depth'],temperature_2d),
- 11 'latitude': ('time', latitude),
- 12 'longitude': ('time', longitude)
- 13 }
- 14 )
-
-File ~/anaconda3/lib/python3.11/site-packages/xarray/core/dataset.py:652, in Dataset.__init__(self, data_vars, coords, attrs)
- 649 if isinstance(coords, Dataset):
- 650 coords = coords._variables
---> 652 variables, coord_names, dims, indexes, _ = merge_data_and_coords(
- 653 data_vars, coords, compat="broadcast_equals"
- 654 )
- 656 self._attrs = dict(attrs) if attrs is not None else None
- 657 self._close = None
-
-File ~/anaconda3/lib/python3.11/site-packages/xarray/core/merge.py:569, in merge_data_and_coords(data_vars, coords, compat, join)
- 567 objects = [data_vars, coords]
- 568 explicit_coords = coords.keys()
---> 569 return merge_core(
- 570 objects,
- 571 compat,
- 572 join,
- 573 explicit_coords=explicit_coords,
- 574 indexes=Indexes(indexes, coords),
- 575 )
-
-File ~/anaconda3/lib/python3.11/site-packages/xarray/core/merge.py:761, in merge_core(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value)
- 756 prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
- 757 variables, out_indexes = merge_collected(
- 758 collected, prioritized, compat=compat, combine_attrs=combine_attrs
- 759 )
---> 761 dims = calculate_dimensions(variables)
- 763 coord_names, noncoord_names = determine_coords(coerced)
- 764 if explicit_coords is not None:
-
-File ~/anaconda3/lib/python3.11/site-packages/xarray/core/variable.py:3282, in calculate_dimensions(variables)
- 3280 last_used[dim] = k
- 3281 elif dims[dim] != size:
--> 3282 raise ValueError(
- 3283 f"conflicting sizes for dimension {dim!r}: "
- 3284 f"length {size} on {k!r} and length {dims[dim]} on {last_used!r}"
- 3285 )
- 3286 return dims
-
-ValueError: conflicting sizes for dimension 'time': length 3 on 'latitude' and length 4 on {'time': 'sea_water_temperature', 'depth': 'sea_water_temperature'}
-
-
-
-
-Okay, that is all well and good. These issues might seem small. However, often our profiles are differen lengths, like this.
-
-
-depth_1 = [10,20,30]
-temperature_1 = [21.42, 21.21, 20.98]
-depth_2 = [10,20,30,40,50]
-temperature_2 = [22.08, 21.56, 20.42, 19.23, 18.53]
-depth_3 = [10,20,30,40,50,60]
-temperature_3 = [22.42, 21.21, 20.12, 19.45, 18.72, 16.99]
-depth_4 = [10,20]
-temperature_4 = [21.84, 21.49]
-
-
-
-
-But a 2D array needs to have profiles of equal lengths. To address this, we need to fill the rest of our 2D array with NaNs.
-
-
-# Finding the maximum length among the depth arrays
-max_depth_length = max(len(depth_1), len(depth_2), len(depth_3), len(depth_4))
-
-# Creating arrays filled with NaNs
-temp_arr_1 = np.full(max_depth_length, np.nan)
-temp_arr_2 = np.full(max_depth_length, np.nan)
-temp_arr_3 = np.full(max_depth_length, np.nan)
-temp_arr_4 = np.full(max_depth_length, np.nan)
-
-# Filling the arrays with available temperature data
-temp_arr_1[:len(temperature_1)] = temperature_1
-temp_arr_2[:len(temperature_2)] = temperature_2
-temp_arr_3[:len(temperature_3)] = temperature_3
-temp_arr_4[:len(temperature_4)] = temperature_4
-
-# Creating a 2D array
-temperature_2d = np.array([temp_arr_1, temp_arr_2, temp_arr_3, temp_arr_4])
-temperature_2d
-
-
-
-
-array([[21.42, 21.21, 20.98, nan, nan, nan],
- [22.08, 21.56, 20.42, 19.23, 18.53, nan],
- [22.42, 21.21, 20.12, 19.45, 18.72, 16.99],
- [21.84, 21.49, nan, nan, nan, nan]])
-
-
-
-
-
-
-latitude = [78.5142,79.2833,79.9840,80.4228]
-longitude = [30.4231,30.3591,30.4994, 30.4200]
-
-xrds = xr.Dataset(
- coords = {
- 'depth': depth_3, # The longest profile
- 'time': time
- },
- data_vars = {
- 'sea_water_temperature': (['time','depth'],temperature_2d),
- 'latitude': ('time', latitude),
- 'longitude': ('time', longitude)
- }
-)
-
-xrds
-
-
-
-
-
-<xarray.Dataset>
-Dimensions: (time: 4, depth: 6)
-Coordinates:
- * depth (depth) int64 10 20 30 40 50 60
- * time (time) int64 0 1 2 3
-Data variables:
- sea_water_temperature (time, depth) float64 21.42 21.21 20.98 ... nan nan
- latitude (time) float64 78.51 79.28 79.98 80.42
- longitude (time) float64 30.42 30.36 30.5 30.42
-
-So all the profiles need to be extended to the maximum depth in this case. You can imagine that this could create a lot of empty space in your files!
-But there is more…
-In the example above each profile is sampled at 10 m increments. What if different depths are sampled for each profile? Well, the depth coordinate variable would have to encompass all the depths sampled across all profiles. For example:
+
+
+Random points in space and time (ungridded data)#
+Often data don’t fit on a regular grid. What then?
+You could bin your data to a regular grid and specify what you have done in the metadata. You (the data creator) are best person to know if this is a suitable thing to do for your own data. However, in many cases, you will not want to bin your data.
+The best practice is to use a common ‘counter’ dimension for all your coordinates. You can think of this as an index or counter of your data points.
+This method is quite easy to write. You just need to create 1D arrays for all your coordinates and data variables.
-depth_1 = [5,20,50]
-temperature_1 = [21.42, 21.21, 20.98]
-depth_2 = [10,35,70,90,100]
-temperature_2 = [22.08, 21.56, 20.42, 19.23, 18.53]
-depth_3 = [10,25,40,60,80,100]
-temperature_3 = [22.42, 21.21, 20.12, 19.45, 18.72, 16.99]
-depth_4 = [12,24]
-temperature_4 = [21.84, 21.49]
-
-# Merge all unique depth values
-all_depths = sorted(set(depth_1 + depth_2 + depth_3 + depth_4))
-
-# Create arrays filled with NaNs for temperatures
-all_temperatures = []
-for depths, temps in [(depth_1, temperature_1), (depth_2, temperature_2),
- (depth_3, temperature_3), (depth_4, temperature_4)]:
- temp_arr = np.full(len(all_depths), np.nan)
- for depth, temp in zip(depths, temps):
- index = all_depths.index(depth)
- temp_arr[index] = temp
- all_temperatures.append(temp_arr)
+num_points = 100
+node = np.arange(0,num_points)
-# Create a 2D array
-temperature_2d = np.array(all_temperatures)
-temperature_2d
-
-
-
-
-array([[21.42, nan, nan, 21.21, nan, nan, nan, nan, 20.98,
- nan, nan, nan, nan, nan],
- [ nan, 22.08, nan, nan, nan, nan, 21.56, nan, nan,
- nan, 20.42, nan, 19.23, 18.53],
- [ nan, 22.42, nan, nan, nan, 21.21, nan, 20.12, nan,
- 19.45, nan, 18.72, nan, 16.99],
- [ nan, nan, 21.84, nan, 21.49, nan, nan, nan, nan,
- nan, nan, nan, nan, nan]])
-
-
-
-
-
-
-latitude = [78.5142,79.2833,79.9840,80.4228]
-longitude = [30.4231,30.3591,30.4994, 30.4200]
+# Generate some example irregular data
+latitudes = np.random.uniform(low=-90, high=90, size=num_points)
+longitudes = np.random.uniform(low=-180, high=180, size=num_points)
+data_values = np.random.rand(num_points) # Replace with your actual data
+# Create an xarray Dataset
xrds = xr.Dataset(
coords = {
- 'depth': all_depths,
- 'time': time
+ 'node': node
},
data_vars = {
- 'sea_water_temperature': (['time','depth'],temperature_2d),
- 'latitude': ('time', latitude),
- 'longitude': ('time', longitude)
+ 'data_var': ('node', data_values),
+ 'latitude': ('node', latitudes),
+ 'longitude': ('node', longitudes),
}
)
@@ -4816,47 +4371,104 @@ Multiple profiles
-on-the-fly visualisation of data on a data centre or data access portal website
-aggregating data from several files together
-allowing users to access only a subset of the data in a file
-
-
-
-What have we learned about publishing multiple time series or profiles?#
-
-NetCDF files that include many profiles or time series can include a lot of empty space.
-NetCDF files that include only one profile or time series often require only one dimension and coordinate variables and are therefore simpler.
-Because the files are simpler, they are easier to create, easier to understand, and easier to build services upon.
-There are more different ways to encode the same data in a NetCDF file that includes multiple profiles. Files with a single profile are more likely to be consistent with each other, regardless of who is creating them.
-The project or cruise that data were collected as part of is arbitary to most data users over medium to long time scales. Your data are a contribution to a broader network of data that someone can download and use altogether - combining data from many projects or cruises.
-
Some final rules of thumb#
-It is better to create many simple files than a single complicated file - within reason.
+Always publish data at the highest possible functional granularity (i.e., not individual measurements,
+but neither several stations combined in one dataset).
+Never combine data with different temporal dimensions (e.g., variables with minute and variables
+with hourly resolutions) in the same dataset.
+Never combine data with different vertical dimensions (e.g., surface observations and vertical
+profiles) in the same dataset.
Try to avoid creating files with lots of unneccessary empty space. Some is of course unavoidable, and this is fine!
The time variable can often be set to ‘unlimited’ which is useful if you might append more data later or concatenate files.
-Data that require different sets of global attributes, or different sets of dimensions and coordinate variables, should be split up into different files.
Multiple files can be published together in a data collection a be assigned a single DOI. Parent-child relationships can also be used to assign one DOI for the parent and a different DOI for each of its children.
Think about your data as a contribution to a larger network of similar data.
@@ -4927,13 +4539,12 @@ Some final rules of thumb
- What to consider
-- One profile or time series
+- An appropriate level of granularity for your data
+- Multiple profiles
+- What have we learned about publishing multiple time series or profiles?
- A time series for instruments that move
- Data on a multidimensional grid
- Random points in space and time (ungridded data)
-- Multiple time series
-- Multiple profiles
-- What have we learned about publishing multiple time series or profiles?
- Some final rules of thumb
diff --git a/_build/html/06_creating_multiple_cfnetcdf_files_in_one_go.html b/_build/html/06_creating_multiple_cfnetcdf_files_in_one_go.html
index 944d913..0c0dfe4 100644
--- a/_build/html/06_creating_multiple_cfnetcdf_files_in_one_go.html
+++ b/_build/html/06_creating_multiple_cfnetcdf_files_in_one_go.html
@@ -176,7 +176,6 @@
- 06: Creating multiple CF-NetCDF files in one go
- 07: Combining data from multiple netcdf files
- 08: Ancillary variables
-- 09: Cells and cell methods
@@ -886,14 +885,14 @@ A refresh of how to create a single NetCDF file
diff --git a/_build/html/07_combing_data_from_multiple_netcdf_files.html b/_build/html/07_combing_data_from_multiple_netcdf_files.html
index d39a24a..bdf1921 100644
--- a/_build/html/07_combing_data_from_multiple_netcdf_files.html
+++ b/_build/html/07_combing_data_from_multiple_netcdf_files.html
@@ -176,7 +176,6 @@
- 06: Creating multiple CF-NetCDF files in one go
- 07: Combining data from multiple netcdf files
- 08: Ancillary variables
-- 09: Cells and cell methods
@@ -808,12 +807,10 @@ Introducing the data
+ _NCProperties: version=2,netcdf=4.6.3,hdf5=1.10.5
Let’s look at a quick example of how we can extract the data into numpy arrays
diff --git a/_build/html/08_ancillary_variables.html b/_build/html/08_ancillary_variables.html
index bee5732..5753eae 100644
--- a/_build/html/08_ancillary_variables.html
+++ b/_build/html/08_ancillary_variables.html
@@ -65,7 +65,6 @@
-
@@ -176,7 +175,6 @@
- 06: Creating multiple CF-NetCDF files in one go
- 07: Combining data from multiple netcdf files
- 08: Ancillary variables
-- 09: Cells and cell methods
@@ -801,7 +799,7 @@ Basic example without ancillary data
@@ -1203,7 +1201,7 @@ Assigning quality or status flags
@@ -1608,7 +1606,7 @@ Assigning quality or status flagsConventions global attribute, for example
+ Conventions: CF-1.8, ACDD-1.3, OceanSITES Manual 1.4
More work needs to be done to expand the CF conventions to standardise ancillary data. At the time of writing, a standard_name
for the volume of sea water filtered does not exist.
This is where the scientific community can help!
@@ -3207,15 +3205,6 @@ Other ancillary data07: Combining data from multiple netcdf files
-
-
-
-
diff --git a/_build/html/09_cells_and_cell_methods.html b/_build/html/09_cells_and_cell_methods.html
index 31c03be..e56098f 100644
--- a/_build/html/09_cells_and_cell_methods.html
+++ b/_build/html/09_cells_and_cell_methods.html
@@ -65,7 +65,6 @@
-
@@ -166,7 +165,7 @@
-
+
@@ -821,11 +819,11 @@ Maximum monthly temperatures
@@ -1392,11 +1390,11 @@ Maximum monthly temperaturestime_bounds variable defines the bounds of the time
variable. We are using cell_methods
to state that values are the maximums within each cell with respect to time.
@@ -1904,11 +1902,11 @@ Ice core data
@@ -2024,15 +2022,6 @@ Cell methods when you have multiple bounds
- 06: Creating multiple CF-NetCDF files in one go
- 07: Combining data from multiple netcdf files
- 08: Ancillary variables
-- 09: Cells and cell methods
diff --git a/_build/html/10_netcdf_files_with_groups.html b/_build/html/10_netcdf_files_with_groups.html
index 5b4b158..3fbf70f 100644
--- a/_build/html/10_netcdf_files_with_groups.html
+++ b/_build/html/10_netcdf_files_with_groups.html
@@ -174,7 +174,6 @@
@@ -402,12 +401,12 @@ 10: NetCDF files with groups----> 3 xrds = xr.open_dataset(netcdf_file)
4 print(xrds)
-File ~/anaconda3/lib/python3.11/site-packages/xarray/backends/api.py:547, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
- 544 kwargs.update(backend_kwargs)
- 546 if engine is None:
---> 547 engine = plugins.guess_engine(filename_or_obj)
- 549 if from_array_kwargs is None:
- 550 from_array_kwargs = {}
+File ~/anaconda3/lib/python3.11/site-packages/xarray/backends/api.py:553, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
+ 550 kwargs.update(backend_kwargs)
+ 552 if engine is None:
+--> 553 engine = plugins.guess_engine(filename_or_obj)
+ 555 if from_array_kwargs is None:
+ 556 from_array_kwargs = {}
File ~/anaconda3/lib/python3.11/site-packages/xarray/backends/plugins.py:197, in guess_engine(store_spec)
189 else:
diff --git a/_build/jupyter_execute/82ec67859f2b6d475b61016adaa94571d907306f30789b5e08c4c0d2be3fc145.png b/_build/html/_images/5c2bd4bb90b78536cb99cb13e81d6ba6b448b1c2fff77671e712640c6bb57e1a.png
similarity index 99%
rename from _build/jupyter_execute/82ec67859f2b6d475b61016adaa94571d907306f30789b5e08c4c0d2be3fc145.png
rename to _build/html/_images/5c2bd4bb90b78536cb99cb13e81d6ba6b448b1c2fff77671e712640c6bb57e1a.png
index fb52750..6c53823 100644
Binary files a/_build/jupyter_execute/82ec67859f2b6d475b61016adaa94571d907306f30789b5e08c4c0d2be3fc145.png and b/_build/html/_images/5c2bd4bb90b78536cb99cb13e81d6ba6b448b1c2fff77671e712640c6bb57e1a.png differ
diff --git a/_build/html/_images/8c3552ccd70a2f009f605e152751e93b39b0c79da4f20274538704fcd030a818.png b/_build/html/_images/700b3601089df91995112e597e811c6917990efa6157bda31a3d2ec12ed71650.png
similarity index 99%
rename from _build/html/_images/8c3552ccd70a2f009f605e152751e93b39b0c79da4f20274538704fcd030a818.png
rename to _build/html/_images/700b3601089df91995112e597e811c6917990efa6157bda31a3d2ec12ed71650.png
index 303699c..8a65825 100644
Binary files a/_build/html/_images/8c3552ccd70a2f009f605e152751e93b39b0c79da4f20274538704fcd030a818.png and b/_build/html/_images/700b3601089df91995112e597e811c6917990efa6157bda31a3d2ec12ed71650.png differ
diff --git a/_build/html/_images/a92ad5c5a9794ea71b463ddda13d0c8bccd52e714067519340a49281cab63fed.png b/_build/html/_images/77ea2bfb90a99933b06fe992edf7392c52322ac9c82cbe33a29231100fed7d3b.png
similarity index 97%
rename from _build/html/_images/a92ad5c5a9794ea71b463ddda13d0c8bccd52e714067519340a49281cab63fed.png
rename to _build/html/_images/77ea2bfb90a99933b06fe992edf7392c52322ac9c82cbe33a29231100fed7d3b.png
index 2277463..b8e1401 100644
Binary files a/_build/html/_images/a92ad5c5a9794ea71b463ddda13d0c8bccd52e714067519340a49281cab63fed.png and b/_build/html/_images/77ea2bfb90a99933b06fe992edf7392c52322ac9c82cbe33a29231100fed7d3b.png differ
diff --git a/_build/html/_images/818b827320c4a59978006317e4e63f30c4a3af589bcade51691434dbd546793c.png b/_build/html/_images/89135a2d6d9771d18709e7add25fc312ad7712cd75e8d42f34886993731c7e06.png
similarity index 99%
rename from _build/html/_images/818b827320c4a59978006317e4e63f30c4a3af589bcade51691434dbd546793c.png
rename to _build/html/_images/89135a2d6d9771d18709e7add25fc312ad7712cd75e8d42f34886993731c7e06.png
index 1c98119..7e01965 100644
Binary files a/_build/html/_images/818b827320c4a59978006317e4e63f30c4a3af589bcade51691434dbd546793c.png and b/_build/html/_images/89135a2d6d9771d18709e7add25fc312ad7712cd75e8d42f34886993731c7e06.png differ
diff --git a/_build/html/_images/8fbfcf4bfb8f0fed1ab2aea1275f7c41a48e544a723bdd59df4e2a417d42d6f4.png b/_build/html/_images/8fbfcf4bfb8f0fed1ab2aea1275f7c41a48e544a723bdd59df4e2a417d42d6f4.png
deleted file mode 100644
index f55c923..0000000
Binary files a/_build/html/_images/8fbfcf4bfb8f0fed1ab2aea1275f7c41a48e544a723bdd59df4e2a417d42d6f4.png and /dev/null differ
diff --git a/_build/jupyter_execute/8107a7a1e711b079f3ecbb292b5761338f8e4989e6c019d6439eeddac504a2a8.png b/_build/html/_images/97e089c7a526f62b696fb2a59ed3fd9339014a72cc624393f912c447e1b00497.png
similarity index 99%
rename from _build/jupyter_execute/8107a7a1e711b079f3ecbb292b5761338f8e4989e6c019d6439eeddac504a2a8.png
rename to _build/html/_images/97e089c7a526f62b696fb2a59ed3fd9339014a72cc624393f912c447e1b00497.png
index 4c2ff9b..d7edf3d 100644
Binary files a/_build/jupyter_execute/8107a7a1e711b079f3ecbb292b5761338f8e4989e6c019d6439eeddac504a2a8.png and b/_build/html/_images/97e089c7a526f62b696fb2a59ed3fd9339014a72cc624393f912c447e1b00497.png differ
diff --git a/_build/html/_images/a52d711bbde256c96fe9c9085bfaae08866b461cd270fe72846ee879f153b530.png b/_build/html/_images/a52d711bbde256c96fe9c9085bfaae08866b461cd270fe72846ee879f153b530.png
deleted file mode 100644
index 2bf306a..0000000
Binary files a/_build/html/_images/a52d711bbde256c96fe9c9085bfaae08866b461cd270fe72846ee879f153b530.png and /dev/null differ
diff --git a/_build/html/_images/4da8b4108e28661a2fda8bbcdc4e80d04772c125e0ef7ddfce772af4a58b183d.png b/_build/html/_images/b4546a355348d1ec97e7adf35fb868e29d0b6fe4383712f4a332197583675b06.png
similarity index 99%
rename from _build/html/_images/4da8b4108e28661a2fda8bbcdc4e80d04772c125e0ef7ddfce772af4a58b183d.png
rename to _build/html/_images/b4546a355348d1ec97e7adf35fb868e29d0b6fe4383712f4a332197583675b06.png
index 4b9baf9..8aae9e2 100644
Binary files a/_build/html/_images/4da8b4108e28661a2fda8bbcdc4e80d04772c125e0ef7ddfce772af4a58b183d.png and b/_build/html/_images/b4546a355348d1ec97e7adf35fb868e29d0b6fe4383712f4a332197583675b06.png differ
diff --git a/_build/html/_images/9c76c09b4e389c3961a2fbeac82ec156274bd8a7a4059d0715896db6551dca74.png b/_build/html/_images/c0ec2de1791af233d58c94b4a3faa72a7b9fb73b20d080755898175452905fa5.png
similarity index 98%
rename from _build/html/_images/9c76c09b4e389c3961a2fbeac82ec156274bd8a7a4059d0715896db6551dca74.png
rename to _build/html/_images/c0ec2de1791af233d58c94b4a3faa72a7b9fb73b20d080755898175452905fa5.png
index 768a71a..7df87ee 100644
Binary files a/_build/html/_images/9c76c09b4e389c3961a2fbeac82ec156274bd8a7a4059d0715896db6551dca74.png and b/_build/html/_images/c0ec2de1791af233d58c94b4a3faa72a7b9fb73b20d080755898175452905fa5.png differ
diff --git a/_build/html/_images/ce157e3cd7cdb5303f4a1d6e340e52456c5f30b4250fbef7bdb92862a488bb28.png b/_build/html/_images/e3ad651fe2b82ac3051b48aee78ca4db5762a1402babe9f75a275f241ab4891f.png
similarity index 97%
rename from _build/html/_images/ce157e3cd7cdb5303f4a1d6e340e52456c5f30b4250fbef7bdb92862a488bb28.png
rename to _build/html/_images/e3ad651fe2b82ac3051b48aee78ca4db5762a1402babe9f75a275f241ab4891f.png
index ef1fda2..e6aa098 100644
Binary files a/_build/html/_images/ce157e3cd7cdb5303f4a1d6e340e52456c5f30b4250fbef7bdb92862a488bb28.png and b/_build/html/_images/e3ad651fe2b82ac3051b48aee78ca4db5762a1402babe9f75a275f241ab4891f.png differ
diff --git a/_build/html/_images/e1974b82226341f2743ad88dde69b92f5703ef391c07c3ddafc0fe3ebced9c89.png b/_build/html/_images/e6476b144a06386d7b2d62dd220fe331a28ee06c0f635bcd53b021f7eb9c4275.png
similarity index 99%
rename from _build/html/_images/e1974b82226341f2743ad88dde69b92f5703ef391c07c3ddafc0fe3ebced9c89.png
rename to _build/html/_images/e6476b144a06386d7b2d62dd220fe331a28ee06c0f635bcd53b021f7eb9c4275.png
index c2fc0b6..d6c5450 100644
Binary files a/_build/html/_images/e1974b82226341f2743ad88dde69b92f5703ef391c07c3ddafc0fe3ebced9c89.png and b/_build/html/_images/e6476b144a06386d7b2d62dd220fe331a28ee06c0f635bcd53b021f7eb9c4275.png differ
diff --git a/_build/html/_images/e8e4b6019afa6a43e4ae6d4c5134907a55d4d49190fb06d953a44598bafe1c4a.png b/_build/html/_images/e8e4b6019afa6a43e4ae6d4c5134907a55d4d49190fb06d953a44598bafe1c4a.png
new file mode 100644
index 0000000..3e481ef
Binary files /dev/null and b/_build/html/_images/e8e4b6019afa6a43e4ae6d4c5134907a55d4d49190fb06d953a44598bafe1c4a.png differ
diff --git a/_build/html/_images/ed3919da7ce4aaceac2746b4e5633448282551236deafbf48d69d986d72331b5.png b/_build/html/_images/ed3919da7ce4aaceac2746b4e5633448282551236deafbf48d69d986d72331b5.png
new file mode 100644
index 0000000..966a69e
Binary files /dev/null and b/_build/html/_images/ed3919da7ce4aaceac2746b4e5633448282551236deafbf48d69d986d72331b5.png differ
diff --git a/_build/html/_images/ce4668d08595062b5b8c05a8391187dd954212b943943eb2ebb644d79fad4ffd.png b/_build/html/_images/fddc15bf159581ae91d5c2412d71b38c8b4b038568b5dce9a7ee981c3e39c3a0.png
similarity index 97%
rename from _build/html/_images/ce4668d08595062b5b8c05a8391187dd954212b943943eb2ebb644d79fad4ffd.png
rename to _build/html/_images/fddc15bf159581ae91d5c2412d71b38c8b4b038568b5dce9a7ee981c3e39c3a0.png
index b7dcfb0..3687637 100644
Binary files a/_build/html/_images/ce4668d08595062b5b8c05a8391187dd954212b943943eb2ebb644d79fad4ffd.png and b/_build/html/_images/fddc15bf159581ae91d5c2412d71b38c8b4b038568b5dce9a7ee981c3e39c3a0.png differ
diff --git a/_build/html/_sources/02_creating_plots.ipynb b/_build/html/_sources/02_creating_plots.ipynb
index 1b4252f..53b63a3 100644
--- a/_build/html/_sources/02_creating_plots.ipynb
+++ b/_build/html/_sources/02_creating_plots.ipynb
@@ -414,12 +414,12 @@
" pi_institution: University Centre in Svalbard\n",
" pi_email: annav@unis.no\n",
" sea_floor_depth_below_sea_surface: 332.58\n",
- " _NCProperties: version=2,netcdf=4.6.3,hdf5=1.10.5 "
],
"text/plain": [
"\n",
@@ -1818,7 +1818,7 @@
" date_issued: 2023-01-08T18:33:09Z"
]
},
- "execution_count": 9,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -1839,17 +1839,17 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 15,
"id": "dc4be44a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 10,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
@@ -1878,7 +1878,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 17,
"id": "4d943974",
"metadata": {},
"outputs": [
@@ -1904,6 +1904,7 @@
"ax.set_yticks(range(-90, 91, 30), crs=ccrs.PlateCarree())\n",
"ax.set_xlabel('Longitude')\n",
"ax.set_ylabel('Latitude')\n",
+ "plt.savefig('data/media/temperature_anomalies.png')\n",
"plt.show()"
]
},
@@ -1932,7 +1933,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.4"
+ "version": "3.11.5"
}
},
"nbformat": 4,
diff --git a/_build/html/_sources/03_extracting_data_to_different_formats.ipynb b/_build/html/_sources/03_extracting_data_to_different_formats.ipynb
index 36e5e22..23b2b46 100644
--- a/_build/html/_sources/03_extracting_data_to_different_formats.ipynb
+++ b/_build/html/_sources/03_extracting_data_to_different_formats.ipynb
@@ -2423,9 +2423,7 @@
"cell_type": "code",
"execution_count": 29,
"id": "e3ba19ed",
- "metadata": {
- "scrolled": false
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -4357,7 +4355,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.4"
+ "version": "3.11.5"
}
},
"nbformat": 4,
diff --git a/_build/html/_sources/04_creating_a_cfnetcdf_file.ipynb b/_build/html/_sources/04_creating_a_cfnetcdf_file.ipynb
index 97af9e4..e008735 100644
--- a/_build/html/_sources/04_creating_a_cfnetcdf_file.ipynb
+++ b/_build/html/_sources/04_creating_a_cfnetcdf_file.ipynb
@@ -14,7 +14,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 1,
"id": "c6364167",
"metadata": {},
"outputs": [],
@@ -37,7 +37,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 2,
"id": "48890b93",
"metadata": {},
"outputs": [
@@ -410,7 +410,7 @@
"<xarray.Dataset>\n",
"Dimensions: ()\n",
"Data variables:\n",
- " *empty*
"
+ " *empty*"
],
"text/plain": [
"\n",
@@ -419,7 +419,7 @@
" *empty*"
]
},
- "execution_count": 21,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -449,7 +449,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 3,
"id": "b05c78fc",
"metadata": {},
"outputs": [
@@ -824,7 +824,7 @@
"Coordinates:\n",
" * time (time) int64 0 1 2 3 4 5 6 7 8 9\n",
"Data variables:\n",
- " *empty* "
+ " *empty*"
],
"text/plain": [
"\n",
@@ -835,7 +835,7 @@
" *empty*"
]
},
- "execution_count": 22,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -874,7 +874,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 4,
"id": "eb16f3f7",
"metadata": {},
"outputs": [
@@ -896,7 +896,7 @@
" dtype='datetime64[s]')"
]
},
- "execution_count": 23,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -919,7 +919,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 5,
"id": "8fcf87b9",
"metadata": {},
"outputs": [
@@ -931,7 +931,7 @@
" 64800, 68400, 72000, 75600, 79200, 82800])"
]
},
- "execution_count": 24,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -943,7 +943,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 6,
"id": "8c58ad4c",
"metadata": {},
"outputs": [
@@ -954,7 +954,7 @@
" 17, 18, 19, 20, 21, 22, 23])"
]
},
- "execution_count": 25,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -974,7 +974,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 7,
"id": "554ba044",
"metadata": {},
"outputs": [
@@ -1349,12 +1349,12 @@
"Coordinates:\n",
" * time (time) int64 0 3600 7200 10800 14400 ... 72000 75600 79200 82800\n",
"Data variables:\n",
- " *empty*
- timePandasIndex
PandasIndex(Index([ 0, 3600, 7200, 10800, 14400, 18000, 21600, 25200, 28800, 32400,\n",
+ " 36000, 39600, 43200, 46800, 50400, 54000, 57600, 61200, 64800, 68400,\n",
+ " 72000, 75600, 79200, 82800],\n",
+ " dtype='int64', name='time'))
"
],
"text/plain": [
"\n",
@@ -1365,7 +1365,7 @@
" *empty*"
]
},
- "execution_count": 26,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -1392,7 +1392,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 8,
"id": "98930259",
"metadata": {},
"outputs": [
@@ -1769,7 +1769,7 @@
" * latitude (latitude) float64 78.54 79.14 80.71\n",
" * longitude (longitude) float64 30.01 28.73\n",
"Data variables:\n",
- " *empty* "
+ " *empty*"
],
"text/plain": [
"\n",
@@ -1782,7 +1782,7 @@
" *empty*"
]
},
- "execution_count": 27,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -1819,7 +1819,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 9,
"id": "0f645cb6",
"metadata": {},
"outputs": [
@@ -2196,7 +2196,7 @@
" * latitude (latitude) float64 78.54 79.14 80.71\n",
" * longitude (longitude) float64 30.01 28.73\n",
"Data variables:\n",
- " chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2 "
+ " chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2"
],
"text/plain": [
"\n",
@@ -2209,7 +2209,7 @@
" chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2"
]
},
- "execution_count": 28,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -2232,19 +2232,19 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 10,
"id": "d422e4d3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array([[7, 6],\n",
- " [6, 4],\n",
- " [2, 8]])"
+ "array([[5, 0],\n",
+ " [3, 2],\n",
+ " [9, 3]])"
]
},
- "execution_count": 29,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -2256,7 +2256,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 11,
"id": "fa63b113",
"metadata": {},
"outputs": [
@@ -2634,9 +2634,9 @@
" * longitude (longitude) float64 30.01 28.73\n",
"Data variables:\n",
" chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2\n",
- " wind_speed (latitude, longitude) int64 7 6 6 4 2 8 "
+ " wind_speed (latitude, longitude) int64 5 0 3 2 9 3"
],
"text/plain": [
"\n",
@@ -2647,10 +2647,10 @@
" * longitude (longitude) float64 30.01 28.73\n",
"Data variables:\n",
" chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2\n",
- " wind_speed (latitude, longitude) int64 7 6 6 4 2 8"
+ " wind_speed (latitude, longitude) int64 5 0 3 2 9 3"
]
},
- "execution_count": 30,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -2670,7 +2670,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 12,
"id": "e274e2f1",
"metadata": {},
"outputs": [
@@ -3041,26 +3041,26 @@
" fill: currentColor;\n",
"}\n",
"<xarray.DataArray 'wind_speed' (latitude: 3, longitude: 2)>\n",
- "array([[7, 6],\n",
- " [6, 4],\n",
- " [2, 8]])\n",
+ "array([[5, 0],\n",
+ " [3, 2],\n",
+ " [9, 3]])\n",
"Coordinates:\n",
" * latitude (latitude) float64 78.54 79.14 80.71\n",
- " * longitude (longitude) float64 30.01 28.73
"
+ " * longitude (longitude) float64 30.01 28.73
"
],
"text/plain": [
"\n",
- "array([[7, 6],\n",
- " [6, 4],\n",
- " [2, 8]])\n",
+ "array([[5, 0],\n",
+ " [3, 2],\n",
+ " [9, 3]])\n",
"Coordinates:\n",
" * latitude (latitude) float64 78.54 79.14 80.71\n",
" * longitude (longitude) float64 30.01 28.73"
]
},
- "execution_count": 31,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -3081,7 +3081,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 13,
"id": "56ba3981",
"metadata": {},
"outputs": [
@@ -3459,17 +3459,17 @@
" * longitude (longitude) float64 30.01 28.73\n",
"Data variables:\n",
" chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2\n",
- " wind_speed (latitude, longitude) int64 7 6 6 4 2 8\n",
- " temperature (latitude, longitude, depth) int64 26 23 21 20 ... 28 24 26- depthPandasIndex
PandasIndex(Index([0, 10, 20, 50, 100], dtype='int64', name='depth'))
- latitudePandasIndex
PandasIndex(Index([78.5425, 79.1423, 80.7139], dtype='float64', name='latitude'))
- longitudePandasIndex
PandasIndex(Index([30.0131, 28.7269], dtype='float64', name='longitude'))
"
],
"text/plain": [
"\n",
@@ -3480,11 +3480,11 @@
" * longitude (longitude) float64 30.01 28.73\n",
"Data variables:\n",
" chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2\n",
- " wind_speed (latitude, longitude) int64 7 6 6 4 2 8\n",
- " temperature (latitude, longitude, depth) int64 26 23 21 20 ... 28 24 26"
+ " wind_speed (latitude, longitude) int64 5 0 3 2 9 3\n",
+ " temperature (latitude, longitude, depth) int64 20 29 24 21 ... 23 21 23"
]
},
- "execution_count": 32,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -3510,7 +3510,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 14,
"id": "d22ac157",
"metadata": {},
"outputs": [
@@ -3547,210 +3547,210 @@
" 0 \n",
" 78.5425 \n",
" 30.0131 \n",
- " 33.198072 \n",
+ " 32.500291 \n",
" \n",
" \n",
" 1 \n",
" 0 \n",
" 78.5425 \n",
" 28.7269 \n",
- " 32.967388 \n",
+ " 30.470107 \n",
" \n",
" \n",
" 2 \n",
" 0 \n",
" 79.1423 \n",
" 30.0131 \n",
- " 33.766404 \n",
+ " 32.986480 \n",
" \n",
" \n",
" 3 \n",
" 0 \n",
" 79.1423 \n",
" 28.7269 \n",
- " 33.862252 \n",
+ " 34.677304 \n",
" \n",
" \n",
" 4 \n",
" 0 \n",
" 80.7139 \n",
" 30.0131 \n",
- " 34.866701 \n",
+ " 32.532454 \n",
" \n",
" \n",
" 5 \n",
" 0 \n",
" 80.7139 \n",
" 28.7269 \n",
- " 34.801337 \n",
+ " 34.603980 \n",
" \n",
" \n",
" 6 \n",
" 10 \n",
" 78.5425 \n",
" 30.0131 \n",
- " 31.713436 \n",
+ " 30.693671 \n",
" \n",
" \n",
" 7 \n",
" 10 \n",
" 78.5425 \n",
" 28.7269 \n",
- " 34.493179 \n",
+ " 34.611547 \n",
" \n",
" \n",
" 8 \n",
" 10 \n",
" 79.1423 \n",
" 30.0131 \n",
- " 30.428292 \n",
+ " 32.143802 \n",
" \n",
" \n",
" 9 \n",
" 10 \n",
" 79.1423 \n",
" 28.7269 \n",
- " 31.707314 \n",
+ " 31.304417 \n",
" \n",
" \n",
" 10 \n",
" 10 \n",
" 80.7139 \n",
" 30.0131 \n",
- " 30.655009 \n",
+ " 30.732836 \n",
" \n",
" \n",
" 11 \n",
" 10 \n",
" 80.7139 \n",
" 28.7269 \n",
- " 34.506142 \n",
+ " 32.731361 \n",
" \n",
" \n",
" 12 \n",
" 20 \n",
" 78.5425 \n",
" 30.0131 \n",
- " 34.876919 \n",
+ " 31.281876 \n",
" \n",
" \n",
" 13 \n",
" 20 \n",
" 78.5425 \n",
" 28.7269 \n",
- " 32.274340 \n",
+ " 33.793631 \n",
" \n",
" \n",
" 14 \n",
" 20 \n",
" 79.1423 \n",
" 30.0131 \n",
- " 34.076707 \n",
+ " 33.943875 \n",
" \n",
" \n",
" 15 \n",
" 20 \n",
" 79.1423 \n",
" 28.7269 \n",
- " 32.099654 \n",
+ " 31.368625 \n",
" \n",
" \n",
" 16 \n",
" 20 \n",
" 80.7139 \n",
" 30.0131 \n",
- " 33.985081 \n",
+ " 30.620448 \n",
" \n",
" \n",
" 17 \n",
" 20 \n",
" 80.7139 \n",
" 28.7269 \n",
- " 30.194662 \n",
+ " 32.272004 \n",
" \n",
" \n",
" 18 \n",
" 50 \n",
" 78.5425 \n",
" 30.0131 \n",
- " 31.446841 \n",
+ " 34.451398 \n",
" \n",
" \n",
" 19 \n",
" 50 \n",
" 78.5425 \n",
" 28.7269 \n",
- " 32.130476 \n",
+ " 30.676179 \n",
" \n",
" \n",
" 20 \n",
" 50 \n",
" 79.1423 \n",
" 30.0131 \n",
- " 34.057877 \n",
+ " 32.713458 \n",
" \n",
" \n",
" 21 \n",
" 50 \n",
" 79.1423 \n",
" 28.7269 \n",
- " 32.506849 \n",
+ " 32.465119 \n",
" \n",
" \n",
" 22 \n",
" 50 \n",
" 80.7139 \n",
" 30.0131 \n",
- " 33.239367 \n",
+ " 34.994735 \n",
" \n",
" \n",
" 23 \n",
" 50 \n",
" 80.7139 \n",
" 28.7269 \n",
- " 34.818830 \n",
+ " 32.510465 \n",
" \n",
" \n",
" 24 \n",
" 100 \n",
" 78.5425 \n",
" 30.0131 \n",
- " 30.640971 \n",
+ " 33.118576 \n",
" \n",
" \n",
" 25 \n",
" 100 \n",
" 78.5425 \n",
" 28.7269 \n",
- " 34.394051 \n",
+ " 34.556056 \n",
" \n",
" \n",
" 26 \n",
" 100 \n",
" 79.1423 \n",
" 30.0131 \n",
- " 33.060688 \n",
+ " 33.539166 \n",
" \n",
" \n",
" 27 \n",
" 100 \n",
" 79.1423 \n",
" 28.7269 \n",
- " 30.693648 \n",
+ " 33.368916 \n",
" \n",
" \n",
" 28 \n",
" 100 \n",
" 80.7139 \n",
" 30.0131 \n",
- " 31.607544 \n",
+ " 33.915107 \n",
" \n",
" \n",
" 29 \n",
" 100 \n",
" 80.7139 \n",
" 28.7269 \n",
- " 33.201887 \n",
+ " 30.555867 \n",
" \n",
" \n",
"\n",
@@ -3758,39 +3758,39 @@
],
"text/plain": [
" Depth Latitude Longitude Salinity\n",
- "0 0 78.5425 30.0131 33.198072\n",
- "1 0 78.5425 28.7269 32.967388\n",
- "2 0 79.1423 30.0131 33.766404\n",
- "3 0 79.1423 28.7269 33.862252\n",
- "4 0 80.7139 30.0131 34.866701\n",
- "5 0 80.7139 28.7269 34.801337\n",
- "6 10 78.5425 30.0131 31.713436\n",
- "7 10 78.5425 28.7269 34.493179\n",
- "8 10 79.1423 30.0131 30.428292\n",
- "9 10 79.1423 28.7269 31.707314\n",
- "10 10 80.7139 30.0131 30.655009\n",
- "11 10 80.7139 28.7269 34.506142\n",
- "12 20 78.5425 30.0131 34.876919\n",
- "13 20 78.5425 28.7269 32.274340\n",
- "14 20 79.1423 30.0131 34.076707\n",
- "15 20 79.1423 28.7269 32.099654\n",
- "16 20 80.7139 30.0131 33.985081\n",
- "17 20 80.7139 28.7269 30.194662\n",
- "18 50 78.5425 30.0131 31.446841\n",
- "19 50 78.5425 28.7269 32.130476\n",
- "20 50 79.1423 30.0131 34.057877\n",
- "21 50 79.1423 28.7269 32.506849\n",
- "22 50 80.7139 30.0131 33.239367\n",
- "23 50 80.7139 28.7269 34.818830\n",
- "24 100 78.5425 30.0131 30.640971\n",
- "25 100 78.5425 28.7269 34.394051\n",
- "26 100 79.1423 30.0131 33.060688\n",
- "27 100 79.1423 28.7269 30.693648\n",
- "28 100 80.7139 30.0131 31.607544\n",
- "29 100 80.7139 28.7269 33.201887"
+ "0 0 78.5425 30.0131 32.500291\n",
+ "1 0 78.5425 28.7269 30.470107\n",
+ "2 0 79.1423 30.0131 32.986480\n",
+ "3 0 79.1423 28.7269 34.677304\n",
+ "4 0 80.7139 30.0131 32.532454\n",
+ "5 0 80.7139 28.7269 34.603980\n",
+ "6 10 78.5425 30.0131 30.693671\n",
+ "7 10 78.5425 28.7269 34.611547\n",
+ "8 10 79.1423 30.0131 32.143802\n",
+ "9 10 79.1423 28.7269 31.304417\n",
+ "10 10 80.7139 30.0131 30.732836\n",
+ "11 10 80.7139 28.7269 32.731361\n",
+ "12 20 78.5425 30.0131 31.281876\n",
+ "13 20 78.5425 28.7269 33.793631\n",
+ "14 20 79.1423 30.0131 33.943875\n",
+ "15 20 79.1423 28.7269 31.368625\n",
+ "16 20 80.7139 30.0131 30.620448\n",
+ "17 20 80.7139 28.7269 32.272004\n",
+ "18 50 78.5425 30.0131 34.451398\n",
+ "19 50 78.5425 28.7269 30.676179\n",
+ "20 50 79.1423 30.0131 32.713458\n",
+ "21 50 79.1423 28.7269 32.465119\n",
+ "22 50 80.7139 30.0131 34.994735\n",
+ "23 50 80.7139 28.7269 32.510465\n",
+ "24 100 78.5425 30.0131 33.118576\n",
+ "25 100 78.5425 28.7269 34.556056\n",
+ "26 100 79.1423 30.0131 33.539166\n",
+ "27 100 79.1423 28.7269 33.368916\n",
+ "28 100 80.7139 30.0131 33.915107\n",
+ "29 100 80.7139 28.7269 30.555867"
]
},
- "execution_count": 33,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -3834,35 +3834,35 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 15,
"id": "326cd6d1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array([[[33.19807212, 32.96738805],\n",
- " [33.76640441, 33.86225242],\n",
- " [34.86670052, 34.80133678]],\n",
+ "array([[[32.50029145, 30.47010654],\n",
+ " [32.98648027, 34.67730424],\n",
+ " [32.53245412, 34.60398017]],\n",
"\n",
- " [[31.71343578, 34.49317854],\n",
- " [30.4282916 , 31.70731397],\n",
- " [30.65500853, 34.50614172]],\n",
+ " [[30.69367084, 34.61154746],\n",
+ " [32.14380155, 31.30441667],\n",
+ " [30.7328361 , 32.73136052]],\n",
"\n",
- " [[34.8769191 , 32.27434012],\n",
- " [34.07670683, 32.09965368],\n",
- " [33.98508066, 30.19466169]],\n",
+ " [[31.28187595, 33.79363095],\n",
+ " [33.94387511, 31.36862496],\n",
+ " [30.6204477 , 32.27200395]],\n",
"\n",
- " [[31.44684121, 32.13047582],\n",
- " [34.05787675, 32.50684866],\n",
- " [33.23936714, 34.81882987]],\n",
+ " [[34.45139803, 30.67617875],\n",
+ " [32.7134583 , 32.46511861],\n",
+ " [34.99473467, 32.51046499]],\n",
"\n",
- " [[30.64097065, 34.39405084],\n",
- " [33.06068836, 30.69364772],\n",
- " [31.60754384, 33.20188711]]])"
+ " [[33.1185761 , 34.55605586],\n",
+ " [33.53916637, 33.36891646],\n",
+ " [33.91510697, 30.55586746]]])"
]
},
- "execution_count": 34,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -3874,7 +3874,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 16,
"id": "7a03dd71",
"metadata": {},
"outputs": [
@@ -4252,36 +4252,36 @@
" * longitude (longitude) float64 30.01 28.73\n",
"Data variables:\n",
" chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2\n",
- " wind_speed (latitude, longitude) int64 7 6 6 4 2 8\n",
- " temperature (latitude, longitude, depth) int64 26 23 21 20 ... 28 24 26\n",
- " salinity (depth, latitude, longitude) float64 33.2 32.97 ... 33.2 "
+ " wind_speed (latitude, longitude) int64 5 0 3 2 9 3\n",
+ " temperature (latitude, longitude, depth) int64 20 29 24 21 ... 23 21 23\n",
+ " salinity (depth, latitude, longitude) float64 32.5 30.47 ... 30.56
"
],
"text/plain": [
"\n",
@@ -4292,12 +4292,12 @@
" * longitude (longitude) float64 30.01 28.73\n",
"Data variables:\n",
" chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2\n",
- " wind_speed (latitude, longitude) int64 7 6 6 4 2 8\n",
- " temperature (latitude, longitude, depth) int64 26 23 21 20 ... 28 24 26\n",
- " salinity (depth, latitude, longitude) float64 33.2 32.97 ... 33.2"
+ " wind_speed (latitude, longitude) int64 5 0 3 2 9 3\n",
+ " temperature (latitude, longitude, depth) int64 20 29 24 21 ... 23 21 23\n",
+ " salinity (depth, latitude, longitude) float64 32.5 30.47 ... 30.56"
]
},
- "execution_count": 35,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -4313,28 +4313,38 @@
"id": "38edbeeb",
"metadata": {},
"source": [
- "## Variable attributes\n",
+ "## Metadata (attributes)\n",
"\n",
"Hurrah! Your data are in the xarray dataset object. But are you ready to export a NetCDF file? Will that file be compliant with the FAIR principles? No! We need metadata.\n",
"\n",
- "Variable attributes are metadata that describe the variables.\n",
+ "Variable attributes are metadata that describe the variables. Global attributes are metadata that describe the file as a whole. You can find a list of attributes here provided by the Climate & Forecast (CF) conventions:\n",
+ "https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#attribute-appendix\n",
+ "\n",
+ "The table in the link above specifies which attributes can be used as global attributes and which can be used as variable attributes. Some attributes can be used as either.\n",
"\n",
- "The Climate & Forecast (CF) conventions dictate which variable attributes should be included for different data. \n",
+ "The CF conventions are light on discovery metadata. Discovery metadata are metadata that can be used to find data. For example, when and where the data were collected and by whom, some keywords etc. So we also use the ACDD convention - The Attribute Convention for Data Discovery.\n",
+ "https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3\n",
+ "\n",
+ "This is a list of recommendations. SIOS advises that people follow the requirements of the Arctic Data Centre, here. Requirements are a more effective way to encourage consistency than recommendations. These requirements are compliant with the ACDD conventions:\n",
+ "https://adc.met.no/node/4\n",
"\n",
- "https://cfconventions.org/\n",
+ "### Variable attributes\n",
"\n",
- "For example for latitude:\n",
+ "The CF conventions provide examples of which variable attributes you should be including in your CF-NetCDF file. For example for latitude:\n",
"https://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html#latitude-coordinate\n",
"\n",
"Let's replicate that setup.\n",
"\n",
- "These attributes are well documented on the ACDD convention host page, here: https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#Highly_Recommended_Variable_Attributes\n",
- "\n"
+ "Additionally, the ACDD convention recommends that and attribute *coverage_content_type* is also added, which is used to state whether the data are *modelResult*, *physicalMeasurement* or something else, see the list here: \n",
+ "https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#Highly_Recommended_Variable_Attributes\n",
+ "\n",
+ "And remember we might want to select additional applicable attributes for our variables from this section of the CF conventions:\n",
+ "https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#attribute-appendix\n"
]
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 17,
"id": "c5fb71d5",
"metadata": {},
"outputs": [
@@ -4712,7 +4722,7 @@
" standard_name: latitude\n",
" long_name: latitude\n",
" units: degrees_north\n",
- " coverage_content_type: coordinate
"
+ " coverage_content_type: coordinate"
],
"text/plain": [
"\n",
@@ -4726,7 +4736,7 @@
" coverage_content_type: coordinate"
]
},
- "execution_count": 36,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -4747,7 +4757,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 25,
"id": "0f75eb79",
"metadata": {},
"outputs": [
@@ -5125,36 +5135,50 @@
" * longitude (longitude) float64 30.01 28.73\n",
"Data variables:\n",
" chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2\n",
- " wind_speed (latitude, longitude) int64 7 6 6 4 2 8\n",
- " temperature (latitude, longitude, depth) int64 26 23 21 20 ... 28 24 26\n",
- " salinity (depth, latitude, longitude) float64 33.2 32.97 ... 33.2 "
+ " wind_speed (latitude, longitude) int64 5 0 3 2 9 3\n",
+ " temperature (latitude, longitude, depth) int64 20 29 24 21 ... 23 21 23\n",
+ " salinity (depth, latitude, longitude) float64 32.5 30.47 ... 30.56\n",
+ "Attributes: (12/26)\n",
+ " id: your_unique_id_here\n",
+ " naming_authority: institution that provides the id\n",
+ " title: my title\n",
+ " summary: analagous to an abstract in the paper, describing t...\n",
+ " creator_type: person\n",
+ " creator_name: John Smith; Luke Marsden\n",
+ " ... ...\n",
+ " geospatial_lat_min: 78.5425\n",
+ " geospatial_lat_max: 80.7139\n",
+ " geospatial_lon_min: 28.7269\n",
+ " geospatial_lon_max: 30.0131\n",
+ " date_created: 2024-01-19T10:23:56Z\n",
+ " history: File created at 2024-01-19T10:23:56Z using xarray i..."
],
"text/plain": [
"\n",
@@ -5165,12 +5189,26 @@
" * longitude (longitude) float64 30.01 28.73\n",
"Data variables:\n",
" chlorophyll_a (depth) float64 21.5 18.5 17.6 16.8 15.2\n",
- " wind_speed (latitude, longitude) int64 7 6 6 4 2 8\n",
- " temperature (latitude, longitude, depth) int64 26 23 21 20 ... 28 24 26\n",
- " salinity (depth, latitude, longitude) float64 33.2 32.97 ... 33.2"
+ " wind_speed (latitude, longitude) int64 5 0 3 2 9 3\n",
+ " temperature (latitude, longitude, depth) int64 20 29 24 21 ... 23 21 23\n",
+ " salinity (depth, latitude, longitude) float64 32.5 30.47 ... 30.56\n",
+ "Attributes: (12/26)\n",
+ " id: your_unique_id_here\n",
+ " naming_authority: institution that provides the id\n",
+ " title: my title\n",
+ " summary: analagous to an abstract in the paper, describing t...\n",
+ " creator_type: person\n",
+ " creator_name: John Smith; Luke Marsden\n",
+ " ... ...\n",
+ " geospatial_lat_min: 78.5425\n",
+ " geospatial_lat_max: 80.7139\n",
+ " geospatial_lon_min: 28.7269\n",
+ " geospatial_lon_max: 30.0131\n",
+ " date_created: 2024-01-19T10:23:56Z\n",
+ " history: File created at 2024-01-19T10:23:56Z using xarray i..."
]
},
- "execution_count": 37,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -5195,6 +5233,24 @@
" 'units': 'μg m-3',\n",
" 'coverage_content_type': 'physicalMeasurement',\n",
"}\n",
+ "xrds['salinity'].attrs = {\n",
+ " 'standard_name': 'sea_water_salinity',\n",
+ " 'long_name': 'a description about each variable in your own words',\n",
+ " 'units': '1e-3',\n",
+ " 'coverage_content_type': 'physicalMeasurement',\n",
+ "}\n",
+ "xrds['temperature'].attrs = {\n",
+ " 'standard_name': 'sea_water_temperature',\n",
+ " 'long_name': 'a description about each variable in your own words',\n",
+ " 'units': 'degreesC',\n",
+ " 'coverage_content_type': 'physicalMeasurement',\n",
+ "}\n",
+ "xrds['wind_speed'].attrs = {\n",
+ " 'standard_name': 'wind_speed',\n",
+ " 'long_name': 'a description about each variable in your own words',\n",
+ " 'units': 'm s-1',\n",
+ " 'coverage_content_type': 'physicalMeasurement',\n",
+ "}\n",
"# And so on for each variable..\n",
"\n",
"xrds\n"
@@ -5205,20 +5261,19 @@
"id": "a414e2d9",
"metadata": {},
"source": [
- "## Global attributes\n",
+ "### Global attributes\n",
"\n",
- "The CF conventions are light on discovery metadata. Discovery metadata are metadata that can be used to find data. For example, when and where the data were collected and by whom, some keywords etc. So we also use the ACDD convention - The Attribute Convention for Data Discovery.\n",
- "https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3\n",
+ "As mentioned above, the requirements of the Arctic Data Centre for global attributes (based on the ACDD convention) can serve as a guide for which global attributes you should be including. https://adc.met.no/node/4\n",
"\n",
- "This is a list of recommendations. SIOS advises that people follow the requirements of the Arctic Data Centre, here. Requirements are a more effective way to encourage consistency than recommendations. These requirements are compliant with the ACDD conventions:\n",
- "https://adc.met.no/node/4\n",
+ "And remember we might want to select additional applicable global attributes from this section of the CF conventions:\n",
+ "https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#attribute-appendix\n",
"\n",
- "Go through and add each required attribute and any others you wish to. You are also welcome to add any custom attributes on top of these requirements. Similarly to variable attributes, this can either be done one by one or all in one in a dictionary. "
+ "Go through and add each required attribute and any others you wish to. You are also welcome to add any global attributes from the CF conventions as well as any custom attributes on top of these requirements. Similarly to variable attributes, this can either be done one by one or all in one in a dictionary. "
]
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 26,
"id": "e8ab76b7",
"metadata": {},
"outputs": [],
@@ -5238,8 +5293,8 @@
" 'creator_url': '; https://orcid.org/0000-0002-9746-544X', # OrcID is best practice if possible. Other URLs okay, or leave blank for authors that don't have one.\n",
" 'time_coverage_start': '2020-05-10T08:14:58Z',\n",
" 'time_coverage_end': '2020-05-10T11:51:12Z',\n",
- " 'keywords': '',\n",
- " 'keywords_vocabulary': 'GCMD:GCMD Keywords',\n",
+ " 'keywords': 'wind_speed, sea_water_temperature, sea_water_salinity, mass_concentration_of_chlorophyll_a_in_sea_water',\n",
+ " 'keywords_vocabulary': 'CF:NetCDF COARDS Climate and Forecast Standard Names',\n",
" 'institution': 'Your Institution',\n",
" 'publisher_name': 'Publisher Name', # Data centre where your data will be published\n",
" 'publisher_email': 'publisher@email.com',\n",
@@ -5260,7 +5315,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 27,
"id": "7b66deba",
"metadata": {},
"outputs": [],
@@ -5281,7 +5336,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 28,
"id": "5771cbc3",
"metadata": {},
"outputs": [
@@ -5299,8 +5354,8 @@
" 'creator_url': '; https://orcid.org/0000-0002-9746-544X',\n",
" 'time_coverage_start': '2020-05-10T08:14:58Z',\n",
" 'time_coverage_end': '2020-05-10T11:51:12Z',\n",
- " 'keywords': '',\n",
- " 'keywords_vocabulary': 'GCMD:GCMD Keywords',\n",
+ " 'keywords': 'wind_speed, sea_water_temperature, sea_water_salinity, mass_concentration_of_chlorophyll_a_in_sea_water',\n",
+ " 'keywords_vocabulary': 'CF:NetCDF COARDS Climate and Forecast Standard Names',\n",
" 'institution': 'Your Institution',\n",
" 'publisher_name': 'Publisher Name',\n",
" 'publisher_email': 'publisher@email.com',\n",
@@ -5312,11 +5367,11 @@
" 'geospatial_lat_max': 80.7139,\n",
" 'geospatial_lon_min': 28.7269,\n",
" 'geospatial_lon_max': 30.0131,\n",
- " 'date_created': '2024-01-03T12:50:57Z',\n",
- " 'history': 'File created at 2024-01-03T12:50:57Z using xarray in Python by John Smith'}"
+ " 'date_created': '2024-01-19T10:26:01Z',\n",
+ " 'history': 'File created at 2024-01-19T10:26:01Z using xarray in Python by John Smith'}"
]
},
- "execution_count": 40,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -5335,7 +5390,7 @@
"source": [
"## Exporting your xarray object to a NetCDF file\n",
"\n",
- "Finally you need to export your data. Firstly, you need to specify how each variable should be encoded.\n",
+ "Finally you need to export your data. Firstly, you can specify how each variable should be encoded. This is an optional step - it will be assumed from the data type in python if you don't specify the encoding manually.\n",
"\n",
"* Fill values: The fill value will be used to fill in any missing values. It should be an unrealistic value that will obviously show up as a spike in the data when plotted. The _FillValue is a special variable attribute that some softwares can understand, so when one opens the data, the fill values are replaced by NaNs again.\n",
"* dtype: What type of data does your variable contain? Characters? Integers? Decimal numbers? Some commonly used *dtype* values are:\n",
@@ -5346,12 +5401,12 @@
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 29,
"id": "7081b03d",
"metadata": {},
"outputs": [],
"source": [
- "# Specifiy encoding\n",
+ "# Specifiy encoding - you can write a file without this and encoding will be assumed, but you should check in any case.\n",
"myencoding = {\n",
" 'depth': {\n",
" 'dtype': 'int32',\n",
@@ -5418,7 +5473,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.4"
+ "version": "3.11.5"
}
},
"nbformat": 4,
diff --git a/_build/html/_sources/05_how_to_structure_your_NetCDF_file_or_files.ipynb b/_build/html/_sources/05_how_to_structure_your_NetCDF_file_or_files.ipynb
index e2111a0..e0a4e6d 100644
--- a/_build/html/_sources/05_how_to_structure_your_NetCDF_file_or_files.ipynb
+++ b/_build/html/_sources/05_how_to_structure_your_NetCDF_file_or_files.ipynb
@@ -14,8 +14,6 @@
"\n",
"There are far too many to go through all the examples. We will just look at a few. But the aim is that this tutorial will teach you what you should be thinking about when deciding to structure your data, so you can tackle any setup you have.\n",
"\n",
- "NOTE TO SELF. IN THE VIDEO ONLY SHOW ANIMATIONS AND EXPLAIN AND REDUCE THE AMOUNT OF CODE SHOWN. OR THE VIDEO WILL BE TOO LONG. MENTION THEY CAN LOOK AT EXAMPLES IN THE NOTEBOOK ATTACHED.\n",
- "\n",
"## What to consider\n",
"\n",
"Whenever we are creating one or many CF-NetCDF files, we should try to make sure they are \n",
@@ -27,12 +25,55 @@
"* Metadata should describe the data to an appropriate level of *granularity*. Finer granulairty means there are more files each with there own metadata to describe the data they contain, so the data are described at a higher resolution.\n",
"* The data and metadata should be easy to read, write, understand and use.\n",
"\n",
+ "## An appropriate level of granularity for your data\n",
+ "\n",
+ "How many files CF-NetCDF files should you divide your data into?\n",
+ "\n",
+ "There are many things to consider here. Quite often the granularity of data is not fine enough, as data creators encode their discovery level metadata (global attributes) considering only what is useful for human interpretation and not for machine action. Finer granularity data is easier to build services upon, such as on-the-fly visualisation of datasets, or aggregates datasets together. Finer granularity data allows computers do the data preparation so that humans can focus on interpretation and analysis.\n",
+ "\n",
+ "The data creator should also consider what granularity is useful for the data users. This will vary on a case-by-case basis. Perhaps you have a long time series of data, spanning many years. Many data users might not be interested in the entire time series, but might want to access data for a single day or month. Therefore, you might consider dividing your data into daily or monthly files to simplify data use. You could also separate different data variables into different files if you envisage that each file might attract different data users.\n",
+ "\n",
+ "Data from different stations should also be divided into different datasets. Imagine we have set up 6 weather stations and want to publish the fictitious data.\n",
+ "\n",
+ "\n",
+ "\n",
+ "We have 2 options for how to struture our data.\n",
+ "1. Include all 6 time series in one NetCDF file\n",
+ "2. Create a separate time series for each profile.\n",
+ "\n",
+ "The best practice is to create a separate file for each time series. This might suprise some of you, and the idea of creating dozens or maybe hundreds of files might sound daunting or even ridiculous.\n",
+ "\n",
+ "Firstly, you shouldn't think of your data in isolation. **Your data are a contribution to a much larger network of data**. Imagine that each marker below is a weather station, and the colour is the person or project who is responsible for the data. Imagine you are a looking for data inside the red square. For many data users, the project that the data were collected as part of is irrelevant. Many users want to access data from all the projects and combine them.\n",
+ "\n",
+ "\n",
+ "\n",
+ "Sure, it is easier for you and the people in your project to group the data by project. But you already have the data and can group them how you like between yourselves. When it comes to publishing data, we need to think about the data users outside of the project. Data, if published correctly, could be used for decades in to the future, perhaps longer in some cases. The majority of your data users might have never heard of your project!\n",
+ "\n",
+ "Creating individual files for each time series has another advantage; each file can have its own set of global attributes! This has many advantages, including:\n",
+ "* If one time series was processed differently, this can be described.\n",
+ "* Each file has its own bounding coordinates. Imagine you go to a data centre or data access portal and you are looking for data on a map. A file with many time series will show up as a box on a map. Without opening the file up, it is difficult to see which locations have been sampled. What if a potential data user is only interested in a certain location within the bounding box? A file that contains a single time series would show up as a point on a map.\n",
+ "* Each time series can be published separately and have its own citation - including different authors if that is desired. If someone uses only one or a few of the files, they can only cite the relevant ones. It is then clear which data they have used in their work. Many good data centres are now able to provide you with a 'parent' page for your entire data collection with its own DOI and recommended citation that one can cite if they are using most or all of the files.\n",
+ "\n",
+ "Some might see it as a hassle to download and open lots of files. Some services are already being developed to allow data to download data from many similar files into a single file. More services will surely be available in the future. When we are publishing data, we should think not only about what is possible now, but what will be possible in the future. \n",
+ "\n",
+ "But for now, in the next few tutorials, we will look at how you can create and access multiple files quickly and easily today using Python!\n",
+ "\n",
+ "Need some convincing? Let's look at another example for vertical profiles (e.g. CTD data)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0851d141",
+ "metadata": {},
+ "source": [
+ "## Multiple profiles\n",
+ "\n",
"Let's import the modules we will use for this tutorial."
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"id": "67135b6b",
"metadata": {},
"outputs": [],
@@ -43,18 +84,91 @@
},
{
"cell_type": "markdown",
- "id": "3994ec7f",
+ "id": "e05df89e-437d-4c98-b4cc-58ef297d76a1",
+ "metadata": {},
+ "source": [
+ "Imagine we have the following 4 depth profiles:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "3e189c74",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "depth = [10,20,30]\n",
+ "temperature_1 = [21.42, 21.21, 20.98]\n",
+ "temperature_2 = [22.08, 21.56, 20.42]\n",
+ "temperature_3 = [22.11, 21.38, 20.12]\n",
+ "temperature_4 = [21.87, 21.01, 19.99]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3b28454d",
"metadata": {},
"source": [
- "## One profile or time series\n",
+ "Many people publish all the data from a single cruise in one file. But we have just learned that this is not the best practice! \n",
"\n",
- "The simplest setup we can have (except a single point) is data with a single dimension. This could be a time series or a vertical profile. Below is some basic code to add an depth dimension, a coordinate variable and data variable (sea water temperature) to a NetCDF file. "
+ "If we wanted to publish all the profiles in one file, we need another dimension to distinguish between the profiles. Time is a sensible choice."
]
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "77cde279",
+ "execution_count": 3,
+ "id": "6ed61391",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "time = [0,1,2,3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "518de385",
+ "metadata": {},
+ "source": [
+ "But now we need to made a 2D array of our temperature values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "f33df992",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[[21.42, 21.21, 20.98],\n",
+ " [22.08, 21.56, 20.42],\n",
+ " [22.11, 21.38, 20.12],\n",
+ " [21.87, 21.01, 19.99]]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "temperature_2d = [temperature_1, temperature_2, temperature_3, temperature_4]\n",
+ "temperature_2d"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b571ebb4",
+ "metadata": {},
+ "source": [
+ "We can fit the data into our xarray object like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "bb09ff8a",
"metadata": {},
"outputs": [
{
@@ -424,37 +538,39 @@
" fill: currentColor;\n",
"}\n",
"<xarray.Dataset>\n",
- "Dimensions: (depth: 3)\n",
+ "Dimensions: (time: 4, depth: 3)\n",
"Coordinates:\n",
" * depth (depth) int64 10 20 30\n",
+ " * time (time) int64 0 1 2 3\n",
"Data variables:\n",
- " sea_water_temperature (depth) float64 21.42 21.21 20.98
"
+ " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99"
],
"text/plain": [
"\n",
- "Dimensions: (depth: 3)\n",
+ "Dimensions: (time: 4, depth: 3)\n",
"Coordinates:\n",
" * depth (depth) int64 10 20 30\n",
+ " * time (time) int64 0 1 2 3\n",
"Data variables:\n",
- " sea_water_temperature (depth) float64 21.42 21.21 20.98"
+ " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99"
]
},
- "execution_count": 7,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "depth = [10,20,30]\n",
- "temperature = [21.42, 21.21, 20.98]\n",
- "\n",
- "\n",
"xrds = xr.Dataset(\n",
" coords = {\n",
- " 'depth': depth\n",
+ " 'depth': depth,\n",
+ " 'time': time\n",
" },\n",
" data_vars = {\n",
- " 'sea_water_temperature': ('depth',temperature)\n",
+ " 'sea_water_temperature': (['time','depth'],temperature_2d)\n",
" }\n",
")\n",
"\n",
@@ -463,20 +579,18 @@
},
{
"cell_type": "markdown",
- "id": "8b6dd359",
+ "id": "1688db2d",
"metadata": {},
"source": [
- "## A time series for instruments that move\n",
- "\n",
- "If we tried to use latitude, longitude, depth/elevation and time dimensions for our data variables, there would be a lot of empty space! Not all coordinates have to be coordinate variables with their own dimensions! \n",
+ "But don't forget that we now also need to include the latitude and longitude as variables. Latitude and longitude aren't required variables if your NetCDF file contains a single vertical profile - they can be written as global attributes instead. \n",
"\n",
- "In this case we can use time as our dimension, and everything else as 1D variables."
+ "But in this case, latitude and longitude can be 1D variables with a dimension of time."
]
},
{
"cell_type": "code",
- "execution_count": 20,
- "id": "c5550b5c",
+ "execution_count": 6,
+ "id": "347a8ae1",
"metadata": {},
"outputs": [
{
@@ -846,74 +960,66 @@
" fill: currentColor;\n",
"}\n",
"<xarray.Dataset>\n",
- "Dimensions: (time: 10)\n",
+ "Dimensions: (time: 4, depth: 3)\n",
"Coordinates:\n",
- " * time (time) int64 0 1 2 3 4 5 6 7 8 9\n",
+ " * depth (depth) int64 10 20 30\n",
+ " * time (time) int64 0 1 2 3\n",
"Data variables:\n",
- " longitude (time) float64 -66.7 -66.64 -66.58 ... -66.76 -66.85\n",
- " latitude (time) float64 6.497 6.476 6.558 ... 6.407 6.413\n",
- " depth (time) float64 6.639 5.89 5.029 ... 6.213 6.255 6.559\n",
- " sea_water_temperature (time) float64 10.77 10.15 9.69 ... 10.25 10.25 10.56
"
+ " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99\n",
+ " latitude (time) float64 78.51 79.28 79.98 80.42\n",
+ " longitude (time) float64 30.42 30.36 30.5 30.42"
],
"text/plain": [
"\n",
- "Dimensions: (time: 10)\n",
+ "Dimensions: (time: 4, depth: 3)\n",
"Coordinates:\n",
- " * time (time) int64 0 1 2 3 4 5 6 7 8 9\n",
+ " * depth (depth) int64 10 20 30\n",
+ " * time (time) int64 0 1 2 3\n",
"Data variables:\n",
- " longitude (time) float64 -66.7 -66.64 -66.58 ... -66.76 -66.85\n",
- " latitude (time) float64 6.497 6.476 6.558 ... 6.407 6.413\n",
- " depth (time) float64 6.639 5.89 5.029 ... 6.213 6.255 6.559\n",
- " sea_water_temperature (time) float64 10.77 10.15 9.69 ... 10.25 10.25 10.56"
+ " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99\n",
+ " latitude (time) float64 78.51 79.28 79.98 80.42\n",
+ " longitude (time) float64 30.42 30.36 30.5 30.42"
]
},
- "execution_count": 20,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "time = [0,1,2,3,4,5,6,7,8,9]\n",
- "latitude = [6.4970, 6.4756, 6.5584, 6.5087, 6.4815, 6.5029, 6.4279, 6.3409, 6.4066, 6.4134]\n",
- "longitude = [-66.6972, -66.6399, -66.5838, -66.6357, -66.7313, -66.686, -66.7192, -66.6737, -66.7594, -66.8479]\n",
- "depth = [6.6388, 5.8899, 5.0289, 4.7409, 5.5595, 5.4532, 5.7104, 6.2129, 6.2548, 6.5595]\n",
- "temperature = [10.77, 10.15, 9.69, 9.46, 9.06, 9.97, 9.66, 10.25, 10.25, 10.56]\n",
+ "latitude = [78.5142,79.2833,79.9840,80.4228]\n",
+ "longitude = [30.4231,30.3591,30.4994, 30.4200]\n",
"\n",
"xrds = xr.Dataset(\n",
" coords = {\n",
+ " 'depth': depth,\n",
" 'time': time\n",
" },\n",
" data_vars = {\n",
- " 'longitude': ('time', longitude),\n",
+ " 'sea_water_temperature': (['time','depth'],temperature_2d),\n",
" 'latitude': ('time', latitude),\n",
- " 'depth': ('time', depth),\n",
- " 'sea_water_temperature': ('time', temperature)\n",
+ " 'longitude': ('time', longitude)\n",
" }\n",
- " )\n",
+ ")\n",
"\n",
- "xrds\n"
+ "xrds"
]
},
{
"cell_type": "markdown",
- "id": "9b5e067e",
+ "id": "a840d640",
"metadata": {},
"source": [
- "## Data on a multidimensional grid\n",
- "\n",
- "Some data sit on a grid with multiple dimensions. Some examples are satellite data or data output from certain models. Imagine we have sea water temperature data exported from a model. There are 4 dimensions; latitude, longitude, depth and time. In this case, let's imagine that the model exports data to a regular grid.\n",
- "\n",
- "I am going to use random values! If you have data, your first job is get your data into a multidimensional array. You will find some help in tutorial 04 on how to convert tabular data to a multidimensional array. ChatGPT can also be very helpful - but make sure the values are going in to the right places. "
+ "This NetCDF file has more variables and dimensions than one that includes only a single profile. This leaves more room for variation in how people might structure their data. Sometimes you see people including latitude and longitude as coordinate variables with themselves as their dimensions, like below. "
]
},
{
"cell_type": "code",
- "execution_count": 11,
- "id": "2349ca67",
+ "execution_count": 7,
+ "id": "2ca952c4",
"metadata": {},
"outputs": [
{
@@ -1283,165 +1389,68 @@
" fill: currentColor;\n",
"}\n",
"<xarray.Dataset>\n",
- "Dimensions: (time: 4, depth: 1001, latitude: 71, longitude: 42)\n",
+ "Dimensions: (time: 4, depth: 3, latitude: 4, longitude: 4)\n",
"Coordinates:\n",
+ " * latitude (latitude) float64 78.51 79.28 79.98 80.42\n",
+ " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n",
+ " * depth (depth) int64 10 20 30\n",
" * time (time) int64 0 1 2 3\n",
- " * depth (depth) int64 0 1 2 3 4 5 ... 996 997 998 999 1000\n",
- " * latitude (latitude) float64 75.0 75.1 75.2 ... 81.8 81.9 82.0\n",
- " * longitude (longitude) float64 28.0 28.1 28.2 ... 31.9 32.0 32.1\n",
"Data variables:\n",
- " sea_water_temperature (time, depth, latitude, longitude) float64 1.737 ....
"
+ " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99"
],
"text/plain": [
"\n",
- "Dimensions: (time: 4, depth: 1001, latitude: 71, longitude: 42)\n",
+ "Dimensions: (time: 4, depth: 3, latitude: 4, longitude: 4)\n",
"Coordinates:\n",
+ " * latitude (latitude) float64 78.51 79.28 79.98 80.42\n",
+ " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n",
+ " * depth (depth) int64 10 20 30\n",
" * time (time) int64 0 1 2 3\n",
- " * depth (depth) int64 0 1 2 3 4 5 ... 996 997 998 999 1000\n",
- " * latitude (latitude) float64 75.0 75.1 75.2 ... 81.8 81.9 82.0\n",
- " * longitude (longitude) float64 28.0 28.1 28.2 ... 31.9 32.0 32.1\n",
"Data variables:\n",
- " sea_water_temperature (time, depth, latitude, longitude) float64 1.737 ...."
+ " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99"
]
},
- "execution_count": 11,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "depth = np.arange(0,1001,1) # 0 to 1000, incrementing by 1\n",
- "latitude = np.arange(75,82.1,0.1) # 75 to 82, incrementing by 0.1\n",
- "longitude = np.arange(28,32.1,0.1) # 28 to 32, incrementing by 0.1\n",
- "time = [0,1,2,3]\n",
- "\n",
- "# Create 4D array of random temperature values between 0 and 2\n",
- "temperature = np.random.uniform(0, 2, size=(len(time), len(depth), len(latitude), len(longitude)))\n",
+ "latitude = [78.5142,79.2833,79.9840,80.4228]\n",
+ "longitude = [30.4231,30.3591,30.4994, 30.4200]\n",
"\n",
"xrds = xr.Dataset(\n",
" coords = {\n",
- " 'time': time,\n",
" 'depth': depth,\n",
- " 'latitude': latitude,\n",
- " 'longitude': longitude\n",
- " },\n",
+ " 'time': time\n",
+ " },\n",
" data_vars = {\n",
- " 'sea_water_temperature': (['time', 'depth', 'latitude', 'longitude'], temperature)\n",
+ " 'sea_water_temperature': (['time','depth'],temperature_2d),\n",
+ " 'latitude': ('latitude', latitude),\n",
+ " 'longitude': ('longitude', longitude)\n",
" }\n",
- " )\n",
+ ")\n",
"\n",
"xrds"
]
},
{
"cell_type": "markdown",
- "id": "74547a11",
+ "id": "02173350",
"metadata": {},
"source": [
- "## Random points in space and time (ungridded data)\n",
- "\n",
- "Often data don't fit on a regular grid. What then?\n",
- "\n",
- "You could bin your data to a regular grid and specify what you have done in the metadata. You (the data creator) are best person to know if this is a suitable thing to do for your own data. However, in many cases, you will not want to bin your data.\n",
- "\n",
- "The best practice is to use a common 'counter' dimension for all your coordinates. You can think of this as an index or counter of your data points.\n",
+ "If we are being pedantic, we could say that latitude and longitude are not explicitely linked to time in this case. One has to make the assumption that they are linked. \n",
"\n",
- "This method is quite easy to write. You just need to create 1D arrays for all your coordinates and data variables."
+ "It is also easier to make mistakes when creating the files in this case - for example by accidentally encoding latitude with the wrong length. "
]
},
{
"cell_type": "code",
- "execution_count": 36,
- "id": "3810c917",
+ "execution_count": 8,
+ "id": "a58011a7",
"metadata": {},
"outputs": [
{
@@ -1811,126 +1820,48 @@
" fill: currentColor;\n",
"}\n",
"<xarray.Dataset>\n",
- "Dimensions: (node: 100)\n",
+ "Dimensions: (time: 4, depth: 3, latitude: 3, longitude: 4)\n",
"Coordinates:\n",
- " * node (node) int64 0 1 2 3 4 5 6 7 8 9 ... 91 92 93 94 95 96 97 98 99\n",
+ " * latitude (latitude) float64 78.51 79.28 79.98\n",
+ " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n",
+ " * depth (depth) int64 10 20 30\n",
+ " * time (time) int64 0 1 2 3\n",
"Data variables:\n",
- " data_var (node) float64 0.01378 0.1594 0.6346 ... 0.6874 0.5476 0.182\n",
- " latitude (node) float64 -23.37 69.95 25.08 55.56 ... 9.822 41.32 -23.22\n",
- " longitude (node) float64 -136.3 108.5 15.2 -90.64 ... 173.6 -10.53 -2.535
"
+ " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99
"
],
"text/plain": [
"\n",
- "Dimensions: (node: 100)\n",
+ "Dimensions: (time: 4, depth: 3, latitude: 3, longitude: 4)\n",
"Coordinates:\n",
- " * node (node) int64 0 1 2 3 4 5 6 7 8 9 ... 91 92 93 94 95 96 97 98 99\n",
+ " * latitude (latitude) float64 78.51 79.28 79.98\n",
+ " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n",
+ " * depth (depth) int64 10 20 30\n",
+ " * time (time) int64 0 1 2 3\n",
"Data variables:\n",
- " data_var (node) float64 0.01378 0.1594 0.6346 ... 0.6874 0.5476 0.182\n",
- " latitude (node) float64 -23.37 69.95 25.08 55.56 ... 9.822 41.32 -23.22\n",
- " longitude (node) float64 -136.3 108.5 15.2 -90.64 ... 173.6 -10.53 -2.535"
+ " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99"
]
},
- "execution_count": 36,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "num_points = 100\n",
- "node = np.arange(0,num_points)\n",
- "\n",
- "# Generate some example irregular data\n",
- "latitudes = np.random.uniform(low=-90, high=90, size=num_points)\n",
- "longitudes = np.random.uniform(low=-180, high=180, size=num_points)\n",
- "data_values = np.random.rand(num_points) # Replace with your actual data\n",
+ "latitude = [78.5142,79.2833,79.9840]\n",
+ "longitude = [30.4231,30.3591,30.4994, 30.4200]\n",
"\n",
- "# Create an xarray Dataset\n",
"xrds = xr.Dataset(\n",
" coords = {\n",
- " 'node': node\n",
+ " 'depth': depth,\n",
+ " 'time': time\n",
" },\n",
" data_vars = {\n",
- " 'data_var': ('node', data_values),\n",
- " 'latitude': ('node', latitudes),\n",
- " 'longitude': ('node', longitudes),\n",
+ " 'sea_water_temperature': (['time','depth'],temperature_2d),\n",
+ " 'latitude': ('latitude', latitude),\n",
+ " 'longitude': ('longitude', longitude)\n",
" }\n",
")\n",
"\n",
@@ -1939,140 +1870,131 @@
},
{
"cell_type": "markdown",
- "id": "cc3a1a65",
- "metadata": {},
- "source": [
- "## Multiple time series\n",
- "\n",
- "Imagine we have set up 6 weather stations and want to publish the data.\n",
- "\n",
- ""
- ]
- },
- {
- "cell_type": "markdown",
- "id": "392b5fae",
+ "id": "a2085ce5",
"metadata": {},
"source": [
- "We have 2 options for how to struture our data.\n",
- "1. Include all 6 time series in one NetCDF file\n",
- "2. Create a separate time series for each profile.\n",
- "\n",
- "The best practice is to create a separate file for each time series. This might suprise some of you, and the idea of creating dozens or maybe hundreds of files might sound daunting or even ridiculous.\n",
- "\n",
- "Firstly, you shouldn't think of your data in isolation. **Your data are a contribution to a much larger network of data**. Imagine that each marker below is a weather station, and the colour is the person or project who is responsible for the data. Imagine you are a looking for data inside the red square. For many data users, the project that the data were collected as part of is irrelevant. Many users want to access data from all the projects and combine them.\n",
+ "No error! It would be easy to overlook that we have only 3 latitude values and 4 longitude values.\n",
"\n",
- ""
+ "If you make assign a dimension of time to your latitude and longitude variables, an error is returned if, for example, your latitude variable is the wrong length."
]
},
{
- "cell_type": "markdown",
- "id": "ca50b898",
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "77ed2b0d",
"metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "conflicting sizes for dimension 'time': length 3 on 'latitude' and length 4 on {'time': 'sea_water_temperature', 'depth': 'sea_water_temperature'}",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[9], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m latitude \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m78.5142\u001b[39m,\u001b[38;5;241m79.2833\u001b[39m,\u001b[38;5;241m79.9840\u001b[39m]\n\u001b[1;32m 2\u001b[0m longitude \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m30.4231\u001b[39m,\u001b[38;5;241m30.3591\u001b[39m,\u001b[38;5;241m30.4994\u001b[39m, \u001b[38;5;241m30.4200\u001b[39m]\n\u001b[0;32m----> 4\u001b[0m xrds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mDataset(\n\u001b[1;32m 5\u001b[0m coords \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdepth\u001b[39m\u001b[38;5;124m'\u001b[39m: depth,\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m: time\n\u001b[1;32m 8\u001b[0m },\n\u001b[1;32m 9\u001b[0m data_vars \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 10\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msea_water_temperature\u001b[39m\u001b[38;5;124m'\u001b[39m: ([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdepth\u001b[39m\u001b[38;5;124m'\u001b[39m],temperature_2d),\n\u001b[1;32m 11\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlatitude\u001b[39m\u001b[38;5;124m'\u001b[39m: (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m, latitude),\n\u001b[1;32m 12\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlongitude\u001b[39m\u001b[38;5;124m'\u001b[39m: (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m, longitude)\n\u001b[1;32m 13\u001b[0m }\n\u001b[1;32m 14\u001b[0m )\n",
+ "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/xarray/core/dataset.py:694\u001b[0m, in \u001b[0;36mDataset.__init__\u001b[0;34m(self, data_vars, coords, attrs)\u001b[0m\n\u001b[1;32m 691\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(coords, Dataset):\n\u001b[1;32m 692\u001b[0m coords \u001b[38;5;241m=\u001b[39m coords\u001b[38;5;241m.\u001b[39m_variables\n\u001b[0;32m--> 694\u001b[0m variables, coord_names, dims, indexes, _ \u001b[38;5;241m=\u001b[39m merge_data_and_coords(\n\u001b[1;32m 695\u001b[0m data_vars, coords\n\u001b[1;32m 696\u001b[0m )\n\u001b[1;32m 698\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_attrs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(attrs) \u001b[38;5;28;01mif\u001b[39;00m attrs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 699\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+ "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/xarray/core/dataset.py:423\u001b[0m, in \u001b[0;36mmerge_data_and_coords\u001b[0;34m(data_vars, coords)\u001b[0m\n\u001b[1;32m 419\u001b[0m coords \u001b[38;5;241m=\u001b[39m create_coords_with_default_indexes(coords, data_vars)\n\u001b[1;32m 421\u001b[0m \u001b[38;5;66;03m# exclude coords from alignment (all variables in a Coordinates object should\u001b[39;00m\n\u001b[1;32m 422\u001b[0m \u001b[38;5;66;03m# already be aligned together) and use coordinates' indexes to align data_vars\u001b[39;00m\n\u001b[0;32m--> 423\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merge_core(\n\u001b[1;32m 424\u001b[0m [data_vars, coords],\n\u001b[1;32m 425\u001b[0m compat\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbroadcast_equals\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 426\u001b[0m join\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mouter\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 427\u001b[0m explicit_coords\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mtuple\u001b[39m(coords),\n\u001b[1;32m 428\u001b[0m indexes\u001b[38;5;241m=\u001b[39mcoords\u001b[38;5;241m.\u001b[39mxindexes,\n\u001b[1;32m 429\u001b[0m priority_arg\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[1;32m 430\u001b[0m skip_align_args\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m1\u001b[39m],\n\u001b[1;32m 431\u001b[0m )\n",
+ "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/xarray/core/merge.py:724\u001b[0m, in \u001b[0;36mmerge_core\u001b[0;34m(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value, skip_align_args)\u001b[0m\n\u001b[1;32m 719\u001b[0m prioritized \u001b[38;5;241m=\u001b[39m _get_priority_vars_and_indexes(aligned, priority_arg, compat\u001b[38;5;241m=\u001b[39mcompat)\n\u001b[1;32m 720\u001b[0m variables, out_indexes \u001b[38;5;241m=\u001b[39m merge_collected(\n\u001b[1;32m 721\u001b[0m collected, prioritized, compat\u001b[38;5;241m=\u001b[39mcompat, combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs\n\u001b[1;32m 722\u001b[0m )\n\u001b[0;32m--> 724\u001b[0m dims \u001b[38;5;241m=\u001b[39m calculate_dimensions(variables)\n\u001b[1;32m 726\u001b[0m coord_names, noncoord_names \u001b[38;5;241m=\u001b[39m determine_coords(coerced)\n\u001b[1;32m 727\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compat \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mminimal\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 728\u001b[0m \u001b[38;5;66;03m# coordinates may be dropped in merged results\u001b[39;00m\n",
+ "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/xarray/core/variable.py:3008\u001b[0m, in \u001b[0;36mcalculate_dimensions\u001b[0;34m(variables)\u001b[0m\n\u001b[1;32m 3006\u001b[0m last_used[dim] \u001b[38;5;241m=\u001b[39m k\n\u001b[1;32m 3007\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m dims[dim] \u001b[38;5;241m!=\u001b[39m size:\n\u001b[0;32m-> 3008\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 3009\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconflicting sizes for dimension \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdim\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3010\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlength \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msize\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m on \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mk\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m and length \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdims[dim]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m on \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlast_used\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3011\u001b[0m )\n\u001b[1;32m 3012\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dims\n",
+ "\u001b[0;31mValueError\u001b[0m: conflicting sizes for dimension 'time': length 3 on 'latitude' and length 4 on {'time': 'sea_water_temperature', 'depth': 'sea_water_temperature'}"
+ ]
+ }
+ ],
"source": [
- "Sure, it is easier for you and the people in your project to group the data by project. But you already have the data and can group them how you like between yourselves. When it comes to publishing data, we need to think about the data users outside of the project. Data, if published correctly, could be used for decades in to the future, perhaps longer in some cases. The majority of your data users might never have heard of your project!\n",
- "\n",
- "Creating individual files for each time series has another advantage; each file can have its own set of global attributes! This has many advantages, including:\n",
- "* If one time series was processed differently, this can be described.\n",
- "* Each file has its own bounding coordinates. Imagine you go to a data centre or data access portal and you are looking for data on a map. A file with many time series will show up as a box on a map. Without opening the file up, it is difficult to see which locations have been sampled. What if a potential data user is only interested in a certain location within the bounding box? A file that contains a single time series would show up as a point on a map.\n",
- "* Each time series can be published separately and have its own citation - including different authors if that is desired. If someone uses only one or a few of the files, they can only cite the relevant ones. It is then clear which data they have used in their work. Many good data centres are now able to provide you with a 'parent' page for your entire data collection with its own DOI and recommended citation that one can cite if they are using most or all of the files.\n",
- "\n",
- "Some might see it as a hassle to download and open lots of files. Some services are already being developed to allow data to download data from many similar files into a single file. More services will surely be available in the future. When we are publishing data, we should think not only about what is possible now, but what will be possible in the future. \n",
- "\n",
- "But for now, in the next few tutorials, we will look at how you can create and access multiple files quickly and easily today using Python!\n",
+ "latitude = [78.5142,79.2833,79.9840]\n",
+ "longitude = [30.4231,30.3591,30.4994, 30.4200]\n",
"\n",
- "Need some convincing? Let's look at another example for vertical profiles (e.g. CTD data)."
+ "xrds = xr.Dataset(\n",
+ " coords = {\n",
+ " 'depth': depth,\n",
+ " 'time': time\n",
+ " },\n",
+ " data_vars = {\n",
+ " 'sea_water_temperature': (['time','depth'],temperature_2d),\n",
+ " 'latitude': ('time', latitude),\n",
+ " 'longitude': ('time', longitude)\n",
+ " }\n",
+ ")"
]
},
{
"cell_type": "markdown",
- "id": "0851d141",
+ "id": "d41d2d46",
"metadata": {},
"source": [
- "## Multiple profiles\n",
- "\n",
- "Imagine we have the following 4 depth profiles:"
+ "Okay, that is all well and good. These issues might seem small. However, often our profiles are different lengths, like this."
]
},
{
"cell_type": "code",
- "execution_count": 22,
- "id": "3e189c74",
+ "execution_count": 10,
+ "id": "93f5f5dd",
"metadata": {},
"outputs": [],
"source": [
- "depth = [10,20,30]\n",
+ "depth_1 = [10,20,30]\n",
"temperature_1 = [21.42, 21.21, 20.98]\n",
- "temperature_2 = [22.08, 21.56, 20.42]\n",
- "temperature_3 = [22.11, 21.38, 20.12]\n",
- "temperature_4 = [21.87, 21.01, 19.99]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "3b28454d",
- "metadata": {},
- "source": [
- "Many people publish all the data from a single cruise in one file. But we have just learned that this is not the best practice! \n",
- "\n",
- "If we wanted to publish all the profiles in one file, we need another dimension to distinguish between the profiles. Time is a sensible choice."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "id": "6ed61391",
- "metadata": {},
- "outputs": [],
- "source": [
- "time = [0,1,2,3]"
+ "depth_2 = [10,20,30,40,50]\n",
+ "temperature_2 = [22.08, 21.56, 20.42, 19.23, 18.53]\n",
+ "depth_3 = [10,20,30,40,50,60]\n",
+ "temperature_3 = [22.42, 21.21, 20.12, 19.45, 18.72, 16.99]\n",
+ "depth_4 = [10,20]\n",
+ "temperature_4 = [21.84, 21.49]"
]
},
{
"cell_type": "markdown",
- "id": "518de385",
+ "id": "00f830df",
"metadata": {},
"source": [
- "But now we need to made a 2D array of our temperature values."
+ "But a 2D array needs to have profiles of equal lengths. To address this, we need to fill the rest of our 2D array with NaNs."
]
},
{
"cell_type": "code",
- "execution_count": 24,
- "id": "f33df992",
+ "execution_count": 11,
+ "id": "971d6377",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[[21.42, 21.21, 20.98],\n",
- " [22.08, 21.56, 20.42],\n",
- " [22.11, 21.38, 20.12],\n",
- " [21.87, 21.01, 19.99]]"
+ "array([[21.42, 21.21, 20.98, nan, nan, nan],\n",
+ " [22.08, 21.56, 20.42, 19.23, 18.53, nan],\n",
+ " [22.42, 21.21, 20.12, 19.45, 18.72, 16.99],\n",
+ " [21.84, 21.49, nan, nan, nan, nan]])"
]
},
- "execution_count": 24,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "temperature_2d = [temperature_1, temperature_2, temperature_3, temperature_4]\n",
+ "# Finding the maximum length among the depth arrays\n",
+ "max_depth_length = max(len(depth_1), len(depth_2), len(depth_3), len(depth_4))\n",
+ "\n",
+ "# Creating arrays filled with NaNs\n",
+ "temp_arr_1 = np.full(max_depth_length, np.nan)\n",
+ "temp_arr_2 = np.full(max_depth_length, np.nan)\n",
+ "temp_arr_3 = np.full(max_depth_length, np.nan)\n",
+ "temp_arr_4 = np.full(max_depth_length, np.nan)\n",
+ "\n",
+ "# Filling the arrays with available temperature data\n",
+ "temp_arr_1[:len(temperature_1)] = temperature_1\n",
+ "temp_arr_2[:len(temperature_2)] = temperature_2\n",
+ "temp_arr_3[:len(temperature_3)] = temperature_3\n",
+ "temp_arr_4[:len(temperature_4)] = temperature_4\n",
+ "\n",
+ "# Creating a 2D array\n",
+ "temperature_2d = np.array([temp_arr_1, temp_arr_2, temp_arr_3, temp_arr_4])\n",
"temperature_2d"
]
},
- {
- "cell_type": "markdown",
- "id": "b571ebb4",
- "metadata": {},
- "source": [
- "We can fit the data into our xarray object like this:"
- ]
- },
{
"cell_type": "code",
- "execution_count": 25,
- "id": "bb09ff8a",
+ "execution_count": 12,
+ "id": "24d71c33",
"metadata": {},
"outputs": [
{
@@ -2442,39 +2364,48 @@
" fill: currentColor;\n",
"}\n",
"<xarray.Dataset>\n",
- "Dimensions: (time: 4, depth: 3)\n",
+ "Dimensions: (time: 4, depth: 6)\n",
"Coordinates:\n",
- " * depth (depth) int64 10 20 30\n",
+ " * depth (depth) int64 10 20 30 40 50 60\n",
" * time (time) int64 0 1 2 3\n",
"Data variables:\n",
- " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99
"
+ " sea_water_temperature (time, depth) float64 21.42 21.21 20.98 ... nan nan\n",
+ " latitude (time) float64 78.51 79.28 79.98 80.42\n",
+ " longitude (time) float64 30.42 30.36 30.5 30.42"
],
"text/plain": [
"\n",
- "Dimensions: (time: 4, depth: 3)\n",
+ "Dimensions: (time: 4, depth: 6)\n",
"Coordinates:\n",
- " * depth (depth) int64 10 20 30\n",
+ " * depth (depth) int64 10 20 30 40 50 60\n",
" * time (time) int64 0 1 2 3\n",
"Data variables:\n",
- " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99"
+ " sea_water_temperature (time, depth) float64 21.42 21.21 20.98 ... nan nan\n",
+ " latitude (time) float64 78.51 79.28 79.98 80.42\n",
+ " longitude (time) float64 30.42 30.36 30.5 30.42"
]
},
- "execution_count": 25,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
+ "latitude = [78.5142,79.2833,79.9840,80.4228]\n",
+ "longitude = [30.4231,30.3591,30.4994, 30.4200]\n",
+ "\n",
"xrds = xr.Dataset(\n",
" coords = {\n",
- " 'depth': depth,\n",
+ " 'depth': depth_3, # The longest profile\n",
" 'time': time\n",
" },\n",
" data_vars = {\n",
- " 'sea_water_temperature': (['time','depth'],temperature_2d)\n",
+ " 'sea_water_temperature': (['time','depth'],temperature_2d),\n",
+ " 'latitude': ('time', latitude),\n",
+ " 'longitude': ('time', longitude)\n",
" }\n",
")\n",
"\n",
@@ -2483,447 +2414,73 @@
},
{
"cell_type": "markdown",
- "id": "1688db2d",
+ "id": "fdd313c4",
"metadata": {},
"source": [
- "But don't forget that we now also need to include the latitude and longitude as variables. Latitude and longitude aren't required variables if your NetCDF file contains a single vertical profile - they can be written as global attributes instead. \n",
+ "So all the profiles need to be extended to the maximum depth in this case. You can imagine that this could create a lot of empty space in your files!\n",
"\n",
- "But in this case, latitude and longitude can be 1D variables with a dimension of time."
+ "But there is more... \n",
+ "\n",
+ "In the example above each profile is sampled at 10 m increments. What if different depths are sampled for each profile? Well, the depth coordinate variable would have to encompass all the depths sampled across all profiles. For example:"
]
},
{
"cell_type": "code",
- "execution_count": 26,
- "id": "347a8ae1",
+ "execution_count": 13,
+ "id": "2e8d2811",
"metadata": {},
"outputs": [
{
- "data": {
- "text/html": [
- "\n",
- "<xarray.Dataset>\n",
- "Dimensions: (time: 4, depth: 3)\n",
- "Coordinates:\n",
- " * depth (depth) int64 10 20 30\n",
- " * time (time) int64 0 1 2 3\n",
- "Data variables:\n",
- " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99\n",
- " latitude (time) float64 78.51 79.28 79.98 80.42\n",
- " longitude (time) float64 30.42 30.36 30.5 30.42
"
- ],
- "text/plain": [
- "\n",
- "Dimensions: (time: 4, depth: 3)\n",
- "Coordinates:\n",
- " * depth (depth) int64 10 20 30\n",
- " * time (time) int64 0 1 2 3\n",
- "Data variables:\n",
- " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99\n",
- " latitude (time) float64 78.51 79.28 79.98 80.42\n",
- " longitude (time) float64 30.42 30.36 30.5 30.42"
+ "data": {
+ "text/plain": [
+ "array([[21.42, nan, nan, 21.21, nan, nan, nan, nan, 20.98,\n",
+ " nan, nan, nan, nan, nan],\n",
+ " [ nan, 22.08, nan, nan, nan, nan, 21.56, nan, nan,\n",
+ " nan, 20.42, nan, 19.23, 18.53],\n",
+ " [ nan, 22.42, nan, nan, nan, 21.21, nan, 20.12, nan,\n",
+ " 19.45, nan, 18.72, nan, 16.99],\n",
+ " [ nan, nan, 21.84, nan, 21.49, nan, nan, nan, nan,\n",
+ " nan, nan, nan, nan, nan]])"
]
},
- "execution_count": 26,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "latitude = [78.5142,79.2833,79.9840,80.4228]\n",
- "longitude = [30.4231,30.3591,30.4994, 30.4200]\n",
+ "depth_1 = [5,20,50]\n",
+ "temperature_1 = [21.42, 21.21, 20.98]\n",
+ "depth_2 = [10,35,70,90,100]\n",
+ "temperature_2 = [22.08, 21.56, 20.42, 19.23, 18.53]\n",
+ "depth_3 = [10,25,40,60,80,100]\n",
+ "temperature_3 = [22.42, 21.21, 20.12, 19.45, 18.72, 16.99]\n",
+ "depth_4 = [12,24]\n",
+ "temperature_4 = [21.84, 21.49]\n",
"\n",
- "xrds = xr.Dataset(\n",
- " coords = {\n",
- " 'depth': depth,\n",
- " 'time': time\n",
- " },\n",
- " data_vars = {\n",
- " 'sea_water_temperature': (['time','depth'],temperature_2d),\n",
- " 'latitude': ('time', latitude),\n",
- " 'longitude': ('time', longitude)\n",
- " }\n",
- ")\n",
+ "# Merge all unique depth values\n",
+ "all_depths = sorted(set(depth_1 + depth_2 + depth_3 + depth_4))\n",
"\n",
- "xrds"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a840d640",
- "metadata": {},
- "source": [
- "This NetCDF file has more variables and dimensions than one that includes only a single profile. This leaves more room for variation in how people might structure their data. Sometimes you see people including latitude and longitude as coordinate variables with themselves as their dimensions, like below. "
+ "# Create arrays filled with NaNs for temperatures\n",
+ "all_temperatures = []\n",
+ "for depths, temps in [(depth_1, temperature_1), (depth_2, temperature_2),\n",
+ " (depth_3, temperature_3), (depth_4, temperature_4)]:\n",
+ " temp_arr = np.full(len(all_depths), np.nan)\n",
+ " # Adding temperature values to each array\n",
+ " for depth, temp in zip(depths, temps):\n",
+ " index = all_depths.index(depth)\n",
+ " temp_arr[index] = temp\n",
+ " all_temperatures.append(temp_arr)\n",
+ "\n",
+ "# Create a 2D array\n",
+ "temperature_2d = np.array(all_temperatures)\n",
+ "temperature_2d"
]
},
{
"cell_type": "code",
- "execution_count": 27,
- "id": "2ca952c4",
+ "execution_count": 14,
+ "id": "9d413281",
"metadata": {},
"outputs": [
{
@@ -3293,31 +2850,35 @@
" fill: currentColor;\n",
"}\n",
"<xarray.Dataset>\n",
- "Dimensions: (time: 4, depth: 3, latitude: 4, longitude: 4)\n",
+ "Dimensions: (time: 4, depth: 14)\n",
"Coordinates:\n",
- " * latitude (latitude) float64 78.51 79.28 79.98 80.42\n",
- " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n",
- " * depth (depth) int64 10 20 30\n",
+ " * depth (depth) int64 5 10 12 20 24 25 ... 50 60 70 80 90 100\n",
" * time (time) int64 0 1 2 3\n",
"Data variables:\n",
- " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99
"
+ " sea_water_temperature (time, depth) float64 21.42 nan nan ... nan nan nan\n",
+ " latitude (time) float64 78.51 79.28 79.98 80.42\n",
+ " longitude (time) float64 30.42 30.36 30.5 30.42"
],
"text/plain": [
"\n",
- "Dimensions: (time: 4, depth: 3, latitude: 4, longitude: 4)\n",
+ "Dimensions: (time: 4, depth: 14)\n",
"Coordinates:\n",
- " * latitude (latitude) float64 78.51 79.28 79.98 80.42\n",
- " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n",
- " * depth (depth) int64 10 20 30\n",
+ " * depth (depth) int64 5 10 12 20 24 25 ... 50 60 70 80 90 100\n",
" * time (time) int64 0 1 2 3\n",
"Data variables:\n",
- " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99"
+ " sea_water_temperature (time, depth) float64 21.42 nan nan ... nan nan nan\n",
+ " latitude (time) float64 78.51 79.28 79.98 80.42\n",
+ " longitude (time) float64 30.42 30.36 30.5 30.42"
]
},
- "execution_count": 27,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -3328,13 +2889,13 @@
"\n",
"xrds = xr.Dataset(\n",
" coords = {\n",
- " 'depth': depth,\n",
+ " 'depth': all_depths,\n",
" 'time': time\n",
" },\n",
" data_vars = {\n",
" 'sea_water_temperature': (['time','depth'],temperature_2d),\n",
- " 'latitude': ('latitude', latitude),\n",
- " 'longitude': ('longitude', longitude)\n",
+ " 'latitude': ('time', latitude),\n",
+ " 'longitude': ('time', longitude)\n",
" }\n",
")\n",
"\n",
@@ -3343,18 +2904,41 @@
},
{
"cell_type": "markdown",
- "id": "02173350",
+ "id": "739a487e",
"metadata": {},
"source": [
- "If we are being pedantic, we could say that latitude and longitude are not explicitely linked to time in this case. One has to make the assumption that they are linked. \n",
+ "That is a lot of empty space! Only 16 out of 56 points contain values! This gets worse the more profiles you have!\n",
"\n",
- "It is also easier to make mistakes when creating the files in this case - for example by accidentally encoding latitude with the wrong length. "
+ "All of this makes it more difficult to build useful services on top of your data. This could be \n",
+ "* on-the-fly visualisation of data on a data centre or data access portal website\n",
+ "* aggregating data from several files together\n",
+ "* allowing users to access only a subset of the data in a file\n",
+ "\n",
+ "## What have we learned about publishing multiple time series or profiles?\n",
+ "\n",
+ "* NetCDF files that include many profiles or time series can include a lot of empty space.\n",
+ "* NetCDF files that include only one profile or time series often require only one dimension and coordinate variables and are therefore simpler.\n",
+ "* Because the files are simpler, they are easier to create, easier to understand, and easier to build services upon.\n",
+ "* There are more different ways to encode the same data in a NetCDF file that includes multiple profiles. Files with a single profile are more likely to be consistent with each other, regardless of who is creating them.\n",
+ "* The project or cruise that data were collected as part of is arbitary to most data users over medium to long time scales. Your data are a contribution to a broader network of data that someone can download and use altogether - combining data from many projects or cruises."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8b6dd359",
+ "metadata": {},
+ "source": [
+ "## A time series for instruments that move\n",
+ "\n",
+ "If we tried to use latitude, longitude, depth/elevation and time dimensions for our data variables, there would be a lot of empty space! Not all coordinates have to be coordinate variables with their own dimensions! \n",
+ "\n",
+ "In this case we can use time as our dimension, and everything else as 1D variables."
]
},
{
"cell_type": "code",
- "execution_count": 28,
- "id": "a58011a7",
+ "execution_count": 15,
+ "id": "c5550b5c",
"metadata": {},
"outputs": [
{
@@ -3714,191 +3298,84 @@
"\n",
".xr-icon-database,\n",
".xr-icon-file-text2,\n",
- ".xr-no-icon {\n",
- " display: inline-block;\n",
- " vertical-align: middle;\n",
- " width: 1em;\n",
- " height: 1.5em !important;\n",
- " stroke-width: 0;\n",
- " stroke: currentColor;\n",
- " fill: currentColor;\n",
- "}\n",
- "<xarray.Dataset>\n",
- "Dimensions: (time: 4, depth: 3, latitude: 3, longitude: 4)\n",
- "Coordinates:\n",
- " * latitude (latitude) float64 78.51 79.28 79.98\n",
- " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n",
- " * depth (depth) int64 10 20 30\n",
- " * time (time) int64 0 1 2 3\n",
- "Data variables:\n",
- " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99
"
- ],
- "text/plain": [
- "\n",
- "Dimensions: (time: 4, depth: 3, latitude: 3, longitude: 4)\n",
- "Coordinates:\n",
- " * latitude (latitude) float64 78.51 79.28 79.98\n",
- " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n",
- " * depth (depth) int64 10 20 30\n",
- " * time (time) int64 0 1 2 3\n",
- "Data variables:\n",
- " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "latitude = [78.5142,79.2833,79.9840]\n",
- "longitude = [30.4231,30.3591,30.4994, 30.4200]\n",
- "\n",
- "xrds = xr.Dataset(\n",
- " coords = {\n",
- " 'depth': depth,\n",
- " 'time': time\n",
- " },\n",
- " data_vars = {\n",
- " 'sea_water_temperature': (['time','depth'],temperature_2d),\n",
- " 'latitude': ('latitude', latitude),\n",
- " 'longitude': ('longitude', longitude)\n",
- " }\n",
- ")\n",
- "\n",
- "xrds"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a2085ce5",
- "metadata": {},
- "source": [
- "No error! It would be easy to overlook that we have only 3 latitude values and 4 longitude values.\n",
- "\n",
- "If you make assign a dimension of time to your latitude and longitude variables, an error is returned if, for example, your latitude variable is the wrong length."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "id": "77ed2b0d",
- "metadata": {},
- "outputs": [
- {
- "ename": "ValueError",
- "evalue": "conflicting sizes for dimension 'time': length 3 on 'latitude' and length 4 on {'time': 'sea_water_temperature', 'depth': 'sea_water_temperature'}",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[29], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m latitude \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m78.5142\u001b[39m,\u001b[38;5;241m79.2833\u001b[39m,\u001b[38;5;241m79.9840\u001b[39m]\n\u001b[1;32m 2\u001b[0m longitude \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m30.4231\u001b[39m,\u001b[38;5;241m30.3591\u001b[39m,\u001b[38;5;241m30.4994\u001b[39m, \u001b[38;5;241m30.4200\u001b[39m]\n\u001b[0;32m----> 4\u001b[0m xrds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mDataset(\n\u001b[1;32m 5\u001b[0m coords \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdepth\u001b[39m\u001b[38;5;124m'\u001b[39m: depth,\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m: time\n\u001b[1;32m 8\u001b[0m },\n\u001b[1;32m 9\u001b[0m data_vars \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 10\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msea_water_temperature\u001b[39m\u001b[38;5;124m'\u001b[39m: ([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdepth\u001b[39m\u001b[38;5;124m'\u001b[39m],temperature_2d),\n\u001b[1;32m 11\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlatitude\u001b[39m\u001b[38;5;124m'\u001b[39m: (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m, latitude),\n\u001b[1;32m 12\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlongitude\u001b[39m\u001b[38;5;124m'\u001b[39m: (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m, longitude)\n\u001b[1;32m 13\u001b[0m }\n\u001b[1;32m 14\u001b[0m )\n",
- "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/xarray/core/dataset.py:652\u001b[0m, in \u001b[0;36mDataset.__init__\u001b[0;34m(self, data_vars, coords, attrs)\u001b[0m\n\u001b[1;32m 649\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(coords, Dataset):\n\u001b[1;32m 650\u001b[0m coords \u001b[38;5;241m=\u001b[39m coords\u001b[38;5;241m.\u001b[39m_variables\n\u001b[0;32m--> 652\u001b[0m variables, coord_names, dims, indexes, _ \u001b[38;5;241m=\u001b[39m merge_data_and_coords(\n\u001b[1;32m 653\u001b[0m data_vars, coords, compat\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbroadcast_equals\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 654\u001b[0m )\n\u001b[1;32m 656\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_attrs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(attrs) \u001b[38;5;28;01mif\u001b[39;00m attrs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 657\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
- "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/xarray/core/merge.py:569\u001b[0m, in \u001b[0;36mmerge_data_and_coords\u001b[0;34m(data_vars, coords, compat, join)\u001b[0m\n\u001b[1;32m 567\u001b[0m objects \u001b[38;5;241m=\u001b[39m [data_vars, coords]\n\u001b[1;32m 568\u001b[0m explicit_coords \u001b[38;5;241m=\u001b[39m coords\u001b[38;5;241m.\u001b[39mkeys()\n\u001b[0;32m--> 569\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merge_core(\n\u001b[1;32m 570\u001b[0m objects,\n\u001b[1;32m 571\u001b[0m compat,\n\u001b[1;32m 572\u001b[0m join,\n\u001b[1;32m 573\u001b[0m explicit_coords\u001b[38;5;241m=\u001b[39mexplicit_coords,\n\u001b[1;32m 574\u001b[0m indexes\u001b[38;5;241m=\u001b[39mIndexes(indexes, coords),\n\u001b[1;32m 575\u001b[0m )\n",
- "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/xarray/core/merge.py:761\u001b[0m, in \u001b[0;36mmerge_core\u001b[0;34m(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value)\u001b[0m\n\u001b[1;32m 756\u001b[0m prioritized \u001b[38;5;241m=\u001b[39m _get_priority_vars_and_indexes(aligned, priority_arg, compat\u001b[38;5;241m=\u001b[39mcompat)\n\u001b[1;32m 757\u001b[0m variables, out_indexes \u001b[38;5;241m=\u001b[39m merge_collected(\n\u001b[1;32m 758\u001b[0m collected, prioritized, compat\u001b[38;5;241m=\u001b[39mcompat, combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs\n\u001b[1;32m 759\u001b[0m )\n\u001b[0;32m--> 761\u001b[0m dims \u001b[38;5;241m=\u001b[39m calculate_dimensions(variables)\n\u001b[1;32m 763\u001b[0m coord_names, noncoord_names \u001b[38;5;241m=\u001b[39m determine_coords(coerced)\n\u001b[1;32m 764\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m explicit_coords \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
- "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/xarray/core/variable.py:3282\u001b[0m, in \u001b[0;36mcalculate_dimensions\u001b[0;34m(variables)\u001b[0m\n\u001b[1;32m 3280\u001b[0m last_used[dim] \u001b[38;5;241m=\u001b[39m k\n\u001b[1;32m 3281\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m dims[dim] \u001b[38;5;241m!=\u001b[39m size:\n\u001b[0;32m-> 3282\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 3283\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconflicting sizes for dimension \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdim\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3284\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlength \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msize\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m on \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mk\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m and length \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdims[dim]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m on \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlast_used\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3285\u001b[0m )\n\u001b[1;32m 3286\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dims\n",
- "\u001b[0;31mValueError\u001b[0m: conflicting sizes for dimension 'time': length 3 on 'latitude' and length 4 on {'time': 'sea_water_temperature', 'depth': 'sea_water_temperature'}"
- ]
- }
- ],
- "source": [
- "latitude = [78.5142,79.2833,79.9840]\n",
- "longitude = [30.4231,30.3591,30.4994, 30.4200]\n",
- "\n",
- "xrds = xr.Dataset(\n",
- " coords = {\n",
- " 'depth': depth,\n",
- " 'time': time\n",
- " },\n",
- " data_vars = {\n",
- " 'sea_water_temperature': (['time','depth'],temperature_2d),\n",
- " 'latitude': ('time', latitude),\n",
- " 'longitude': ('time', longitude)\n",
- " }\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "d41d2d46",
- "metadata": {},
- "source": [
- "Okay, that is all well and good. These issues might seem small. However, often our profiles are differen lengths, like this."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "id": "93f5f5dd",
- "metadata": {},
- "outputs": [],
- "source": [
- "depth_1 = [10,20,30]\n",
- "temperature_1 = [21.42, 21.21, 20.98]\n",
- "depth_2 = [10,20,30,40,50]\n",
- "temperature_2 = [22.08, 21.56, 20.42, 19.23, 18.53]\n",
- "depth_3 = [10,20,30,40,50,60]\n",
- "temperature_3 = [22.42, 21.21, 20.12, 19.45, 18.72, 16.99]\n",
- "depth_4 = [10,20]\n",
- "temperature_4 = [21.84, 21.49]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "00f830df",
- "metadata": {},
- "source": [
- "But a 2D array needs to have profiles of equal lengths. To address this, we need to fill the rest of our 2D array with NaNs."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "id": "971d6377",
- "metadata": {},
- "outputs": [
- {
- "data": {
+ ".xr-no-icon {\n",
+ " display: inline-block;\n",
+ " vertical-align: middle;\n",
+ " width: 1em;\n",
+ " height: 1.5em !important;\n",
+ " stroke-width: 0;\n",
+ " stroke: currentColor;\n",
+ " fill: currentColor;\n",
+ "}\n",
+ "<xarray.Dataset>\n",
+ "Dimensions: (time: 10)\n",
+ "Coordinates:\n",
+ " * time (time) int64 0 1 2 3 4 5 6 7 8 9\n",
+ "Data variables:\n",
+ " longitude (time) float64 -66.7 -66.64 -66.58 ... -66.76 -66.85\n",
+ " latitude (time) float64 6.497 6.476 6.558 ... 6.407 6.413\n",
+ " depth (time) float64 6.639 5.89 5.029 ... 6.213 6.255 6.559\n",
+ " sea_water_temperature (time) float64 10.77 10.15 9.69 ... 10.25 10.25 10.56
"
+ ],
"text/plain": [
- "array([[21.42, 21.21, 20.98, nan, nan, nan],\n",
- " [22.08, 21.56, 20.42, 19.23, 18.53, nan],\n",
- " [22.42, 21.21, 20.12, 19.45, 18.72, 16.99],\n",
- " [21.84, 21.49, nan, nan, nan, nan]])"
+ "\n",
+ "Dimensions: (time: 10)\n",
+ "Coordinates:\n",
+ " * time (time) int64 0 1 2 3 4 5 6 7 8 9\n",
+ "Data variables:\n",
+ " longitude (time) float64 -66.7 -66.64 -66.58 ... -66.76 -66.85\n",
+ " latitude (time) float64 6.497 6.476 6.558 ... 6.407 6.413\n",
+ " depth (time) float64 6.639 5.89 5.029 ... 6.213 6.255 6.559\n",
+ " sea_water_temperature (time) float64 10.77 10.15 9.69 ... 10.25 10.25 10.56"
]
},
- "execution_count": 31,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "# Finding the maximum length among the depth arrays\n",
- "max_depth_length = max(len(depth_1), len(depth_2), len(depth_3), len(depth_4))\n",
+ "time = [0,1,2,3,4,5,6,7,8,9]\n",
+ "latitude = [6.4970, 6.4756, 6.5584, 6.5087, 6.4815, 6.5029, 6.4279, 6.3409, 6.4066, 6.4134]\n",
+ "longitude = [-66.6972, -66.6399, -66.5838, -66.6357, -66.7313, -66.686, -66.7192, -66.6737, -66.7594, -66.8479]\n",
+ "depth = [6.6388, 5.8899, 5.0289, 4.7409, 5.5595, 5.4532, 5.7104, 6.2129, 6.2548, 6.5595]\n",
+ "temperature = [10.77, 10.15, 9.69, 9.46, 9.06, 9.97, 9.66, 10.25, 10.25, 10.56]\n",
"\n",
- "# Creating arrays filled with NaNs\n",
- "temp_arr_1 = np.full(max_depth_length, np.nan)\n",
- "temp_arr_2 = np.full(max_depth_length, np.nan)\n",
- "temp_arr_3 = np.full(max_depth_length, np.nan)\n",
- "temp_arr_4 = np.full(max_depth_length, np.nan)\n",
+ "xrds = xr.Dataset(\n",
+ " coords = {\n",
+ " 'time': time\n",
+ " },\n",
+ " data_vars = {\n",
+ " 'longitude': ('time', longitude),\n",
+ " 'latitude': ('time', latitude),\n",
+ " 'depth': ('time', depth),\n",
+ " 'sea_water_temperature': ('time', temperature)\n",
+ " }\n",
+ " )\n",
"\n",
- "# Filling the arrays with available temperature data\n",
- "temp_arr_1[:len(temperature_1)] = temperature_1\n",
- "temp_arr_2[:len(temperature_2)] = temperature_2\n",
- "temp_arr_3[:len(temperature_3)] = temperature_3\n",
- "temp_arr_4[:len(temperature_4)] = temperature_4\n",
+ "xrds\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9b5e067e",
+ "metadata": {},
+ "source": [
+ "## Data on a multidimensional grid\n",
"\n",
- "# Creating a 2D array\n",
- "temperature_2d = np.array([temp_arr_1, temp_arr_2, temp_arr_3, temp_arr_4])\n",
- "temperature_2d"
+ "Some data sit on a grid with multiple dimensions. Some examples are satellite data or data output from certain models. Imagine we have sea water temperature data exported from a model. There are 4 dimensions; latitude, longitude, depth and time. In this case, let's imagine that the model exports data to a regular grid.\n",
+ "\n",
+ "I am going to use random values! If you have data, your first job is get your data into a multidimensional array. You will find some help in tutorial 04 on how to convert tabular data to a multidimensional array. ChatGPT can also be very helpful - but make sure the values are going in to the right places. "
]
},
{
"cell_type": "code",
- "execution_count": 32,
- "id": "24d71c33",
+ "execution_count": 17,
+ "id": "2349ca67",
"metadata": {},
"outputs": [
{
@@ -4268,122 +3745,165 @@
" fill: currentColor;\n",
"}\n",
"<xarray.Dataset>\n",
- "Dimensions: (time: 4, depth: 6)\n",
+ "Dimensions: (time: 4, depth: 1001, latitude: 71, longitude: 42)\n",
"Coordinates:\n",
- " * depth (depth) int64 10 20 30 40 50 60\n",
" * time (time) int64 0 1 2 3\n",
+ " * depth (depth) int64 0 1 2 3 4 5 ... 996 997 998 999 1000\n",
+ " * latitude (latitude) float64 75.0 75.1 75.2 ... 81.8 81.9 82.0\n",
+ " * longitude (longitude) float64 28.0 28.1 28.2 ... 31.9 32.0 32.1\n",
"Data variables:\n",
- " sea_water_temperature (time, depth) float64 21.42 21.21 20.98 ... nan nan\n",
- " latitude (time) float64 78.51 79.28 79.98 80.42\n",
- " longitude (time) float64 30.42 30.36 30.5 30.42
"
+ " sea_water_temperature (time, depth, latitude, longitude) float64 1.229 ....
<xarray.Dataset>\n", - "Dimensions: (time: 4, depth: 14)\n", + "Dimensions: (node: 100)\n", "Coordinates:\n", - " * depth (depth) int64 5 10 12 20 24 25 ... 50 60 70 80 90 100\n", - " * time (time) int64 0 1 2 3\n", + " * node (node) int64 0 1 2 3 4 5 6 7 8 9 ... 91 92 93 94 95 96 97 98 99\n", "Data variables:\n", - " sea_water_temperature (time, depth) float64 21.42 nan nan ... nan nan nan\n", - " latitude (time) float64 78.51 79.28 79.98 80.42\n", - " longitude (time) float64 30.42 30.36 30.5 30.42
YouTube videos will accompany each notebook. When these videos are published, they can be included using the following code:
+from IPython.display import YouTubeVideo
+YouTubeVideo('FGHJhAFf1W0')
+
Be sure to change the id of the video!
+ + + + +PandasIndex(Index([-87.5, -82.5, -77.5, -72.5, -67.5, -62.5, -57.5, -52.5, -47.5, -42.5,\n", + " -37.5, -32.5, -27.5, -22.5, -17.5, -12.5, -7.5, -2.5, 2.5, 7.5,\n", + " 12.5, 17.5, 22.5, 27.5, 32.5, 37.5, 42.5, 47.5, 52.5, 57.5,\n", + " 62.5, 67.5, 72.5, 77.5, 82.5, 87.5],\n", + " dtype='float32', name='lat'))
PandasIndex(Index([ 2.5, 7.5, 12.5, 17.5, 22.5, 27.5, 32.5, 37.5, 42.5, 47.5,\n", + " 52.5, 57.5, 62.5, 67.5, 72.5, 77.5, 82.5, 87.5, 92.5, 97.5,\n", + " 102.5, 107.5, 112.5, 117.5, 122.5, 127.5, 132.5, 137.5, 142.5, 147.5,\n", + " 152.5, 157.5, 162.5, 167.5, 172.5, 177.5, 182.5, 187.5, 192.5, 197.5,\n", + " 202.5, 207.5, 212.5, 217.5, 222.5, 227.5, 232.5, 237.5, 242.5, 247.5,\n", + " 252.5, 257.5, 262.5, 267.5, 272.5, 277.5, 282.5, 287.5, 292.5, 297.5,\n", + " 302.5, 307.5, 312.5, 317.5, 322.5, 327.5, 332.5, 337.5, 342.5, 347.5,\n", + " 352.5, 357.5],\n", + " dtype='float32', name='lon'))
PandasIndex(Index([0.0], dtype='float32', name='z'))
PandasIndex(Index([-87.5, -82.5, -77.5, -72.5, -67.5, -62.5, -57.5, -52.5, -47.5, -42.5,\n", + " -37.5, -32.5, -27.5, -22.5, -17.5, -12.5, -7.5, -2.5, 2.5, 7.5,\n", + " 12.5, 17.5, 22.5, 27.5, 32.5, 37.5, 42.5, 47.5, 52.5, 57.5,\n", + " 62.5, 67.5, 72.5, 77.5, 82.5, 87.5],\n", + " dtype='float32', name='lat'))
PandasIndex(Index([ 2.5, 7.5, 12.5, 17.5, 22.5, 27.5, 32.5, 37.5, 42.5, 47.5,\n", + " 52.5, 57.5, 62.5, 67.5, 72.5, 77.5, 82.5, 87.5, 92.5, 97.5,\n", + " 102.5, 107.5, 112.5, 117.5, 122.5, 127.5, 132.5, 137.5, 142.5, 147.5,\n", + " 152.5, 157.5, 162.5, 167.5, 172.5, 177.5, 182.5, 187.5, 192.5, 197.5,\n", + " 202.5, 207.5, 212.5, 217.5, 222.5, 227.5, 232.5, 237.5, 242.5, 247.5,\n", + " 252.5, 257.5, 262.5, 267.5, 272.5, 277.5, 282.5, 287.5, 292.5, 297.5,\n", + " 302.5, 307.5, 312.5, 317.5, 322.5, 327.5, 332.5, 337.5, 342.5, 347.5,\n", + " 352.5, 357.5],\n", + " dtype='float32', name='lon'))
PandasIndex(Index([0.0], dtype='float32', name='z'))
PandasIndex(Index([ 0, 3600, 7200, 10800, 14400, 18000, 21600, 25200, 28800, 32400,\n", + " 36000, 39600, 43200, 46800, 50400, 54000, 57600, 61200, 64800, 68400,\n", + " 72000, 75600, 79200, 82800],\n", + " dtype='int64', name='time'))
<xarray.DataArray 'wind_speed' (latitude: 3, longitude: 2)>\n", - "array([[1, 8],\n", - " [4, 1],\n", - " [0, 2]])\n", + "array([[7, 5],\n", + " [9, 6],\n", + " [4, 1]])\n", "Coordinates:\n", " * latitude (latitude) float64 78.54 79.14 80.71\n", - " * longitude (longitude) float64 30.01 28.73" + " * longitude (longitude) float64 30.01 28.73 " ], "text/plain": [ "
PandasIndex(Index([0, 10, 20, 50, 100], dtype='int64', name='depth'))
PandasIndex(Index([78.5425, 79.1423, 80.7139], dtype='float64', name='latitude'))
PandasIndex(Index([30.0131, 28.7269], dtype='float64', name='longitude'))
<xarray.Dataset>\n", - "Dimensions: (depth: 3)\n", + "Dimensions: (time: 4, depth: 3)\n", "Coordinates:\n", " * depth (depth) int64 10 20 30\n", + " * time (time) int64 0 1 2 3\n", "Data variables:\n", - " sea_water_temperature (depth) float64 21.42 21.21 20.98" + " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99 " ], "text/plain": [ "
<xarray.Dataset>\n", - "Dimensions: (time: 10)\n", + "Dimensions: (time: 4, depth: 3)\n", "Coordinates:\n", - " * time (time) int64 0 1 2 3 4 5 6 7 8 9\n", + " * depth (depth) int64 10 20 30\n", + " * time (time) int64 0 1 2 3\n", "Data variables:\n", - " longitude (time) float64 -66.7 -66.64 -66.58 ... -66.76 -66.85\n", - " latitude (time) float64 6.497 6.476 6.558 ... 6.407 6.413\n", - " depth (time) float64 6.639 5.89 5.029 ... 6.213 6.255 6.559\n", - " sea_water_temperature (time) float64 10.77 10.15 9.69 ... 10.25 10.25 10.56" + " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99\n", + " latitude (time) float64 78.51 79.28 79.98 80.42\n", + " longitude (time) float64 30.42 30.36 30.5 30.42 " ], "text/plain": [ "
<xarray.Dataset>\n", - "Dimensions: (time: 4, depth: 1001, latitude: 71, longitude: 42)\n", + "Dimensions: (time: 4, depth: 3, latitude: 4, longitude: 4)\n", "Coordinates:\n", + " * latitude (latitude) float64 78.51 79.28 79.98 80.42\n", + " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n", + " * depth (depth) int64 10 20 30\n", " * time (time) int64 0 1 2 3\n", - " * depth (depth) int64 0 1 2 3 4 5 ... 996 997 998 999 1000\n", - " * latitude (latitude) float64 75.0 75.1 75.2 ... 81.8 81.9 82.0\n", - " * longitude (longitude) float64 28.0 28.1 28.2 ... 31.9 32.0 32.1\n", "Data variables:\n", - " sea_water_temperature (time, depth, latitude, longitude) float64 1.932 ...." + " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99 " ], "text/plain": [ "
<xarray.Dataset>\n", - "Dimensions: (node: 100)\n", + "Dimensions: (time: 4, depth: 3, latitude: 3, longitude: 4)\n", "Coordinates:\n", - " * node (node) int64 0 1 2 3 4 5 6 7 8 9 ... 91 92 93 94 95 96 97 98 99\n", + " * latitude (latitude) float64 78.51 79.28 79.98\n", + " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n", + " * depth (depth) int64 10 20 30\n", + " * time (time) int64 0 1 2 3\n", "Data variables:\n", - " data_var (node) float64 0.8881 0.6197 0.886 ... 0.3992 0.1609 0.6582\n", - " latitude (node) float64 11.52 40.92 -35.42 81.54 ... 42.43 80.09 -81.26\n", - " longitude (node) float64 -37.15 50.15 -65.14 35.36 ... -120.0 -48.52 -58.31" + " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99 " ], "text/plain": [ "
<xarray.Dataset>\n", - "Dimensions: (time: 4, depth: 3)\n", + "Dimensions: (time: 4, depth: 6)\n", "Coordinates:\n", - " * depth (depth) int64 10 20 30\n", + " * depth (depth) int64 10 20 30 40 50 60\n", " * time (time) int64 0 1 2 3\n", "Data variables:\n", - " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99" + " sea_water_temperature (time, depth) float64 21.42 21.21 20.98 ... nan nan\n", + " latitude (time) float64 78.51 79.28 79.98 80.42\n", + " longitude (time) float64 30.42 30.36 30.5 30.42 " ], "text/plain": [ "
<xarray.Dataset>\n", - "Dimensions: (time: 4, depth: 3)\n", + "Dimensions: (time: 4, depth: 14)\n", "Coordinates:\n", - " * depth (depth) int64 10 20 30\n", + " * depth (depth) int64 5 10 12 20 24 25 ... 50 60 70 80 90 100\n", " * time (time) int64 0 1 2 3\n", "Data variables:\n", - " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99\n", + " sea_water_temperature (time, depth) float64 21.42 nan nan ... nan nan nan\n", " latitude (time) float64 78.51 79.28 79.98 80.42\n", - " longitude (time) float64 30.42 30.36 30.5 30.42
<xarray.Dataset>\n", - "Dimensions: (time: 4, depth: 3, latitude: 4, longitude: 4)\n", + "Dimensions: (time: 10)\n", "Coordinates:\n", - " * latitude (latitude) float64 78.51 79.28 79.98 80.42\n", - " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n", - " * depth (depth) int64 10 20 30\n", - " * time (time) int64 0 1 2 3\n", + " * time (time) int64 0 1 2 3 4 5 6 7 8 9\n", "Data variables:\n", - " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99" + " longitude (time) float64 -66.7 -66.64 -66.58 ... -66.76 -66.85\n", + " latitude (time) float64 6.497 6.476 6.558 ... 6.407 6.413\n", + " depth (time) float64 6.639 5.89 5.029 ... 6.213 6.255 6.559\n", + " sea_water_temperature (time) float64 10.77 10.15 9.69 ... 10.25 10.25 10.56 " ], "text/plain": [ "
<xarray.Dataset>\n", - "Dimensions: (time: 4, depth: 3, latitude: 3, longitude: 4)\n", + "Dimensions: (time: 4, depth: 1001, latitude: 71, longitude: 42)\n", "Coordinates:\n", - " * latitude (latitude) float64 78.51 79.28 79.98\n", - " * longitude (longitude) float64 30.42 30.36 30.5 30.42\n", - " * depth (depth) int64 10 20 30\n", " * time (time) int64 0 1 2 3\n", + " * depth (depth) int64 0 1 2 3 4 5 ... 996 997 998 999 1000\n", + " * latitude (latitude) float64 75.0 75.1 75.2 ... 81.8 81.9 82.0\n", + " * longitude (longitude) float64 28.0 28.1 28.2 ... 31.9 32.0 32.1\n", "Data variables:\n", - " sea_water_temperature (time, depth) float64 21.42 21.21 ... 21.01 19.99" + " sea_water_temperature (time, depth, latitude, longitude) float64 1.229 .... " ], "text/plain": [ "
<xarray.Dataset>\n", - "Dimensions: (time: 4, depth: 6)\n", - "Coordinates:\n", - " * depth (depth) int64 10 20 30 40 50 60\n", - " * time (time) int64 0 1 2 3\n", - "Data variables:\n", - " sea_water_temperature (time, depth) float64 21.42 21.21 20.98 ... nan nan\n", - " latitude (time) float64 78.51 79.28 79.98 80.42\n", - " longitude (time) float64 30.42 30.36 30.5 30.42
<xarray.Dataset>\n", - "Dimensions: (time: 4, depth: 14)\n", + "Dimensions: (node: 100)\n", "Coordinates:\n", - " * depth (depth) int64 5 10 12 20 24 25 ... 50 60 70 80 90 100\n", - " * time (time) int64 0 1 2 3\n", + " * node (node) int64 0 1 2 3 4 5 6 7 8 9 ... 91 92 93 94 95 96 97 98 99\n", "Data variables:\n", - " sea_water_temperature (time, depth) float64 21.42 nan nan ... nan nan nan\n", - " latitude (time) float64 78.51 79.28 79.98 80.42\n", - " longitude (time) float64 30.42 30.36 30.5 30.42" + " data_var (node) float64 0.9497 0.6762 0.1775 ... 0.6252 0.7844 0.4039\n", + " latitude (node) float64 -33.93 84.84 -13.17 69.12 ... 61.13 -15.98 -64.3\n", + " longitude (node) float64 121.7 90.23 99.55 -44.3 ... 150.3 -118.3 114.8 " ], "text/plain": [ "
PandasIndex(Index([19358, 19389, 19417, 19448, 19478, 19509, 19539, 19570, 19601, 19631,\n", + " 19662, 19692],\n", + " dtype='int64', name='time'))
PandasIndex(Index([0, 10, 20, 30], dtype='int64', name='depth'))