diff --git a/docs/source/_templates/_api_ref.pandas.dataframe_templ.rst b/docs/source/_templates/_api_ref.pandas.dataframe_templ.rst new file mode 100644 index 000000000..9cec8fe73 --- /dev/null +++ b/docs/source/_templates/_api_ref.pandas.dataframe_templ.rst @@ -0,0 +1,313 @@ +.. _api_ref.pandas.dataframe: +.. include:: ./../ext_links.txt + +DataFrame +========= +.. currentmodule:: pandas + +This is the main `Pandas*`_ data structure representing a table of rows and columns. + +DataFrame is a two-dimensional structure with labeled axes. It can be thought of as a dictionary-like +container for :class:`Series ` + +Constructor +----------- + +.. sdc_toctree + DataFrame + +Attributes/Operators +-------------------- + +.. sdc_toctree + DataFrame.index + DataFrame.columns + DataFrame.dtypes + DataFrame.select_dtypes + DataFrame.values + DataFrame.axes + DataFrame.ndim + DataFrame.size + DataFrame.shape + DataFrame.memory_usage + DataFrame.empty + +Type Conversions +---------------- + +.. sdc_toctree + DataFrame.astype + DataFrame.infer_objects + DataFrame.copy + DataFrame.isna + DataFrame.notna + DataFrame.bool + +Indexing and Iteration +---------------------- + +.. sdc_toctree + DataFrame.head + DataFrame.at + DataFrame.iat + DataFrame.loc + DataFrame.iloc + DataFrame.insert + DataFrame.__iter__ + DataFrame.items + DataFrame.iteritems + DataFrame.keys + DataFrame.iterrows + DataFrame.itertuples + DataFrame.lookup + DataFrame.pop + DataFrame.tail + DataFrame.xs + DataFrame.get + DataFrame.isin + DataFrame.where + DataFrame.mask + DataFrame.query + +For more information on ``.at``, ``.iat``, ``.loc``, and +``.iloc``, see the :ref:`indexing documentation `. + +Binary Operator Functions +------------------------- + +.. sdc_toctree + DataFrame.add + DataFrame.sub + DataFrame.mul + DataFrame.div + DataFrame.truediv + DataFrame.floordiv + DataFrame.mod + DataFrame.pow + DataFrame.dot + DataFrame.radd + DataFrame.rsub + DataFrame.rmul + DataFrame.rdiv + DataFrame.rtruediv + DataFrame.rfloordiv + DataFrame.rmod + DataFrame.rpow + DataFrame.lt + DataFrame.gt + DataFrame.le + DataFrame.ge + DataFrame.ne + DataFrame.eq + DataFrame.combine + DataFrame.combine_first + +User-Defined Functions, GroupBy & Window +---------------------------------------- + +.. sdc_toctree + DataFrame.apply + DataFrame.applymap + DataFrame.pipe + DataFrame.agg + DataFrame.aggregate + DataFrame.transform + DataFrame.groupby + DataFrame.rolling + DataFrame.expanding + DataFrame.ewm + +.. _api_ref.dataframe.stats: + +Computations, Descriptive Statistics +------------------------------------ + +.. sdc_toctree + DataFrame.abs + DataFrame.all + DataFrame.any + DataFrame.clip + DataFrame.corr + DataFrame.corrwith + DataFrame.count + DataFrame.cov + DataFrame.cummax + DataFrame.cummin + DataFrame.cumprod + DataFrame.cumsum + DataFrame.describe + DataFrame.diff + DataFrame.eval + DataFrame.kurt + DataFrame.kurtosis + DataFrame.mad + DataFrame.max + DataFrame.mean + DataFrame.median + DataFrame.min + DataFrame.mode + DataFrame.pct_change + DataFrame.prod + DataFrame.product + DataFrame.quantile + DataFrame.rank + DataFrame.round + DataFrame.sem + DataFrame.skew + DataFrame.sum + DataFrame.std + DataFrame.var + DataFrame.nunique + +Re-Indexing, Selection, Label Manipulation +------------------------------------------ + +.. sdc_toctree + DataFrame.add_prefix + DataFrame.add_suffix + DataFrame.align + DataFrame.at_time + DataFrame.between_time + DataFrame.drop + DataFrame.drop_duplicates + DataFrame.duplicated + DataFrame.equals + DataFrame.filter + DataFrame.first + DataFrame.head + DataFrame.idxmax + DataFrame.idxmin + DataFrame.last + DataFrame.reindex + DataFrame.reindex_like + DataFrame.rename + DataFrame.rename_axis + DataFrame.reset_index + DataFrame.sample + DataFrame.set_axis + DataFrame.set_index + DataFrame.tail + DataFrame.take + DataFrame.truncate + +Missing Data Handling +--------------------- + +.. sdc_toctree + DataFrame.dropna + DataFrame.fillna + DataFrame.replace + DataFrame.interpolate + +Re-Shaping, Sorting, Transposing +-------------------------------- + +.. sdc_toctree + DataFrame.droplevel + DataFrame.pivot + DataFrame.pivot_table + DataFrame.reorder_levels + DataFrame.sort_values + DataFrame.sort_index + DataFrame.nlargest + DataFrame.nsmallest + DataFrame.swaplevel + DataFrame.stack + DataFrame.unstack + DataFrame.swapaxes + DataFrame.melt + DataFrame.explode + DataFrame.squeeze + DataFrame.to_xarray + DataFrame.T + DataFrame.transpose + +Combining, Joining, Merging +----------------------------- + +.. sdc_toctree + DataFrame.append + DataFrame.assign + DataFrame.join + DataFrame.merge + DataFrame.update + +Time Series +----------- + +.. sdc_toctree + DataFrame.asfreq + DataFrame.asof + DataFrame.shift + DataFrame.slice_shift + DataFrame.tshift + DataFrame.first_valid_index + DataFrame.last_valid_index + DataFrame.resample + DataFrame.to_period + DataFrame.to_timestamp + DataFrame.tz_convert + DataFrame.tz_localize + +.. _api_ref.dataframe.plotting: + +Plotting +-------- +``DataFrame.plot`` is both a callable method and a namespace attribute for +specific plotting methods of the form ``DataFrame.plot.``. + +.. sdc_toctree + DataFrame.plot + DataFrame.plot.area + DataFrame.plot.bar + DataFrame.plot.barh + DataFrame.plot.box + DataFrame.plot.density + DataFrame.plot.hexbin + DataFrame.plot.hist + DataFrame.plot.kde + DataFrame.plot.line + DataFrame.plot.pie + DataFrame.plot.scatter + DataFrame.boxplot + DataFrame.hist + +.. _api_ref.dataframe.sparse: + +Sparse Accessor +--------------- + +Sparse-``dtype`` specific methods and attributes are provided under the +``DataFrame.sparse`` accessor. + +.. sdc_toctree + DataFrame.sparse.density + DataFrame.sparse.from_spmatrix + DataFrame.sparse.to_coo + DataFrame.sparse.to_dense + +Serialization, Input-Output, Conversion +--------------------------------------- + +.. sdc_toctree + DataFrame.from_dict + DataFrame.from_records + DataFrame.info + DataFrame.to_parquet + DataFrame.to_pickle + DataFrame.to_csv + DataFrame.to_hdf + DataFrame.to_sql + DataFrame.to_dict + DataFrame.to_excel + DataFrame.to_json + DataFrame.to_html + DataFrame.to_feather + DataFrame.to_latex + DataFrame.to_stata + DataFrame.to_msgpack + DataFrame.to_gbq + DataFrame.to_records + DataFrame.to_string + DataFrame.to_clipboard + DataFrame.style diff --git a/docs/source/_templates/_api_ref.pandas.series_templ.rst b/docs/source/_templates/_api_ref.pandas.series_templ.rst index 6bf9981f2..53eaabda2 100644 --- a/docs/source/_templates/_api_ref.pandas.series_templ.rst +++ b/docs/source/_templates/_api_ref.pandas.series_templ.rst @@ -69,7 +69,7 @@ Indexing and Iteration For more information on ``.at``, ``.iat``, ``.loc``, and ``.iloc``, see the :ref:`indexing documentation `. -Binary operator functions +Binary Operator Functions ------------------------- .. sdc_toctree diff --git a/docs/source/apireference.rst b/docs/source/apireference.rst index 1857d152a..9f4e2a917 100644 --- a/docs/source/apireference.rst +++ b/docs/source/apireference.rst @@ -7,3 +7,4 @@ API Reference :maxdepth: 2 Series: Columnar Data Structure <./_api_ref/api_ref.pandas.series.rst> + Dataframe: Tabular Data Structure <./_api_ref/api_ref.pandas.dataframe.rst> diff --git a/docs/source/buildscripts/__init__.py b/docs/source/buildscripts/__init__.py new file mode 100644 index 000000000..a8421bb14 --- /dev/null +++ b/docs/source/buildscripts/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** diff --git a/docs/source/buildscripts/apiref_generator.py b/docs/source/buildscripts/apiref_generator.py index 674d7f080..d39f3e796 100644 --- a/docs/source/buildscripts/apiref_generator.py +++ b/docs/source/buildscripts/apiref_generator.py @@ -26,12 +26,20 @@ # ***************************************************************************** import pandas -from sdc_object_utils import init_pandas_structure, init_sdc_structure, init_pandas_sdc_dict, get_sdc_object, get_obj +from sdc_object_utils import init_pandas_structure, init_sdc_structure, init_pandas_sdc_dict +from sdc_object_utils import get_sdc_object_by_pandas_name, get_obj from sdc_object_utils import get_class_methods, get_class_attributes, get_fully_qualified_name from sdc_doc_utils import is_sdc_user_guide_header, get_indent, reindent, get_short_description from sdc_doc_utils import split_in_sections, get_docstring, create_heading_str, cut_sdc_dev_guide import os + +APIREF_TEMPLATE_FNAMES = [ + './_templates/_api_ref.pandas.series_templ.rst', + './_templates/_api_ref.pandas.dataframe_templ.rst', +] + + APIREF_REL_PATH = './_api_ref/' @@ -168,7 +176,7 @@ def reformat_explicit_markup(text): line = lines[0] if line.strip().startswith('.. versionchanged') or line.strip().startswith('.. versionadded') or \ - line.strip().startswith('.. deprecated'): + line.strip().startswith('.. deprecated') or line.strip().startswith('.. _'): new_text += line + '\n' # Here if found explicit markup if len(lines) > 1: @@ -385,7 +393,7 @@ def _reformat_returns(title, text): return text -def generate_simple_object_doc(pandas_obj, short_doc_flag=False, doc_from_pandas_flag=True, add_sdc_sections=True, +def generate_simple_object_doc(pandas_name, short_doc_flag=False, doc_from_pandas_flag=True, add_sdc_sections=True, unsupported_warning=True, reformat_pandas=True): """ Generates documentation for Pandas object obj according to flags. @@ -393,7 +401,7 @@ def generate_simple_object_doc(pandas_obj, short_doc_flag=False, doc_from_pandas For complex objects such as modules and classes the function does not go to sub-objects, i.e. to class attributes and sub-modules of the module. - :param pandas_obj: Pandas object for which documentation to be generated. + :param pandas_name: Pandas object for which documentation to be generated. :param short_doc_flag: Flag to indicate that only short description for the object is needed. :param doc_from_pandas_flag: Flag to indicate that the documentation must be taken from Pandas docstring. This docstring can be extended with Intel SDC specific sections. These are See Also, Examples, @@ -409,6 +417,7 @@ def generate_simple_object_doc(pandas_obj, short_doc_flag=False, doc_from_pandas """ doc = '' + pandas_obj = get_obj(pandas_name) if pandas_obj is None: return doc # Empty documentation for no-object @@ -450,7 +459,7 @@ def generate_simple_object_doc(pandas_obj, short_doc_flag=False, doc_from_pandas return doc # Here if additional sections from Intel SDC object needs to be added to pandas_obj docstring - sdc_obj = get_sdc_object(pandas_obj) + sdc_obj = get_sdc_object_by_pandas_name(pandas_name) if sdc_obj is None: if unsupported_warning: if reformat_pandas: @@ -541,8 +550,7 @@ def write_simple_object_rst_file(pandas_name, short_doc_flag=False, doc_from_pan :param doc_from_pandas_flag: Flag, if ``True``, derive the description from Pandas docstring for the object. :param add_sdc_sections: Flag, if ``True``, extend the docstring with respective Intel SDC sections (if any) """ - pandas_obj = get_obj(pandas_name) - doc = generate_simple_object_doc(pandas_obj, short_doc_flag, doc_from_pandas_flag, add_sdc_sections) + doc = generate_simple_object_doc(pandas_name, short_doc_flag, doc_from_pandas_flag, add_sdc_sections) if doc is None or doc == '': return @@ -612,14 +620,13 @@ def parse_templ_rst(fname_templ): indent = get_indent(line) line = line.strip() full_name = current_module_name + '.' + line - obj = get_obj(full_name) - short_description = generate_simple_object_doc(obj, short_doc_flag=True).strip() + short_description = generate_simple_object_doc(full_name, short_doc_flag=True).strip() new_line = reindent(':ref:`', indent) + line + ' <' + full_name + '>`\n' + \ reindent(short_description, indent+4) + '\n' fout.write(new_line) doc.pop(0) - full_description = generate_simple_object_doc(obj, short_doc_flag=False) + full_description = generate_simple_object_doc(full_name, short_doc_flag=False) f = open_file_for_write(APIREF_REL_PATH + full_name + '.rst') f.write('.. _' + full_name + ':\n\n:orphan:\n\n') f.write(create_heading_str(full_name, '*') + '\n\n') @@ -645,11 +652,18 @@ def write_class_rst_files(cls, short_doc_flag=False, doc_from_pandas_flag=True, def generate_api_reference(): + """ + Master function for API Reference generation. + + This function initializes all required data structures, and parses required templates for + Final RST files generation that looks and feels like Pandas API Reference. + """ init_pandas_structure() init_sdc_structure() init_pandas_sdc_dict() - parse_templ_rst('./_templates/_api_ref.pandas.series_templ.rst') + for templ_fname in APIREF_TEMPLATE_FNAMES: + parse_templ_rst(templ_fname) if __name__ == "__main__": diff --git a/docs/source/buildscripts/sdc_doc_utils.py b/docs/source/buildscripts/sdc_doc_utils.py index c01a4da9b..797f56a5b 100644 --- a/docs/source/buildscripts/sdc_doc_utils.py +++ b/docs/source/buildscripts/sdc_doc_utils.py @@ -320,6 +320,7 @@ def split_in_sections(doc): :seealso: NumPy style `example `_ """ + doc = reindent(doc, 0) sections = doc.split('\n\n') # Sections are separated by empty lines titled_sections = [] diff --git a/docs/source/buildscripts/sdc_object_utils.py b/docs/source/buildscripts/sdc_object_utils.py index 31e335b21..1f14f0ca9 100644 --- a/docs/source/buildscripts/sdc_object_utils.py +++ b/docs/source/buildscripts/sdc_object_utils.py @@ -98,6 +98,25 @@ def get_sdc_object(pandas_obj): return None # There is no match in Intel SDC to pandas_obj +def get_sdc_object_by_pandas_name(pandas_name): + """ + Returns corresponding Intel SDC object for a given Pandas object given as string ``pandas_name``. + + This function is needed because :func:`get_sdc_object` cannot uniquely match Intel SDC and Pandas objects. + For example, the same Pandas object represents :meth:`Series.get` and :meth:`DataFrame.get` methods. As a result + that :func:`get_sdc_object` will return **some** SDC object that matches respective Pandas object. If you need + unique match between Pandas and Intel SDC use :func:`get_sdc_object_by_pandas_name` function instead. (Which + should be the case for majority usecases). + + :param pandas_name: Pandas object to be matched with Intel SDC object + :return: Intel SDC object corresponding to Pandas object having ``pandas_name`` name + """ + if pandas_name in pandas_sdc_dict: + return pandas_sdc_dict[pandas_name] + else: + return None # There is no match in Intel SDC to pandas_obj + + def init_pandas_sdc_dict(): """ Initializes global dictionary that performs mapping between Pandas objects and SDC objects. @@ -118,6 +137,7 @@ def _map_sdc_to_pandas(sdc_obj): pandas_name = extract_pandas_name_from(text) pandas_obj = get_obj(pandas_name) pandas_sdc_dict[pandas_obj] = sdc_obj + pandas_sdc_dict[pandas_name] = sdc_obj return False global pandas_sdc_dict