From e9b7b3d2e1ad778351c72574ddb1068e97a90e41 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 22 Jan 2024 14:31:04 -0800 Subject: [PATCH 01/29] Process notebooks with pypandoc and a custom pandoc filter --- .github/workflows/build-tutorials.yml | 3 +- conf.py | 5 +- custom_pandoc_filter.py | 68 +++++++++++++++++++++++++++ post_process_notebooks.py | 46 ++++++++++++++++++ 4 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 custom_pandoc_filter.py create mode 100644 post_process_notebooks.py diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index 809b9ad4bf..a5bbc3beea 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -187,7 +187,8 @@ jobs: echo "rm /opt/cache/bin/*" | docker exec -u root -i "${container_name}" bash docker exec -t "${container_name}" sh -c ".jenkins/build.sh" - + docker exec -t "${container_name}" python "./post_process_notebooks.py" + - name: Upload docs preview uses: seemethere/upload-artifact-s3@v5 if: ${{ github.event_name == 'pull_request' }} diff --git a/conf.py b/conf.py index e0d1d6fda6..a088886d9d 100644 --- a/conf.py +++ b/conf.py @@ -107,7 +107,10 @@ def reset_seeds(gallery_conf, fname): "# https://pytorch.org/tutorials/beginner/colab\n" "%matplotlib inline"), 'reset_modules': (reset_seeds), - 'ignore_pattern': r'_torch_export_nightly_tutorial.py' + 'ignore_pattern': r'_torch_export_nightly_tutorial.py', + 'pypandoc': {'extra_args': ['--mathjax'], + 'filters': ['./custom_pandoc_filter.py'], + }, } if os.getenv('GALLERY_PATTERN'): diff --git a/custom_pandoc_filter.py b/custom_pandoc_filter.py new file mode 100644 index 0000000000..335aaf1e50 --- /dev/null +++ b/custom_pandoc_filter.py @@ -0,0 +1,68 @@ +from pandocfilters import toJSONFilter, Div, RawBlock, Para, Str, Space, Link, Code, CodeBlock +import markdown +import re + +def to_markdown(item): + if item['t'] == 'Str': + return item['c'] + elif item['t'] == 'Space': + return ' ' + elif item['t'] == 'Link': + # Assuming the link text is always in the first item + return f"[{item['c'][1][0]['c']}]({item['c'][2][0]})" + elif item['t'] == 'Code': + return f"`{item['c'][1]}`" + elif item['t'] == 'CodeBlock': + return f"```\n{item['c'][1]}\n```" + +def process_admonitions(key, value, format, meta): + if key == 'Div': + [[ident, classes, keyvals], contents] = value + if 'note' in classes: + color = '#54c7ec' + label = 'NOTE:' + elif 'tip' in classes: + color = '#6bcebb' + label = 'TIP:' + elif 'warning' in classes: + color = '#e94f3b' + label = 'WARNING:' + else: + return + + note_content = [] + for block in contents: + if 't' in block and block['t'] == 'Para': + for item in block['c']: + if item['t'] == 'Str': + note_content.append(Str(item['c'])) + elif item['t'] == 'Space': + note_content.append(Space()) + elif item['t'] == 'Link': + note_content.append(Link(*item['c'])) + elif item['t'] == 'Code': + note_content.append(Code(*item['c'])) + elif 't' in block and block['t'] == 'CodeBlock': + note_content.append(CodeBlock(*block['c'])) + + note_content_md = ''.join(to_markdown(item) for item in note_content) + html_content = markdown.markdown(note_content_md) + + return [{'t': 'RawBlock', 'c': ['html', f'
{label}
']}, {'t': 'RawBlock', 'c': ['html', '
']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '
']}] + + elif key == 'RawBlock': + [format, content] = value + if format == 'html' and 'iframe' in content: + # Extract the video URL + video_url = content.split('src="')[1].split('"')[0] + # Create the Python code to display the video + html_code = f""" +from IPython.display import display, HTML +html_code = \""" +{content} +\""" +display(HTML(html_code)) +""" + +if __name__ == "__main__": + toJSONFilter(process_admonitions) diff --git a/post_process_notebooks.py b/post_process_notebooks.py new file mode 100644 index 0000000000..88f225eba9 --- /dev/null +++ b/post_process_notebooks.py @@ -0,0 +1,46 @@ +import nbformat as nbf +import os +import re + +def get_gallery_dirs(conf_path): + """Execute the conf.py file and return the gallery directories.""" + namespace = {} + exec(open(conf_path).read(), namespace) + sphinx_gallery_conf = namespace['sphinx_gallery_conf'] + print(f"Processing directories: {', '.join(sphinx_gallery_conf['gallery_dirs'])}") + return sphinx_gallery_conf['gallery_dirs'] + +def process_notebook(notebook_path): + """Read and process a notebook file.""" + print(f'Processing file: {notebook_path}') + notebook = nbf.read(notebook_path, as_version=4) + for cell in notebook.cells: + if cell.cell_type == 'markdown': + cell.source = process_content(cell.source) + nbf.write(notebook, notebook_path) + +def process_content(content): + """Remove extra syntax from the content of a Markdown cell.""" + content = re.sub(r'```{=html}\n\n```', '">', content) + content = re.sub(r'<\/div>\n```', '\n', content) + content = re.sub(r'```{=html}\n\n```', '\n', content) + content = re.sub(r'```{=html}', '', content) + content = re.sub(r'

\n```', '

', content) + return content + +def process_directory(notebook_dir): + """Process all notebook files in a directory and its subdirectories.""" + for root, dirs, files in os.walk(notebook_dir): + for filename in files: + if filename.endswith('.ipynb'): + process_notebook(os.path.join(root, filename)) + +def main(): + """Main function to process all directories specified in the conf.py file.""" + conf_path = 'conf.py' + for notebook_dir in get_gallery_dirs(conf_path): + process_directory(notebook_dir) + +if __name__ == "__main__": + main() From aeec8e81370e3e79b71d6241f966da97808c0eea Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 23 Jan 2024 08:16:56 -0800 Subject: [PATCH 02/29] Update --- .github/workflows/build-tutorials.yml | 1 - .jenkins/build.sh | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index a5bbc3beea..2bf7ab3258 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -187,7 +187,6 @@ jobs: echo "rm /opt/cache/bin/*" | docker exec -u root -i "${container_name}" bash docker exec -t "${container_name}" sh -c ".jenkins/build.sh" - docker exec -t "${container_name}" python "./post_process_notebooks.py" - name: Upload docs preview uses: seemethere/upload-artifact-s3@v5 diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 6d9c69d317..eeae2f4c35 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -63,6 +63,9 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then # Step 3: Run `make docs` to generate HTML files and static files for these tutorials make docs + # Step 3.1: Run the post-processing script: + python3 ../post_process_notebooks.py + # Step 4: If any of the generated files are not related the tutorial files we want to run, # then we remove them set +x From 617bccd8480ebb1dfc98c88ebaf0a34864490569 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 23 Jan 2024 08:40:24 -0800 Subject: [PATCH 03/29] Update --- .github/workflows/build-tutorials.yml | 1 - .jenkins/build.sh | 2 +- post_process_notebooks.py => .jenkins/post_process_notebooks.py | 0 3 files changed, 1 insertion(+), 2 deletions(-) rename post_process_notebooks.py => .jenkins/post_process_notebooks.py (100%) diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index 2bf7ab3258..b931d9fbbc 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -187,7 +187,6 @@ jobs: echo "rm /opt/cache/bin/*" | docker exec -u root -i "${container_name}" bash docker exec -t "${container_name}" sh -c ".jenkins/build.sh" - - name: Upload docs preview uses: seemethere/upload-artifact-s3@v5 if: ${{ github.event_name == 'pull_request' }} diff --git a/.jenkins/build.sh b/.jenkins/build.sh index eeae2f4c35..c353c58a17 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -64,7 +64,7 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then make docs # Step 3.1: Run the post-processing script: - python3 ../post_process_notebooks.py + python .jenkins/post_process_notebooks.py # Step 4: If any of the generated files are not related the tutorial files we want to run, # then we remove them diff --git a/post_process_notebooks.py b/.jenkins/post_process_notebooks.py similarity index 100% rename from post_process_notebooks.py rename to .jenkins/post_process_notebooks.py From b5c9ed880c6224b3342ee04066f1a40b1f88dfc4 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 23 Jan 2024 10:36:58 -0800 Subject: [PATCH 04/29] Update --- beginner_source/nn_tutorial.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/beginner_source/nn_tutorial.py b/beginner_source/nn_tutorial.py index b45200fd49..40a28a95f0 100644 --- a/beginner_source/nn_tutorial.py +++ b/beginner_source/nn_tutorial.py @@ -10,6 +10,8 @@ # We recommend running this tutorial as a notebook, not a script. To download the notebook (``.ipynb``) file, # click the link at the top of the page. # +# Testing... +# # PyTorch provides the elegantly designed modules and classes `torch.nn `_ , # `torch.optim `_ , # `Dataset `_ , From 384308fcac4830bc22cc4b6971be55848fa2905d Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Thu, 25 Jan 2024 08:24:00 -0800 Subject: [PATCH 05/29] Update --- conf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf.py b/conf.py index a088886d9d..1eb35f7e3a 100644 --- a/conf.py +++ b/conf.py @@ -108,9 +108,9 @@ def reset_seeds(gallery_conf, fname): "%matplotlib inline"), 'reset_modules': (reset_seeds), 'ignore_pattern': r'_torch_export_nightly_tutorial.py', - 'pypandoc': {'extra_args': ['--mathjax'], - 'filters': ['./custom_pandoc_filter.py'], - }, + 'pypandoc': {'extra_args': ['--mathjax'], + 'filters': ['./custom_pandoc_filter.py'], + }, } if os.getenv('GALLERY_PATTERN'): From 04c50a9f7c37655aebcbf4ae2947296b30f7a55b Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 29 Jan 2024 10:45:54 -0800 Subject: [PATCH 06/29] Update --- custom_pandoc_filter.py => .jenkins/custom_pandoc_filter.py | 0 conf.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename custom_pandoc_filter.py => .jenkins/custom_pandoc_filter.py (100%) diff --git a/custom_pandoc_filter.py b/.jenkins/custom_pandoc_filter.py similarity index 100% rename from custom_pandoc_filter.py rename to .jenkins/custom_pandoc_filter.py diff --git a/conf.py b/conf.py index 1eb35f7e3a..f328279263 100644 --- a/conf.py +++ b/conf.py @@ -109,7 +109,7 @@ def reset_seeds(gallery_conf, fname): 'reset_modules': (reset_seeds), 'ignore_pattern': r'_torch_export_nightly_tutorial.py', 'pypandoc': {'extra_args': ['--mathjax'], - 'filters': ['./custom_pandoc_filter.py'], + 'filters': ['.jenkins/custom_pandoc_filter.py'], }, } From 253250cafcbf9b3c189a89c1bcfc91738d1462a5 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 29 Jan 2024 14:41:39 -0800 Subject: [PATCH 07/29] Install pypandoc --- conf.py | 1 + requirements.txt | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/conf.py b/conf.py index f328279263..2b79a03876 100644 --- a/conf.py +++ b/conf.py @@ -41,6 +41,7 @@ from custom_directives import IncludeDirective, GalleryItemDirective, CustomGalleryItemDirective, CustomCalloutItemDirective, CustomCardItemDirective import distutils.file_util import re +import pypandoc from get_sphinx_filenames import SPHINX_SHOULD_RUN import plotly.io as pio diff --git a/requirements.txt b/requirements.txt index d93af01585..7bcff69b64 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,8 +6,9 @@ sphinx-gallery==0.11.1 sphinx_design docutils==0.16 sphinx-copybutton -tqdm==4.66.1 -numpy==1.24.4 +pypandoc==1.12 +tqdm +numpy matplotlib librosa torch From 17f60342cae83010fbf25ccf1ff71532dfea88d0 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 30 Jan 2024 08:26:00 -0800 Subject: [PATCH 08/29] Install pypandoc --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 7bcff69b64..86503c152c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ sphinx-gallery==0.11.1 sphinx_design docutils==0.16 sphinx-copybutton +pandoc==2.3 pypandoc==1.12 tqdm numpy From c841731d2c1f80e01a6f7465afd1954d4055f339 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Thu, 1 Feb 2024 13:30:16 -0800 Subject: [PATCH 09/29] Install dependancies --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 86503c152c..3849bb693c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,8 @@ docutils==0.16 sphinx-copybutton pandoc==2.3 pypandoc==1.12 +pandocfilters +markdown tqdm numpy matplotlib From 9609be0f59d1ebf70e88ad49c067338e8e836b25 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 2 Feb 2024 09:54:31 -0800 Subject: [PATCH 10/29] Update --- .github/workflows/build-tutorials.yml | 6 ++++++ conf.py | 1 - requirements.txt | 1 - 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index b931d9fbbc..76b90d08c8 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -54,6 +54,12 @@ jobs: - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG uses: pytorch/test-infra/.github/actions/setup-nvidia@main + - name: Install pandoc (does not install from pypi) + run: | + set -ex + apt-get update + apt-get install -y pandoc + - name: Calculate docker image shell: bash id: docker-image diff --git a/conf.py b/conf.py index 2b79a03876..f328279263 100644 --- a/conf.py +++ b/conf.py @@ -41,7 +41,6 @@ from custom_directives import IncludeDirective, GalleryItemDirective, CustomGalleryItemDirective, CustomCalloutItemDirective, CustomCardItemDirective import distutils.file_util import re -import pypandoc from get_sphinx_filenames import SPHINX_SHOULD_RUN import plotly.io as pio diff --git a/requirements.txt b/requirements.txt index 3849bb693c..4db3242d66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ sphinx-gallery==0.11.1 sphinx_design docutils==0.16 sphinx-copybutton -pandoc==2.3 pypandoc==1.12 pandocfilters markdown From ba5eafb5b79bbcc89e62fdafd8d0a9faf006019e Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 2 Feb 2024 09:59:52 -0800 Subject: [PATCH 11/29] Update --- .github/workflows/build-tutorials.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index 76b90d08c8..8abb6f3562 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -56,9 +56,8 @@ jobs: - name: Install pandoc (does not install from pypi) run: | - set -ex - apt-get update - apt-get install -y pandoc + sudo apt-get update + sudo apt-get install -y pandoc - name: Calculate docker image shell: bash From 62fe5974f7e1b5a5d62e635e80f053033cda01fe Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 2 Feb 2024 10:04:35 -0800 Subject: [PATCH 12/29] Update --- .github/workflows/build-tutorials.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index 8abb6f3562..4a3c2c2fbf 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -54,11 +54,6 @@ jobs: - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG uses: pytorch/test-infra/.github/actions/setup-nvidia@main - - name: Install pandoc (does not install from pypi) - run: | - sudo apt-get update - sudo apt-get install -y pandoc - - name: Calculate docker image shell: bash id: docker-image @@ -67,6 +62,11 @@ jobs: # for some reason, pip installs it in a different place than what is looked at in the py file pip3 install requests==2.26 + + # Install pandoc (does not install from pypi) + sudo apt-get update + sudo apt-get install -y pandoc + pyTorchDockerImageTag=$(python3 .jenkins/get_docker_tag.py) echo "docker-image=${DOCKER_IMAGE}:${pyTorchDockerImageTag}" >> "${GITHUB_OUTPUT}" From 218604bd20601544ebd0d5a6f03e9ca1de7154b9 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 2 Feb 2024 10:11:20 -0800 Subject: [PATCH 13/29] Update --- .github/workflows/build-tutorials.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index 4a3c2c2fbf..dc0f24d367 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -63,10 +63,6 @@ jobs: # for some reason, pip installs it in a different place than what is looked at in the py file pip3 install requests==2.26 - # Install pandoc (does not install from pypi) - sudo apt-get update - sudo apt-get install -y pandoc - pyTorchDockerImageTag=$(python3 .jenkins/get_docker_tag.py) echo "docker-image=${DOCKER_IMAGE}:${pyTorchDockerImageTag}" >> "${GITHUB_OUTPUT}" From 686cd52325cac0c3132054b4d264151a66de7f37 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 2 Feb 2024 10:11:50 -0800 Subject: [PATCH 14/29] Update --- .jenkins/build.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index c353c58a17..89c69af766 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -15,6 +15,10 @@ sudo apt-get update || sudo apt-get install libgnutls30 sudo apt-get update sudo apt-get install -y --no-install-recommends unzip p7zip-full sox libsox-dev libsox-fmt-all rsync +# Install pandoc (does not install from pypi) +sudo apt-get update +sudo apt-get install -y pandoc + # NS: Path to python runtime should already be part of docker container # export PATH=/opt/conda/bin:$PATH rm -rf src From f219b50f3a79d2762d0a928e494955b20cb943af Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 5 Feb 2024 08:17:58 -0800 Subject: [PATCH 15/29] Add --toc extra arg --- conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf.py b/conf.py index f328279263..6c577b6feb 100644 --- a/conf.py +++ b/conf.py @@ -108,7 +108,7 @@ def reset_seeds(gallery_conf, fname): "%matplotlib inline"), 'reset_modules': (reset_seeds), 'ignore_pattern': r'_torch_export_nightly_tutorial.py', - 'pypandoc': {'extra_args': ['--mathjax'], + 'pypandoc': {'extra_args': ['--mathjax', '--toc'], 'filters': ['.jenkins/custom_pandoc_filter.py'], }, } From 008d86cc1978ee4492f6f4c95f143932c8b367bb Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 8 Mar 2024 09:53:56 -0800 Subject: [PATCH 16/29] Updates --- .jenkins/custom_pandoc_filter.py | 82 +++++++++++++++++++++++++++--- .jenkins/post_process_notebooks.py | 74 ++++++++++++++++++++++----- conf.py | 4 +- 3 files changed, 138 insertions(+), 22 deletions(-) diff --git a/.jenkins/custom_pandoc_filter.py b/.jenkins/custom_pandoc_filter.py index 335aaf1e50..ae3314d2cf 100644 --- a/.jenkins/custom_pandoc_filter.py +++ b/.jenkins/custom_pandoc_filter.py @@ -1,21 +1,34 @@ from pandocfilters import toJSONFilter, Div, RawBlock, Para, Str, Space, Link, Code, CodeBlock import markdown import re +import html +import markdown.inlinepatterns +import os +import nbformat as nbf -def to_markdown(item): +def to_markdown(item, skip_octicon=False): + # A handler function to process strings, links, code, and code + # blocks if item['t'] == 'Str': return item['c'] elif item['t'] == 'Space': return ' ' elif item['t'] == 'Link': - # Assuming the link text is always in the first item - return f"[{item['c'][1][0]['c']}]({item['c'][2][0]})" + link_text = ''.join(to_markdown(i, skip_octicon) for i in item['c'][1]) + return f'{link_text}' elif item['t'] == 'Code': - return f"`{item['c'][1]}`" + # Need to remove icticon as they don't render in .ipynb + if any(value == 'octicon' for key, value in item['c'][0][2]): + return '' + else: + # Escape the code and wrap it in tags + return f'{html.escape(item["c"][1])}' elif item['t'] == 'CodeBlock': - return f"```\n{item['c'][1]}\n```" + # Escape the code block and wrap it in
 tags
+        return f'
{html.escape(item["c"][1])}
' def process_admonitions(key, value, format, meta): + # Replace admonitions with proper HTML. if key == 'Div': [[ident, classes, keyvals], contents] = value if 'note' in classes: @@ -49,14 +62,17 @@ def process_admonitions(key, value, format, meta): html_content = markdown.markdown(note_content_md) return [{'t': 'RawBlock', 'c': ['html', f'
{label}
']}, {'t': 'RawBlock', 'c': ['html', '
']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '
']}] - elif key == 'RawBlock': + # this is needed for the cells that have embedded video. + # We add a special tag to those: ``` {python, .jupyter-code-cell} + # The post-processing script then finds those and genrates separate + # code cells that can load video. [format, content] = value if format == 'html' and 'iframe' in content: # Extract the video URL video_url = content.split('src="')[1].split('"')[0] # Create the Python code to display the video - html_code = f""" + python_code = f""" from IPython.display import display, HTML html_code = \""" {content} @@ -64,5 +80,55 @@ def process_admonitions(key, value, format, meta): display(HTML(html_code)) """ + return {'t': 'CodeBlock', 'c': [['', ['python', 'jupyter-code-cell'], []], python_code]} + + +def process_images(key, value, format, meta): + # Add https://pytorch.org/tutorials/ to images so that they + # load correctly in the notebook. + if key == 'Image': + [ident, classes, keyvals], caption, [src, title] = value + if not src.startswith('http'): + while src.startswith('../'): + src = src[3:] + if src.startswith('/_static'): + src = src[1:] + src = 'https://pytorch.org/tutorials/' + src + return {'t': 'Image', 'c': [[ident, classes, keyvals], caption, [src, title]]} + +def process_grids(key, value, format, meta): + # Generate side by side grid cards. Only for the two-cards layout + # that we use in the tutorial template. + if key == 'Div': + [[ident, classes, keyvals], contents] = value + if 'grid' in classes: + columns = ['
', + '
'] + column_num = 0 + for block in contents: + if 't' in block and block['t'] == 'Div' and 'grid-item-card' in block['c'][0][1]: + item_html = '' + for item in block['c'][1]: + if item['t'] == 'Para': + item_html += '

' + ''.join(to_markdown(i) for i in item['c']) + '

' + elif item['t'] == 'BulletList': + item_html += '
    ' + for list_item in item['c']: + item_html += '
  • ' + ''.join(to_markdown(i) for i in list_item[0]['c']) + '
  • ' + item_html += '
' + columns[column_num] += item_html + column_num = (column_num + 1) % 2 + columns = [column + '
' for column in columns] + return {'t': 'RawBlock', 'c': ['html', ''.join(columns)]} + +def is_code_block(item): + return item['t'] == 'Code' and 'octicon' in item['c'][1] +def process_all(key, value, format, meta): + new_value = process_admonitions(key, value, format, meta) + if new_value is None: + new_value = process_images(key, value, format, meta) + if new_value is None: + new_value = process_grids(key, value, format, meta) + return new_value if __name__ == "__main__": - toJSONFilter(process_admonitions) + toJSONFilter(process_all) diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py index 88f225eba9..6241cde907 100644 --- a/.jenkins/post_process_notebooks.py +++ b/.jenkins/post_process_notebooks.py @@ -2,25 +2,71 @@ import os import re +# Pattern to search ``` {.python .jupyter-code-cell} +pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL) + def get_gallery_dirs(conf_path): - """Execute the conf.py file and return the gallery directories.""" + """ + Execute the conf.py file and return the gallery directories. + This is needed to make sure the script runs through all + dirs. + """ namespace = {} exec(open(conf_path).read(), namespace) sphinx_gallery_conf = namespace['sphinx_gallery_conf'] print(f"Processing directories: {', '.join(sphinx_gallery_conf['gallery_dirs'])}") return sphinx_gallery_conf['gallery_dirs'] -def process_notebook(notebook_path): - """Read and process a notebook file.""" +def process_video_cell(notebook_path): + """ + This function finds the code blocks with the + "``` {.python .jupyter-code-cell}" code bocks and slices them + into a separe code cell (instead of markdown) which allows to + load the video in the notebook. The rest of the content is placed + in a new markdown cell. + """ print(f'Processing file: {notebook_path}') notebook = nbf.read(notebook_path, as_version=4) - for cell in notebook.cells: + + # Iterate over markdown cells + for i, cell in enumerate(notebook.cells): if cell.cell_type == 'markdown': - cell.source = process_content(cell.source) + match = pattern.search(cell.source) + if match: + # Extract the parts before and after the video code block + before_html_block = match.group(1) + code_block = match.group(2) + + # Add a comment to run the cell to display the video + code_block = "# Run this cell to load the video\n" + code_block + # Create a new code cell + new_code_cell = nbf.v4.new_code_cell(source=code_block) + + # Replace the original markdown cell with the part before the code block + cell.source = before_html_block + + # Insert the new code cell after the current one + notebook.cells.insert(i+1, new_code_cell) + + # If there is content after the HTML code block, create a new markdown cell + if len(match.group(3).strip()) > 0: + after_html_block = match.group(3) + new_markdown_cell = nbf.v4.new_markdown_cell(source=after_html_block) + # Create a new markdown cell and add the content after code block there + notebook.cells.insert(i+2, new_markdown_cell) + + else: + # Remove ```{=html} from the code block + cell.source = remove_html_tagk(cell.source) + nbf.write(notebook, notebook_path) -def process_content(content): - """Remove extra syntax from the content of a Markdown cell.""" +def remove_html_tag(content): + """ + Pandoc adds an extraneous ```{=html} ``` to raw HTML blocks which + prevents it from rendering correctly. This function removes + ```{=html} that we don't need. + """ content = re.sub(r'```{=html}\n\n```', '">', content) content = re.sub(r'<\/div>\n```', '
\n', content) @@ -29,18 +75,20 @@ def process_content(content): content = re.sub(r'

\n```', '

', content) return content -def process_directory(notebook_dir): - """Process all notebook files in a directory and its subdirectories.""" +def walk_dir(notebook_dir): + """ + Walk the dir and process all notebook files in + the gallery directory and its subdirectories. + """ for root, dirs, files in os.walk(notebook_dir): for filename in files: if filename.endswith('.ipynb'): - process_notebook(os.path.join(root, filename)) + process_video_cell(os.path.join(root, filename)) def main(): - """Main function to process all directories specified in the conf.py file.""" - conf_path = 'conf.py' + conf_path = '../conf.py' for notebook_dir in get_gallery_dirs(conf_path): - process_directory(notebook_dir) + walk_dir(notebook_dir) if __name__ == "__main__": main() diff --git a/conf.py b/conf.py index 6c577b6feb..755c6d7eec 100644 --- a/conf.py +++ b/conf.py @@ -42,7 +42,9 @@ import distutils.file_util import re from get_sphinx_filenames import SPHINX_SHOULD_RUN - +import pandocfilters +import pandoc +import pypandoc import plotly.io as pio pio.renderers.default = 'sphinx_gallery' From f74983d945452efc97695d432b57c5ec54e483ea Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 8 Mar 2024 10:32:27 -0800 Subject: [PATCH 17/29] Updates --- conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/conf.py b/conf.py index 755c6d7eec..ecddd97a28 100644 --- a/conf.py +++ b/conf.py @@ -43,7 +43,6 @@ import re from get_sphinx_filenames import SPHINX_SHOULD_RUN import pandocfilters -import pandoc import pypandoc import plotly.io as pio pio.renderers.default = 'sphinx_gallery' From d45b48bdb7a002e24f01373b5c30a99597d24677 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 8 Mar 2024 10:54:31 -0800 Subject: [PATCH 18/29] Updates --- .jenkins/custom_pandoc_filter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.jenkins/custom_pandoc_filter.py b/.jenkins/custom_pandoc_filter.py index ae3314d2cf..d0565e6a12 100644 --- a/.jenkins/custom_pandoc_filter.py +++ b/.jenkins/custom_pandoc_filter.py @@ -26,6 +26,8 @@ def to_markdown(item, skip_octicon=False): elif item['t'] == 'CodeBlock': # Escape the code block and wrap it in
 tags
         return f'
{html.escape(item["c"][1])}
' + else: + return '' def process_admonitions(key, value, format, meta): # Replace admonitions with proper HTML. From 6efe44b8960c87ddeb048d312c0b99c199f31373 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 8 Mar 2024 11:21:50 -0800 Subject: [PATCH 19/29] Correct path to conf.py --- .jenkins/post_process_notebooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py index 6241cde907..535c8270e7 100644 --- a/.jenkins/post_process_notebooks.py +++ b/.jenkins/post_process_notebooks.py @@ -86,7 +86,7 @@ def walk_dir(notebook_dir): process_video_cell(os.path.join(root, filename)) def main(): - conf_path = '../conf.py' + conf_path = './conf.py' for notebook_dir in get_gallery_dirs(conf_path): walk_dir(notebook_dir) From 67a7be62b6727fee2f22ffceab8042c571d5f364 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 8 Mar 2024 11:44:57 -0800 Subject: [PATCH 20/29] Fix typo --- .jenkins/post_process_notebooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py index 535c8270e7..66482007b0 100644 --- a/.jenkins/post_process_notebooks.py +++ b/.jenkins/post_process_notebooks.py @@ -57,7 +57,7 @@ def process_video_cell(notebook_path): else: # Remove ```{=html} from the code block - cell.source = remove_html_tagk(cell.source) + cell.source = remove_html_tag(cell.source) nbf.write(notebook, notebook_path) From f318f2de65eca9badde96f518860575f60bc42e8 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 8 Mar 2024 13:24:27 -0800 Subject: [PATCH 21/29] Fix --- .jenkins/post_process_notebooks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py index 66482007b0..132acd0c63 100644 --- a/.jenkins/post_process_notebooks.py +++ b/.jenkins/post_process_notebooks.py @@ -33,6 +33,7 @@ def process_video_cell(notebook_path): if cell.cell_type == 'markdown': match = pattern.search(cell.source) if match: + print(f'Match found in cell {i}: {match.group(0)[:100]}...') # Extract the parts before and after the video code block before_html_block = match.group(1) code_block = match.group(2) From af71efd74c6c50a188bd7bcde728f611a592a120 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 8 Mar 2024 13:59:31 -0800 Subject: [PATCH 22/29] Fix --- .jenkins/post_process_notebooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py index 132acd0c63..3abf04f535 100644 --- a/.jenkins/post_process_notebooks.py +++ b/.jenkins/post_process_notebooks.py @@ -3,7 +3,7 @@ import re # Pattern to search ``` {.python .jupyter-code-cell} -pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL) +pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\npython_code = """\n.*?\n"""\ndisplay\(HTML\(python_code\)\))\n```(.*)', re.DOTALL) def get_gallery_dirs(conf_path): """ From 764a8d6aed986f155098d91ea55a1f0c95b26dd8 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 8 Mar 2024 14:40:52 -0800 Subject: [PATCH 23/29] Upgrade nbformat to 5.9.2 --- .github/workflows/build-tutorials.yml | 2 +- .jenkins/post_process_notebooks.py | 2 +- beginner_source/nn_tutorial.py | 2 -- conf.py | 3 ++- requirements.txt | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index dc0f24d367..809b9ad4bf 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -62,7 +62,6 @@ jobs: # for some reason, pip installs it in a different place than what is looked at in the py file pip3 install requests==2.26 - pyTorchDockerImageTag=$(python3 .jenkins/get_docker_tag.py) echo "docker-image=${DOCKER_IMAGE}:${pyTorchDockerImageTag}" >> "${GITHUB_OUTPUT}" @@ -188,6 +187,7 @@ jobs: echo "rm /opt/cache/bin/*" | docker exec -u root -i "${container_name}" bash docker exec -t "${container_name}" sh -c ".jenkins/build.sh" + - name: Upload docs preview uses: seemethere/upload-artifact-s3@v5 if: ${{ github.event_name == 'pull_request' }} diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py index 3abf04f535..132acd0c63 100644 --- a/.jenkins/post_process_notebooks.py +++ b/.jenkins/post_process_notebooks.py @@ -3,7 +3,7 @@ import re # Pattern to search ``` {.python .jupyter-code-cell} -pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\npython_code = """\n.*?\n"""\ndisplay\(HTML\(python_code\)\))\n```(.*)', re.DOTALL) +pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL) def get_gallery_dirs(conf_path): """ diff --git a/beginner_source/nn_tutorial.py b/beginner_source/nn_tutorial.py index 40a28a95f0..b45200fd49 100644 --- a/beginner_source/nn_tutorial.py +++ b/beginner_source/nn_tutorial.py @@ -10,8 +10,6 @@ # We recommend running this tutorial as a notebook, not a script. To download the notebook (``.ipynb``) file, # click the link at the top of the page. # -# Testing... -# # PyTorch provides the elegantly designed modules and classes `torch.nn `_ , # `torch.optim `_ , # `Dataset `_ , diff --git a/conf.py b/conf.py index ecddd97a28..36bf506f86 100644 --- a/conf.py +++ b/conf.py @@ -75,7 +75,8 @@ 'sphinx.ext.intersphinx', 'sphinx_copybutton', 'sphinx_gallery.gen_gallery', - 'sphinx_design' + 'sphinx_design', + 'nbsphinx' ] intersphinx_mapping = { diff --git a/requirements.txt b/requirements.txt index d94b7c5ff9..92c2dab9ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,7 +31,7 @@ torchx torchrl==0.3.0 tensordict==0.3.0 ax-platform -nbformat>=4.2.0 +nbformat>==5.9.2 datasets transformers torchmultimodal-nightly # needs to be updated to stable as soon as it's avaialable From 8931f7cfa4f7989b236335604960436e504d856c Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 8 Mar 2024 15:25:08 -0800 Subject: [PATCH 24/29] Clear imports, install nbsphinx --- .jenkins/custom_pandoc_filter.py | 4 ---- requirements.txt | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.jenkins/custom_pandoc_filter.py b/.jenkins/custom_pandoc_filter.py index d0565e6a12..2dde91a164 100644 --- a/.jenkins/custom_pandoc_filter.py +++ b/.jenkins/custom_pandoc_filter.py @@ -1,10 +1,6 @@ from pandocfilters import toJSONFilter, Div, RawBlock, Para, Str, Space, Link, Code, CodeBlock import markdown -import re import html -import markdown.inlinepatterns -import os -import nbformat as nbf def to_markdown(item, skip_octicon=False): # A handler function to process strings, links, code, and code diff --git a/requirements.txt b/requirements.txt index 92c2dab9ae..68c4d6dcf9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ sphinx==5.0.0 sphinx-gallery==0.11.1 sphinx_design +nbsphinx docutils==0.16 sphinx-copybutton pypandoc==1.12 From 72b744f33c667540d175f0573651778f147c9b66 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 8 Mar 2024 19:04:01 -0800 Subject: [PATCH 25/29] Run script on master --- .jenkins/build.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 89c69af766..14f29bc223 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -147,6 +147,9 @@ elif [[ "${JOB_TYPE}" == "manager" ]]; then bash $DIR/remove_invisible_code_block_batch.sh docs python .jenkins/validate_tutorials_built.py + # Step 5.1: Run post-processing script on .ipynb files: + python .jenkins/post_process_notebooks.py + # Step 6: Copy generated HTML files and static files to S3 7z a manager.7z docs awsv2 s3 cp manager.7z s3://${BUCKET_NAME}/${COMMIT_ID}/manager.7z From 40acba902606d86f84972b64209ec75dfe3a775f Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 11 Mar 2024 11:07:13 -0700 Subject: [PATCH 26/29] Modify the walk function. --- .jenkins/post_process_notebooks.py | 34 +++++++++++++----------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py index 132acd0c63..a648427efa 100644 --- a/.jenkins/post_process_notebooks.py +++ b/.jenkins/post_process_notebooks.py @@ -2,21 +2,18 @@ import os import re +""" +This post-processing script needs to run after the .ipynb files are +generated. The script removes extraneous ```{=html} syntax from the +admonitions and splits the cells that have video iframe into a +separate code cell that can be run to load the video directly +in the notebook. This script is included in build.sh. +""" + + # Pattern to search ``` {.python .jupyter-code-cell} pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL) -def get_gallery_dirs(conf_path): - """ - Execute the conf.py file and return the gallery directories. - This is needed to make sure the script runs through all - dirs. - """ - namespace = {} - exec(open(conf_path).read(), namespace) - sphinx_gallery_conf = namespace['sphinx_gallery_conf'] - print(f"Processing directories: {', '.join(sphinx_gallery_conf['gallery_dirs'])}") - return sphinx_gallery_conf['gallery_dirs'] - def process_video_cell(notebook_path): """ This function finds the code blocks with the @@ -48,6 +45,7 @@ def process_video_cell(notebook_path): # Insert the new code cell after the current one notebook.cells.insert(i+1, new_code_cell) + print(f'New code cell created with source: {new_code_cell.source}') # If there is content after the HTML code block, create a new markdown cell if len(match.group(3).strip()) > 0: @@ -76,20 +74,18 @@ def remove_html_tag(content): content = re.sub(r'

\n```', '

', content) return content -def walk_dir(notebook_dir): +def walk_dir(downloads_dir): """ Walk the dir and process all notebook files in - the gallery directory and its subdirectories. + the _downloads directory and its subdirectories. """ - for root, dirs, files in os.walk(notebook_dir): + for root, dirs, files in os.walk(downloads_dir): for filename in files: if filename.endswith('.ipynb'): process_video_cell(os.path.join(root, filename)) - def main(): - conf_path = './conf.py' - for notebook_dir in get_gallery_dirs(conf_path): - walk_dir(notebook_dir) + downloads_dir = './docs/_downloads' + walk_dir(downloads_dir) if __name__ == "__main__": main() From ba9d005575e5ec467dd9060c3fe2541ade1c4f6d Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 12 Mar 2024 15:57:18 -0700 Subject: [PATCH 27/29] Apply suggestions from code review Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com> --- .jenkins/custom_pandoc_filter.py | 37 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/.jenkins/custom_pandoc_filter.py b/.jenkins/custom_pandoc_filter.py index 2dde91a164..693f15a51c 100644 --- a/.jenkins/custom_pandoc_filter.py +++ b/.jenkins/custom_pandoc_filter.py @@ -43,7 +43,7 @@ def process_admonitions(key, value, format, meta): note_content = [] for block in contents: - if 't' in block and block['t'] == 'Para': + if block.get('t') == 'Para': for item in block['c']: if item['t'] == 'Str': note_content.append(Str(item['c'])) @@ -53,7 +53,7 @@ def process_admonitions(key, value, format, meta): note_content.append(Link(*item['c'])) elif item['t'] == 'Code': note_content.append(Code(*item['c'])) - elif 't' in block and block['t'] == 'CodeBlock': + elif block.get('t') == 'CodeBlock': note_content.append(CodeBlock(*block['c'])) note_content_md = ''.join(to_markdown(item) for item in note_content) @@ -84,15 +84,17 @@ def process_admonitions(key, value, format, meta): def process_images(key, value, format, meta): # Add https://pytorch.org/tutorials/ to images so that they # load correctly in the notebook. - if key == 'Image': - [ident, classes, keyvals], caption, [src, title] = value - if not src.startswith('http'): - while src.startswith('../'): - src = src[3:] - if src.startswith('/_static'): - src = src[1:] - src = 'https://pytorch.org/tutorials/' + src - return {'t': 'Image', 'c': [[ident, classes, keyvals], caption, [src, title]]} + if key != 'Image': + return None + [ident, classes, keyvals], caption, [src, title] = value + if not src.startswith('http'): + while src.startswith('../'): + src = src[3:] + if src.startswith('/_static'): + src = src[1:] + src = 'https://pytorch.org/tutorials/' + src + + return {'t': 'Image', 'c': [[ident, classes, keyvals], caption, [src, title]]} def process_grids(key, value, format, meta): # Generate side by side grid cards. Only for the two-cards layout @@ -121,12 +123,15 @@ def process_grids(key, value, format, meta): def is_code_block(item): return item['t'] == 'Code' and 'octicon' in item['c'][1] + + def process_all(key, value, format, meta): - new_value = process_admonitions(key, value, format, meta) - if new_value is None: - new_value = process_images(key, value, format, meta) - if new_value is None: - new_value = process_grids(key, value, format, meta) + for transform in [process_admonitions, process_images, process_grids]: + new_value = transform(key, value, format, meta) + if new_value is not None: + break return new_value + + if __name__ == "__main__": toJSONFilter(process_all) From 96673f6eeccbb9e2f3a0a6af47f90ae8b169d362 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 12 Mar 2024 15:57:31 -0700 Subject: [PATCH 28/29] Update pins --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 68c4d6dcf9..843362dd09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,8 +10,8 @@ sphinx-copybutton pypandoc==1.12 pandocfilters markdown -tqdm -numpy +tqdm==4.66.1 +numpy==1.24.4 matplotlib librosa torch From 7adf3f6e2a99436785c30a4dbcf4c9a8d25ef33c Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 12 Mar 2024 16:23:50 -0700 Subject: [PATCH 29/29] Formatting cleanup --- .jenkins/custom_pandoc_filter.py | 6 ++++-- .jenkins/post_process_notebooks.py | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.jenkins/custom_pandoc_filter.py b/.jenkins/custom_pandoc_filter.py index 693f15a51c..f4ceb0df11 100644 --- a/.jenkins/custom_pandoc_filter.py +++ b/.jenkins/custom_pandoc_filter.py @@ -25,6 +25,7 @@ def to_markdown(item, skip_octicon=False): else: return '' + def process_admonitions(key, value, format, meta): # Replace admonitions with proper HTML. if key == 'Div': @@ -59,7 +60,7 @@ def process_admonitions(key, value, format, meta): note_content_md = ''.join(to_markdown(item) for item in note_content) html_content = markdown.markdown(note_content_md) - return [{'t': 'RawBlock', 'c': ['html', f'
{label}
']}, {'t': 'RawBlock', 'c': ['html', '
']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '
']}] + return [{'t': 'RawBlock', 'c': ['html', f'
{label}
']}, {'t': 'RawBlock', 'c': ['html', '
']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '
']}] elif key == 'RawBlock': # this is needed for the cells that have embedded video. # We add a special tag to those: ``` {python, .jupyter-code-cell} @@ -93,9 +94,10 @@ def process_images(key, value, format, meta): if src.startswith('/_static'): src = src[1:] src = 'https://pytorch.org/tutorials/' + src - + return {'t': 'Image', 'c': [[ident, classes, keyvals], caption, [src, title]]} + def process_grids(key, value, format, meta): # Generate side by side grid cards. Only for the two-cards layout # that we use in the tutorial template. diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py index a648427efa..81f51766c3 100644 --- a/.jenkins/post_process_notebooks.py +++ b/.jenkins/post_process_notebooks.py @@ -14,6 +14,7 @@ # Pattern to search ``` {.python .jupyter-code-cell} pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL) + def process_video_cell(notebook_path): """ This function finds the code blocks with the @@ -60,6 +61,7 @@ def process_video_cell(notebook_path): nbf.write(notebook, notebook_path) + def remove_html_tag(content): """ Pandoc adds an extraneous ```{=html} ``` to raw HTML blocks which @@ -74,6 +76,7 @@ def remove_html_tag(content): content = re.sub(r'

\n```', '

', content) return content + def walk_dir(downloads_dir): """ Walk the dir and process all notebook files in @@ -83,9 +86,12 @@ def walk_dir(downloads_dir): for filename in files: if filename.endswith('.ipynb'): process_video_cell(os.path.join(root, filename)) + + def main(): downloads_dir = './docs/_downloads' walk_dir(downloads_dir) + if __name__ == "__main__": main()