diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 503dba3..3382471 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -3,7 +3,6 @@ version: 2 updates: - # Set update schedule for GitHub Actions - package-ecosystem: "github-actions" directory: "/" diff --git a/.github/workflows/link-checker-pr.yml b/.github/workflows/link-checker-pr.yml index 7e9a47b..e0ff799 100644 --- a/.github/workflows/link-checker-pr.yml +++ b/.github/workflows/link-checker-pr.yml @@ -13,9 +13,9 @@ jobs: with: # Avoid using single or double quotes for multiline patterns files: | - **.md + **.md matrix: true - + linkChecker: runs-on: ubuntu-latest needs: changedFiles diff --git a/.github/workflows/link-checker.yml b/.github/workflows/link-checker.yml index badf569..bab6a2a 100644 --- a/.github/workflows/link-checker.yml +++ b/.github/workflows/link-checker.yml @@ -5,7 +5,7 @@ on: branches: - main schedule: - - cron: '0 4 * * *' + - cron: "0 4 * * *" jobs: linkChecker: runs-on: ubuntu-latest diff --git a/.github/workflows/pdf.yml b/.github/workflows/pdf.yml index 12273c3..e50e149 100644 --- a/.github/workflows/pdf.yml +++ b/.github/workflows/pdf.yml @@ -16,7 +16,7 @@ jobs: - uses: actions/setup-node@v4 with: - node-version: '20' + node-version: "20" - name: Install uploader run: npm install @iomeg/zenodo-upload @@ -33,7 +33,7 @@ jobs: --user $(id -u):$(id -g) \ -e "PDF_OUTPUT_NAME=guide-nlesc.pdf" \ ghcr.io/kernoeb/docker-docsify-pdf:latest - + - name: Upload PDF as an artifact if: always() uses: actions/upload-artifact@v4 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..6350268 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/rbubley/mirrors-prettier + rev: v3.4.2 + hooks: + - id: prettier diff --git a/CITATION.cff b/CITATION.cff index efa7ee4..5b89bb8 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -109,7 +109,7 @@ authors: - affiliation: "Netherlands eScience Center" family-names: Zapata given-names: Felipe - orcid: "https://orcid.org/0000-0001-8286-677X" + orcid: "https://orcid.org/0000-0001-8286-677X" - affiliation: "Netherlands eScience Center" family-names: Bakker given-names: Tom diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 84d7ab9..5ae8e37 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,5 @@ # Contributing to this Guide + - [Who? You!](#who_you) - [Audience](#audience) - [Scope](#scope) @@ -6,7 +7,6 @@ - [Technical details (docsify)](#technical-details) - [Zen of the Guide](#zen-of-the-guide) - # Who? You! This guide is primarily written by the Research Software Engineers at the Netherlands eScience Center. @@ -16,12 +16,13 @@ Contributions by anyone (also outside the Center) are most welcome! While everybody is encouraged to contribute where they can, we appoint maintainers for specific pages to regularly keep things up to date and think along with contributors. To see who is responsible for which part of the guide see the maintainer listed at the top of a page. -If you are interested in becoming a chapter owner for a page that is listed as *unmaintained*, please open a pull request to add your name instead of *unmaintained*. +If you are interested in becoming a chapter owner for a page that is listed as _unmaintained_, please open a pull request to add your name instead of _unmaintained_. ## Editorial board The editors make sure content is in line with [the scope](#scope), that it is maintainable and that it is maintained. In practice they will: + - track, lead towards satisfactory conclusion of and when necessary (in case of disagreement) decide on issues, discussions and pull requests, - flag content that needs to be updated or removed, - ask for input from page maintainers or other contributors, @@ -30,21 +31,20 @@ In practice they will: and do any other regular editing tasks. Currently the team consists of: + - Bouwe Andela [@bouweandela](https://github.com/bouweandela) (research software engineer) - Carlos Martínez Ortiz [@c-martinez](https://github.com/c-martinez) (community manager) - Patrick Bos [@egpbos](https://github.com/egpbos) (technology lead) - - # Audience Our eScience Center _RSEs_ are the prototypical audience members, in particular those starting out in some unfamiliar area of technology. Some characteristics include: + - They are interested in _intermediate to advanced level_ best practices. If there are already ten easily found blog posts about it, it doesn't have to be in the Guide. - They are a _programmer or researcher_ that is already familiar with some other programming language or software-related technology. - They may be generally interested (in particular topics of eScience practice and research software development in general or how this is done at the eScience Center specifically), but their main aim is towards _practical_ application, not to create a literature study of the current landscape of (research) software. - # Scope To make sure the information in this guide stays relevant and up to date it is intentionally low on technical details. @@ -73,13 +73,12 @@ In practice, this means the Guide (for now) will mostly consist of language guid It can also sometimes function as a staging/draft area for eventually moving content to the Turing Way. However, we will urge you to contribute to the Turing Way directly. - ## For significant changes / additions, especially new chapters + Please check if your contribution fits in [The Turing Way](https://github.com/the-turing-way/the-turing-way) before considering contributing to this guide. Feel free to ask the [editors](#editorial-board) if you are unsure or open an [issue](https://github.com/NLeSC/guide/issues) to discuss it. If it does not fit, please open an [issue](https://github.com/NLeSC/guide/issues) to discuss your planned contribution before starting to work on it, to avoid disappointment later. - # How? ## Style, form @@ -92,14 +91,25 @@ A well written piece of advice should contain the following information: 4. Long how: also explain other options for implementing advice, e.g. _here's a list of some more version control programs and/or services which we can recommend_. ## Technical + Please use branches and pull requests to contribute content. If you are not part of the Netherlands eScience Center organization but would still like to contribute please do by submitting a pull request from a fork. ```shell git clone https://github.com/NLeSC/guide.git +cd guide git branch newbranch git checkout newbranch ``` +Please install [pre-commit](https://pre-commit.com/) and enable the pre-commit +hooks by running + +```shell +pre-commit install +``` + +to automatically format your changes when committing. + Add your new awesome feature, fix bugs, make other changes. To preview changes locally, host the repo with a static file web server: @@ -116,7 +126,7 @@ To check if there are any broken links use [lychee](https://github.com/lycheever docker run --init -it -v `pwd`:/docs lycheeverse/lychee /docs --config=docs/lychee.toml ``` -If everything works as it should, ``git add``, ``commit`` and ``push`` like normal. +If everything works as it should, `git add`, `commit` and `push` like normal. If you have made a significant contribution to the guide, please make sure to add yourself to the `CITATION.cff` file so your name can be included in the list of authors of the guide. @@ -125,7 +135,6 @@ If you have made a significant contribution to the guide, please make sure to ad We host a PDF version of the guide on [Zenodo](https://doi.org/10.5281/zenodo.4020564). To update it a [new release](https://github.com/NLeSC/guide/releases) needs to be made of the guide. This will trigger a GitHub action to create a new Zenodo version with the PDF file. - # Technical details The basics of how the Guide is implemented. @@ -133,19 +142,20 @@ The basics of how the Guide is implemented. The Guide is rendered by [docsify](https://docsify.js.org) and hosted on GitHub Pages. Deployment is "automatic" from the main branch, because docsify requires no build step into static HTML pages, but rather generates HTML dynamically from the MarkDown files in the Guide repository. The only configuration that was necessary for this automatic deployment is: + 1. The [index.html](https://github.com/NLeSC/guide/blob/main/index.html) file in the root directory that loads docsify. 2. The empty [.nojekyll](https://github.com/NLeSC/guide/blob/main/.nojekyll) file, which tells GitHub that we're not dealing with Jekyll here (the GitHub Pages default). 3. Telling GitHub in the Settings -> Pages menu to load the Pages content from the root directory. -4. The [_sidebar.md](https://github.com/NLeSC/guide/blob/main/_sidebar.md) file for the table of contents. +4. The [\_sidebar.md](https://github.com/NLeSC/guide/blob/main/_sidebar.md) file for the table of contents. Plugins that we use: + - The [docsify full text search plugin](https://docsify.js.org/#/plugins?id=full-text-search) - The [docsify Google Analytics plugin](https://docsify.js.org/#/plugins?id=google-analytics) - [Prism](https://docsify.js.org/#/language-highlight) is used for language highlighting. If you want to change anything in this part, please discuss in an issue. - # Zen of the Guide 0. Help your colleagues. diff --git a/README.md b/README.md index 7a446b5..dded3e8 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ This is a guide to research software development at the Netherlands eScience Cen It is a living document, written by and for our research software engineers (RSEs) and our collaborators. We write it for two reasons: + 1. To have a trusted source for quickly getting started on selected software development topics. We hope this will help RSEs (including our future selves!) to get off to a flying start on new projects in software/technological areas they are not yet familiar with. 2. To discuss and reach consensus on such topics/areas. @@ -42,6 +43,7 @@ You'll hone your writing skills while you're at it. See the [Contributing to this Guide](/CONTRIBUTING.md) chapter if you want to know more about how you can help, or ask one of the editors. Currently the editorial team consists of: + - Bouwe Andela [@bouweandela](https://github.com/bouweandela) (research software engineer) - Carlos Martínez Ortiz [@c-martinez](https://github.com/c-martinez) (community manager) - Patrick Bos [@egpbos](https://github.com/egpbos) (technology lead) diff --git a/_sidebar.md b/_sidebar.md index 94a427c..8a7624a 100644 --- a/_sidebar.md +++ b/_sidebar.md @@ -1,15 +1,14 @@ - -* [Introduction](/README.md) -* [Best practices](/best_practices.md) -* [Language Guides](/language_guides/languages_overview.md) - * [Bash](/language_guides/bash.md) - * [JavaScript and TypeScript](/language_guides/javascript.md) - * [Python](/language_guides/python.md) - * [R](/language_guides/r.md) - * [C and C++](/language_guides/ccpp.md) - * [Fortran](/language_guides/fortran.md) -* [Technology Guides](/technology/technology_overview.md) - * [GPU programming](/technology/gpu.md) - * [UX - User Experience](/technology/user_experience.md) - * [Datasets](/technology/datasets.md) -* [Contributing to this Guide](/CONTRIBUTING.md) +- [Introduction](/README.md) +- [Best practices](/best_practices.md) +- [Language Guides](/language_guides/languages_overview.md) + - [Bash](/language_guides/bash.md) + - [JavaScript and TypeScript](/language_guides/javascript.md) + - [Python](/language_guides/python.md) + - [R](/language_guides/r.md) + - [C and C++](/language_guides/ccpp.md) + - [Fortran](/language_guides/fortran.md) +- [Technology Guides](/technology/technology_overview.md) + - [GPU programming](/technology/gpu.md) + - [UX - User Experience](/technology/user_experience.md) + - [Datasets](/technology/datasets.md) +- [Contributing to this Guide](/CONTRIBUTING.md) diff --git a/best_practices.md b/best_practices.md index dd146c2..8e59bc5 100644 --- a/best_practices.md +++ b/best_practices.md @@ -104,10 +104,12 @@ developing in. Below is a list of editors that support many programming languages. Integrated Development Environments (IDEs): + - [Visual Studio Code](https://code.visualstudio.com/) - modern editor with extensive plugin ecosystem that can make it as powerful as most IDEs - [JetBrains IDEs](https://www.jetbrains.com/ides/) - specialized IDEs for Python, C++, Java and web, all using the IntelliJ framework - [Eclipse](https://www.eclipse.org/ide/) - a bit older but still nice Text editors: + - [vim](https://www.vim.org/) - classic text editor - [emacs](https://www.gnu.org/software/emacs/) - classic text editor diff --git a/index.html b/index.html index 750dfb7..1c4120a 100644 --- a/index.html +++ b/index.html @@ -1,36 +1,42 @@ - + + + + Netherlands eScience Center Guide + + + + + + + - - - Netherlands eScience Center Guide - - - - - - - - - -
- - - - - - - + +
+ + + + + + diff --git a/language_guides/bash.md b/language_guides/bash.md index 3b90456..1274816 100644 --- a/language_guides/bash.md +++ b/language_guides/bash.md @@ -1,7 +1,6 @@ # Bash -*Page maintainer: Bouwe Andela* [@bouweandela](https://github.com/bouweandela) - +_Page maintainer: Bouwe Andela_ [@bouweandela](https://github.com/bouweandela) Bash is both a command line interface, also known as a **shell**, and a scripting language. @@ -41,19 +40,22 @@ learn - the name and function of [commonly used command line tools](#Commonly-used-command-line-tools) ### Bash keyboard shortcuts + An introduction to [bash keyboard shortcuts](https://www.tecmint.com/linux-command-line-bash-shortcut-keys/) can be found here. -Note that Bash can also be configured such that it uses the *vi* keyboard -shortcuts instead of the default *emacs* ones, which can be useful if you +Note that Bash can also be configured such that it uses the _vi_ keyboard +shortcuts instead of the default _emacs_ ones, which can be useful if you [prefer vi](https://skeptics.stackexchange.com/questions/17492/does-emacs-cause-emacs-pinky). ### Bash aliases + [Bash aliases](https://linuxize.com/post/how-to-create-bash-aliases/) allow you to define shorthands for commands you use often. Typically these are defined in the `~/.bashrc` or `~/.bash_aliases` file. ### Commonly used command line tools + It is recommended that you know at least the names and use of the following command line tools. The details of how to use a tool exactly can easily be found by searching the @@ -82,7 +84,7 @@ efficient if you already know the name of the command you are looking for. **Working with text** Here we list the most commonly used Bash tools that are built to manipulate -*lines of text*. +_lines of text_. The nice thing about these tools is that you can combine them by streaming the output of one tool to become the input of the next tool. Have a look at the @@ -103,7 +105,7 @@ like `>` for output and `<` for input to a command from a text file. - `cat` - Print the content of a file - `head` - Print the first n lines - `tail` - Print the last n lines -- `tee` - Read from standard input and write to standard output and file +- `tee` - Read from standard input and write to standard output and file - `less` - Read text - `sort` - Sort lines of text - `uniq` - Keep unique lines @@ -179,9 +181,11 @@ to make sure that your bash script is as likely to do what you think it should do as possible. In addition to that, always start the script with + ```bash set -euo pipefail ``` + this will stop the script if there is - `-e` a command that exits with a non-zero exit code @@ -191,6 +195,7 @@ this will stop the script if there is an exit code other than zero usually indicates that an error occurred. If needed, you can temporarily allow this kind of error for a single line by wrapping it like this + ```bash set +e false # A command that returns a non-zero exit code diff --git a/language_guides/ccpp.md b/language_guides/ccpp.md index 0765721..1189293 100644 --- a/language_guides/ccpp.md +++ b/language_guides/ccpp.md @@ -1,19 +1,21 @@ # C and C++ -*Page maintainer: Johan Hidding* [@jhidding](https://github.com/jhidding) - +_Page maintainer: Johan Hidding_ [@jhidding](https://github.com/jhidding) C++ is one of the hardest languages to learn. Entering a project where C++ coding is needed should not be taken lightly. This guide focusses on tools and documentation for use of C++ in an open-source environment. ### Standards + The latest ratified standard of C++ is C++17. The first standardised version of C++ is from 1998. The next version of C++ is scheduled for 2020. With these updates (especially the 2011 one) the preferred style of C++ changed drastically. As a result, a program written in 1998 looks very different from one from 2018, but it still compiles. There are many videos on Youtube describing some of these changes and how they can be used to make your code look better (i.e. more maintainable). This goes with a warning: Don't try to be too smart; other people still have to understand your code. ## Practical use + ### Compilers + There are two main-stream open-source C++ compilers. -* [GCC](https://gcc.gnu.org/) -* [LLVM - CLANG](http://llvm.org/) +- [GCC](https://gcc.gnu.org/) +- [LLVM - CLANG](http://llvm.org/) Overall, these compilers are more or less similar in terms of features, language support, compile times and (perhaps most importantly) performance of the generated binaries. The generated binary performance does differ for specific algorithms. @@ -26,25 +28,27 @@ If you need every last erg of performance, some cluster environments have the In These compilers come with a lot of options. Some basic literacy in GCC and CLANG: -* `-O` changes optimisation levels -* `-std=c++xx` sets the C++ standard used -* `-I*path*` add path to search for include files -* `-o*file*` output file -* `-c` only compile, do not link -* `-Wall` be more verbose with warnings +- `-O` changes optimisation levels +- `-std=c++xx` sets the C++ standard used +- `-I*path*` add path to search for include files +- `-o*file*` output file +- `-c` only compile, do not link +- `-Wall` be more verbose with warnings And linker flags: -* `-l*library*` links to a library -* `-L*path*` add path to search for libraries -* `-shared` make a shared library -* `-Wl,-z,defs` ensures all symbols are accounted for when linking to a shared object +- `-l*library*` links to a library +- `-L*path*` add path to search for libraries +- `-shared` make a shared library +- `-Wl,-z,defs` ensures all symbols are accounted for when linking to a shared object ### Interpreter + There **is** a C++ interpreter called [Cling](https://rawgit.com/vgvassilev/cling/master/www/index.html). This also comes with a [Jupyter notebook kernel](http://jupyter.org/try). ### Build systems + There are several build systems that handle C/C++. Currently, [the CMake system is most popular](https://www.jetbrains.com/research/devecosystem-2018/cpp/). It is not actually a build system itself; it generates build files based on (in theory) platform-independent and compiler-independent configuration files. @@ -53,7 +57,7 @@ Some popular IDEs keep automatic count for CMake, or are even completely built a The major drawback of CMake is the confusing documentation, but this is generally made up for in terms of community support. When Googling for ways to write your CMake files, make sure you look for "modern CMake", which is a style that has been gaining traction in the last few years and makes everything better (e.g. dependency management, but also just the CMake files themselves). -Traditionally, the auto-tools suite (AutoConf and AutoMake) was *the* way to build things on Unix; you'll probably know the three command salute: +Traditionally, the auto-tools suite (AutoConf and AutoMake) was _the_ way to build things on Unix; you'll probably know the three command salute: > ./configure --prefix=~/.local ... @@ -68,37 +72,38 @@ Microsoft Visual Studio has its own project model / build system and a library l We do not recommend these if you don't also supply an option for building with CMake or Autotools. Another modern alternative that has been gaining attention mainly in the GNU/Gnome/Linux world is [Meson](http://mesonbuild.com/), which is also based on [Ninja](https://ninja-build.org/). - ### Package management + There is no standard package manager like `pip`, `npm` or `gem` for C++. This means that you will have to choose depending on your particular circumstances what tool to use for installing libraries and, possibly, packaging the tools you yourself built. Some important factors include: + - Whether or not you have root/admin access to your system - What kind of environment/ecosystem you are working in. For instance: - * There are many tools targeted specifically at HPC/cluster environments. - * Specific communities (e.g. NLP research or bioinformatics) may have gravitated towards specific tools, so you'll probably want to use those for maximum impact. + - There are many tools targeted specifically at HPC/cluster environments. + - Specific communities (e.g. NLP research or bioinformatics) may have gravitated towards specific tools, so you'll probably want to use those for maximum impact. - Whether software is packaged at all; many C/C++ tools only come in source form, hopefully with [build setup configuration](#build-systems). - #### Yes root access + If you have root/admin access to your system, the first go-to for libraries may be your OS package manager. If the target package is not in there, try to see if there is an equivalent library that is, and see what kind of software uses it. - #### No root access + A good, cross-platform option nowadays is to use [`miniconda`](https://conda.io/miniconda.html), which works on Linux, macOS and Windows. The `conda-forge` channel especially has a lot of C++ libraries. Specify that you want to use this channel with command line option `-c conda-forge`. The `bioconda` channel in turn builds upon the `conda-forge` libraries, hosting a lot of bioinformatics tools. - #### Managing non-packaged software -If you do have to install a programm, which depends on a specific version of a library which depends on a specific version of another library, you enter what is called *dependency hell*. + +If you do have to install a programm, which depends on a specific version of a library which depends on a specific version of another library, you enter what is called _dependency hell_. Some agility in compiling and installing libraries is essential. You can install libraries in `/usr/local` or in `${HOME}/.local` if you aren't root, but there you have no package management. -Many HPC administrations provide [environment modules](https://modules.readthedocs.io/en/latest/) (`module avail`), which allow you to easily populate your `$PATH` and other environment variables to find the respective package. You can also write your own module files to solve your *dependency hell*. +Many HPC administrations provide [environment modules](https://modules.readthedocs.io/en/latest/) (`module avail`), which allow you to easily populate your `$PATH` and other environment variables to find the respective package. You can also write your own module files to solve your _dependency hell_. A lot of libraries come with a package description for `pkg-config`. These descriptions are installed in `/usr/lib/pkgconfig`. @@ -110,10 +115,11 @@ If you want to keep things organized on systems where you use multiple versions You install each library in its own directory (`~/.local/pkg/` for instance), then running `xstow` will create symlinks to the files in the `~/.local` directory (one above the XStow package directory). Using XStow in this way alows you to keep a single additional search path when compiling your next library. - #### Packaging software + In case you find the manual compilation too cumbersome, or want to conveniently distribute software (your own or perhaps one of your project's dependencies that the author did not package themselves), you'll have to build your own package. The above solutions are good defaults for this, but there are some additional options that are widely used. + - For distribution to root/admin users: system package managers (Linux: `apt`, `yum`, `pacman`, macOS: Homebrew, Macports) - For distribution to any users: [Conda](https://conda.io/miniconda.html) and [Conan](https://conan.io/) are cross-platform (Linux, macOS, Windows) - For distribution to HPC/cluster users: see options below @@ -122,14 +128,15 @@ When choosing which system to build your package for, it is imporant to consider If any of these tools are already widely used in your audience, pick that one. If not, it is really up to your personal preferences, as all tools have their pros and cons. Some general guidelines could be: + - prefer multi-platform over single platform - prefer widely used over obscure (even if it's technically magnificent, if nobody uses it, it's useless for distributing your software) - prefer multi-language over single language (especially for C++, because it is so often used to build libraries that power higher level languages) But, as the state of the package management ecosystem shows, in practice, there will be many exceptions to these guidelines. - #### HPC/cluster environments + One way around this if the system does use `module` is to use [Easybuild](https://easybuild.readthedocs.io/en/latest/), which makes installing modules in your home directory quite easy. Many recipes (called Easyblocks) for building packages or whole toolchains are [available online](https://easybuild.readthedocs.io/en/latest/version-specific/Supported_software.html). These are written in Python. @@ -148,33 +155,34 @@ Note that C++20 will bring Modules, which can be used as an alternative to inclu This will allow for easier packaging and will probably cause the package management landscape to change considerably. For this reason, it may be wise at this time to keep your options open and keep an eye on developments within the different package management solutions. - - ### Editors + This is largely a matter of taste, but not always. In theory, given that there are many good command line tools available for working with C(++) code, any code editor will do to write C(++). Some people also prefer to avoid relying on IDEs too much; by helping your memory they can also help you to write less maintainable code. People of this persuasion would usually recommend any of the following editors: -* Vim, recommended plugins: - + [NERDTree](https://github.com/scrooloose/nerdtree) file explorer. - + [editorconfig](https://github.com/editorconfig/editorconfig-vim) - + [stl.vim](https://www.vim.org/scripts/script.php?script_id=4293) adds STL to syntax highlighting - + [Syntastic](https://github.com/scrooloose/syntastic) - + Integrated debugging using [Clewn](http://clewn.sourceforge.net/) -* Emacs: - + Has GDB mode for debugging. -* More modern editors: Atom / Sublime Text / VS Code - + Rich plugin ecosystem - + Easier on the eyes... I mean modern OS/GUI integration + +- Vim, recommended plugins: + - [NERDTree](https://github.com/scrooloose/nerdtree) file explorer. + - [editorconfig](https://github.com/editorconfig/editorconfig-vim) + - [stl.vim](https://www.vim.org/scripts/script.php?script_id=4293) adds STL to syntax highlighting + - [Syntastic](https://github.com/scrooloose/syntastic) + - Integrated debugging using [Clewn](http://clewn.sourceforge.net/) +- Emacs: + - Has GDB mode for debugging. +- More modern editors: Atom / Sublime Text / VS Code + - Rich plugin ecosystem + - Easier on the eyes... I mean modern OS/GUI integration In practice, sometimes you run into large/complex existing projects and navigating these can be really hard, especially when you just start working on the project. In these cases, an IDE can really help. Intelligent code suggestions, easy jumping between code segments in different files, integrated debugging, testing, VCS, etc. can make the learning curve a lot less steep. Good/popular IDEs are -* CLion -* Visual Studio (Windows only, but many people swear by it) -* Eclipse + +- CLion +- Visual Studio (Windows only, but many people swear by it) +- Eclipse ### Code and program quality analysis @@ -185,48 +193,61 @@ C++ (and C) compilers come with built in linters and tools to check that your pr While most IDEs and some editors offer automatic formatting of files, [clang-format](http://clang.llvm.org/docs/ClangFormat.html) is a standalone tool, which offers sensible defaults and a huge range of customisation options. Integrating it into the CI workflow guarantees that checked in code adheres to formatting guidelines. #### Static code analysis with GCC + To use the GCC linter, use the following set of compiler flags when compiling C++ code: + ``` -O2 -Wall -Wextra -Wcast-align -Wcast-qual -Wctor-dtor-privacy -Wdisabled-optimization -Wformat=2 -Winit-self -Wlogical-op -Wmissing-declarations -Wmissing-include-dirs -Wnoexcept -Wold-style-cast -Woverloaded-virtual -Wredundant-decls -Wshadow -Wsign-conversion -Wsign-promo -Wstrict-null-sentinel -Wstrict-overflow=5 -Wswitch-default -Wundef -Wno-unused ``` + and these flags when compiling C code: + ``` -O2 -Wall -Wextra -Wformat-nonliteral -Wcast-align -Wpointer-arith -Wbad-function-cast -Wmissing-prototypes -Wstrict-prototypes -Wmissing-declarations -Winline -Wundef -Wnested-externs -Wcast-qual -Wshadow -Wwrite-strings -Wno-unused-parameter -Wfloat-equal ``` + Use at least optimization level 2 (`-O2`) to have GCC perform code analysis up to a level where you get all warnings. Use the `-Werror` flag to turn warnings into errors, i.e. your code won't compile if you have warnings. See this [post](https://stackoverflow.com/questions/5088460/flags-to-enable-thorough-and-verbose-g-warnings) for an explanation of why this is a reasonable selection of warning flags. #### Static code analysis with Clang (LLVM) + Clang has the very convenient flag + ``` -Weverything ``` + A good strategy is probably to start out using this flag and then disable any warnings that you do not find useful. #### Static code analysis with cppcheck + An additional good tool that detects many issues is cppcheck. Most editors/IDEs have plugins to use it automatically. #### Dynamic program analysis using `-fsanitize` + Both GCC and Clang allow you to compile your code with the `-fsanitize=` flag, which will instrument your program to detect various errors quickly. The most useful option is probably + ``` -fsanitize=address -O2 -fno-omit-frame-pointer -g ``` + which is a fast memory error detector. There are also other options available like `-fsanitize=thread` and `-fsanitize=undefined`. See the GCC man page or the [Clang online manual](https://clang.llvm.org/docs/index.html) for more information. #### Dynamic program analysis using the valgrind suite of tools -The [valgrind suite of tools](http://valgrind.org/info/tools.html) has tools similar to what is provided by the `-fsanitize` compiler flag as well as various profiling tools. Using the valgrind tool memcheck to detect memory errors is typically slower than using compiler provided option, so this might be something you will want to do less often. You will probably want to compile your code with debug symbols enabled (`-g`) in order to get useful output with memcheck. When using the profilers, keep in mind that a [statistical profiler](https://en.wikipedia.org/wiki/Profiling_%28computer_programming%29#Statistical_profilers) may give you more realistic results. + +The [valgrind suite of tools](http://valgrind.org/info/tools.html) has tools similar to what is provided by the `-fsanitize` compiler flag as well as various profiling tools. Using the valgrind tool memcheck to detect memory errors is typically slower than using compiler provided option, so this might be something you will want to do less often. You will probably want to compile your code with debug symbols enabled (`-g`) in order to get useful output with memcheck. When using the profilers, keep in mind that a [statistical profiler](https://en.wikipedia.org/wiki/Profiling_%28computer_programming%29#Statistical_profilers) may give you more realistic results. ### Automated code refactoring Sometimes you have to update large parts of your code base a little bit, like when you move from one standard to another or you changed a function definition. Although this can be accomplished with a `sed` command using regular expressions, this approach is dangerous, if you use macros, your code is not formatted properly etc.... [Clang-tidy](https://clang.llvm.org/extra/clang-tidy/) can do these things and many more by using the abstract syntax tree of the compiler instead of the source code files to refactor your code and thus is much more robust but also powerful. - ### Debugging + Most of your time programming C(++) will probably be spent on debugging. At some point, surrounding every line of your code with `printf("here %d", i++);` will no longer avail you and you will need a more powerful tool. With a debugger, you can inspect the program while it is running. @@ -235,96 +256,108 @@ When paused, you can inspect the current values of variables, manually step forw Learning to use these powerful tools is a very good time investment. There are some really good CppCon videos about debugging on YouTube. -* GDB - the GNU Debugger, many graphical front-ends are based on GDB. -* LLDB - the LLVM debugger. This is the go-to GDB alternative for the LLVM toolchain, especially on macOS where GDB is hard to setup. -* DDD - primitive GUI frontend for GDB. -* The IDEs mentioned above either have custom built-in debuggers or provide an interface to GDB or LLDB. - +- GDB - the GNU Debugger, many graphical front-ends are based on GDB. +- LLDB - the LLVM debugger. This is the go-to GDB alternative for the LLVM toolchain, especially on macOS where GDB is hard to setup. +- DDD - primitive GUI frontend for GDB. +- The IDEs mentioned above either have custom built-in debuggers or provide an interface to GDB or LLDB. ## Libraries + Historically, many C and C++ projects have seemed rather hestitant about using external dependencies (perhaps due to the poor dependency management situation mentioned above). However, many good (scientific) computing libraries are available today that you should consider using if applicable. Here follows a list of libraries that we recommend and/or have experience with. These can typically be installed from a wide range of [package managers](#package-management). ### Usual suspects + These scientific libraries are well known, widely used and have a lot of good online documentation. -* [GNU Scientific library (GSL)](https://www.gnu.org/software/gsl/doc/html/index.html) -* [FFTW](http://www.fftw.org): Fastest Fourier Transform in the West -* [OpenMPI](https://www.open-mpi.org). Use with caution, since it will strongly define the structure of your code, which may or may not be desirable. +- [GNU Scientific library (GSL)](https://www.gnu.org/software/gsl/doc/html/index.html) +- [FFTW](http://www.fftw.org): Fastest Fourier Transform in the West +- [OpenMPI](https://www.open-mpi.org). Use with caution, since it will strongly define the structure of your code, which may or may not be desirable. ### Boost + This is what the Google style guide has to say about Boost: -> * **Definition:** The Boost library collection is a popular collection of peer-reviewed, free, open-source C++ libraries. -> * **Pros:** Boost code is generally very high-quality, is widely portable, and fills many important gaps in the C++ standard library, such as type traits and better binders. -> * **Cons:** Some Boost libraries encourage coding practices which can hamper readability, such as metaprogramming and other advanced template techniques, and an excessively "functional" style of programming. +> - **Definition:** The Boost library collection is a popular collection of peer-reviewed, free, open-source C++ libraries. +> - **Pros:** Boost code is generally very high-quality, is widely portable, and fills many important gaps in the C++ standard library, such as type traits and better binders. +> - **Cons:** Some Boost libraries encourage coding practices which can hamper readability, such as metaprogramming and other advanced template techniques, and an excessively "functional" style of programming. As a general rule, don't use Boost when there is equivalent STL functionality. ### xtensor + [xtensor](http://github.com/xtensor-stack/xtensor) is a modern (C++14) N-dimensional tensor (array, matrix, etc) library for numerical work in the style of Python's NumPy. It aims for maximum performance (and in most cases it succeeds) and has an active development community. This library features, among other things: -* Lazy-evaluation: only calculate when necessary. -* Extensible template expressions: automatically optimize many subsequent operations into one "kernel". -* NumPy style syntax, including broadcasting. -* C++ STL style interfaces for easy integration with STL functionality. -* [Very low-effort integration with today's main data science languages Python](https://blog.esciencecenter.nl/irregular-data-in-pandas-using-c-88ce311cb9ef?gi=23ebfce3ae77), R and Julia. -This all makes xtensor a very interesting choice compared to similar older libraries like Eigen and Armadillo. +- Lazy-evaluation: only calculate when necessary. +- Extensible template expressions: automatically optimize many subsequent operations into one "kernel". +- NumPy style syntax, including broadcasting. +- C++ STL style interfaces for easy integration with STL functionality. +- [Very low-effort integration with today's main data science languages Python](https://blog.esciencecenter.nl/irregular-data-in-pandas-using-c-88ce311cb9ef?gi=23ebfce3ae77), R and Julia. + This all makes xtensor a very interesting choice compared to similar older libraries like Eigen and Armadillo. ### General purpose, I/O -* Configuration file reading and writing: - * [yaml-cpp](https://github.com/jbeder/yaml-cpp): A YAML parser and emitter in C++ - * [JSON for Modern C++](https://nlohmann.github.io/json/) -* Command line argument parsing: - * [argagg](https://github.com/vietjtnguyen/argagg) - * [Clara](https://github.com/catchorg/Clara) -* [fmt](https://github.com/fmtlib/fmt): pythonic string formatting -* [hdf5](https://github.com/HDFGroup/hdf5): The popular HDF5 binary format C++ interface. +- Configuration file reading and writing: + - [yaml-cpp](https://github.com/jbeder/yaml-cpp): A YAML parser and emitter in C++ + - [JSON for Modern C++](https://nlohmann.github.io/json/) +- Command line argument parsing: + - [argagg](https://github.com/vietjtnguyen/argagg) + - [Clara](https://github.com/catchorg/Clara) +- [fmt](https://github.com/fmtlib/fmt): pythonic string formatting +- [hdf5](https://github.com/HDFGroup/hdf5): The popular HDF5 binary format C++ interface. ### Parallel processing -* [oneAPI Threading Building Blocks](https://oneapi-src.github.io/oneTBB/) (oneTBB): template library for task parallelism -* [ZeroMQ](http://zeromq.org): lower level flexible communication library with a unified interface for message passing between threads and processes, but also between separate machines via TCP. +- [oneAPI Threading Building Blocks](https://oneapi-src.github.io/oneTBB/) (oneTBB): template library for task parallelism +- [ZeroMQ](http://zeromq.org): lower level flexible communication library with a unified interface for message passing between threads and processes, but also between separate machines via TCP. ## Style + ### Style guides + Good style is not just about layout and linting on trailing whitespace. It will mean the difference between a blazing fast code and a broken one. -* [C++ Core Guidelines](http://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines) -* [Guidelines Support Library](https://github.com/Microsoft/GSL) -* [Google Style Guide](https://google.github.io/styleguide/cppguide.html) -* [Google Style Guide - github](https://github.com/google/styleguide) Contains the CppLint linter. +- [C++ Core Guidelines](http://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines) +- [Guidelines Support Library](https://github.com/Microsoft/GSL) +- [Google Style Guide](https://google.github.io/styleguide/cppguide.html) +- [Google Style Guide - github](https://github.com/google/styleguide) Contains the CppLint linter. ### Project layout + A C++ project will usually have directories `/src` for source codes, `/doc` for Doxygen output, `/test` for testing code. Some people like to put header files in `/include`. In C++ though, many header files will contain functioning code (templates and inline functions). This makes the separation between code and interface a bit murky. In this case, it can make more sense to put headers and implementation in the same tree, but different communities will have different opinions on this. A third option that is sometimes used is to make separate "template implementation" header files. ## Sustainability + ### Testing + Use [Google Test](https://github.com/google/googletest). It is light-weight, good and is used a lot. [Catch2](https://github.com/catchorg/Catch2) is also pretty good, well maintained and has native support in the CLion IDE. ### Documentation + Use [Doxygen](http://www.doxygen.nl/). It is the de-facto standard way of inlining documentation into comment sections of your code. The output is very ugly. Mini-tutorial: run `doxygen -g` (preferably inside a `doc` folder) in a new project to set things up, from then on, run `doxygen` to (re-)generate the documentation. A newer but less mature option is [cldoc](http://jessevdk.github.io/cldoc/). ## Resources + ### Online -* [CppCon videos](https://www.youtube.com/user/CppCon): Many really good talks recorded at the various CppCon meetings. -* [CppReference.com](http://en.cppreference.com/w/) -* [C++ Annotations](http://www.icce.rug.nl/documents/cplusplus/) -* [CPlusPlus.com](http://www.cplusplus.com/) -* [Modern C++, according to Microsoft](https://msdn.microsoft.com/en-us/library/hh279654.aspx) + +- [CppCon videos](https://www.youtube.com/user/CppCon): Many really good talks recorded at the various CppCon meetings. +- [CppReference.com](http://en.cppreference.com/w/) +- [C++ Annotations](http://www.icce.rug.nl/documents/cplusplus/) +- [CPlusPlus.com](http://www.cplusplus.com/) +- [Modern C++, according to Microsoft](https://msdn.microsoft.com/en-us/library/hh279654.aspx) ### Books -* Bjarne Soustrup - The C++ Language -* Scott Meyers - Effective Modern C++ + +- Bjarne Soustrup - The C++ Language +- Scott Meyers - Effective Modern C++ diff --git a/language_guides/fortran.md b/language_guides/fortran.md index 36d352b..d26217f 100644 --- a/language_guides/fortran.md +++ b/language_guides/fortran.md @@ -1,7 +1,6 @@ # Fortran -*Page maintainer: Gijs van den Oord* [@goord](https://github.com/goord) - +_Page maintainer: Gijs van den Oord_ [@goord](https://github.com/goord) **Disclaimer: In general the Netherlands eScience Center does not recommend using Fortran. However, in some cases it is the only viable option, for instance if a project builds upon existing code written in this language. This section will be restricted to Fortran90, which captures majority of Fortran source code.** @@ -10,21 +9,23 @@ numerical compute workloads, with no existing alternative. In this case it is re ## Recommended sources of information -* [Fortran90 official documentation](http://www.fortran90.org/) -* [Fortran wiki](http://fortranwiki.org/fortran/show/HomePage) -* [Fortran90 handbook](http://micro.ustc.edu.cn/Fortran/Fortran%2090%20Handbook.pdf) +- [Fortran90 official documentation](http://www.fortran90.org/) +- [Fortran wiki](http://fortranwiki.org/fortran/show/HomePage) +- [Fortran90 handbook](http://micro.ustc.edu.cn/Fortran/Fortran%2090%20Handbook.pdf) ## Compilers -* **gfortran**: the official GNU Fortran compiler and part of the gcc compiler suite. -* **ifort**: the Intel Fortran compiler, widely used in academia and industry because of its superior performance, but +- **gfortran**: the official GNU Fortran compiler and part of the gcc compiler suite. +- **ifort**: the Intel Fortran compiler, widely used in academia and industry because of its superior performance, but unfortunately this is commercial software so not recommended. The same holds for the Portland compiler **pgfortran** ## Debuggers and diagnostic tools + There exist many commercial performance profiling tools by Intel and the Portland Group which we shall not discuss here. Most important freely available alternatives are -* **gdb**: the GNU debugger, part of the gcc compiler suite. Use the **-g** option to compile with debugging symbols. -* **gprof**: the GNU profiler, part of gcc too. Use the **-p** option to compile with profiling enabled. -* **valgrind**: to detect memory leaks. + +- **gdb**: the GNU debugger, part of the gcc compiler suite. Use the **-g** option to compile with debugging symbols. +- **gprof**: the GNU profiler, part of gcc too. Use the **-p** option to compile with profiling enabled. +- **valgrind**: to detect memory leaks. ## Editors and IDEs @@ -36,10 +37,10 @@ completion and refactoring tools one might consider the [CBFortran](http://cbfor If working on an existing code base, adopt the existing conventions. Otherwise we recommend the standard conventions, described in the [official documentation](http://www.fortran90.org/src/best-practices.html#fortran-style-guide) and the [Fortran company style guide](http://www.fortran.com/). We would like to add the following advice: -* Use free-form text input style (the default), with a maximal line width well below the 132 characters imposed by the Fortran90 standard. -* When a method does not need to alter any data in any module and returns a single value, use a function for it, otherwise use a subroutine. Minimize the latter to reasonable extent. -* Use the intent attributes in subroutine variable declarations as it makes the code much easier to understand. -* Use a performance-driven approach to the architecture, do not use the object-oriented features of Fortran90 if they slow down execution. Encapsulation by modules is perfectly acceptable. -* Add concise comments to modules and routines, and add comments to less obvious lines of code. -* Provide a test suite with your code, containing both unit and integration tests. Both automake and cmake provide test +- Use free-form text input style (the default), with a maximal line width well below the 132 characters imposed by the Fortran90 standard. +- When a method does not need to alter any data in any module and returns a single value, use a function for it, otherwise use a subroutine. Minimize the latter to reasonable extent. +- Use the intent attributes in subroutine variable declarations as it makes the code much easier to understand. +- Use a performance-driven approach to the architecture, do not use the object-oriented features of Fortran90 if they slow down execution. Encapsulation by modules is perfectly acceptable. +- Add concise comments to modules and routines, and add comments to less obvious lines of code. +- Provide a test suite with your code, containing both unit and integration tests. Both automake and cmake provide test suite functionality; if you create your makefile yourself, add a separate testing target. diff --git a/language_guides/javascript.md b/language_guides/javascript.md index f3ab5b0..3331ff9 100644 --- a/language_guides/javascript.md +++ b/language_guides/javascript.md @@ -1,6 +1,6 @@ # JavaScript -*Page maintainer: Ewan Cahen* [@ewan-escience](https://github.com/ewan-escience) +_Page maintainer: Ewan Cahen_ [@ewan-escience](https://github.com/ewan-escience) [JavaScript](https://en.wikipedia.org/wiki/JavaScript) (JS) is a programming language that is one of the three (together with [HTML](https://en.wikipedia.org/wiki/HTML) and [CSS](https://en.wikipedia.org/wiki/CSS)) core technologies of the web. It is essential if you want to write interactive webpages or web applications, because JavaScript is, apart from [WebAssembly](https://webassembly.org/), the only programming language that runs in modern browsers. Furthermore, JS can also run [outside of the browser](/language_guides/javascript?id=javascript-outside-of-the-browser), e.g. for running short scripts or full-blown servers. @@ -25,11 +25,11 @@ Before you pick a framework, you should first consider what you are trying to bu Currently, the most popular frameworks are (ordered by popularity according to the [StackOverflow 2024 Developer Survey](https://survey.stackoverflow.co/2024/technology#1-web-frameworks-and-technologies)) -* [React](https://react.dev/) -* [Angular](https://angular.dev/) -* [Vue.js](https://vuejs.org/) -* [Svelte](https://svelte.dev/) -* [SolidJS](https://www.solidjs.com/) +- [React](https://react.dev/) +- [Angular](https://angular.dev/) +- [Vue.js](https://vuejs.org/) +- [Svelte](https://svelte.dev/) +- [SolidJS](https://www.solidjs.com/) ### React @@ -74,15 +74,16 @@ Solid has a meta-framework called [SolidStart](https://start.solidjs.com/). Most JavaScript is run in web browsers, but if you want to run it outside of a browser (e.g. as a server or to run a script locally), you'll need a JavaScript **runtime**. These are the main runtimes available: -* [Node.js](https://nodejs.org) is the most used runtime, mainly for being the only available runtime for a long time. This gives the advantage that there is a lot of documentation available (official and unofficial, e.g. forums) and that many tools are available for Node.js. It comes with a [package manager (npm)](https://www.npmjs.com/) that allows you to install packages from a huge library. Its installation instructions can be found [here](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs). -* [Deno](https://deno.com/) can be seen as a successor to Node.js and tries to improve on it in a few ways, most notably: - * [built-in support](https://docs.deno.com/runtime/fundamentals/typescript/) for TypeScript - * a better [security model](https://docs.deno.com/runtime/fundamentals/typescript/) - * built-in tooling, like a [linter and formatter](https://docs.deno.com/runtime/fundamentals/linting_and_formatting/) - * [compiling](https://docs.deno.com/runtime/reference/cli/compiler/) to standalone executables - +- [Node.js](https://nodejs.org) is the most used runtime, mainly for being the only available runtime for a long time. This gives the advantage that there is a lot of documentation available (official and unofficial, e.g. forums) and that many tools are available for Node.js. It comes with a [package manager (npm)](https://www.npmjs.com/) that allows you to install packages from a huge library. Its installation instructions can be found [here](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs). +- [Deno](https://deno.com/) can be seen as a successor to Node.js and tries to improve on it in a few ways, most notably: + - [built-in support](https://docs.deno.com/runtime/fundamentals/typescript/) for TypeScript + - a better [security model](https://docs.deno.com/runtime/fundamentals/typescript/) + - built-in tooling, like a [linter and formatter](https://docs.deno.com/runtime/fundamentals/linting_and_formatting/) + - [compiling](https://docs.deno.com/runtime/reference/cli/compiler/) to standalone executables + Its installation instructions can be found [here](https://docs.deno.com/runtime/getting_started/installation/) -* [Bun](https://bun.sh/), the youngest runtime of the three. Its focus is on speed, reduced complexity and enhanced developer productivity (read more [here](https://bun.sh/docs)). Just like Deno, it comes with [built-in TypeScript support](https://bun.sh/docs/runtime/typescript), can [compile to standalone executables](https://bun.sh/docs/bundler/executables) and it aims to be fully [compatible with Node.js](https://bun.sh/docs/runtime/nodejs-apis). Its installation instructions can be found [here](https://bun.sh/docs/installation). + +- [Bun](https://bun.sh/), the youngest runtime of the three. Its focus is on speed, reduced complexity and enhanced developer productivity (read more [here](https://bun.sh/docs)). Just like Deno, it comes with [built-in TypeScript support](https://bun.sh/docs/runtime/typescript), can [compile to standalone executables](https://bun.sh/docs/bundler/executables) and it aims to be fully [compatible with Node.js](https://bun.sh/docs/runtime/nodejs-apis). Its installation instructions can be found [here](https://bun.sh/docs/installation). A more comprehensive comparison can be found [in this guide](https://zerotomastery.io/blog/deno-vs-node-vs-bun-comparison-guide/). @@ -92,32 +93,32 @@ To answer this question, you should consider what is important for you and your Choose Node.js if: -* you need a stable, mature and a well established runtime with a large community around it; -* you need to use dependencies that should most likely "just work"; -* you cannot convince the people you work with to install something else; -* you don't need any particular feature of any of its competitors. +- you need a stable, mature and a well established runtime with a large community around it; +- you need to use dependencies that should most likely "just work"; +- you cannot convince the people you work with to install something else; +- you don't need any particular feature of any of its competitors. Choose Deno if: -* you want a relatively mature runtime with a lot of features built in; -* you want out-of-the-box TypeScript support; -* you like its security model; -* you want a complete package with a linter and formatter included; -* you don't mind spending some time if something does not work directly. +- you want a relatively mature runtime with a lot of features built in; +- you want out-of-the-box TypeScript support; +- you like its security model; +- you want a complete package with a linter and formatter included; +- you don't mind spending some time if something does not work directly. Choose Bun if: -* you are willing to take a risk using a relatively new runtime; -* you want out-of-the-box TypeScript support; -* you want to use one of Bun's particular features; -* you need maximum performance (though you should benchmark for your use case first and consider using a different programming language). +- you are willing to take a risk using a relatively new runtime; +- you want out-of-the-box TypeScript support; +- you want to use one of Bun's particular features; +- you need maximum performance (though you should benchmark for your use case first and consider using a different programming language). ## Editors and IDEs These are some good JavaScript editors: -* [WebStorm](https://www.jetbrains.com/webstorm/) by JetBrains. It is free (as in monetary cost) for [non-commercial use](https://www.jetbrains.com/legal/docs/toolbox/license_non-commercial/); otherwise you have to buy a licence. Most of its features are also available in other IDEs of JetBrains, like [IntelliJ IDEA ultimate](https://www.jetbrains.com/idea/), [PyCharm professional](https://www.jetbrains.com/pycharm/) and [Rider](https://www.jetbrains.com/rider/). You can compare the products of JetBrains [here](https://www.jetbrains.com/products/compare/?product=webstorm&product=idea). Note that the free version of WebStorm will [collect data](https://blog.jetbrains.com/blog/2024/10/24/webstorm-and-rider-are-now-free-for-non-commercial-use/#anonymous-data-collection) anonymously, _without_ the option to disable it. WebStorm comes with a lot of [functionality included](https://www.jetbrains.com/webstorm/features/), but also gives access to a [Marketplace of plugins](https://plugins.jetbrains.com/). -* [Visual Studio Code](https://code.visualstudio.com), an open source and free (as in monetary cost) editor by Microsoft. By default, it collects [telemetry data](https://code.visualstudio.com/docs/getstarted/telemetry), but that can be [disabled](https://code.visualstudio.com/docs/getstarted/telemetry#_disable-telemetry-reporting). VSCode has a [limited feature set](https://code.visualstudio.com/docs/editor/whyvscode) out of the box, which can be enhanced with [extensions](https://marketplace.visualstudio.com/vscode). +- [WebStorm](https://www.jetbrains.com/webstorm/) by JetBrains. It is free (as in monetary cost) for [non-commercial use](https://www.jetbrains.com/legal/docs/toolbox/license_non-commercial/); otherwise you have to buy a licence. Most of its features are also available in other IDEs of JetBrains, like [IntelliJ IDEA ultimate](https://www.jetbrains.com/idea/), [PyCharm professional](https://www.jetbrains.com/pycharm/) and [Rider](https://www.jetbrains.com/rider/). You can compare the products of JetBrains [here](https://www.jetbrains.com/products/compare/?product=webstorm&product=idea). Note that the free version of WebStorm will [collect data](https://blog.jetbrains.com/blog/2024/10/24/webstorm-and-rider-are-now-free-for-non-commercial-use/#anonymous-data-collection) anonymously, _without_ the option to disable it. WebStorm comes with a lot of [functionality included](https://www.jetbrains.com/webstorm/features/), but also gives access to a [Marketplace of plugins](https://plugins.jetbrains.com/). +- [Visual Studio Code](https://code.visualstudio.com), an open source and free (as in monetary cost) editor by Microsoft. By default, it collects [telemetry data](https://code.visualstudio.com/docs/getstarted/telemetry), but that can be [disabled](https://code.visualstudio.com/docs/getstarted/telemetry#_disable-telemetry-reporting). VSCode has a [limited feature set](https://code.visualstudio.com/docs/editor/whyvscode) out of the box, which can be enhanced with [extensions](https://marketplace.visualstudio.com/vscode). ## Debugging @@ -125,16 +126,16 @@ In web development, debugging is typically done in the browser. Read [this artic There is documentation for each browser on their [dev tools](https://en.wikipedia.org/wiki/Web_development_tools): -* [Firefox](https://firefox-source-docs.mozilla.org/devtools-user/) -* [Chrome](https://developer.chrome.com/docs/devtools) -* [Edge](https://learn.microsoft.com/en-us/microsoft-edge/devtools-guide-chromium/overview) -* [Safari](https://developer.apple.com/safari/tools/) +- [Firefox](https://firefox-source-docs.mozilla.org/devtools-user/) +- [Chrome](https://developer.chrome.com/docs/devtools) +- [Edge](https://learn.microsoft.com/en-us/microsoft-edge/devtools-guide-chromium/overview) +- [Safari](https://developer.apple.com/safari/tools/) -There are also debugging guides for the various JS runtimes: +There are also debugging guides for the various JS runtimes: -* [Node.js](https://nodejs.org/en/learn/getting-started/debugging) -* [Deno](https://docs.deno.com/runtime/fundamentals/debugging/) -* [Bun](https://bun.sh/docs/runtime/debugger) +- [Node.js](https://nodejs.org/en/learn/getting-started/debugging) +- [Deno](https://docs.deno.com/runtime/fundamentals/debugging/) +- [Bun](https://bun.sh/docs/runtime/debugger) When using a (meta-)framework, also have a look at its documentation. @@ -153,6 +154,7 @@ python3 -m http.server 8000 ``` + Then open the web browser to http://localhost:8000. ## Documentation :id=js-docs @@ -163,13 +165,12 @@ Then open the web browser to http://localhost:8000. The various runtimes have testing functionality included, so you don't have to install extra dependencies: -* [Node.js](https://nodejs.org/en/learn/test-runner/introduction) -* [Deno](https://docs.deno.com/runtime/fundamentals/testing/) -* [Bun](https://bun.sh/guides/test/run-tests) +- [Node.js](https://nodejs.org/en/learn/test-runner/introduction) +- [Deno](https://docs.deno.com/runtime/fundamentals/testing/) +- [Bun](https://bun.sh/guides/test/run-tests) If these don't suffice, a nice overview of popular testing frameworks can be found [here](https://raygun.com/blog/javascript-unit-testing-frameworks/). - ### Testing with browsers To interact with web browsers use [Selenium](https://www.selenium.dev/). @@ -200,15 +201,14 @@ Also have a look at the [Airbnb JavaScript Style Guide](https://github.com/airbn For more in-depth analyses, you can use a code quality and analysis tool. -* [SonarCloud](https://sonarcloud.io) is an open platform to manage code quality which can also show code coverage and count test results over time. It easily [integrates with GitHub](https://github.com/apps/sonarcloud). -* [Codacy](https://www.codacy.com) can analyze [many different languages](https://docs.codacy.com/getting-started/supported-languages-and-tools/) using open source tools. It also offers [GitHub integration](https://docs.codacy.com/repositories-configure/integrations/github-integration/). -* [Code climate](https://codeclimate.com/quality) can analyze JavaScript (and Ruby, PHP). Can analyze Java (best supported), C, C++, Python, JavaScript and TypeScript. +- [SonarCloud](https://sonarcloud.io) is an open platform to manage code quality which can also show code coverage and count test results over time. It easily [integrates with GitHub](https://github.com/apps/sonarcloud). +- [Codacy](https://www.codacy.com) can analyze [many different languages](https://docs.codacy.com/getting-started/supported-languages-and-tools/) using open source tools. It also offers [GitHub integration](https://docs.codacy.com/repositories-configure/integrations/github-integration/). +- [Code climate](https://codeclimate.com/quality) can analyze JavaScript (and Ruby, PHP). Can analyze Java (best supported), C, C++, Python, JavaScript and TypeScript. ## Showing code examples You can use [jsfiddle](https://jsfiddle.net/), which shows you a live preview of your web page while you fiddle with the underlying HTML, JavaScript and CSS code. - ## TypeScript https://www.typescriptlang.org/ @@ -223,11 +223,11 @@ This section highlights the differences with JavaScript. For topics without sign To learn about TypeScript, the following resources are available: -* Official [TypeScript documentation](https://www.typescriptlang.org/docs/) and [tutorial](https://www.typescriptlang.org/docs/handbook/intro.html) -* [Single video tutorial](https://www.youtube.com/watch?v=d56mG7DezGs) and [playlist tutorial](https://www.youtube.com/playlist?list=PL4cUxeGkcC9gUgr39Q_yD6v-bSyMwKPUI) -* Tutorials on debugging TypeScript in [Chrome](https://blog.logrocket.com/how-to-debug-typescript-chrome/) and [Firefox](https://hacks.mozilla.org/2019/09/debugging-typescript-in-firefox-devtools/). If you are using a framework, consult the documentation of that framework for additional ways of debugging -* [The Definitive TypeScript 5.0 Guide](https://www.sitepen.com/blog/update-the-definitive-typescript-guide) -* The [W3Schools TypeScript tutorial](https://www.w3schools.com/typescript/index.php) +- Official [TypeScript documentation](https://www.typescriptlang.org/docs/) and [tutorial](https://www.typescriptlang.org/docs/handbook/intro.html) +- [Single video tutorial](https://www.youtube.com/watch?v=d56mG7DezGs) and [playlist tutorial](https://www.youtube.com/playlist?list=PL4cUxeGkcC9gUgr39Q_yD6v-bSyMwKPUI) +- Tutorials on debugging TypeScript in [Chrome](https://blog.logrocket.com/how-to-debug-typescript-chrome/) and [Firefox](https://hacks.mozilla.org/2019/09/debugging-typescript-in-firefox-devtools/). If you are using a framework, consult the documentation of that framework for additional ways of debugging +- [The Definitive TypeScript 5.0 Guide](https://www.sitepen.com/blog/update-the-definitive-typescript-guide) +- The [W3Schools TypeScript tutorial](https://www.w3schools.com/typescript/index.php) ### Quickstart @@ -245,6 +245,7 @@ npm install --save-dev @types/ ``` For example say we want to use the `react` package which we installed using `npm`: + ```shell npm install react --save ``` @@ -257,8 +258,8 @@ Install it with: npm install --save-dev @types/react ``` -The ``--save-dev`` flag saves this installation to the package.json file as a development dependency. -Do not use ``--save`` for types because a production build will have been transpiled to JavaScript and has no use for TypeScript types. +The `--save-dev` flag saves this installation to the package.json file as a development dependency. +Do not use `--save` for types because a production build will have been transpiled to JavaScript and has no use for TypeScript types. ### Debugging diff --git a/language_guides/languages_overview.md b/language_guides/languages_overview.md index dd9f965..95edf8c 100644 --- a/language_guides/languages_overview.md +++ b/language_guides/languages_overview.md @@ -1,7 +1,6 @@ # Language Guides -*Page maintainer: Patrick Bos* [@egpbos](https://github.com/egpbos) - +_Page maintainer: Patrick Bos_ [@egpbos](https://github.com/egpbos) This chapter provides practical info on each of the main programming languages of the Netherlands eScience Center. @@ -23,7 +22,6 @@ Each chapter should contain: - Recommended additional packages and libraries - Available templates - ## Preferred Languages At the Netherlands eScience Center we prefer Java and Python over C++ and Perl, as these languages in general produce more sustainable code. It is not always possible to choose which libraries we use, as almost all projects have existing code as a starting point. diff --git a/language_guides/python.md b/language_guides/python.md index 02dcba3..c24214a 100644 --- a/language_guides/python.md +++ b/language_guides/python.md @@ -1,7 +1,6 @@ # Python -*Page maintainer: Bouwe Andela* [@bouweandela](https://github.com/bouweandela) - +_Page maintainer: Bouwe Andela_ [@bouweandela](https://github.com/bouweandela) Python is the "dynamic language of choice" of the Netherlands eScience Center. We use it for data analysis and data science projects, and for many other types of projects: workflow management, visualization, natural language processing, web-based tools and much more. @@ -23,15 +22,15 @@ It is possible to write Python that is both Python 2 and Python 3 compatible (e. If you need Python 2 because of old, incompatible Python 2 libraries, strongly consider upgrading those libraries to Python 3 or replacing them altogether. Building and/or using Python 2 is probably discouraged even more than, say, using Fortran 77, since at least Fortran 77 compilers are still being maintained. -* [Things you’re probably not using in Python 3 – but should](https://datawhatnow.com/things-you-are-probably-not-using-in-python-3-but-should/) -* [Six](https://pypi.org/project/six/): Python 2 and 3 Compatibility Library -* [2to3](https://docs.python.org/2/library/2to3.html): Automated Python 2 to 3 code translation -* [python-modernize](https://github.com/mitsuhiko/python-modernize): wrapper around 2to3 +- [Things you’re probably not using in Python 3 – but should](https://datawhatnow.com/things-you-are-probably-not-using-in-python-3-but-should/) +- [Six](https://pypi.org/project/six/): Python 2 and 3 Compatibility Library +- [2to3](https://docs.python.org/2/library/2to3.html): Automated Python 2 to 3 code translation +- [python-modernize](https://github.com/mitsuhiko/python-modernize): wrapper around 2to3 ## Learning Python -* A popular way to learn Python is by doing it the hard way at http://learnpythonthehardway.org/ -* Using [`pylint`](https://www.pylint.org) and [`yapf`](https://github.com/google/yapf) while learning Python is an easy way to get familiar with best practices and commonly used coding styles +- A popular way to learn Python is by doing it the hard way at http://learnpythonthehardway.org/ +- Using [`pylint`](https://www.pylint.org) and [`yapf`](https://github.com/google/yapf) while learning Python is an easy way to get familiar with best practices and commonly used coding styles ## Dependencies and package management @@ -45,10 +44,10 @@ We strongly recommend creating isolated "virtual environments" for each Python p These can be created with `venv` or with `conda`. Advantages over installing packages system-wide or in a single user folder: -* Installs Python modules when you are not root. -* Contains all Python dependencies so the environment keeps working after an upgrade. -* Keeps environments clean for each project, so you don't get more than you need (and can easily reproduce that minimal working situation). -* Lets you select the Python version per environment, so you can test code compatibility between Python versions +- Installs Python modules when you are not root. +- Contains all Python dependencies so the environment keeps working after an upgrade. +- Keeps environments clean for each project, so you don't get more than you need (and can easily reproduce that minimal working situation). +- Lets you select the Python version per environment, so you can test code compatibility between Python versions ### Pip + a virtual environment @@ -83,10 +82,12 @@ Miniforge includes both the faster `mamba` as well as the traditional `conda`. ## Building and packaging code ### Making an installable package + To create an installable Python package you will have to create a `pyproject.toml` file. This will contain three kinds of information: metadata about your project, information on how to build and install your package, and configuration settings for any tools your project may use. Our [Python template](https://github.com/NLeSC/python-template) already does this for you. #### Project metadata + Your project metadata will be under the `[project]` header, and includes such information as the name, version number, description and dependencies. The [Python Packaging User Guide](https://packaging.python.org/en/latest/specifications/pyproject-toml/#declaring-project-metadata-the-project-table) has more information on what else can or should be added here. For your dependencies, you should keep version constraints to a minimum; use, in order of descending preference: no constraints, lower bounds, lower + upper bounds, exact versions. @@ -97,52 +98,57 @@ If instead you are writing a new `pyproject.toml` for an existing project, a rec It is possible to find the full list of currently installed packages with `pip freeze` or `conda list`, but note that this is not ideal for listing dependencies in `pyproject.toml`, because it also lists all dependencies of the dependencies that you use. #### Build system + Besides specifying your project's own metadata, you also have to specify a build-system under the `[build-system]` header. We currently recommend using [`hatchling`](https://pypi.org/project/hatchling/) or [`setuptools`](https://setuptools.pypa.io/en/latest/build_meta.html). Note that Python's build system landscape is still in flux, so be sure to look upthe some current practices in the [packaging guide's section on build backends](https://packaging.python.org/en/latest/tutorials/packaging-projects/#choosing-a-build-backend) and [authoritative blogs like this one](https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html). One important thing to note is that use of `setup.py` and `setup.cfg` has been officially deprecated and we should migrate away from that. #### Tool configuration + Finally, `pyproject.toml` can be used to specify the configuration for any other tools like `pytest`, `ruff` and `mypy` your project may use. Each of these gets their own section in your `pyproject.toml` instead of using their own file, saving you from having dozens of such files in your project. #### Installation + When the `pyproject.toml` is written, your package can be installed with + ``` pip install -e . ``` -The `-e` flag will install your package in editable mode, i.e. it will create a symlink to your package in the installation location instead of copying the package. This is convenient when developing, because any changes you make to the source code will immediately be available for use in the installed version. + +The `-e` flag will install your package in editable mode, i.e. it will create a symlink to your package in the installation location instead of copying the package. This is convenient when developing, because any changes you make to the source code will immediately be available for use in the installed version. Set up continuous integration to test your installation setup. You can use `pyroma` as a linter for your installation configuration. ### Packaging and distributing your package + For packaging your code, you can either use `pip` or `conda`. Neither of them is [better than the other](https://jakevdp.github.io/blog/2016/08/25/conda-myths-and-misconceptions/) -- they are different; use the one which is more suitable for your project. `pip` may be more suitable for distributing pure python packages, and it provides some support for binary dependencies using [`wheels`](http://pythonwheels.com). `conda` may be more suitable when you have external dependencies which cannot be packaged in a wheel. #### Build via the [Python Package Index (PyPI)](https://pypi.org) so that the package can be installed with pip -* [General instructions](https://packaging.python.org/en/latest/tutorials/packaging-projects/) -* We recommend to configure GitHub Actions to upload the package to PyPI automatically for each release. - * For new repositories, it is recommended to use [trusted publishing](https://docs.pypi.org/trusted-publishers/) because it is more secure than using secret tokens from GitHub. - * For a workflow using secret tokens instead, see this [example workflow in DIANNA](https://github.com/dianna-ai/dianna/blob/main/.github/workflows/release.yml). - * You can follow [these instructions](https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/) to set up GitHub Actions workflows with trusted publishing. - * The [`verbose`](https://github.com/marketplace/actions/pypi-publish#for-debugging) option for pypi workflows is useful to see why a workflow failed. - * To avoid unnecessary workflow runs, you can follow the example in the [sirup package](https://github.com/ivory-tower-private-power/sirup/blob/main/.github/workflows/release.yml): manually trigger pushes to pypi and investigate potential bugs during this process with a manual upload. -* Manual uploads with twine - * Because PyPI and Test PyPI require Two-Factor Authentication per January 2024, you need to mimick GitHub's trusted publishing to publish manually with `twine`. - * You can follow the section on "The manual way" as described [here](https://docs.pypi.org/trusted-publishers/using-a-publisher/). -* Additional guidelines: - * Packages should be uploaded to PyPI using [your own account](https://pypi.org/account/register) - * For packages developed in a team or organization, it is recommended that you create a team or organizational account on PyPI and add that as a collaborator with the owner rule. This will allow your team or organization to maintain the package even if individual contributors at some point move on to do other things. At the Netherlands eScience Center, we are a fairly small organization, so we use a single backup account (`nlesc`). - * When distributing code through PyPI, non-python files (such as `requirements.txt`) will not be packaged automatically, you need to [add them to](https://stackoverflow.com/questions/1612733/including-non-python-files-with-setup-py) a `MANIFEST.in` file. - * To test whether your distribution will work correctly before uploading to PyPI, you can run `python -m build` in the root of your repository. Then try installing your package with `pip install dist/tar.gz.` - * `python -m build` will also build [Python wheels](http://pythonwheels.com/), the current standard for [distributing](https://packaging.python.org/distributing/#wheels) Python packages. This will work out of the box for pure Python code, without C extensions. If C extensions are used, each OS needs to have its own wheel. The [manylinux](https://github.com/pypa/manylinux) Docker images can be used for building wheels compatible with multiple Linux distributions. Wheel building can be automated using GitHub Actions or another CI solution, where you can build on all three major platforms using a build matrix. - +- [General instructions](https://packaging.python.org/en/latest/tutorials/packaging-projects/) +- We recommend to configure GitHub Actions to upload the package to PyPI automatically for each release. + - For new repositories, it is recommended to use [trusted publishing](https://docs.pypi.org/trusted-publishers/) because it is more secure than using secret tokens from GitHub. + - For a workflow using secret tokens instead, see this [example workflow in DIANNA](https://github.com/dianna-ai/dianna/blob/main/.github/workflows/release.yml). + - You can follow [these instructions](https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/) to set up GitHub Actions workflows with trusted publishing. + - The [`verbose`](https://github.com/marketplace/actions/pypi-publish#for-debugging) option for pypi workflows is useful to see why a workflow failed. + - To avoid unnecessary workflow runs, you can follow the example in the [sirup package](https://github.com/ivory-tower-private-power/sirup/blob/main/.github/workflows/release.yml): manually trigger pushes to pypi and investigate potential bugs during this process with a manual upload. +- Manual uploads with twine + - Because PyPI and Test PyPI require Two-Factor Authentication per January 2024, you need to mimick GitHub's trusted publishing to publish manually with `twine`. + - You can follow the section on "The manual way" as described [here](https://docs.pypi.org/trusted-publishers/using-a-publisher/). +- Additional guidelines: + - Packages should be uploaded to PyPI using [your own account](https://pypi.org/account/register) + - For packages developed in a team or organization, it is recommended that you create a team or organizational account on PyPI and add that as a collaborator with the owner rule. This will allow your team or organization to maintain the package even if individual contributors at some point move on to do other things. At the Netherlands eScience Center, we are a fairly small organization, so we use a single backup account (`nlesc`). + - When distributing code through PyPI, non-python files (such as `requirements.txt`) will not be packaged automatically, you need to [add them to](https://stackoverflow.com/questions/1612733/including-non-python-files-with-setup-py) a `MANIFEST.in` file. + - To test whether your distribution will work correctly before uploading to PyPI, you can run `python -m build` in the root of your repository. Then try installing your package with `pip install dist/tar.gz.` + - `python -m build` will also build [Python wheels](http://pythonwheels.com/), the current standard for [distributing](https://packaging.python.org/distributing/#wheels) Python packages. This will work out of the box for pure Python code, without C extensions. If C extensions are used, each OS needs to have its own wheel. The [manylinux](https://github.com/pypa/manylinux) Docker images can be used for building wheels compatible with multiple Linux distributions. Wheel building can be automated using GitHub Actions or another CI solution, where you can build on all three major platforms using a build matrix. #### [Build using conda](https://conda-forge.org/docs/maintainer/adding_pkgs.html) -* **Make use of [conda-forge](https://conda-forge.org/) whenever possible**, since it provides many automated build services that save you tons of work, compared to using your own conda repository. It also has a very active community for when you need help. -* Use BioConda or custom channels (hosted on GitHub) as alternatives if need be. +- **Make use of [conda-forge](https://conda-forge.org/) whenever possible**, since it provides many automated build services that save you tons of work, compared to using your own conda repository. It also has a very active community for when you need help. +- Use BioConda or custom channels (hosted on GitHub) as alternatives if need be. ## Editors and IDEs @@ -179,6 +185,7 @@ In fact, some tools have started to make use of this in runtime modules as well, See [this guide](https://realpython.com/python-type-checking/) to learn more about type hints. Some tools to know about that make use of type hints: + - [Type checkers](https://www.infoworld.com/article/2260170/4-python-type-checkers-to-keep-your-code-clean.html) are static code analysis tools that check your code based on the type hints you provide. It is highly recommended that you use a type checker. Choose [mypy](https://mypy-lang.org/) if you are unsure which one to choose. @@ -230,49 +237,48 @@ We currently setup [Sonarcloud](https://sonarcloud.io/) by default in our [Pytho To reproduce the Sonarcloud pipeline locally, you can use [SonarLint](https://www.sonarlint.org/) in your IDE. If you use another editor, perhaps it is more convenient to pick another service like Codacy or Codecov. - ## Debugging and profiling ### Debugging -* Python has its own debugger called [pdb](https://docs.python.org/3/library/pdb.html). It is a part of the Python distribution. -* [pudb](https://github.com/inducer/pudb) is a console-based Python debugger which can easily be installed using pip. -* If you are looking for IDEs with debugging capabilities, see the [Editors and IDEs section](#editors-and-ides). -* If you are using Windows, [Python Tools for Visual Studio](https://github.com/Microsoft/PTVS) adds Python support for Visual Studio. -* If you would like to integrate [pdb](https://docs.python.org/3/library/pdb.html) with `vim`, you can use [Pyclewn](https://sourceforge.net/projects/pyclewn). +- Python has its own debugger called [pdb](https://docs.python.org/3/library/pdb.html). It is a part of the Python distribution. +- [pudb](https://github.com/inducer/pudb) is a console-based Python debugger which can easily be installed using pip. +- If you are looking for IDEs with debugging capabilities, see the [Editors and IDEs section](#editors-and-ides). +- If you are using Windows, [Python Tools for Visual Studio](https://github.com/Microsoft/PTVS) adds Python support for Visual Studio. +- If you would like to integrate [pdb](https://docs.python.org/3/library/pdb.html) with `vim`, you can use [Pyclewn](https://sourceforge.net/projects/pyclewn). -* List of other available software can be found on the [Python wiki page on debugging tools](https://wiki.python.org/moin/PythonDebuggingTools). +- List of other available software can be found on the [Python wiki page on debugging tools](https://wiki.python.org/moin/PythonDebuggingTools). -* If you are looking for some tutorials to get started: +- If you are looking for some tutorials to get started: - - https://pymotw.com/2/pdb - - https://github.com/spiside/pdb-tutorial - - https://www.jetbrains.com/help/pycharm/2016.3/debugging.html - - https://waterprogramming.wordpress.com/2015/09/10/debugging-in-python-using-pycharm/ - - http://www.pydev.org/manual_101_run.html + - https://pymotw.com/2/pdb + - https://github.com/spiside/pdb-tutorial + - https://www.jetbrains.com/help/pycharm/2016.3/debugging.html + - https://waterprogramming.wordpress.com/2015/09/10/debugging-in-python-using-pycharm/ + - http://www.pydev.org/manual_101_run.html ### Profiling There are a number of available profiling tools that are suitable for different situations. -* [cProfile](https://docs.python.org/2/library/profile.html) measures number of function calls and how much CPU time they take. The output can be further analyzed using the `pstats` module. -* For more fine-grained, line-by-line CPU time profiling, two modules can be used: - - [line_profiler](https://github.com/rkern/line_profiler) provides a function decorator that measures the time spent on each line inside the function. - - [pprofile](https://github.com/vpelletier/pprofile) is less intrusive; it simply times entire Python scripts line-by-line. It can give output in callgrind format, which allows you to study the statistics and call tree in `kcachegrind` (often used for analyzing c(++) profiles from `valgrind`). +- [cProfile](https://docs.python.org/2/library/profile.html) measures number of function calls and how much CPU time they take. The output can be further analyzed using the `pstats` module. +- For more fine-grained, line-by-line CPU time profiling, two modules can be used: + - [line_profiler](https://github.com/rkern/line_profiler) provides a function decorator that measures the time spent on each line inside the function. + - [pprofile](https://github.com/vpelletier/pprofile) is less intrusive; it simply times entire Python scripts line-by-line. It can give output in callgrind format, which allows you to study the statistics and call tree in `kcachegrind` (often used for analyzing c(++) profiles from `valgrind`). More realistic profiling information can usually be obtained by using statistical or sampling profilers. The profilers listed below all create nice flame graphs. -* [vprof](https://github.com/nvdv/vprof) -* [Pyflame](https://github.com/uber/pyflame) -* [nylas-perftools](https://github.com/nylas/nylas-perftools) +- [vprof](https://github.com/nvdv/vprof) +- [Pyflame](https://github.com/uber/pyflame) +- [nylas-perftools](https://github.com/nylas/nylas-perftools) ## Logging -* [logging](https://docs.python.org/3/library/logging.html) module is the most commonly used tool to track events in Python code. -* Tutorials: - - [Official Python Logging Tutorial](https://docs.python.org/3/howto/logging.html#logging-basic-tutorial) - - http://docs.python-guide.org/en/latest/writing/logging - - [Python logging best practices](https://www.datadoghq.com/blog/python-logging-best-practices/) +- [logging](https://docs.python.org/3/library/logging.html) module is the most commonly used tool to track events in Python code. +- Tutorials: + - [Official Python Logging Tutorial](https://docs.python.org/3/howto/logging.html#logging-basic-tutorial) + - http://docs.python-guide.org/en/latest/writing/logging + - [Python logging best practices](https://www.datadoghq.com/blog/python-logging-best-practices/) ## Documentation @@ -280,7 +286,6 @@ It is recommended that you [write documentation](https://book.the-turing-way.org A popular and recommended solution for hosting documentation is [Read the Docs](https://readthedocs.org). It can automatically build documentation for projects hosted on [GitHub, GitLab, and Bitbucket](https://docs.readthedocs.io/en/stable/reference/git-integration.html). - ### Building documentation There are several tools for building webpages with documentation. @@ -305,18 +310,18 @@ It is recommended that you [routinely test any code examples in your documentati ### General scientific -* [NumPy](http://www.numpy.org/) -* [SciPy](https://www.scipy.org/) -* [Pandas](http://pandas.pydata.org/) data analysis toolkit -* [scikit-learn](http://scikit-learn.org/): machine learning in Python -* [Cython](http://cython.org/) speed up Python code by using C types and calling C functions -* [dask](http://dask.pydata.org) larger than memory arrays and parallel execution +- [NumPy](http://www.numpy.org/) +- [SciPy](https://www.scipy.org/) +- [Pandas](http://pandas.pydata.org/) data analysis toolkit +- [scikit-learn](http://scikit-learn.org/): machine learning in Python +- [Cython](http://cython.org/) speed up Python code by using C types and calling C functions +- [dask](http://dask.pydata.org) larger than memory arrays and parallel execution ### IPython and Jupyter notebooks (aka IPython notebooks) [IPython](https://ipython.org/) is an interactive Python interpreter -- very much the same as the standard Python interactive interpreter, but with some [extra features](http://ipython.readthedocs.io/en/stable/interactive/index.html) (tab completion, shell commands, in-line help, etc). -[Jupyter](http://jupyter.org/) notebooks (formerly know as IPython notebooks) are browser based interactive Python enviroments. It incorporates the same features as the IPython console, plus some extras like in-line plotting. [Look at some examples](https://nbviewer.jupyter.org/github/ipython/ipython/blob/4.0.x/examples/IPython%20Kernel/Index.ipynb) to find out more. Within a notebook you can alternate code with Markdown comments (and even LaTeX), which is great for reproducible research. +[Jupyter](http://jupyter.org/) notebooks (formerly know as IPython notebooks) are browser based interactive Python enviroments. It incorporates the same features as the IPython console, plus some extras like in-line plotting. [Look at some examples](https://nbviewer.jupyter.org/github/ipython/ipython/blob/4.0.x/examples/IPython%20Kernel/Index.ipynb) to find out more. Within a notebook you can alternate code with Markdown comments (and even LaTeX), which is great for reproducible research. [Notebook extensions](https://github.com/ipython-contrib/jupyter_contrib_nbextensions) adds extra functionalities to notebooks. [JupyterLab](https://github.com/jupyterlab/jupyterlab) is a web-based environment with a lot of improvements and integrated tools. @@ -327,46 +332,47 @@ It is good practice to restart the kernel and run the notebook from start to fin ### Visualization -* [Matplotlib](http://matplotlib.org) has been the standard in scientific visualization. It supports quick-and-dirty plotting through the `pyplot` submodule. Its object oriented interface can be somewhat arcane, but is highly customizable and runs natively on many platforms, making it compatible with all major OSes and environments. It supports most sources of data, including native Python objects, Numpy and Pandas. - - [Seaborn](http://stanford.edu/~mwaskom/software/seaborn/index.html) is a Python visualisation library based on Matplotlib and aimed towards statistical analysis. It supports numpy, pandas, scipy and statmodels. -* Web-based: - - [Bokeh](https://github.com/bokeh/bokeh) is Interactive Web Plotting for Python. - - [Plotly](https://plot.ly/) is another platform for interactive plotting through a web browser, including in Jupyter notebooks. - - [altair](https://github.com/ellisonbg/altair) is a _grammar of graphics_ style declarative statistical visualization library. It does not render visualizations itself, but rather outputs Vega-Lite JSON data. This can lead to a simplified workflow. - - [ggplot](https://github.com/yhat/ggpy) is a plotting library imported from R. +- [Matplotlib](http://matplotlib.org) has been the standard in scientific visualization. It supports quick-and-dirty plotting through the `pyplot` submodule. Its object oriented interface can be somewhat arcane, but is highly customizable and runs natively on many platforms, making it compatible with all major OSes and environments. It supports most sources of data, including native Python objects, Numpy and Pandas. + - [Seaborn](http://stanford.edu/~mwaskom/software/seaborn/index.html) is a Python visualisation library based on Matplotlib and aimed towards statistical analysis. It supports numpy, pandas, scipy and statmodels. +- Web-based: + - [Bokeh](https://github.com/bokeh/bokeh) is Interactive Web Plotting for Python. + - [Plotly](https://plot.ly/) is another platform for interactive plotting through a web browser, including in Jupyter notebooks. + - [altair](https://github.com/ellisonbg/altair) is a _grammar of graphics_ style declarative statistical visualization library. It does not render visualizations itself, but rather outputs Vega-Lite JSON data. This can lead to a simplified workflow. + - [ggplot](https://github.com/yhat/ggpy) is a plotting library imported from R. ### Parallelisation CPython (the official and mainstream Python implementation) is not built for parallel processing due to the [global interpreter lock](https://wiki.python.org/moin/GlobalInterpreterLock). Note that the GIL only applies to actual Python code, so compiled modules like e.g. `numpy` do not suffer from it. Having said that, there are many ways to run Python code in parallel: -* The [multiprocessing](https://docs.python.org/3/library/multiprocessing.html) module is the standard way to do parallel executions in one or multiple machines, it circumvents the GIL by creating multiple Python processess. -* A much simpler alternative in Python 3 is the [`concurrent.futures`](https://docs.python.org/3/library/concurrent.futures.html) module. -* [IPython / Jupyter notebooks have built-in parallel and distributed computing capabilities](https://ipython.org/ipython-doc/3/parallel/) -* Many modules have parallel capabilities or can be compiled to have them. -* At the eScience Center, we have developed the [Noodles package](https://research-software-directory.org/software/noodles) for creating computational workflows and automatically parallelizing it by dispatching independent subtasks to parallel and/or distributed systems. + +- The [multiprocessing](https://docs.python.org/3/library/multiprocessing.html) module is the standard way to do parallel executions in one or multiple machines, it circumvents the GIL by creating multiple Python processess. +- A much simpler alternative in Python 3 is the [`concurrent.futures`](https://docs.python.org/3/library/concurrent.futures.html) module. +- [IPython / Jupyter notebooks have built-in parallel and distributed computing capabilities](https://ipython.org/ipython-doc/3/parallel/) +- Many modules have parallel capabilities or can be compiled to have them. +- At the eScience Center, we have developed the [Noodles package](https://research-software-directory.org/software/noodles) for creating computational workflows and automatically parallelizing it by dispatching independent subtasks to parallel and/or distributed systems. ### Web Frameworks There are convenient Python web frameworks available: -* [flask](http://flask.pocoo.org/) -* [CherryPy](https://cherrypy.dev/) -* [Django](https://www.djangoproject.com/) -* [bottle](http://bottlepy.org/) (similar to flask, but a bit more light-weight for a JSON-REST service) -* [FastAPI](https://fastapi.tiangolo.com): again, similar to flask in functionality, but uses modern Python features like async and type hints with runtime behavioral effects. +- [flask](http://flask.pocoo.org/) +- [CherryPy](https://cherrypy.dev/) +- [Django](https://www.djangoproject.com/) +- [bottle](http://bottlepy.org/) (similar to flask, but a bit more light-weight for a JSON-REST service) +- [FastAPI](https://fastapi.tiangolo.com): again, similar to flask in functionality, but uses modern Python features like async and type hints with runtime behavioral effects. We have recommended `flask` in the past, but FastAPI has become more popular recently. ### NLP/text mining -* [nltk](http://www.nltk.org/) Natural Language Toolkit -* [Pattern](https://github.com/clips/pattern): web/text mining module -* [gensim](https://radimrehurek.com/gensim/): Topic modeling +- [nltk](http://www.nltk.org/) Natural Language Toolkit +- [Pattern](https://github.com/clips/pattern): web/text mining module +- [gensim](https://radimrehurek.com/gensim/): Topic modeling ### Creating programs with command line arguments -* For run-time configuration via command-line options, the built-in [`argparse`](https://docs.python.org/library/argparse.html) module usually suffices. -* A more complete solution is [`ConfigArgParse`](https://github.com/bw2/ConfigArgParse). This (almost) drop-in replacement for `argparse` allows you to not only specify configuration options via command-line options, but also via (ini or yaml) configuration files and via environment variables. -* Other popular libraries are [`click`](https://click.palletsprojects.com) and [`fire`](https://google.github.io/python-fire/). -* [Typer](https://typer.tiangolo.com): make a command-line application by using type hints with runtime effects. Very low on boilerplate for simple cases, but also allows for more complex cases. Uses `click` internally. +- For run-time configuration via command-line options, the built-in [`argparse`](https://docs.python.org/library/argparse.html) module usually suffices. +- A more complete solution is [`ConfigArgParse`](https://github.com/bw2/ConfigArgParse). This (almost) drop-in replacement for `argparse` allows you to not only specify configuration options via command-line options, but also via (ini or yaml) configuration files and via environment variables. +- Other popular libraries are [`click`](https://click.palletsprojects.com) and [`fire`](https://google.github.io/python-fire/). +- [Typer](https://typer.tiangolo.com): make a command-line application by using type hints with runtime effects. Very low on boilerplate for simple cases, but also allows for more complex cases. Uses `click` internally. diff --git a/language_guides/r.md b/language_guides/r.md index e2dddf1..63f9535 100644 --- a/language_guides/r.md +++ b/language_guides/r.md @@ -1,35 +1,39 @@ # R -*Page maintainers: [Malte Lüken](https://github.com/maltelueken) and [Pablo Rodríguez-Sánchez](https://github.com/PabRod)* . +_Page maintainers: [Malte Lüken](https://github.com/maltelueken) and [Pablo Rodríguez-Sánchez](https://github.com/PabRod)_ . ## What is R? R is a functional programming language and software environment for statistical computing and graphics: https://www.r-project.org/. ### Philosophy and typical use cases + R is particularly popular in the social, health, and biological sciences where it is used for statistical modeling. R can also be used for signal processing (e.g. FFT), machine learning, image analyses, and natural language processing. The R syntax is similar to that of Matlab and Python in terms of compactness and readability, which makes it a good prototyping language for science. One of the strengths of R is the large number of available open source statistical packages, often developed by domain experts. For example, R-package [Seewave](http://rug.mnhn.fr/seewave/) is specialised in sound analyses. Packages are typically released on CRAN [The Comprehensive R Archive Network](http://cran.r-project.org). ### Some crucial differences with Python + Are you familiar with Python? Then kickstart your R journey by reading this [blog post](https://towardsdatascience.com/the-starter-guide-for-transitioning-your-python-projects-to-r-8de4122b04ad). ### Recommended sources of information + All R functions come with documentation in a standardized format. Some R packages have their own google group. Further, stackoverflow and standard search engines can lead you to answers to issues. If you prefer books, consider the following resources: -* [R for Data Science](https://r4ds.had.co.nz/) by Hadley Wickham, -* [Advanced R](https://adv-r.hadley.nz/) by Hadley Wickham, -* [Writing better R code](http://www.bioconductor.org/help/course-materials/2013/CSAMA2013/friday/afternoon/R-programming.pdf) by Laurent Gatto. - +- [R for Data Science](https://r4ds.had.co.nz/) by Hadley Wickham, +- [Advanced R](https://adv-r.hadley.nz/) by Hadley Wickham, +- [Writing better R code](http://www.bioconductor.org/help/course-materials/2013/CSAMA2013/friday/afternoon/R-programming.pdf) by Laurent Gatto. ## Getting started ### Setting up R + To install R check detailed description at [CRAN website](http://cran.r-project.org). #### IDE + R programs can be written in any text editor. R code can be run from the command line or interactively within R environment, that can be started with `R` command in the shell. To quit R environment type `q()`. Said this, it is highly recommended to use an integrated development environment (IDE). The most popular one is [RStudio / Posit](https://posit.co/products/open-source/rstudio/). It is free and quite powerful. It features editor with code completion, command line environment, file manager, package manager and history lookup among others. @@ -41,9 +45,11 @@ Note you will have to install RStudio in addition to installing R. Please note t Within RStudio you can work on ad-hoc code or create a project. Compared with Python an R project is a bit like a virtual environment as it preserves the workspace and installed packages for that project. Creating a project is needed to build an R package. A project is created via the menu at the top of the screen. ### Installing compilers and runtimes + Not needed as most functions in R are already compiled in C, nevertheless R has compiling functionality as described in the [R manual](https://stat.ethz.ch/R-manual/R-devel/library/compiler/html/compile.html). See [overview by Hadley Wickham](http://r-pkgs.had.co.nz/src.html). ## Coding style conventions + We recommend following the [Tidyverse style guide](https://style.tidyverse.org/). Its guidelines can be automatically followed using linters such as: @@ -51,11 +57,13 @@ Its guidelines can be automatically followed using linters such as: - [lintr](https://github.com/r-lib/lintr) ### The `<-` operator + Assigning variables with `<-` instead of `=` is recommended, although **most** of the time both are equivalent. If you are interested in the controversy around assignment operators, check out this [blog post](https://csgillespie.wordpress.com/2010/11/16/assignment-operators-in-r-vs/). ### `%>%` and `|>` + The symbols `%>%` and `|>` represent the pipe operator. The first one is part of the `magrittr` package, and it gained so much popularity that a similar operator, `|>`, was added as part of native R since version 4.1.0. For details on the differences between the two, see this [blog post](https://www.tidyverse.org/blog/2023/04/base-vs-magrittr-pipe/). They just add syntactic sugar to the way we pass a variable to a function. @@ -76,14 +84,17 @@ grades |> remove_nans() |> mean() |> print() You can think of it as a production chain, were an object (the `grades`) passes through three machines, one that removes the `NaN`s, another one that takes the mean, and a last one that prints the result. ## Recommended additional packages and libraries + One of the strengths of R is its community, that creates and maintains a constellation of packages. Very rarely will you use just base R. Here we give you a list of usual packages, starting by one solving the first problem you'll find... how to manage that many packages! ### Managing environments with `renv` + [`renv`](https://rstudio.github.io/renv/articles/renv.html) allows you to create and manage a dependencies library on a per-project basis. It also keeps track of the specific versions of each package used in the project, which is great for reproducibility... and avoiding future headhaches! ### Plotting with basic functions and ggplot2 and ggvis + For a generic impression about plotting with R, see: https://www.r-graph-gallery.com/all-graphs The basic R installation comes with a wide range of functions to plot data to a window on your screen or to a file. If you need to quickly inspect your data or create a custom-made static plot then the basic functions offer the building blocks to do the job. There is a [Statmethods.net tutorial with some examples of plotting options in R](http://www.statmethods.net/graphs/index.html). @@ -93,38 +104,44 @@ However, externally contributed plotting packages may offer easier syntax or con In summary, it is good to familiarize yourself with both the basic plotting functions as well as the contributed graphics packages. In theory, the basic plot functions can do everything that ggplot2 can do, it is mostly a matter of how much you like either syntax and how much freedom you need to tailor the visualisation to your use case. ### Building interactive web applications with shiny + Thanks to [shiny.app](https://shiny.posit.co/) it is possible to make interactive web application in R without the need to write javascript or html. ### Building reports with knitr + [knitr](https://yihui.name/knitr/) is an R package designed to build dynamic reports in R. It's possible to generate on the fly new pdf or html documents with results of computations embedded inside. ### Preparing data for analysis + There are packages that ease tidying up messy data, e.g. [tidyr](https://github.com/hadley/tidyr) and [reshape2](https://github.com/hadley/reshape). The idea of tidy and messy data is explained in a [tidy data](http://vita.had.co.nz/papers/tidy-data.html) paper by Hadley Wickham. There is also the google group [manipulatr](https://groups.google.com/forum/#!forum/manipulatr) to discuss topics related to data manipulation in R. ### Speeding up code + Speeding up code always start with knowing where your bottlenecks are. The following profiling tools will help you doing so: -* Introduction to [profiling in R](https://bookdown.org/rdpeng/rprogdatascience/profiling-r-code.html) +- Introduction to [profiling in R](https://bookdown.org/rdpeng/rprogdatascience/profiling-r-code.html) Some rules of thumb that can quickly improve your code are the follwing: -* Avoid loops, use `apply` functionals instead -* Try to use vectorized functions -* Checkout the [`purrr`](https://purrr.tidyverse.org/) package -* If you are really in a hurry, consider communicating with `C++` code using [`Rcpp`](https://www.rcpp.org/). +- Avoid loops, use `apply` functionals instead +- Try to use vectorized functions +- Checkout the [`purrr`](https://purrr.tidyverse.org/) package +- If you are really in a hurry, consider communicating with `C++` code using [`Rcpp`](https://www.rcpp.org/). For a deeper introduction to the many optimization methods, check the free ebook: -* [Efficient R programming](https://csgillespie.github.io/efficientR/), by Colin Gillespie and Robin Lovelace. +- [Efficient R programming](https://csgillespie.github.io/efficientR/), by Colin Gillespie and Robin Lovelace. ## Package development ### Building R packages + There is a great tutorial written by Hadley Wickam describing all the nitty gritty of building your own package in R. It's called [R packages](http://r-pkgs.had.co.nz). For a quicker introduction, consider this software Carpentries' [lesson on R packages](https://carpentries-incubator.github.io/lesson-R-packaging/), originated and developed at our Center! ### Package documentation + Read [Documentation](http://r-pkgs.had.co.nz/man.html) chapter of Hadleys [R packages](http://r-pkgs.had.co.nz) book for details about documenting R code. Customary R uses `.Rd` files in `/man` directory for documentation. These files and folders are automatically created by RStudio when you create a new project from your existing R-function files. @@ -137,6 +154,7 @@ Read more about vignettes in [Package documentation](http://r-pkgs.had.co.nz/vig Read more about `roxygen` syntax on it's [github page](https://github.com/yihui/roxygen2). `roxygen` will also populate `NAMESPACE` file which is necessary to manage package level imports. ## Available templates + Most of the templating is nativelly managed by the [`usethis`](https://usethis.r-lib.org/) package. It contains functions that create the boilerplate for you, reducing the burden on your memory and reducing chances for errors. In the snippet below you can see how it feels to use it. @@ -150,25 +168,29 @@ usethis::use_citation() # Adds a citation file # etc... ``` + Having said this, these others can serve as inspiration: -* https://rapporter.github.io/rapport/ -* https://shiny.posit.co/r/articles/build/templates/ -* https://bookdown.org/yihui/rmarkdown/document-templates.html +- https://rapporter.github.io/rapport/ +- https://shiny.posit.co/r/articles/build/templates/ +- https://bookdown.org/yihui/rmarkdown/document-templates.html ## Testing, Checking, Debugging and Profiling ### Testing and checking + [Testthat](https://github.com/hadley/testthat) is a testing package by Hadley Wickham. [Testing chapter](http://r-pkgs.had.co.nz/tests.html) of a book [R packages](http://r-pkgs.had.co.nz) describes in detail testing process in R with use of `testthat`. Further, [testthat: Get Started with Testing](https://journal.r-project.org/archive/2011-1/RJournal_2011-1_Wickham.pdf) by Whickham may also provide a good starting point. See also [checking](http://r-pkgs.had.co.nz/check.html) and [testing](http://r-pkgs.had.co.nz/tests.html) R packages. note that within RStudio R package check and R package test can be done via simple toolbar clicks. ### Continuous integration + [Continuous integration](https://book.the-turing-way.org/reproducible-research/ci) should be done with an online service. We recommend using GitHub actions. ### Debugging and Profiling -Debugging is possible in RStudio, see [link](https://support.posit.co/hc/en-us/articles/205612627-Debugging-with-RStudio). For profiling tips see [link](http://adv-r.had.co.nz/Profiling.html) +Debugging is possible in RStudio, see [link](https://support.posit.co/hc/en-us/articles/205612627-Debugging-with-RStudio). For profiling tips see [link](http://adv-r.had.co.nz/Profiling.html) ## Not in this tutorial yet: -* Logging + +- Logging diff --git a/styles.css b/styles.css index f95c5ff..3324c87 100644 --- a/styles.css +++ b/styles.css @@ -1,20 +1,20 @@ /* General theme*/ body { - --theme-color: #009FE3; + --theme-color: #009fe3; } /* Sidebar element order */ .sidebar { - display: flex; - flex-direction: column; + display: flex; + flex-direction: column; } .sidebar .app-name { - order: 1; - margin: 10px 10px 0 10px; + order: 1; + margin: 10px 10px 0 10px; } .sidebar .search { - order: 2 + order: 2; } .sidebar .sidebar-nav { - order: 3 + order: 3; } diff --git a/technology/datasets.md b/technology/datasets.md index 642a72f..01ce0f3 100644 --- a/technology/datasets.md +++ b/technology/datasets.md @@ -1,16 +1,17 @@ # Working with tabular data -*Page maintainers: Suvayu Ali* [@suvayu](https://github.com/suvayu) *, Flavio Hafner* [@f-hafner](https://github.com/f-hafner) *and Reggie Cushing* [@recap](https://github.com/recap) +_Page maintainers: Suvayu Ali_ [@suvayu](https://github.com/suvayu) _, Flavio Hafner_ [@f-hafner](https://github.com/f-hafner) _and Reggie Cushing_ [@recap](https://github.com/recap) -There are several solutions available to you as an RSE, with their own pros and cons. You should evaluate which one works best for your project, and project partners, and pick one. Sometimes it might be, that you need to combine two different types of technologies. Here are some examples from our experience. +There are several solutions available to you as an RSE, with their own pros and cons. You should evaluate which one works best for your project, and project partners, and pick one. Sometimes it might be, that you need to combine two different types of technologies. Here are some examples from our experience. You will encounter datasets in various file formats like: + - CSV/Excel - Parquet - HDF5/NetCDF - JSON/JSON-LD -Or local database files like SQLite. It is important to note, the various trade-offs between these formats. For instance, doing a random seek is difficult with a large dataset for non-binary formats like: CSV, Excel, or JSON. In such cases you should consider formats like Parquet, or HDF5/NetCDF. Non-binary files can also be imported into local databases like SQLite or DuckDB. Below we compare some options to work with datasets in these formats. +Or local database files like SQLite. It is important to note, the various trade-offs between these formats. For instance, doing a random seek is difficult with a large dataset for non-binary formats like: CSV, Excel, or JSON. In such cases you should consider formats like Parquet, or HDF5/NetCDF. Non-binary files can also be imported into local databases like SQLite or DuckDB. Below we compare some options to work with datasets in these formats. It's also good to know about [Apache Arrow](https://arrow.apache.org), which is not itself a file format, but a specification for a memory layout of (binary) data. There is an ecosystem of libraries for all major languages to handle data in this format. @@ -18,50 +19,60 @@ It is used as the back-end of [many data handling projects](https://arrow.apache ## Local database -When you have a relational dataset, it is recommended that you use a database. Using local databases like SQLite and DuckDB can be very easy because of no setup requirements. But they come with some some limitations; for instance, multiple users cannot write to the database simultaneously. +When you have a relational dataset, it is recommended that you use a database. Using local databases like SQLite and DuckDB can be very easy because of no setup requirements. But they come with some some limitations; for instance, multiple users cannot write to the database simultaneously. + +SQLite is a transactional database, so if you have a dataset that is changing with time (e.g. you are adding new rows), it would be more appropriate. However in research often we work with static databases, and are interested mostly in analytical tasks. For such a case, DuckDB is a more appropriate alternative. Between the two, -SQLite is a transactional database, so if you have a dataset that is changing with time (e.g. you are adding new rows), it would be more appropriate. However in research often we work with static databases, and are interested mostly in analytical tasks. For such a case, DuckDB is a more appropriate alternative. Between the two, - DuckDB can also create views (virtual tables) from other sources like files, other databases, but with SQLite you always have to import the data before running any queries. - DuckDB is multi-threaded. This can be an advantage for large databases, where aggregation queries tend to be faster than sqlite. - - However if you have a really large dataset, say 100Ms of rows, and want to perform a deeply nested query, it would require substantial amount of memory, making it unfeasible to run on personal laptops. - - There are options to customize memory handling, and push what is possible on a single machine. - - You need to limit the memory usage to prevent the operatings system, or shell from preemptively killing it. You can choose a value about 50% of your system's RAM. - ```sql - SET memory_limit = '5GB'; - ``` - By default, DuckDB spills over to disk when memory usage grows beyond the above limit. You can verify the temporary directory by running: - ```sql - SELECT current_setting('temp_directory') AS temp_directory; - ``` - Note, if your query is deeply nested, you should have sufficient disk space for DuckDB to use; e.g. for 4 nested levels of `INNER JOIN` combined with a `GROUP BY`, we observed a disk spill over of 30x the original dataset. However we found this was not always reliable. - - In this kind of borderline cases, it might be possible to address the limitation by splitting the workload into chunks, and aggregating later, or by considering one of the alternatives mentioned below. - - You can also optimize the queries for DuckDB, but that requires a deeper dive into the documentation, and understanding how DuckDB query optimisation works. + + - However if you have a really large dataset, say 100Ms of rows, and want to perform a deeply nested query, it would require substantial amount of memory, making it unfeasible to run on personal laptops. + - There are options to customize memory handling, and push what is possible on a single machine. + + You need to limit the memory usage to prevent the operatings system, or shell from preemptively killing it. You can choose a value about 50% of your system's RAM. + + ```sql + SET memory_limit = '5GB'; + ``` + + By default, DuckDB spills over to disk when memory usage grows beyond the above limit. You can verify the temporary directory by running: + + ```sql + SELECT current_setting('temp_directory') AS temp_directory; + ``` + + Note, if your query is deeply nested, you should have sufficient disk space for DuckDB to use; e.g. for 4 nested levels of `INNER JOIN` combined with a `GROUP BY`, we observed a disk spill over of 30x the original dataset. However we found this was not always reliable. + + In this kind of borderline cases, it might be possible to address the limitation by splitting the workload into chunks, and aggregating later, or by considering one of the alternatives mentioned below. + + - You can also optimize the queries for DuckDB, but that requires a deeper dive into the documentation, and understanding how DuckDB query optimisation works. + - Both databases support setting (unique) indexes. Indexes are useful and sometimes necessary - - For both DuckDB and SQLite, unique indexes allow to ensure data integrity - - For SQLite, indexes are crucial to improve the performance of queries. However, having more indexes makes writing new records to the database slower. So it's again a trade-off between query and write speed. + - For both DuckDB and SQLite, unique indexes allow to ensure data integrity + - For SQLite, indexes are crucial to improve the performance of queries. However, having more indexes makes writing new records to the database slower. So it's again a trade-off between query and write speed. # Useful libraries ## Database APIs - [SQLAlchemy](https://www.sqlalchemy.org/) - - In Python, interfacing to SQL databases like SQLite, MySQL or PostgreSQL is often done using [SQLAlchemy](https://www.sqlalchemy.org/), which is an Object Relational Mapper (ORM) that allows you to map tables to Python classes. Note that you still need to use a lot of manual SQL outside of Python to manage the database. However, SQLAlchemy allows you to use the data in a Pythonic way once you have the database layout figured out. + - In Python, interfacing to SQL databases like SQLite, MySQL or PostgreSQL is often done using [SQLAlchemy](https://www.sqlalchemy.org/), which is an Object Relational Mapper (ORM) that allows you to map tables to Python classes. Note that you still need to use a lot of manual SQL outside of Python to manage the database. However, SQLAlchemy allows you to use the data in a Pythonic way once you have the database layout figured out. ## Data processing libraries on a single machine + - Pandas - - The standard tool for working with dataframes, and widely used in analytics or machine learning workflows. Note however how Pandas uses memory, because certain APIs create copies, while others do not. So if you are chaining multiple operations, it is preferable to use APIs that avoid copies. + - The standard tool for working with dataframes, and widely used in analytics or machine learning workflows. Note however how Pandas uses memory, because certain APIs create copies, while others do not. So if you are chaining multiple operations, it is preferable to use APIs that avoid copies. - Vaex - - Vaex is an alternative that focuses on out-of-core processing (larger than memory), and has some lazy evaluation capabilities. + - Vaex is an alternative that focuses on out-of-core processing (larger than memory), and has some lazy evaluation capabilities. - Polars - - An alternative to Pandas (started in 2020), which is primarily written in Rust. Compared to pandas, it is multi-threaded and does lazy evaluation with query optimisation, so much more performant. However since it is newer, documentation is not as complete. It also allows you to write your own custom extensions in Rust. + - An alternative to Pandas (started in 2020), which is primarily written in Rust. Compared to pandas, it is multi-threaded and does lazy evaluation with query optimisation, so much more performant. However since it is newer, documentation is not as complete. It also allows you to write your own custom extensions in Rust. - [Apache Datafusion](https://datafusion.apache.org/) - - A very fast, extensible query engine for building high-quality data-centric systems in [Rust](http://rustlang.org/), using the [Apache Arrow](https://arrow.apache.org/) in-memory format. DataFusion offers SQL and Dataframe APIs, excellent [performance](https://benchmark.clickhouse.com/), built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and a great community. + - A very fast, extensible query engine for building high-quality data-centric systems in [Rust](http://rustlang.org/), using the [Apache Arrow](https://arrow.apache.org/) in-memory format. DataFusion offers SQL and Dataframe APIs, excellent [performance](https://benchmark.clickhouse.com/), built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and a great community. ## Distributed/multi-node data processing libraries + - Dask - - `dask.dataframe` and `dask.array` provides the same API as pandas and numpy respectively, making it easy to switch. - - When working with multiple nodes, it requires communication across nodes (which is network bound). + - `dask.dataframe` and `dask.array` provides the same API as pandas and numpy respectively, making it easy to switch. + - When working with multiple nodes, it requires communication across nodes (which is network bound). - Ray - Apache Spark diff --git a/technology/gpu.md b/technology/gpu.md index 3a6c26b..1b1dc6a 100644 --- a/technology/gpu.md +++ b/technology/gpu.md @@ -1,143 +1,142 @@ # GPU Programming Languages -*Page maintainer: Alessio Sclocco* [@isazi](https://github.com/isazi) - +_Page maintainer: Alessio Sclocco_ [@isazi](https://github.com/isazi) ## Learning Resources - -* Carpentries GPU Programming course - * [Lesson material](https://carpentries-incubator.github.io/lesson-gpu-programming/) -* Introduction to CUDA C - * [Slides](http://developer.download.nvidia.com/compute/developertrainingmaterials/presentations/cuda_language/Introduction_to_CUDA_C.pptx) - * [Video](http://on-demand.gputechconf.com/gtc/2012/video/S0624-Monday-Introduction-to-CUDA-C.mp4) -* Introduction to OpenACC - * [Slides](http://developer.download.nvidia.com/compute/developertrainingmaterials/presentations/openacc/Introduction_To_OpenACC.pptx) -* Introduction to HIP Programming - * [Video](https://www.youtube.com/watch?v=3ejUwypP0bI) -* SYCL Introduction and Best Practices - * [Video](https://www.youtube.com/watch?v=TbkrODiVDQY) -* CSCS GPU Programming with Julia - * [Course recordings](https://github.com/omlins/julia-gpu-course) - + +- Carpentries GPU Programming course + - [Lesson material](https://carpentries-incubator.github.io/lesson-gpu-programming/) +- Introduction to CUDA C + - [Slides](http://developer.download.nvidia.com/compute/developertrainingmaterials/presentations/cuda_language/Introduction_to_CUDA_C.pptx) + - [Video](http://on-demand.gputechconf.com/gtc/2012/video/S0624-Monday-Introduction-to-CUDA-C.mp4) +- Introduction to OpenACC + - [Slides](http://developer.download.nvidia.com/compute/developertrainingmaterials/presentations/openacc/Introduction_To_OpenACC.pptx) +- Introduction to HIP Programming + - [Video](https://www.youtube.com/watch?v=3ejUwypP0bI) +- SYCL Introduction and Best Practices + - [Video](https://www.youtube.com/watch?v=TbkrODiVDQY) +- CSCS GPU Programming with Julia + - [Course recordings](https://github.com/omlins/julia-gpu-course) + ## Documentation -* CUDA - * [C programming guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html) - * [Runtime API](https://docs.nvidia.com/cuda/cuda-runtime-api/) - * [Driver API](https://docs.nvidia.com/cuda/cuda-driver-api/index.html) - * [Fortran programming guide](https://docs.nvidia.com/hpc-sdk/compilers/cuda-fortran-prog-guide/index.html) -* HIP - * [Kernel language syntax](https://rocm.docs.amd.com/projects/HIP/en/latest/reference/kernel_language.html) - * [Runtime API](https://rocm.docs.amd.com/projects/HIP/en/latest/reference/hip_runtime_api_reference.html) -* SYCL - * [Specification](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) - * [Reference guide](https://www.khronos.org/files/sycl/sycl-2020-reference-guide.pdf) -* OpenCL - * [Guide](https://github.com/KhronosGroup/OpenCL-Guide) - * [API](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html) - * [OpenCL C specification](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html) - * [Reference guide](https://www.khronos.org/files/opencl30-reference-guide.pdf) -* OpenACC - * [Programming guide](https://www.openacc.org/sites/default/files/inline-files/OpenACC_Programming_Guide_0_0.pdf) - * [Reference guide](https://www.openacc.org/sites/default/files/inline-files/API%20Guide%202.7.pdf) -* OpenMP - * [Reference guide](https://www.openmp.org/wp-content/uploads/OpenMPRef-5.0-111802-web.pdf) +- CUDA + - [C programming guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html) + - [Runtime API](https://docs.nvidia.com/cuda/cuda-runtime-api/) + - [Driver API](https://docs.nvidia.com/cuda/cuda-driver-api/index.html) + - [Fortran programming guide](https://docs.nvidia.com/hpc-sdk/compilers/cuda-fortran-prog-guide/index.html) +- HIP + - [Kernel language syntax](https://rocm.docs.amd.com/projects/HIP/en/latest/reference/kernel_language.html) + - [Runtime API](https://rocm.docs.amd.com/projects/HIP/en/latest/reference/hip_runtime_api_reference.html) +- SYCL + - [Specification](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) + - [Reference guide](https://www.khronos.org/files/sycl/sycl-2020-reference-guide.pdf) +- OpenCL + - [Guide](https://github.com/KhronosGroup/OpenCL-Guide) + - [API](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html) + - [OpenCL C specification](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html) + - [Reference guide](https://www.khronos.org/files/opencl30-reference-guide.pdf) +- OpenACC + - [Programming guide](https://www.openacc.org/sites/default/files/inline-files/OpenACC_Programming_Guide_0_0.pdf) + - [Reference guide](https://www.openacc.org/sites/default/files/inline-files/API%20Guide%202.7.pdf) +- OpenMP + - [Reference guide](https://www.openmp.org/wp-content/uploads/OpenMPRef-5.0-111802-web.pdf) ## Overview of Libraries -* CUDA - * [cuBLAS](http://docs.nvidia.com/cuda/cublas/index.html) - * [NVBLAS](http://docs.nvidia.com/cuda/nvblas/index.html) - * [cuFFT](http://docs.nvidia.com/cuda/cufft/index.html) - * [cuGRAPH](https://docs.rapids.ai/api/cugraph/stable/) - * [cuRAND](http://docs.nvidia.com/cuda/curand/index.html) - * [cuSPARSE](http://docs.nvidia.com/cuda/cusparse/index.html) -* HIP - * [hipBLAS](https://rocm.docs.amd.com/projects/hipBLAS/en/latest/index.html) - * [hipFFT](https://rocm.docs.amd.com/projects/hipFFT/en/latest/index.html) - * [hipRAND](https://rocm.docs.amd.com/projects/hipRAND/en/latest/index.html) - * [hipSPARSE](https://rocm.docs.amd.com/projects/hipSPARSE/en/latest/index.html) -* SYCL - * [OneAPI BLAS](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-dpcpp/2025-0/blas-routines.html) - * [OneAPI FFT](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-dpcpp/2025-0/fourier-transform-functions.html) - * [OneAPI sparse](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-dpcpp/2025-0/sparse-blas-routines.html) - * [OneAPI random number generators](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-dpcpp/2025-0/random-number-generators.html) -* OpenCL - * [CLBlast](https://github.com/CNugteren/CLBlast) - * [clFFT](https://github.com/clMathLibraries/clFFT) +- CUDA + - [cuBLAS](http://docs.nvidia.com/cuda/cublas/index.html) + - [NVBLAS](http://docs.nvidia.com/cuda/nvblas/index.html) + - [cuFFT](http://docs.nvidia.com/cuda/cufft/index.html) + - [cuGRAPH](https://docs.rapids.ai/api/cugraph/stable/) + - [cuRAND](http://docs.nvidia.com/cuda/curand/index.html) + - [cuSPARSE](http://docs.nvidia.com/cuda/cusparse/index.html) +- HIP + - [hipBLAS](https://rocm.docs.amd.com/projects/hipBLAS/en/latest/index.html) + - [hipFFT](https://rocm.docs.amd.com/projects/hipFFT/en/latest/index.html) + - [hipRAND](https://rocm.docs.amd.com/projects/hipRAND/en/latest/index.html) + - [hipSPARSE](https://rocm.docs.amd.com/projects/hipSPARSE/en/latest/index.html) +- SYCL + - [OneAPI BLAS](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-dpcpp/2025-0/blas-routines.html) + - [OneAPI FFT](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-dpcpp/2025-0/fourier-transform-functions.html) + - [OneAPI sparse](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-dpcpp/2025-0/sparse-blas-routines.html) + - [OneAPI random number generators](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-dpcpp/2025-0/random-number-generators.html) +- OpenCL + - [CLBlast](https://github.com/CNugteren/CLBlast) + - [clFFT](https://github.com/clMathLibraries/clFFT) ## Source-to-source Translation -* CUDA to HIP - * [hipify](https://github.com/ROCm/HIPIFY) -* CUDA to SYCL - * [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) -* CUDA to OpenCL - * [cutocl](https://github.com/benvanwerkhoven/cutocl) +- CUDA to HIP + - [hipify](https://github.com/ROCm/HIPIFY) +- CUDA to SYCL + - [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) +- CUDA to OpenCL + - [cutocl](https://github.com/benvanwerkhoven/cutocl) ## Foreign Function Interfaces -* C++ - * CUDA - * [cudawrappers](https://github.com/nlesc-recruit/cudawrappers) - * OpenCL - * [CLHPP](https://github.com/KhronosGroup/OpenCL-CLHPP) -* Python - * CUDA - * [PyCuda](https://mathema.tician.de/software/pycuda/) - * [CuPy](https://cupy.dev/) - * [cuda-python](https://nvidia.github.io/cuda-python/) - * HIP - * [PyHIP](https://github.com/jatinx/PyHIP) - * SYCL - * [dpctl](https://github.com/IntelPython/dpctl) - * OpenCL - * [PyOpenCL](https://mathema.tician.de/software/pycuda/) -* Julia - * CUDA - * [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) - * HIP - * [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) - * SYCL - * [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) -* Java - * CUDA - * [JCuda](http://www.jcuda.org/) - * OpenCL - * [JOCL](http://www.jocl.org/) +- C++ + - CUDA + - [cudawrappers](https://github.com/nlesc-recruit/cudawrappers) + - OpenCL + - [CLHPP](https://github.com/KhronosGroup/OpenCL-CLHPP) +- Python + - CUDA + - [PyCuda](https://mathema.tician.de/software/pycuda/) + - [CuPy](https://cupy.dev/) + - [cuda-python](https://nvidia.github.io/cuda-python/) + - HIP + - [PyHIP](https://github.com/jatinx/PyHIP) + - SYCL + - [dpctl](https://github.com/IntelPython/dpctl) + - OpenCL + - [PyOpenCL](https://mathema.tician.de/software/pycuda/) +- Julia + - CUDA + - [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) + - HIP + - [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) + - SYCL + - [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) +- Java + - CUDA + - [JCuda](http://www.jcuda.org/) + - OpenCL + - [JOCL](http://www.jocl.org/) ## High-Level Abstractions -* C++ - * [Kokkos](https://github.com/kokkos/kokkos) - * [Raja](https://github.com/LLNL/RAJA) -* Python - * [Numba](https://numba.pydata.org/) - * [pykokkos](https://github.com/kokkos/pykokkos) +- C++ + - [Kokkos](https://github.com/kokkos/kokkos) + - [Raja](https://github.com/LLNL/RAJA) +- Python + - [Numba](https://numba.pydata.org/) + - [pykokkos](https://github.com/kokkos/pykokkos) ## Debugging and Profiling Tools -* CUDA - * [Nsight Systems](https://developer.nvidia.com/nsight-systems) - * [Nsight Compute](https://developer.nvidia.com/nsight-compute) - * [CUDA-GDB](http://docs.nvidia.com/cuda/cuda-gdb/index.html) - * [compute-sanitizer](https://docs.nvidia.com/compute-sanitizer/index.html) -* HIP - * [omniperf](https://github.com/AMDResearch/omniperf) - * [rocprof](https://github.com/ROCm/rocprofiler) -* SYCL - * [oneprof](https://github.com/intel/pti-gpu/tree/master/tools/oneprof) - * [onetrace](https://github.com/intel/pti-gpu/tree/master/tools/onetrace) +- CUDA + - [Nsight Systems](https://developer.nvidia.com/nsight-systems) + - [Nsight Compute](https://developer.nvidia.com/nsight-compute) + - [CUDA-GDB](http://docs.nvidia.com/cuda/cuda-gdb/index.html) + - [compute-sanitizer](https://docs.nvidia.com/compute-sanitizer/index.html) +- HIP + - [omniperf](https://github.com/AMDResearch/omniperf) + - [rocprof](https://github.com/ROCm/rocprofiler) +- SYCL + - [oneprof](https://github.com/intel/pti-gpu/tree/master/tools/oneprof) + - [onetrace](https://github.com/intel/pti-gpu/tree/master/tools/onetrace) ## Performance Optimization -* [PRACE best practice guide on modern accelerators](https://zenodo.org/records/5839488) -* [CUDA best practices](https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html) -* [OneAPI SYCL best practices](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/2025-0/optimize-your-sycl-applications.html) +- [PRACE best practice guide on modern accelerators](https://zenodo.org/records/5839488) +- [CUDA best practices](https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html) +- [OneAPI SYCL best practices](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/2025-0/optimize-your-sycl-applications.html) ## Auto-tuning -* Kernel Tuner - * [GitHub repository](https://github.com/KernelTuner/kernel_tuner) - * [Documentation](https://kerneltuner.github.io/kernel_tuner/stable/) - * [Tutorial](https://github.com/KernelTuner/kernel_tuner_tutorial) +- Kernel Tuner + - [GitHub repository](https://github.com/KernelTuner/kernel_tuner) + - [Documentation](https://kerneltuner.github.io/kernel_tuner/stable/) + - [Tutorial](https://github.com/KernelTuner/kernel_tuner_tutorial) diff --git a/technology/technology_overview.md b/technology/technology_overview.md index bfffda1..a374fcd 100644 --- a/technology/technology_overview.md +++ b/technology/technology_overview.md @@ -1,9 +1,9 @@ # Technology Guides -*Page maintainer: Patrick Bos* [@egpbos](https://github.com/egpbos) +_Page maintainer: Patrick Bos_ [@egpbos](https://github.com/egpbos) These chapters are based on our experiences with using specific software technologies. The main audience is RSEs familiar with basic computing and programming concepts. -The purpose of these chapters is for someone unfamiliar with the specific technology to get a quick overview of the most important concepts, practices and tools, without going into too much detail (we provide links to further reading material for more). \ No newline at end of file +The purpose of these chapters is for someone unfamiliar with the specific technology to get a quick overview of the most important concepts, practices and tools, without going into too much detail (we provide links to further reading material for more). diff --git a/technology/user_experience.md b/technology/user_experience.md index 1fa775d..de3d7bf 100644 --- a/technology/user_experience.md +++ b/technology/user_experience.md @@ -1,13 +1,13 @@ # User Experience (UX) -*Page maintainer: Jesus Garcia* [@ctwhome](https://github.com/ctwhome) - +_Page maintainer: Jesus Garcia_ [@ctwhome](https://github.com/ctwhome) User Experience Design (UX) is a broad, holistic science that combines many cognitive and brain sciences disciplines like psychology and sociology, content strategies, and arts and aesthetics by following human-center approaches. > Human-centred design is an approach to interactive systems development that aims to make systems usable and useful by focusing on the users, their needs and requirements, and applying human factors/ergonomics and usability knowledge and techniques. This approach enhances effectiveness and efficiency, improves human well-being, user satisfaction, accessibility, sustainability, and counteracts possible adverse effects on human health, safety, and performance. [HCDSociety](https://human-centered-design.org/about/) ## Table of content + - UX disciplines - Design thinking process - Designing software @@ -30,20 +30,19 @@ The known UX umbrella diagram represents the different disciplines of UX: -*Author/Copyright holder: J.G. Gonzalez and The Netherlands eScience Center. Copyright: Apache License 2.0* - - +_Author/Copyright holder: J.G. Gonzalez and The Netherlands eScience Center. Copyright: Apache License 2.0_ ### Design Thinking + Design thinking is an approach, mindset, or ideology for product development. According to the [IxF(Interaction Design Foundation](https://interaction-design.org), Design thinking achieves all these advantages at the same time: -* It is a user-centered process that starts with user data, creates design artifacts that address real and not imaginary user needs, and then tests those artifacts with real users. -* It leverages the collective expertise and establishes a shared language and buy-in amongst your team. -* It encourages innovation by exploring multiple avenues for the same problem. +- It is a user-centered process that starts with user data, creates design artifacts that address real and not imaginary user needs, and then tests those artifacts with real users. +- It leverages the collective expertise and establishes a shared language and buy-in amongst your team. +- It encourages innovation by exploring multiple avenues for the same problem. -*Author/Copyright holder: Teo Yu Siang and Interaction Design Foundation. Copyright licence: CC BY-NC-SA 3.0* +_Author/Copyright holder: Teo Yu Siang and Interaction Design Foundation. Copyright licence: CC BY-NC-SA 3.0_ You can find more information about Design Thinking on the [IxF page](https://www.interaction-design.org/literature/topics/design-thinking). @@ -57,15 +56,14 @@ When delivering software iteratively, one of the common approaches to follow is - *Author/Copyright holder: J.G. Gonzalez and The Netherlands eScience Center. Copyright: Apache License 2.0* +_Author/Copyright holder: J.G. Gonzalez and The Netherlands eScience Center. Copyright: Apache License 2.0_ While MVP (Minumun Product Value) focuses on provide users with a way to explore the product and understand its main intent, MLP (Minimun Loveable Product) approach focuses on essential features instead of the bare minimum expected from a class software. Going beyond the bare functionality, the attention is driven towards a great user experience. The outcomes mush contains all elements in the pyramid being **functional, reliable, usable, and pleasurable.** - ### Tools and resources Design tools used for Visual Design, Prototyping, and IxD testing collaborative, real-time, online, and multiplatform. -* [Figma](https://www.figma.com/) -* [Miro](https://miro.com/) -* [Whimsical](https://whimsical.com/) +- [Figma](https://www.figma.com/) +- [Miro](https://miro.com/) +- [Whimsical](https://whimsical.com/)