From 634091a40a0ae1c6e6a15ccb7ad295bb1d84e6bc Mon Sep 17 00:00:00 2001 From: Claus Wilke Date: Sat, 8 Jul 2017 16:07:29 -0500 Subject: [PATCH] update to new name --- R/iris_plot.R | 2 +- R/themes.R | 10 +++++----- background_grids.Rmd | 10 +++++----- index.Rmd | 4 ++-- introduction.Rmd | 2 +- notes.Rmd | 1 + overlapping_points.Rmd | 4 ++-- professional_figures.Rproj | 18 ------------------ small_axis_labels.Rmd | 8 ++++---- 9 files changed, 21 insertions(+), 38 deletions(-) delete mode 100644 professional_figures.Rproj diff --git a/R/iris_plot.R b/R/iris_plot.R index c0fc5bad..75a23a6b 100644 --- a/R/iris_plot.R +++ b/R/iris_plot.R @@ -16,6 +16,6 @@ iris %>% scale_x_continuous(limits=c(1.95, 4.6), expand=c(0, 0)) + scale_y_continuous(limits=c(3.95, 8.1), expand=c(0, 0)) -> iris_plot -p <- plot_grid(NULL, iris_plot + theme_pf_grid(), NULL, nrow=1, rel_widths=c(0.02, 1, 0.04)) +p <- plot_grid(NULL, iris_plot + theme_dviz_grid(), NULL, nrow=1, rel_widths=c(0.02, 1, 0.04)) save_plot("../figures/iris.png", p, base_height=3.708, base_width=6, dpi=150) save_plot("../figures/iris.pdf", p, base_height=3.708, base_width=6) diff --git a/R/themes.R b/R/themes.R index 658e7040..79458a4e 100644 --- a/R/themes.R +++ b/R/themes.R @@ -1,13 +1,13 @@ -theme_pf <- function(font_size = 14, font_family = ""){ +theme_dviz <- function(font_size = 14, font_family = ""){ theme_cowplot(font_size = font_size, font_family = font_family) } -theme_pf_half_open <- function(font_size = 14, font_family = ""){ +theme_dviz_half_open <- function(font_size = 14, font_family = ""){ theme_cowplot(font_size = font_size, font_family = font_family) } # horizontal grid lines only -theme_pf_hgrid <- function(font_size = 14, font_family = "") { +theme_dviz_hgrid <- function(font_size = 14, font_family = "") { color = "grey90" line_size = 0.5 @@ -31,7 +31,7 @@ theme_pf_hgrid <- function(font_size = 14, font_family = "") { # vertical grid lines only -theme_pf_vgrid <- function(font_size = 14, font_family = "") { +theme_dviz_vgrid <- function(font_size = 14, font_family = "") { color = "grey90" line_size = 0.5 @@ -54,7 +54,7 @@ theme_pf_vgrid <- function(font_size = 14, font_family = "") { } # grid lines along major axis ticks, no axes -theme_pf_grid <- function(font_size = 14, font_family = "") { +theme_dviz_grid <- function(font_size = 14, font_family = "") { color = "grey90" line_size = 0.5 diff --git a/background_grids.Rmd b/background_grids.Rmd index 47e3b186..abcf318b 100644 --- a/background_grids.Rmd +++ b/background_grids.Rmd @@ -55,7 +55,7 @@ The grid is too busy, and the gray background in the legend is distracting. We could try to remove the grid altogether, but I think that is a worse option: ```{r warning=FALSE} -stamp_bad(price_plot + theme_pf_half_open()) +stamp_bad(price_plot + theme_dviz_half_open()) ``` Now the curves seem to just float in space, and it's difficult to see where they go. In addition, since all prices are indexed to 100 in June 2012, at a minimum this value should be marked in the plot. Thus, one option would be to add a thin horizontal line at $y=100$: ```{r warning=FALSE} @@ -71,7 +71,7 @@ price_plot2 <- ggplot(prices, aes(x=date, y=price_indexed, color=ticker)) + expand=c(0,0)) + scale_y_continuous(name="stock price, indexed", expand=c(0,0)) + - theme_pf_half_open() + theme_dviz_half_open() @@ -81,7 +81,7 @@ stamp_good(price_plot2) Alternatively, we can use just a minimal grid. In particular, for a plot where we are primarily interested in the change in $y$ values, vertical grid lines are not needed. Moreover, grid lines positioned at only the major axis ticks will often be sufficient. And, the axis line can be omitted or made very thin. Thus, we arrive at this plot: ```{r warning=FALSE} -stamp_good(price_plot + theme_pf_hgrid()) +stamp_good(price_plot + theme_dviz_hgrid()) ``` Note that for such a minimal grid, we generally draw the lines orthogonally to direction along which the numbers of interest vary. Thus, if instead of plotting the stock price over time we plot the five-year increase, as horizontal bars, then we will want to use vertical lines instead: @@ -103,7 +103,7 @@ perc_plot <- ggplot(perc_increase, aes(x=ticker, y=perc)) + breaks=c("FB", "GOOG", "MSFT", "AAPL"), labels=c("Facebook", "Alphabet", "Microsoft", "Apple")) + coord_flip() + - theme_pf_vgrid() + + theme_dviz_vgrid() + theme(axis.title.y=element_blank()) # remove the unnecessary space generated by an empty label stamp_good(perc_plot) @@ -123,7 +123,7 @@ iris %>% scale_x_continuous(limits=c(1.95, 4.6), expand=c(0, 0)) + scale_y_continuous(limits=c(3.95, 8.1), expand=c(0, 0)) -> iris_plot -stamp_good(plot_grid(NULL, iris_plot + theme_pf_grid(), NULL, nrow=1, rel_widths=c(0.02, 1, 0.04))) +stamp_good(plot_grid(NULL, iris_plot + theme_dviz_grid(), NULL, nrow=1, rel_widths=c(0.02, 1, 0.04))) ``` For figures where the relevant comparison is the $x=y$ line, I prefer to draw a diagonal line rather than a grid. For example, consider the following figure, adapted from @Echave-et-al-2016, which compares two sets of correlations for 209 protein structures. By drawing the diagonal line, we can see immediately which correlations are systematically stronger: diff --git a/index.Rmd b/index.Rmd index 57ea215a..b6c482ac 100644 --- a/index.Rmd +++ b/index.Rmd @@ -1,5 +1,5 @@ --- -title: "Professional Figures" +title: "Fundamentals of data visualization" author: "Claus O. Wilke" description: "A guide for making figures that look professional and are publication-ready." github-repo: clauswilke/professional_figures @@ -15,6 +15,6 @@ output: This book is meant as a guide for making figures that look professional and are publication-ready. It has grown out of my experience of having to repeatedly give my trainees the same kinds of advice---use larger fonts, pay attention to the aspect ratio of your figure, use solid colors rather than outlines, and so on. Now, I can just aks them to read the appropriate chapters in this book. -The entire book was written in R Markdown, using RStudio as my text editor and the bookdown package to turn a collection of markdown documents into a coherent whole. The book's source code is hosted on GitHub, at https://github.com/clauswilke/professional_figures. If you would like to fix typos or other issues, feel free to send me pull requests through GitHub. In your commit message, please add the sentence "I assign the copyright of this contribution to Claus O. Wilke," so that I can maintain the option of publishing this book in other forms. For comments, questions, or requests for additional chapters, please open an issue on GitHub. +The entire book was written in R Markdown, using RStudio as my text editor and the bookdown package to turn a collection of markdown documents into a coherent whole. The book's source code is hosted on GitHub, at https://github.com/clauswilke/dataviz. If you would like to fix typos or other issues, feel free to send me pull requests through GitHub. In your commit message, please add the sentence "I assign the copyright of this contribution to Claus O. Wilke," so that I can maintain the option of publishing this book in other forms. For comments, questions, or requests for additional chapters, please open an issue on GitHub. This work is licensed under the [Creative Commons Attribution-NonCommercial-NoDerivs 3.0](http://creativecommons.org/licenses/by-nc-nd/3.0/us/) United States License. diff --git a/introduction.Rmd b/introduction.Rmd index 088fb980..e48660ba 100644 --- a/introduction.Rmd +++ b/introduction.Rmd @@ -1,6 +1,6 @@ # Introduction {-} -If you are a scientist, an analyst, a consultant, or anybody else who has to prepare technical documents or reports, one of the most important skills you need to have is the ability to make compelling figures. Figures will typically carry the weight of your arguments. They need to be clear, attractive, and compelling. The difference between good and bad figures can be the difference between a highly influential or a obscure paper, a grant or contract won or lost, a job interview gone well or poorly. And yet, there are surprisingly few resources to teach you how to make compelling figures. There are no college courses on figure making, and no extensive collection of books you can read.^[One notable exception is the works of Edward Tufte. His books and seminars are excellent, and much of what I do and say has been inspired by him.] Tutorials for plotting sofware typically focus on how to achieve specific visual effects rather than explaining why certain choices are preferred and others not. In your day-to-day work, you are simply expected to know how to make good figures, and if you're lucky you have a patient adviser who teaches you a few tricks as you're writing your first scientific papers. +If you are a scientist, an analyst, a consultant, or anybody else who has to prepare technical documents or reports, one of the most important skills you need to have is the ability to make compelling data visualizations, generally in the form of figures. Figures will typically carry the weight of your arguments. They need to be clear, attractive, and convincing. The difference between good and bad figures can be the difference between a highly influential or a obscure paper, a grant or contract won or lost, a job interview gone well or poorly. And yet, there are surprisingly few resources to teach you how to make compelling data visualizations. There are no college courses on this topic, and no extensive collection of books you can read.^[One notable exception is the works of Edward Tufte. His books and seminars are excellent, and much of what I do and say has been inspired by him.] Tutorials for plotting sofware typically focus on how to achieve specific visual effects rather than explaining why certain choices are preferred and others not. In your day-to-day work, you are simply expected to know how to make good figures, and if you're lucky you have a patient adviser who teaches you a few tricks as you're writing your first scientific papers. In the context of writing, experienced editors talk about "ear", the ability to hear (internally, as you read a piece of prose) whether the writing is any good. I think that when it comes to figures and other visualizations, we similarly need "eye", the ability to look at a figure and see whether it is balanced, clear, and compelling. And just as is the case with writing, the ability to see whether a figure works or not can be learned. Having eye means primarily that you are aware of a larger collection of simple rules and principles of good visualization, and that you pay attention to little details that other people might not. diff --git a/notes.Rmd b/notes.Rmd index 08502166..1cb9fe10 100644 --- a/notes.Rmd +++ b/notes.Rmd @@ -1,6 +1,7 @@ # Notes A few easy chapters to write soon: + - visualizing paired data (use protein correlation data from Echave et al?) - visualizing distribution (use iris?) diff --git a/overlapping_points.Rmd b/overlapping_points.Rmd index 7421d10a..dbbdc76f 100644 --- a/overlapping_points.Rmd +++ b/overlapping_points.Rmd @@ -10,7 +10,7 @@ p <- ggplot(mpg, aes(y=cty, x=displ, color=drv)) + name="drive train", breaks=c("f", "r", "4"), labels=c("FWD", "RWD", "4WD")) + - theme_pf(19) + theme_dviz(19) stamp_ugly(p) ``` I have labeled this figure "ugly" here because the points overlap and partly obscure each other. A simple way to ameliorate this issue is to use partial transparency: @@ -23,7 +23,7 @@ p2 <- ggplot(mpg, aes(y=cty, x=displ, color=drv)) + name="drive train", breaks=c("f", "r", "4"), labels=c("FWD", "RWD", "4WD")) + - theme_pf(19) + theme_dviz(19) stamp_good(p2) ``` diff --git a/professional_figures.Rproj b/professional_figures.Rproj deleted file mode 100644 index e4c1c67d..00000000 --- a/professional_figures.Rproj +++ /dev/null @@ -1,18 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: knitr -LaTeX: pdfLaTeX - -AutoAppendNewline: Yes -StripTrailingWhitespace: Yes - -BuildType: Website diff --git a/small_axis_labels.Rmd b/small_axis_labels.Rmd index b5e1c419..c91646e6 100644 --- a/small_axis_labels.Rmd +++ b/small_axis_labels.Rmd @@ -12,13 +12,13 @@ p <- ggplot(mpg, aes(y=cty, x=displ, color=drv)) + name="drive train", breaks=c("f", "r", "4"), labels=c("FWD", "RWD", "4WD")) -stamp_bad(p + theme_pf(7)) +stamp_bad(p + theme_dviz(7)) ``` The axis labels, axis tick labels, and legend labels are all incredibly small. We can barely see them, and we may have to zoom into the page to distinguish FWD from RWD in the figure legend. A somewhat better version of this figure would be this one: ```{r} -stamp_ugly(p + theme_pf(11)) +stamp_ugly(p + theme_dviz(11)) ``` I think the fonts are still too small, and that's why I have labeled it "ugly". However, we are moving in the right direction, and this figure might be passable under some circumstances. My main criticism with this figure is not so much that the labels aren't legible as that it is not balanced; the text elements are too small compared to the rest of the figure. @@ -29,7 +29,7 @@ stamp_good(p) Importantly, we can overdo it and make the labels too big: ```{r} -stamp_ugly(p + theme_pf(19)) +stamp_ugly(p + theme_dviz(19)) ``` Sometimes we need big labels, in particular if the figure is meant to be reduced in size, but the various elements of the figure (in particular, label text and plot symbols) need to fit together. In the above example, the points used to visualize the data are too small relative to the text. Once we fix this, the figure becomes acceptable again: @@ -42,7 +42,7 @@ p2 <- ggplot(mpg, aes(y=cty, x=displ, color=drv)) + name="drive train", breaks=c("f", "r", "4"), labels=c("FWD", "RWD", "4WD")) + - theme_pf(19) + theme_dviz(19) stamp_good(p2) ``` You may look at this figure and find everything too big. However, keep in mind that it is meant to be scaled down. Scale it down so that it is only an inch or two in width, and the figure looks just fine. In fact, at that scaling this is the only figure in this chapter that looks good.