Re: [Pgfplots-features] automatically selecting some non-overlapping tick labels

Neal H. Walfield Thu, 21 Mar 2013 06:48:21 -0700

FWIW, I've attached the R code that I'm using to choose ticks and tick
labels.  The algorithms are sound, but the lack of context means that
I have to make assumptions about the text size and available space.


Examples:

> source('axis.R')
> axis.ticks.generate(1:100)
 [1]   0  10  20  30  40  50  60  70  80  90 100
> axis.ticks.generate(1:100, log=TRUE)
 [1]   1.0   1.8   3.0   6.0  10.0  18.0  32.0  56.0 100.0
> axis.labels.select(axis.ticks.generate(1:100))
 [1] "0"   "10"  "20"  "30"  "40"  "50"  "60"  "70"  "80"  "90"  "100"
> axis.labels.select(axis.ticks.generate(1:100), available.space=20)
 [1] "0"   ""    "20"  ""    "40"  ""    "60"  ""    "80"  ""    "100"

Perhaps this code will be useful to someone.

Neal


make.multiple.of <- function(x, multiple,
                             towards.zero=FALSE,
                             away.from.zero=FALSE,
                             towards.positive.infinity=FALSE,
                             towards.negative.infinity=FALSE) {
  if (sum(sapply(list(towards.zero,
                      away.from.zero,
                      towards.positive.infinity,
                      towards.negative.infinity),
                 function (x) identical(x, FALSE)))
      != 3) {
    browser()
    stop("You must set exactly one of towards.zero, away.from.zero, 
towards.positive.infinity or towards.negative.infinity to TRUE")
  }

  if (towards.zero)
    return (sign(x) * (abs(x) - abs(x) %% multiple))
  if (away.from.zero)
    return (sign(x) * (abs(x) + (-abs(x) %% multiple)))

  if (towards.positive.infinity) {
    mask = x < 0
  } else {
    stopifnot(towards.negative.infinity)
    mask = x > 0
  }

  x[mask] = make.multiple.of(x[mask], multiple=multiple, towards.zero=TRUE)
  x[!mask] = make.multiple.of(x[!mask], multiple=multiple, away.from.zero=TRUE)

  return (x)
}

axis.ticks.generate <- function(values,
                                log=FALSE,
                                ticks=10,
                                verbose=FALSE) {
  if (log)
    values = base::log(values, 10)

  # Compute the difference between the extremes.

  min.value = min(values)
  max.value = max(values)
  value.range = max.value - min.value

  if (value.range > 10 && log) {
    min.value = floor(min.value)
    max.value = ceiling(max.value)
    value.range = max.value - min.value
  }

  orders.of.magnitude = max(floor(log(abs(value.range), base=10)))

  if (verbose)
    cat(sprintf("%g - %g => width of %g (spans %d orders of magnitude)\n",
                min.value, max.value, value.range, orders.of.magnitude))

  generate.ticks <- function(step.size.multiple) {
    step.size = step.size.multiple * 10^(orders.of.magnitude - 1)
    min.axis = make.multiple.of(min.value, step.size,
      towards.negative.infinity=TRUE)
    max.axis = make.multiple.of(max.value, step.size,
      towards.positive.infinity=TRUE)

    ticks = seq(min.axis, max.axis, step.size)
    if (verbose)
      cat(sprintf("Step size of %g (multiple: %g) results in %d ticks:\n %s\n",
                  step.size, step.size.multiple, length(ticks),
                  paste(sprintf("%g", ticks), collapse=", ")))

    # Trim the ticks, if appropriate.

    # In steps.
    max.margin = 0.5

    separation = (min.value - ticks[1]) / step.size
    if (separation > max.margin) {
      if (verbose)
        cat(sprintf("trimming left: (%g of a step >= %g of a step)\n",
                    separation, max.margin))

      ticks[1] = min.value
    }

    separation = (ticks[length(ticks)] - max.value) / step.size
    if (separation > max.margin) {
      if (verbose)
        cat(sprintf("trimming right: (%g of a step >= %g of a step)\n",
                    separation, max.margin))

      ticks[length(ticks)] = max.value
    }

    return (ticks)
  }

  # Choose the step size that is as close to 10 ticks as possible.
  step.size.multiples = c(1, 1.25, 2.5, 5, 10)
  # We prefer multiples of 10.
  step.size.penalties = c(0,    2, 1.5, 1,  0)
  step.size.multiple = step.size.multiples[
    which.min(abs(ticks
                  - (sapply(step.size.multiples,
                           function (m) {
                             length(generate.ticks(m))
                           })
                     + step.size.penalties)))]

  if (verbose)
    cat(sprintf("Optimal step size multiple for %d ticks is %g\n",
                ticks, step.size.multiple))

  ticks = generate.ticks(step.size.multiple)

  if (log) {
    ticks = 10^ticks
    if (10^value.range < 2)
      ticks = signif(ticks, 2)
    else {
      small = abs(ticks) < 2
      large = abs(ticks) >= 100
      medium = !(small | large)

      ticks[small] = round(ticks[small], 1)
      ticks[medium] = round(ticks[medium])
      ticks[large] = signif(ticks[large], 2)

      ticks = unique(ticks)
    }
  }

  return (ticks)
}

axis.labels.select <- function(ticks, labels=NULL,
                               axis='x',
                               available.space=NULL,
                               log=FALSE, log.base=NULL,
                               verbose=FALSE) {
  stopifnot(is.numeric(ticks))
  if (length(ticks) <= 1)
    return (as.character(ticks))

  if (is.null(labels))
    labels = ticks
  if (is.numeric(labels)) {
    if (max(abs(ticks)) >= 10)
      # We have single and double digit numbers.  Make the single
      # digit numbers single digit and the wider numbers significant
      # to 2 places.
      labels = ifelse(abs(ticks) >= 10, signif(ticks, 2), round(ticks))
    else
      # Use 2 significant digits, except for the last tick.
      labels = signif(ticks, 2)

    labels[length(labels)] = ticks[length(ticks)]
    labels = sprintf("%g", labels)
  }

  stopifnot(length(ticks) == length(labels))
  stopifnot(is.character(labels))

  if (log) {
    if (is.null(log.base))
      log = base::log
    else {
      log = function(x) { base::log(x, base=log.base) }
      exp = function(x) { log.base^x }
    }
  } else {
    log = identity
    exp = identity
  }

  stopifnot(axis %in% c('x', 'y'))

  # Assuming labels are evenly spaced:
  #
  # If all labels are 2 characters wide, we can have 11 labels without
  # overlap.  That's 22 characters plus 10 inter-label spaces.
  #
  # If all labels are 3 characters, then we can have 8 labels
  # without overlap.  That's 24 characters plus 7 interlabel spaces.
  #
  # If all labels are 4 characters, then we can have 6 labels
  # without overlap.  That's 24 characters plus 5 interlabel spaces.
  #
  # Thus, we have space for about 37 characters (digits or spaces).
  #
  # This means that we can have a density of one character every
  # 1/37th of the x-axis.

  if (is.null(available.space))
    available.space = if (axis == 'x') 37 else 20


  # Here's our approach: for each pair of adjacent, non-empty
  # labels, we compute the number of characters from the middle of
  # the left label to the middle of the right label and the amount
  # of available space.  If the character density exceeds the
  # threshold, then we kill one of the labels.

  # Compute the position of each tick on a linear scale from 0 to 1.
  pos = (log(ticks) - log(ticks[1])) / (log(ticks[length(ticks)]) - 
log(ticks[1]))

  label.space <- function(label) {
    if (axis == 'y')
      # XXX: It's hard to compute the height.  Most likely, it is just
      # one, however.
      return (1)

    label = gsub("[\\][a-zA-Z]*", "x", label)
    label = gsub("$", "", label, fixed=TRUE)

    return (nchar(label)
       # Count punctuation as less than a character.
       # - .25 * sapply(strsplit(label, '[.,]'), length)
     )
  }

  stopifnot(length(ticks) >= 2)
  for (i in 2:(length(ticks))) {
    # Find the first label (largest x, such that x <= i - 1) that
    # is not the empty string.
    not.empty = which(labels[(i - 1):1] != "")
    if (length(not.empty) == 0)
      next
    left = ((i - 1):1)[not.empty[1]]

    right = i
    if (labels[right] == "")
      next

    used.chars =
      (sum(label.space(labels[c(left, right)])) / 2
       # The interlabel space.
       + 1)

    space = pos[right] - pos[left]
    available.chars = available.space * space

    if (verbose)
      cat(sprintf("Considering %s: space: %.2g => %.1f characters; have %g 
characters.\n",
                  paste(sprintf("'%s'", labels[c(left, right)]),
                        collapse=", "),
                  space, available.chars, used.chars))

    if (used.chars > available.chars) {
      # Kill the current label, unless this is the last label
      # (which we always want to keep).
      kill = if (right == length(ticks)) left else right
      if (verbose)
        cat(sprintf("Killing '%s' (index: %d)\n", labels[kill], kill))
      labels[kill] = ""
    }
  }

  labels
}

------------------------------------------------------------------------------
Everyone hates slow websites. So do we.
Make your web apps faster with AppDynamics
Download AppDynamics Lite for free today:
http://p.sf.net/sfu/appdyn_d2d_mar
_______________________________________________
Pgfplots-features mailing list
Pgfplots-features@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/pgfplots-features

Re: [Pgfplots-features] automatically selecting some non-overlapping tick labels

Reply via email to