GitList - GitList

Browse code

factor out glue logic between learners and executors

Also add an example notebook that shows the learner running and
updating a live plot.

Joseph Weston authored on 21/07/2017 12:29:46
Showing 2 changed files

Learner-parallel-plotter.ipynb index 0000000..894e215
learner1D.py index 1d062fe..e0f900e 100644

                     new file mode 100644
@@ -0,0 +1,169 @@
                     +{
                     + "cells": [
                     +  {
                     +   "cell_type": "markdown",
                     +   "metadata": {
                     +    "deletable": true,
                     +    "editable": true
                     +   },
                     +   "source": [
                     +    "# Adaptive"
                     +   ]
                     +  },
                     +  {
                     +   "cell_type": "code",
                     +   "execution_count": null,
                     +   "metadata": {
                     +    "collapsed": false,
                     +    "deletable": true,
                     +    "editable": true
                     +   },
                     +   "outputs": [],
                     +   "source": [
                     +    "import holoviews as hv\n",
                     +    "from holoviews.streams import Stream, param\n",
                     +    "hv.notebook_extension('bokeh')"
                     +   ]
                     +  },
                     +  {
                     +   "cell_type": "code",
                     +   "execution_count": null,
                     +   "metadata": {
                     +    "collapsed": false,
                     +    "deletable": true,
                     +    "editable": true
                     +   },
                     +   "outputs": [],
                     +   "source": [
                     +    "import numpy as np\n",
                     +    "import learner1D\n",
                     +    "from time import sleep\n",
                     +    "from random import randint\n",
                     +    "from functools import partial\n",
                     +    "import importlib\n",
                     +    "importlib.reload(learner1D)"
                     +   ]
                     +  },
                     +  {
                     +   "cell_type": "code",
                     +   "execution_count": null,
                     +   "metadata": {
                     +    "collapsed": true
                     +   },
                     +   "outputs": [],
                     +   "source": [
                     +    "def func(x, wait=True):\n",
                     +    "    x = np.asarray(x)\n",
                     +    "    a = 10\n",
                     +    "    if wait:\n",
                     +    "        sleep(randint(1, 3))\n",
                     +    "    return np.sin(x * a)"
                     +   ]
                     +  },
                     +  {
                     +   "cell_type": "markdown",
                     +   "metadata": {
                     +    "deletable": true,
                     +    "editable": true
                     +   },
                     +   "source": [
                     +    "# Parallel"
                     +   ]
                     +  },
                     +  {
                     +   "cell_type": "code",
                     +   "execution_count": null,
                     +   "metadata": {
                     +    "collapsed": false,
                     +    "deletable": true,
                     +    "editable": true
                     +   },
                     +   "outputs": [],
                     +   "source": [
                     +    "import tornado\n",
                     +    "from distributed import Client\n",
                     +    "\n",
                     +    "io = tornado.ioloop.IOLoop.current()\n",
                     +    "\n",
                     +    "# Initialize the learner\n",
                     +    "learner = learner1D.Learner1D()\n",
                     +    "learner.add_point(-1, func(-1))\n",
                     +    "learner.add_point(1, func(1))"
                     +   ]
                     +  },
                     +  {
                     +   "cell_type": "code",
                     +   "execution_count": null,
                     +   "metadata": {
                     +    "collapsed": false,
                     +    "deletable": true,
                     +    "editable": true
                     +   },
                     +   "outputs": [],
                     +   "source": [
                     +    "async def dask_run(learner):\n",
                     +    "    async with Client(asynchronous=True) as client:\n",
                     +    "        await learner1D.run(func, client, learner,goal=lambda learner: learner.loss() < 0.1)\n",
                     +    "\n",
                     +    "def plot(data):\n",
                     +    "        xy = [(k, v) for k, v in sorted(data.items()) if v is not None]\n",
                     +    "        if not xy:\n",
                     +    "            return hv.Scatter([])\n",
                     +    "        x, y  = np.array(xy, dtype=float).T\n",
                     +    "        return hv.Scatter((x, y))"
                     +   ]
                     +  },
                     +  {
                     +   "cell_type": "code",
                     +   "execution_count": null,
                     +   "metadata": {
                     +    "collapsed": false,
                     +    "deletable": true,
                     +    "editable": true
                     +   },
                     +   "outputs": [],
                     +   "source": [
                     +    "data_stream = Stream.define('data', data=param.ObjectSelector(default=dict()))\n",
                     +    "dm = hv.DynamicMap(plot, streams=[data_stream()])\n",
                     +    "dm"
                     +   ]
                     +  },
                     +  {
                     +   "cell_type": "code",
                     +   "execution_count": null,
                     +   "metadata": {
                     +    "collapsed": false,
                     +    "deletable": true,
                     +    "editable": true
                     +   },
                     +   "outputs": [],
                     +   "source": [
                     +    "pc = tornado.ioloop.PeriodicCallback(lambda: dm.event(data=learner.data), 100)\n",
                     +    "pc.start()\n",
                     +    "io.add_callback(dask_run, learner)"
                     +   ]
                     +  }
                     + ],
                     + "metadata": {
                     +  "anaconda-cloud": {},
                     +  "kernelspec": {
                     +   "display_name": "Python 3",
                     +   "language": "python",
                     +   "name": "python3"
                     +  },
                     +  "language_info": {
                     +   "codemirror_mode": {
                     +    "name": "ipython",
                     +    "version": 3
                     +   },
                     +   "file_extension": ".py",
                     +   "mimetype": "text/x-python",
                     +   "name": "python",
                     +   "nbconvert_exporter": "python",
                     +   "pygments_lexer": "ipython3",
                     +   "version": "3.5.3"
                     +  }
                     + },
                     + "nbformat": 4,
                     + "nbformat_minor": 1
                     +}

learner1D.py

History View file @ de7ba87

@@ -6,8 +6,10 @@
                      import heapq
                      from math import sqrt
                      import itertools
                     +import multiprocessing
+                    +
                      import numpy as np
                     -from wrapt import synchronized
                     +import tornado
                      def add_arg(func):
@@ -30,7 +32,7 @@ class Learner1D(object):
                          """
                     -    def __init__(self, xdata=None, ydata=None, client=None):
                     +    def __init__(self, xdata=None, ydata=None):
                              """Initialize the learner.
                              Parameters
@@ -62,15 +64,7 @@ class Learner1D(object):
                              if xdata is not None:
                                  self.add_data(xdata, ydata)
                     -        self.client = client
+                    -
                     -        self.smallest_interval = np.inf
+                    -
                     -        self.num_done = 0
+                    -
                     -        self.futures = {}
+                    -
                     -    def loss(self, x_left, x_right):
                     +    def interval_loss(self, x_left, x_right):
                              """Calculate loss in the interval x_left, x_right.
                              Currently returns the rescaled length of the interval. If one of the
@@ -81,6 +75,12 @@ class Learner1D(object):
                              return sqrt(((x_right - x_left) / self._scale[0])**2 +
                                          ((y_right - y_left) / self._scale[1])**2)
                     +    def loss(self):
                     +        if len(self.losses) == 0:
                     +            return float('inf')
                     +        else:
                     +            return max(self.losses.values())
+                    +
                          def add_data(self, xvalues, yvalues):
                              """Add data to the intervals.
@@ -112,7 +112,7 @@ class Learner1D(object):
                              self._scale = [self._bbox[0][1] - self._bbox[0][0],
                                             self._bbox[1][1] - self._bbox[1][0]]
                     -    def choose_points(self, n=10, add_to_data=True):
                     +    def choose_points(self, n=10):
                              """Return n points that are expected to maximally reduce the loss."""
                              # Find out how to divide the n points over the intervals
                              # by finding  positive integer n_i that minimize max(L_i / n_i) subject
@@ -121,7 +121,6 @@ class Learner1D(object):
                              # Return equally spaced points within each interval to which points
                              # will be added.
                     -        # self.get_results()  # Insert finished results into self.data
                              self.interpolate()  # Apply new interpolation step if new results
                              def points(x, n):
@@ -140,19 +139,16 @@ class Learner1D(object):
                              # Add `None`s to data because then the same point will not be returned
                              # upon a next request. This can be used for parallelization.
                     -        if add_to_data:
                     -            self.add_data(xs, itertools.repeat(None))
                     +        self.add_data(xs, itertools.repeat(None))
                              return xs
                          def get_largest_interval(self):
                              xs = sorted(x for x, y in self.data.items() if y is not None)
+                    -
                              if len(xs) < 2:
                                  return np.inf
                              else:
                     -            self.largest_interval = np.diff(xs).max()
                     -            return self.largest_interval
                     +            return np.diff(xs).max()
                          def interpolate(self):
                              xdata = []
@@ -189,40 +185,51 @@ class Learner1D(object):
                              self.losses = {}
                              for x, (x_left, x_right) in self.neighbors.items():
                                  if x_left is not None:
                     -                self.losses[(x_left, x)] = self.loss(x_left, x)
                     +                self.losses[(x_left, x)] = self.interval_loss(x_left, x)
                                  if x_right is not None:
                     -                self.losses[x, x_right] = self.loss(x, x_right)
                     +                self.losses[x, x_right] = self.interval_loss(x, x_right)
                                  try:
                                      del self.losses[x_left, x_right]
                                  except KeyError:
                                      pass
                     -    def get_done(self):
                     -        done = {x: y for x, y in self.data.items() if y is not None}
                     -        return done
                     -    def add_futures(self, xs, ys):
                     -        """Add concurrent.futures to the self.futures dict."""
                     -        try:
                     -            for x, y in zip(xs, ys):
                     -                self.futures[x] = y
                     -        except TypeError:
                     -            self.futures[xs] = ys
+                    -
                     -    def done_callback(self, n, tol):
                     -        @synchronized
                     -        def wrapped(future):
                     -            x, y = future.result()
                     -            self.futures.pop(x)
                     -            return self.add_data(x, y)
                     -        return wrapped
+                    -
                     -    def map(self, func, xs, n=1, tol=0.01):
                     -        ys = self.client.map(add_arg(func), xs)
                     -        for y in ys:
                     -            y.add_done_callback(self.done_callback(tol, n))
                     -        self.add_futures(xs, ys)
+                    -
                     -    def initialize(self, func, xmin, xmax):
                     -        self.map(func, [xmin, xmax])
                     -        self.add_data([xmin, xmax], [None, None])
                     +# We can't use API that is specific to any particular asynchronous
                     +# framework, so we have to roll our own utility functions.
+                    +
                     +async def any_complete(futures):
                     +    total = tornado.concurrent.Future()
                     +    for f in futures:
                     +        f.add_done_callback(total.set_result)
                     +    await total
                     +    return [f for f in futures if f.done()]
+                    +
+                    +
                     +async def run(f, executor, learner, goal, ncores=multiprocessing.cpu_count()):
                     +    xs = dict()
                     +    done = [None] * ncores
+                    +
                     +    while not goal(learner):
                     +        # Launch tasks to replace the ones that completed
                     +        # on the last iteration.
                     +        for x in learner.choose_points(len(done)):
                     +            xs[executor.submit(f, x)] = x
+                    +
                     +        # Collect and results and add them to the learner
                     +        futures = list(xs.keys())
                     +        await any_complete(futures)
                     +        done = [fut for fut in futures if fut.done()]
                     +        for fut in done:
                     +            x = xs.pop(fut)
                     +            # Need to explicitly await the future (even though we know the
                     +            # result is there) to be compatible with Dask, who's futures'
                     +            # 'result' method return a future themselves.
                     +            y = await fut
                     +            learner.add_point(x, y)
+                    +
                     +    # cancel any outstanding tasks
                     +    for fut in xs.keys():
                     +        fut.cancel()
                     +    # XXX: we should introduce an API for removing data points, and remove all
                     +    #      the data points with a 'None' value from the learner, or add a
                     +    #      method to simply remove all "unfinished" points from the learner.