{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# First we start with some magic ..." ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Your erp5_url is http://10.0.180.166:2222/erp5/Base_executeJupyter. \n", "Please proceed" ] } ], "source": [ "%erp5_url http://10.0.180.166:2222/erp5/Base_executeJupyter" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Your notebook_set_reference is NB-OTHERZ. \n", "Please proceed" ] } ], "source": [ "%notebook_set_reference NB-OTHERZ" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Your erp5_user is zope. \n", "Please proceed" ] } ], "source": [ "%erp5_user zope" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Your erp5_password is insecure. \n", "Please proceed" ] } ], "source": [ "%erp5_password insecure" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## \"context\" gives you the connection to Wendelin" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Get ERP5 Object\n", "context" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get a Data Stream by its ID" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Get the data stream you uploaded the wav\n", "# \"209\" is the ID at the ERP5\n", "context.data_stream_module[\"209\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Let's search by the instead on Catalog" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Query on catalog for the Data Stream with reference \"wavdemo\"\n", "result = context.portal_catalog.getResultValue(\n", " portal_type=\"Data Stream\", \n", " reference='wavdemo')\n", "\n", "# We don't direct receive the object, that's why we get the object\n", "# here.\n", "data_stream = result.getObject()\n", "\n", "data_stream" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## \"On core\" vs \"Out-of-Core\"" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Total is 10339336\n", "\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# BAD BAD BAD BAD!!\n", "# Not out of core, as you return all data as string (So you should avoid do like this)\n", "datastream_as_string = data_stream.getData()\n", "\n", "print \"Total is %s\" % len(datastream_as_string)\n", "print type(datastream_as_string)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Total is 10339336\n", "\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Now we try with out-of-core!!\n", "stream = data_stream.data \n", "\n", "l = 0\n", "for chunk in stream.iterate(): # out of core\n", " l += len(chunk)\n", "print \"Total is %s\" % l\n", "print type(stream)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Few needed imports" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Import needed libraries to use later\n", "import matplotlib.pyplot as plt\n", "from scipy.fftpack import fft\n", "\n", "# Import also scipy to read the audio file\n", "import scipy\n", "import scipy.io\n", "from scipy.io.wavfile import read" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Calculate a simple FFT" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "\n", "#############################################################\n", "# Create an class for wrapper the file api\n", "# So we can pass a out-of-core objects that behave like average file.\n", "# This also don't guarantee that \"scipy.io.wavfile.read\" will try to\n", "# keep things on core.\n", "#############################################################\n", "\n", "class BigFileReader:\n", "\n", " def __init__(self, bigfile):\n", " self.bigfile = bigfile\n", " self.pos = 0\n", "\n", "\n", " def tell(self):\n", " return self.pos\n", "\n", " def seek(self, pos): # TODO whence\n", " # TODO check for out of range\n", " self.pos = pos\n", "\n", " def read(self, n):\n", " chunkv = []\n", " for chunk in self.bigfile.iterate(self.pos, n):\n", " chunkv.append(chunk)\n", " data = ''.join(chunkv)\n", " self.pos += len(data)\n", " return data\n", "\n", "\n", "# Simple call read()\n", "fs, data = read(BigFileReader(data_stream.data)) \n", "\n", "# Get only channel\n", "array = data.T[0] # not out of core\n", "\n", "\n", "###############################################################\n", "# Create and save the array to wendelin to make it out-of-core\n", "# Using this persistent API you can save and append little by little \n", "# one array that continuously grow.\n", "\n", "out_of_core_array = context.data_array_module.newContent(\n", " array=array, \n", " portal_type=\"Data Array\", \n", " title=\"pydata-wav2\")\n", "\n", "cmplx = fft(out_of_core_array.getArray()) \n", "spectrum = abs(cmplx[:(len(cmplx)/2)-1]) # not out of core\n", "\n", "##### Also save spectrum array.\n", "# Save the spectrum array to make it now out of core\n", "out_of_core_spectrum_array = context.data_array_module.newContent(\n", " array=spectrum, \n", " portal_type=\"Data Array\", \n", " title=\"pydata-spectrum2\")\n", "\n", "# Let us just Plot it as usual\n", "figure = plt.figure() \n", "\n", "ax1 = figure.add_subplot(211)\n", "ax2 = figure.add_subplot(212)\n", "\n", "ax1.plot(out_of_core_array.getArray()) \n", "ax2.plot(out_of_core_spectrum_array.getArray())\n", "\n", "# figure.show() don't present inline rendering....\n", "# So we use instead:\n", "context.Base_renderAsHtml(plt)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## You can save the out put image for later too." ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# now quick save on image_module un a dummy way.\n", "figure.savefig(\"/tmp/somenamec.png\")\n", "\n", "context.image_module.newContent(title=\"plot\", portal_type=\"Image\", data=open(\"/tmp/somenamec.png\").read())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Replot using the array" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# We can recover an array saved into and re-replot the chart\n", "# Query is assincronous, we must wait the object be catalogued\n", "# So it can take a while to work in this example.\n", "array_to_replot = context.portal_catalog.getResultValue(\n", " title=\"pydata-wav2\",\n", " portal_type=\"Data Array\").getObject()\n", "\n", "figure = plt.figure() \n", "\n", "ax1 = figure.add_subplot(211)\n", "\n", "ax1.plot(array_to_replot.getArray()) \n", "\n", "# figure.show() don't present inline rendering....\n", "# So we use instead:\n", "context.Base_renderAsHtml(plt)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "ERP5", "language": "python", "name": "erp5" }, "language_info": { "mimetype": "text/plain", "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }