{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pystencils.session import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Tutorial 02: Basic Kernel generation with *pystencils*\n", "\n", "Now that you have an [overview of pystencils](01_tutorial_getting_started.ipynb), \n", "this tutorial shows in more detail how to formulate, optimize and run stencil kernels.\n", "\n", "## 1) Kernel Definition\n", "\n", "### a) Defining kernels with assignment lists and the `kernel` decorator \n", "\n", "*pystencils* gets a symbolic formulation of the kernel. This can be either an `Assignment` or a sequence of `Assignment`s that follow a set of restrictions. \n", "\n", "Lets first create a kernel that consists of multiple assignments:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "src_arr = np.zeros([20, 30])\n", "dst_arr = np.zeros_like(src_arr)\n", "\n", "dst, src = ps.fields(dst=dst_arr, src=src_arr)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxkAAAAnCAYAAABje4W/AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAULUlEQVR4Ae2d67XdtBLHd846BSShggsd5EIFhA4CqSDQASw+hW8s6CBQQRI6gFtBAh1ABwmng9z/z8fykbXtbUmW36O1vPXw6PUfeTyjh/e958+fPzmdTq91+e6bH3744Rc/wcLTISCsf1Lp7+oaPpH/ndJuiMt/JO+prkcKf6HrY4W/08X9vxU/45PS/PKge6u0v+SbCxAIsDLsA3wsul0ESo7toCyTKReGRYCVyZQLWNktQ8AQ2AcCknsv1JOvg948uKoTfhPBPe86U1yDjBYthIAw/11FvZT/M5fCGHwYCc49VTpGxUk+TMQA/Ka+6dNx/2Ndf+veG/muvP8q3qKr8x7eE0aG/eFHwT4BKDW2VY7JlIQhUgp3qjTsE4A3UkPAEFgUAckrdNPGjlBjqolyZ2Qs2rijVs5LRH1/LN9fZcCg+BNM6vsYDbiHuqCtDA6F7+sKjUGUZgzG3+Q795UCpLecaJ7U5bfSS0VU9relypqinLrvu8Q+B6+18yunT0fNU3hsm0yJHEiFcafW1WAfCUGLzGRKCw6LGAKHROD6kL1eWacljHmZsILxh8JfeM3DkHhVx9k25VYwTqJrwtxXnFUOjJYfiTun9Acu7HylsaT1j/x/vDTqwiDBGmX1Y9CJzq2QsNXrI8WdAXRSmJWUF7pa7RwsdGYCtW9x7HO6rHYfkl85WB01z9ixrfxjZUqvfOjjyaVxrXsmU+LleTL2fTy5lL4Hfl3qn90zBAyBcQhcjctuuccgIAGNks8Wqce6eKFzxsK9HE4K/6XrRhf3cc7guI21fzEQMFJu2sntmO6jnHK24w93R2EMGAwP7nENOuWB/p18zo/Qh9/lo7D77rXSmv74N5YOq12rwD4HB7X9cPzKwemoeQqO7TEyJUY+tFgUOa5NpnioCbMueZ6MvVdkdHDr/IruqBEaAoZANgJmZGRDVyajBDVK+j2VxgoGiv+39YvDr+BLRVh5uPETXbim52UTKvmOxPdR+jFoGqf8GDMYCs3KRnOzP0A5vqFCmO1HrKZUTmGXFmW4uHxz+Wrf4tjn9FXtPiS/crA6ap6xY1v5eWazZYryDsqHkDcx41o0JlPawJ3J8xzs20XGxfbAr7ieGpUhYAjkImBGRi5yI/NJQP+ky523OCkcbpXya2Alwz9n4d8j702d4PzwvlsJIR1DoDEOWoSREeXHkEABCY0S6meW3XeVUuAnLB1W+zeLfQ52W+dXTp+PmqfU2FY5TpY4vwWp7vfKlMTx1io3MmIy5Q6oljyfAfu7muNDq+NXfNON0hAwBMYgYEbGGPTG5eUlHa4o8DnhX7wX/Kl+aaDUD61ScAicFY+WU35mut6TWJfVqTS0Mg1H7veQUM/D4N4bxf1zJsHtRaJbxj4HsK3zK6fPR81TcmznypSU8ZbDJ5MpQq1Hnk+N/V74ldMPy2MIGAKJCFwn0ht5OQSeqShmodxZCErm/EV4UBoDg61SF1cfyKeLGXqMCrdCwgvnR6XdyMdRVmVwVLHyPxgY4UuOuql3TW6P2OfguxV+5fTtqHmKje0JZErXeMvhk8mUW9RS5Hkp7PfCr5x+WB5DwBBIRMCMjETASpHrBc5na7kuOtFhXPCHToNOtM3XnXqIMQB4QY91fWVQfriFakqjJqsfa8JebXmkTmAYDjmMyBDboTzu/qb55Tph/jACpce2ysuRKSnjbbhT5xQmU24xQd6GWIdxhx60vfJjYjm0On45UMw3BAyBaRHYnJFRC8NfBQuzOK8UD2f+p0Vs26XzkuFlM8oJc3cIHR6EhlIYh6b35TaqIdvK3Im9sASvSbeTGb+GB4owwtgzuTIMVUhxNq4Tx1tYXkzcZMotSsWwn1gOLcYve65jHqd90hjv8/laErur/GYskxNhqIv/cUBZHjqnsEwjV1or2KlpCPwkp3z3dYWz7fwfB/u/K6f7hPkjwNCgoD63feuW+IC/wiUL+xyoVJfxKxE4+KPL5EoGbsrSJVMuyoeeMRpbu8kUIcWYXQD7WB75dIvxC4x0jXqulR95yuflu8a5308LrwgB8Ws071fUnVmbUhK7q1lbXqgyAeCU24vnFApVt7diWIVoCUviujAivtdVhRX3/7Eb+q+VxsH0yinMJ2/5Az7OgUDLoXP2g4eOWfreL2OFxDuPn2Gf01/hbfzKAW4gj3A1uTKAUc/ts3EtLIfkQ5dMGRrXrnqTKQ6J2/N6oTxPxv6uuPhQhBxyhS3KrwLPNXIBjN+7Dpm/DQQK8H6wo9Shi7O1u3KlsLveKCoILbdlp3gX6gHzVj4zRXtzGBNsMWv2WqufrD408bDDNQ4P5DdGBjSK9+ap71dbs0QXrm6EVRwlfoZ9TsdrPHux133GrfErHVyTK+mYkaNzXF+SD11jdGhcU5FoTKYAxJ0rgv1dcfGhDfFr7HM9Nn88qBNSil8ownvVa/qQm4N3yKRKLvU1YqPpRbC72mjnmVmYchUDJXyXirEEDbix/Nua/RoaB6LPwdy9AIeKP8T9XOxzwDF+5aB2yhnjKRXtUq7kjmsboylDp5t2Zuy7G3E5dQ3vgLHPNfn3sBq/S/lzefhNLtMHqt/07bHPTdX561gIJMyYxf5M17s6Dw8dy0T8rwOHJp/qeqTwF7pQYJlpvdHFXka+t964+j7CB0We8ijridJZ5m25DlqUZOr7sUVYKKL6AHayVZJCzRxVjProPnfrf952qEy2MkQbdqJl1uS1/F0aa0Ng9d3PxL6vuEvpm+CX8DC5comLG7mXOa43MUbXzoI5sM/BQO2a/R2gOtE9fN3ior4gemidTsNXHL9T2o0u9ACUcmaoKRPd5rX8N/LP9BSlr9rV/ZlFr1Fdh5DppRl+CTfq0v1N6tlXMUCpczyI38vnAeQBwyj4U5f787en3FP8JJ8/mEOJ5QHFkbdxSufhJS9lufLIw97+1pJTD60rL1rhbSofCNT1U/6zAdLN3wb7lE6IvmUoRuTly1/FeRRR7+pJUrHP6dAW+KU2mlzJYe5K86SO6y2M0ZVCfdasGbA/qzMiYdZ3gDDo0i169QXR8+GYl/J/5lIYI6KiV/wPXeg31R/mKszk6Ze6tmhgoFfNotcIn0PIdOFZ1EXgRn2b1LMHjQx1HquUg72f00uc0pidZuD+rjBWvvt60EOFWd1wCiw0jXKqdOI8yBgX/gw34ZaVPUDLVwNulKeYU3lYif/T9ax02cUaWbigKfs5ZdmFYVikuLXhM3d7VJ/JlUVG3rSVTjmOpix7WlTmKX1t+MzZHtV1Sbc40xdEj96CruKfu0RvYQLUd+xL92n8e1VYZbALg/ImcSrb/whLUh3KO5teAw5q3O51xSQGRBAP4UYRomF8bVLPvo7AgG/H82nSRqlXmIGLY0WDh/sVETnS3QoGwDTh6u6tNc15gMbwqNOZgaAs32ERx9L6+ZLDag918XDQR7b4yMtyGE9hP7IKOnom4ci4wujDj3XMNF18IcQWZHSTI2ByJR5ikyvxWPVSmkzphWYPN7L0BY0JVjOY+GTlAoMidF26SUOjPGwJY4K0mTRVmLbg2IbFFxjdpGuV2PVzKY/usdLyQleoT3UV1aTVZc6p1+xapsMDgct4CN1DEnr4g4HrdvyE+Vx8CDfoNqtnX7tedvkCB6OBzr0M7gM0+xb9B8uB7wyOIEsV/Uq/rS00KoPysdJ42H2XQuvnSw6rDbzEsRJ5iD9X+Ca5kIQMKv9DAvkuSYXBvUsdq3nA982LOsM+D84hfqWUqrJMrqQAFklrY7t60ffKFeGDXDeZEjmepiYrKVPU1iR9QXVjGLD1CSW80l2I62oMAoX7dJMKmvp+tY2qStCP0jA63smvtlXJf6yLHR9dBowrJyYPk59sKW/a5+rs86HVNYteo3p2L9PVx04jT+ms4HC+rOJ5Hz+60mNxE101eSp/c3r2dVfHvbRP63A4O8wD0zIWFMdaa215qvNWnsDhgeXqMiZOut+Ul0Lr1zEmrDo5wP5WZTB7XvxF5LdN9fS+CH06C5dHwLAvj2lGiSZXMkAbymJjewihae4b7tPgGluq8I/WLfwylQ+FHUUcxY3wtwr7H0NxxkdLNxHNTV0OKxbVmY06jkeav7WcFZJqW7n8ZlLWo4/Ko7yUU51ble/qD4o5j4p2Lr3mMDL9HOVRKSm4UdHm9OwhI6NCTwO1eTgU5oHm4QutOtJitgo1ZVWFn07NnkeV/URpGDTv63uDtK5t8pmRwPGFCB50yqrCjkbxi050LG2xLJk0Y3CxULtpCOwMAT0fbGOMftH1dV9lNM83ZYpuVXJFbaI9zNB9pnCz5K0ws4pNvK9/Ll20JlccGOYbAj0I6DkZK1caeVJXcaZbqA4mQiv9QD76wUk+RgRKfLjDoMlf06FTUIebdGWVotGDFGZHBnIsbAeyEjkSpp8S89DOWD1LpLdOdcwmf1RX00eFVyfTQUTtGq0r1tAW8yJxo75Y/jd8qBvZjGXVlaVnK1/W+/BqAKVq65MK5+Fxjv1juOoegfo+NOEqBbcrJxoeNB6Spiyl0VkePlYQcCw9utWQKFoyKQ+DhnMjLFc91IWhQPhTXZQf7ZSP8yK0y5whYAgECOj54MXxr/zwgGRAeTG6erlS95PPViJHONhJv0/ykScI2ySnfCZXkhAz4iMhUD9fWXJFeaN1ixpTnt/WCoTK4J3PrD9lOYcuUSlrdfvYElMZGPLRY3xa8lQygkDgmDSlrC6XkueNCkBZTHZq79TyZ/UyHdCEQzFdMZkJ3RmicKvbzphbRM8WbozTrPfhdXe/b1NVMOcuGNTM7DPAP9LFA+MMAQUrR8dJwzC45Jj9+1V0MJqyXupy5ZPmP/gptP75EMBwxg5finKzDkqOdhgsjyP6E12gERoCe0BAz4R71ph15LpJ7Vddhnvu1ypXUApQOlA+fHmHguImRVK7bnIlFTGjPwQCtUxAoc+VKyn6wjPVw/udsxBOyUeuNasSNehsoWLCErqTfCYcnEPnQReKccgSV08MPTRdeZC11JvrJpM/wgb81i7TwY12ull+eDJWV8zlRZUvATfo4f1Senb2+/De8+fPeYk+VWejlv9FxwzmW/nhA6nk5Z3axZLnJ/LdQFq+UdYCQ2BnCOj5Qm6wxYAX32inclYpV9QuvjyDYK8OXNbxTf4h12gmWQGGwMQI6PkqKlemam7dzpbepDSUQA5aP1C4kYsKo5Pw5cOz7eQpeUT7SOUw4Ysyv3qndq5Spjvg1L4iuqLKYcxmHfx2bfH9NeOmtkW/D0X7r/r1n96VDBEwoL/XxWpA9cDUaaQzE7A6V7evsVQVh/FmbIhTYCGvUpTkf6qLWRgOveWs9CirubEIbJwnnFM4e2kOYaI8W5MrtNdfYWUl48ehfh7l/sbH8C7ZtHGeZMmVBRhZrbr49Qp3t+LJuzZ8r4bxKmtinmom269zDWH1YWsy/VS3uZSuiH7cGJWxPNkibupb8vvw6gIgGBi8UCsnQO4rwHmMVSmmaheGBBYT7qku36hwSnV186g/YKS+V9/ZVph/Y+frWQi9PxVueHxUfJbo95Z5orYjC95l4rYJuRL07T3x+lm5L79TYQjy7D4qHEyurIzLW+aJ2j5GrszKiVoGMP5DxwRE804VHWG2KVV6CX3UxcFz313M4xFSHysla3ObkOnCfRJdUeWyoh/+91sMjzaBW0dHkt6HVx0FuKQXCnDG4uv6oeDBwMDw9yU62sV8tYeH95V8znSwvw6G8yk64mZkCAQ5eNfa3iZ8wAbrm+Uvc/MjsGWeIBNy5cAm5Io3HHhOMMzZl03YDIw7cLY8hu96sa/QlnkyRq4swUVWLlqGRi0X+QM+tjWhg7AN3d/5AT39ZItN5SLyOFK2SSWvHrvME/qbkOnCeW264iZwC8ZN8vsw+UxGUKFFN4CAHi5WepglCveKYmAg7OwMi0CY0xlP5kQ7ry7xiGfmYf1yOslnEoPP1+bMWuU1YsW5hIPJlZXxx3gyH0OENasUfBETxSvJKQ9frIs2GESLLEL2bOI8RhIYRrwJBOoxGP0+FH11JuPSSsYmOm6NjEKAFSm3X7QrAwLM3LwIGE/mxTunNraHVjOOEpgoFAhYMzDukLQxfIfFWkLGk5k4IVkA1mx/aq1mDFVfyxLyprizFaqUzEZrCBRAIOt9eF2gYiti5QhIqPV9OYxDPCfdty0gM/PQeDIz4HnVsZzNPl62PbAFgrNM5moEbAyvbygYT+blifBmKyVbo/x/Ch9qBDIl2sgQLVs1WcXwz5sO1WH3DYHSCGS9D83IKM2GjZQngYWBwQxM8lLvRrq4uWYaT9bFMvEjWhFYV8uXa42N4eWw76vZeNKHTJl04ctZ1ejdAKJNXQ3lzOlNmdZaKYZAHgIag1nvQ9sulYf3HnJxHoOvXuQe4N0DBmvrg/FkbRyx9qQiYGM4FbHp6Y0nE2M8pREwZdkTw2LFGwInMzIOOAgktFj24oxG3zaqA6KybJeNJ8vib7WPR8DG8HgMS5dgPCmNqJVnCBgCKQiYkZGC1g5o9dJhfycHWO0rFSvhp/FkJYywZmQjYGM4G7rJMhpPJoPWCjYEDIFIBMzIiARqD2R66bjP1TYrGErjENrHe+jfFvtgPNki16zNPgI2hn001hE2nqyDD9YKQ+DoCLiD33yz+YMHBl9MSD2c5GW34NoQED856P2Z/PCgN4aH8XoBhhlPFgDdqiyKgI3honAWKcx4UgRGK8QQMAQSEJDcYRs+O2Va7t6HD75t0bpnkZ0gIOazUlH9G3pHlx7r/icd6ZY0IQLGkwnBtaJnQcDG8CwwJ1ViPEmCy4gNAUNgYgTcSsbE1VjxCyOAgYGhcWZlKs3+I2MZ5hhPlsHdai2HgI3hcliWKsl4UgpJK8cQMARGI/B/iUsndZCxSrQAAAAASUVORK5CYII=\n", "text/latex": [ "$\\displaystyle \\left[ grad_{x} \\leftarrow \\frac{{src}_{(1,0)}}{2} - \\frac{{src}_{(-1,0)}}{2}, \\ grad_{y} \\leftarrow \\frac{{src}_{(0,1)}}{2} - \\frac{{src}_{(0,-1)}}{2}, \\ {dst}_{(0,0)} \\leftarrow grad_{x} + grad_{y}\\right]$" ], "text/plain": [ "⎡ src_E src_W src_N src_S ⎤\n", "⎢gradₓ := ───── - ─────, grad_y := ───── - ─────, dst_C := gradₓ + grad_y⎥\n", "⎣ 2 2 2 2 ⎦" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grad_x, grad_y = sp.symbols(\"grad_x, grad_y\")\n", "\n", "symbolic_description = [\n", " ps.Assignment(grad_x, (src[1, 0] - src[-1, 0]) / 2),\n", " ps.Assignment(grad_y, (src[0, 1] - src[0, -1]) / 2),\n", " ps.Assignment(dst[0, 0], grad_x + grad_y),\n", "]\n", "kernel = ps.create_kernel(symbolic_description)\n", "symbolic_description" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We created subexpressions, using standard sympy symbols on the left hand side, to split the kernel into multiple assignments. Defining a kernel using a list of `Assignment`s is quite tedious and hard to read. \n", "To simplify the formulation of a kernel, *pystencils* offers the `kernel` decorator, that transforms a normal Python function with `@=` assignments into an assignment list that can be passed to `create_kernel`. " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxkAAAAnCAYAAABje4W/AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAULUlEQVR4Ae2d67XdtBLHd846BSShggsd5EIFhA4CqSDQASw+hW8s6CBQQRI6gFtBAh1ABwmng9z/z8fykbXtbUmW36O1vPXw6PUfeTyjh/e958+fPzmdTq91+e6bH3744Rc/wcLTISCsf1Lp7+oaPpH/ndJuiMt/JO+prkcKf6HrY4W/08X9vxU/45PS/PKge6u0v+SbCxAIsDLsA3wsul0ESo7toCyTKReGRYCVyZQLWNktQ8AQ2AcCknsv1JOvg948uKoTfhPBPe86U1yDjBYthIAw/11FvZT/M5fCGHwYCc49VTpGxUk+TMQA/Ka+6dNx/2Ndf+veG/muvP8q3qKr8x7eE0aG/eFHwT4BKDW2VY7JlIQhUgp3qjTsE4A3UkPAEFgUAckrdNPGjlBjqolyZ2Qs2rijVs5LRH1/LN9fZcCg+BNM6vsYDbiHuqCtDA6F7+sKjUGUZgzG3+Q795UCpLecaJ7U5bfSS0VU9relypqinLrvu8Q+B6+18yunT0fNU3hsm0yJHEiFcafW1WAfCUGLzGRKCw6LGAKHROD6kL1eWacljHmZsILxh8JfeM3DkHhVx9k25VYwTqJrwtxXnFUOjJYfiTun9Acu7HylsaT1j/x/vDTqwiDBGmX1Y9CJzq2QsNXrI8WdAXRSmJWUF7pa7RwsdGYCtW9x7HO6rHYfkl85WB01z9ixrfxjZUqvfOjjyaVxrXsmU+LleTL2fTy5lL4Hfl3qn90zBAyBcQhcjctuuccgIAGNks8Wqce6eKFzxsK9HE4K/6XrRhf3cc7guI21fzEQMFJu2sntmO6jnHK24w93R2EMGAwP7nENOuWB/p18zo/Qh9/lo7D77rXSmv74N5YOq12rwD4HB7X9cPzKwemoeQqO7TEyJUY+tFgUOa5NpnioCbMueZ6MvVdkdHDr/IruqBEaAoZANgJmZGRDVyajBDVK+j2VxgoGiv+39YvDr+BLRVh5uPETXbim52UTKvmOxPdR+jFoGqf8GDMYCs3KRnOzP0A5vqFCmO1HrKZUTmGXFmW4uHxz+Wrf4tjn9FXtPiS/crA6ap6xY1v5eWazZYryDsqHkDcx41o0JlPawJ3J8xzs20XGxfbAr7ieGpUhYAjkImBGRi5yI/NJQP+ky523OCkcbpXya2Alwz9n4d8j702d4PzwvlsJIR1DoDEOWoSREeXHkEABCY0S6meW3XeVUuAnLB1W+zeLfQ52W+dXTp+PmqfU2FY5TpY4vwWp7vfKlMTx1io3MmIy5Q6oljyfAfu7muNDq+NXfNON0hAwBMYgYEbGGPTG5eUlHa4o8DnhX7wX/Kl+aaDUD61ScAicFY+WU35mut6TWJfVqTS0Mg1H7veQUM/D4N4bxf1zJsHtRaJbxj4HsK3zK6fPR81TcmznypSU8ZbDJ5MpQq1Hnk+N/V74ldMPy2MIGAKJCFwn0ht5OQSeqShmodxZCErm/EV4UBoDg61SF1cfyKeLGXqMCrdCwgvnR6XdyMdRVmVwVLHyPxgY4UuOuql3TW6P2OfguxV+5fTtqHmKje0JZErXeMvhk8mUW9RS5Hkp7PfCr5x+WB5DwBBIRMCMjETASpHrBc5na7kuOtFhXPCHToNOtM3XnXqIMQB4QY91fWVQfriFakqjJqsfa8JebXmkTmAYDjmMyBDboTzu/qb55Tph/jACpce2ysuRKSnjbbhT5xQmU24xQd6GWIdxhx60vfJjYjm0On45UMw3BAyBaRHYnJFRC8NfBQuzOK8UD2f+p0Vs26XzkuFlM8oJc3cIHR6EhlIYh6b35TaqIdvK3Im9sASvSbeTGb+GB4owwtgzuTIMVUhxNq4Tx1tYXkzcZMotSsWwn1gOLcYve65jHqd90hjv8/laErur/GYskxNhqIv/cUBZHjqnsEwjV1or2KlpCPwkp3z3dYWz7fwfB/u/K6f7hPkjwNCgoD63feuW+IC/wiUL+xyoVJfxKxE4+KPL5EoGbsrSJVMuyoeeMRpbu8kUIcWYXQD7WB75dIvxC4x0jXqulR95yuflu8a5308LrwgB8Ws071fUnVmbUhK7q1lbXqgyAeCU24vnFApVt7diWIVoCUviujAivtdVhRX3/7Eb+q+VxsH0yinMJ2/5Az7OgUDLoXP2g4eOWfreL2OFxDuPn2Gf01/hbfzKAW4gj3A1uTKAUc/ts3EtLIfkQ5dMGRrXrnqTKQ6J2/N6oTxPxv6uuPhQhBxyhS3KrwLPNXIBjN+7Dpm/DQQK8H6wo9Shi7O1u3KlsLveKCoILbdlp3gX6gHzVj4zRXtzGBNsMWv2WqufrD408bDDNQ4P5DdGBjSK9+ap71dbs0QXrm6EVRwlfoZ9TsdrPHux133GrfErHVyTK+mYkaNzXF+SD11jdGhcU5FoTKYAxJ0rgv1dcfGhDfFr7HM9Nn88qBNSil8ownvVa/qQm4N3yKRKLvU1YqPpRbC72mjnmVmYchUDJXyXirEEDbix/Nua/RoaB6LPwdy9AIeKP8T9XOxzwDF+5aB2yhnjKRXtUq7kjmsboylDp5t2Zuy7G3E5dQ3vgLHPNfn3sBq/S/lzefhNLtMHqt/07bHPTdX561gIJMyYxf5M17s6Dw8dy0T8rwOHJp/qeqTwF7pQYJlpvdHFXka+t964+j7CB0We8ijridJZ5m25DlqUZOr7sUVYKKL6AHayVZJCzRxVjProPnfrf952qEy2MkQbdqJl1uS1/F0aa0Ng9d3PxL6vuEvpm+CX8DC5comLG7mXOa43MUbXzoI5sM/BQO2a/R2gOtE9fN3ior4gemidTsNXHL9T2o0u9ACUcmaoKRPd5rX8N/LP9BSlr9rV/ZlFr1Fdh5DppRl+CTfq0v1N6tlXMUCpczyI38vnAeQBwyj4U5f787en3FP8JJ8/mEOJ5QHFkbdxSufhJS9lufLIw97+1pJTD60rL1rhbSofCNT1U/6zAdLN3wb7lE6IvmUoRuTly1/FeRRR7+pJUrHP6dAW+KU2mlzJYe5K86SO6y2M0ZVCfdasGbA/qzMiYdZ3gDDo0i169QXR8+GYl/J/5lIYI6KiV/wPXeg31R/mKszk6Ze6tmhgoFfNotcIn0PIdOFZ1EXgRn2b1LMHjQx1HquUg72f00uc0pidZuD+rjBWvvt60EOFWd1wCiw0jXKqdOI8yBgX/gw34ZaVPUDLVwNulKeYU3lYif/T9ax02cUaWbigKfs5ZdmFYVikuLXhM3d7VJ/JlUVG3rSVTjmOpix7WlTmKX1t+MzZHtV1Sbc40xdEj96CruKfu0RvYQLUd+xL92n8e1VYZbALg/ImcSrb/whLUh3KO5teAw5q3O51xSQGRBAP4UYRomF8bVLPvo7AgG/H82nSRqlXmIGLY0WDh/sVETnS3QoGwDTh6u6tNc15gMbwqNOZgaAs32ERx9L6+ZLDag918XDQR7b4yMtyGE9hP7IKOnom4ci4wujDj3XMNF18IcQWZHSTI2ByJR5ikyvxWPVSmkzphWYPN7L0BY0JVjOY+GTlAoMidF26SUOjPGwJY4K0mTRVmLbg2IbFFxjdpGuV2PVzKY/usdLyQleoT3UV1aTVZc6p1+xapsMDgct4CN1DEnr4g4HrdvyE+Vx8CDfoNqtnX7tedvkCB6OBzr0M7gM0+xb9B8uB7wyOIEsV/Uq/rS00KoPysdJ42H2XQuvnSw6rDbzEsRJ5iD9X+Ca5kIQMKv9DAvkuSYXBvUsdq3nA982LOsM+D84hfqWUqrJMrqQAFklrY7t60ffKFeGDXDeZEjmepiYrKVPU1iR9QXVjGLD1CSW80l2I62oMAoX7dJMKmvp+tY2qStCP0jA63smvtlXJf6yLHR9dBowrJyYPk59sKW/a5+rs86HVNYteo3p2L9PVx04jT+ms4HC+rOJ5Hz+60mNxE101eSp/c3r2dVfHvbRP63A4O8wD0zIWFMdaa215qvNWnsDhgeXqMiZOut+Ul0Lr1zEmrDo5wP5WZTB7XvxF5LdN9fS+CH06C5dHwLAvj2lGiSZXMkAbymJjewihae4b7tPgGluq8I/WLfwylQ+FHUUcxY3wtwr7H0NxxkdLNxHNTV0OKxbVmY06jkeav7WcFZJqW7n8ZlLWo4/Ko7yUU51ble/qD4o5j4p2Lr3mMDL9HOVRKSm4UdHm9OwhI6NCTwO1eTgU5oHm4QutOtJitgo1ZVWFn07NnkeV/URpGDTv63uDtK5t8pmRwPGFCB50yqrCjkbxi050LG2xLJk0Y3CxULtpCOwMAT0fbGOMftH1dV9lNM83ZYpuVXJFbaI9zNB9pnCz5K0ws4pNvK9/Ll20JlccGOYbAj0I6DkZK1caeVJXcaZbqA4mQiv9QD76wUk+RgRKfLjDoMlf06FTUIebdGWVotGDFGZHBnIsbAeyEjkSpp8S89DOWD1LpLdOdcwmf1RX00eFVyfTQUTtGq0r1tAW8yJxo75Y/jd8qBvZjGXVlaVnK1/W+/BqAKVq65MK5+Fxjv1juOoegfo+NOEqBbcrJxoeNB6Spiyl0VkePlYQcCw9utWQKFoyKQ+DhnMjLFc91IWhQPhTXZQf7ZSP8yK0y5whYAgECOj54MXxr/zwgGRAeTG6erlS95PPViJHONhJv0/ykScI2ySnfCZXkhAz4iMhUD9fWXJFeaN1ixpTnt/WCoTK4J3PrD9lOYcuUSlrdfvYElMZGPLRY3xa8lQygkDgmDSlrC6XkueNCkBZTHZq79TyZ/UyHdCEQzFdMZkJ3RmicKvbzphbRM8WbozTrPfhdXe/b1NVMOcuGNTM7DPAP9LFA+MMAQUrR8dJwzC45Jj9+1V0MJqyXupy5ZPmP/gptP75EMBwxg5finKzDkqOdhgsjyP6E12gERoCe0BAz4R71ph15LpJ7Vddhnvu1ypXUApQOlA+fHmHguImRVK7bnIlFTGjPwQCtUxAoc+VKyn6wjPVw/udsxBOyUeuNasSNehsoWLCErqTfCYcnEPnQReKccgSV08MPTRdeZC11JvrJpM/wgb81i7TwY12ull+eDJWV8zlRZUvATfo4f1Senb2+/De8+fPeYk+VWejlv9FxwzmW/nhA6nk5Z3axZLnJ/LdQFq+UdYCQ2BnCOj5Qm6wxYAX32inclYpV9QuvjyDYK8OXNbxTf4h12gmWQGGwMQI6PkqKlemam7dzpbepDSUQA5aP1C4kYsKo5Pw5cOz7eQpeUT7SOUw4Ysyv3qndq5Spjvg1L4iuqLKYcxmHfx2bfH9NeOmtkW/D0X7r/r1n96VDBEwoL/XxWpA9cDUaaQzE7A6V7evsVQVh/FmbIhTYCGvUpTkf6qLWRgOveWs9CirubEIbJwnnFM4e2kOYaI8W5MrtNdfYWUl48ehfh7l/sbH8C7ZtHGeZMmVBRhZrbr49Qp3t+LJuzZ8r4bxKmtinmom269zDWH1YWsy/VS3uZSuiH7cGJWxPNkibupb8vvw6gIgGBi8UCsnQO4rwHmMVSmmaheGBBYT7qku36hwSnV186g/YKS+V9/ZVph/Y+frWQi9PxVueHxUfJbo95Z5orYjC95l4rYJuRL07T3x+lm5L79TYQjy7D4qHEyurIzLW+aJ2j5GrszKiVoGMP5DxwRE804VHWG2KVV6CX3UxcFz313M4xFSHysla3ObkOnCfRJdUeWyoh/+91sMjzaBW0dHkt6HVx0FuKQXCnDG4uv6oeDBwMDw9yU62sV8tYeH95V8znSwvw6G8yk64mZkCAQ5eNfa3iZ8wAbrm+Uvc/MjsGWeIBNy5cAm5Io3HHhOMMzZl03YDIw7cLY8hu96sa/QlnkyRq4swUVWLlqGRi0X+QM+tjWhg7AN3d/5AT39ZItN5SLyOFK2SSWvHrvME/qbkOnCeW264iZwC8ZN8vsw+UxGUKFFN4CAHi5WepglCveKYmAg7OwMi0CY0xlP5kQ7ry7xiGfmYf1yOslnEoPP1+bMWuU1YsW5hIPJlZXxx3gyH0OENasUfBETxSvJKQ9frIs2GESLLEL2bOI8RhIYRrwJBOoxGP0+FH11JuPSSsYmOm6NjEKAFSm3X7QrAwLM3LwIGE/mxTunNraHVjOOEpgoFAhYMzDukLQxfIfFWkLGk5k4IVkA1mx/aq1mDFVfyxLyprizFaqUzEZrCBRAIOt9eF2gYiti5QhIqPV9OYxDPCfdty0gM/PQeDIz4HnVsZzNPl62PbAFgrNM5moEbAyvbygYT+blifBmKyVbo/x/Ch9qBDIl2sgQLVs1WcXwz5sO1WH3DYHSCGS9D83IKM2GjZQngYWBwQxM8lLvRrq4uWYaT9bFMvEjWhFYV8uXa42N4eWw76vZeNKHTJl04ctZ1ejdAKJNXQ3lzOlNmdZaKYZAHgIag1nvQ9sulYf3HnJxHoOvXuQe4N0DBmvrg/FkbRyx9qQiYGM4FbHp6Y0nE2M8pREwZdkTw2LFGwInMzIOOAgktFj24oxG3zaqA6KybJeNJ8vib7WPR8DG8HgMS5dgPCmNqJVnCBgCKQiYkZGC1g5o9dJhfycHWO0rFSvhp/FkJYywZmQjYGM4G7rJMhpPJoPWCjYEDIFIBMzIiARqD2R66bjP1TYrGErjENrHe+jfFvtgPNki16zNPgI2hn001hE2nqyDD9YKQ+DoCLiD33yz+YMHBl9MSD2c5GW34NoQED856P2Z/PCgN4aH8XoBhhlPFgDdqiyKgI3honAWKcx4UgRGK8QQMAQSEJDcYRs+O2Va7t6HD75t0bpnkZ0gIOazUlH9G3pHlx7r/icd6ZY0IQLGkwnBtaJnQcDG8CwwJ1ViPEmCy4gNAUNgYgTcSsbE1VjxCyOAgYGhcWZlKs3+I2MZ5hhPlsHdai2HgI3hcliWKsl4UgpJK8cQMARGI/B/iUsndZCxSrQAAAAASUVORK5CYII=\n", "text/latex": [ "$\\displaystyle \\left[ grad_{x} \\leftarrow \\frac{{src}_{(1,0)}}{2} - \\frac{{src}_{(-1,0)}}{2}, \\ grad_{y} \\leftarrow \\frac{{src}_{(0,1)}}{2} - \\frac{{src}_{(0,-1)}}{2}, \\ {dst}_{(0,0)} \\leftarrow grad_{x} + grad_{y}\\right]$" ], "text/plain": [ "⎡ src_E src_W src_N src_S ⎤\n", "⎢gradₓ := ───── - ─────, grad_y := ───── - ─────, dst_C := gradₓ + grad_y⎥\n", "⎣ 2 2 2 2 ⎦" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "@ps.kernel\n", "def symbolic_description_using_function():\n", " grad_x @= (src[1, 0] - src[-1, 0]) / 2\n", " grad_y @= (src[0, 1] - src[0, -1]) / 2\n", " dst[0, 0] @= grad_x + grad_y\n", "symbolic_description_using_function" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The decorated function can contain any Python code, only the `@=` operator, and the ternary inline `if-else` operator have different meaning. \n", "\n", "### b) Ternary 'if' with `Piecewise`\n", "\n", "The ternary operator maps to `sympy.Piecewise` functions, that can be used to introduce branching into the kernel. Piecewise defined functions must give a value for every input, i.e. there must be a 'otherwise' clause in the end that is indicated by the condition `True`. Piecewise objects are standard sympy terms that can be integrated into bigger expressions:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAARwAAAA/CAYAAAAyu3fMAAAACXBIWXMAAA7EAAAOxAGVKw4bAAARp0lEQVR4Ae2d7ZXdNBPHL/dsASGp4IEOIFQQ6ACyFRA6gJNv+bYHOoBUkEAHQAUJdEA6IGwHef4/RaMja/0iX9vX9s3MOVq9j8Zj6e/RWL770bt37w5O29HAs2fPPpE0vyq8Ufqb7UjmkrgGpmvgOJ2Fc5hLAwKYJ+L1j8JbhW/n4ut8XANb0cDVVgT50OUQ2HwvHfyo8IvS333o+vDrv0wNfORbqvVvrADmS0nxu8LfSn++vkQugWtgGQ34lmoZvY7l+nPs4NuosZrz9rvSgAPOyrdLFs1nEgFHMdbN3yuL48O7BhbVgAPOouqtYn4dW72oau2NXAM71oA7jde/eVg30Jv30eX/lSXH27h7Clh1f1z+FfsVmgYccEwTO4rjNoyzOp8rfTtGdLXnTRj0r8KnCj+q7Gxgp7H+0pg3Cq8V/lL+KwUHHSnjUkj3s3OO+ZZqJ3dZN/Gewq8KOJifK5hlVH0F6stif6X4B4WflP5B4XelR/OqHjRrqHGCZaP4NxUDlMQAj9OFaED3tneOuYWzkxutG8kCDSePlebMDs7malKffLGHfvBUYNEDYl9VMzu9IWMEx7jGxaryk9Sn63JzPWvmmFs4m7ttiwnE4m57C/ZK5V9qsuBTOQdxitppRQ1wrxXYzvIQmpMG55gDzpzq3jYvDhe2LXbz31Dv9AFoQECDtfxI4VOl/1HAYp6DBueYb6nmUPPGeWhC1Vgv95e6DI3P9u+pQgA15cNYim2LiHw4GvmODMKZjb8qOJMV0496fE04nAHJLxQo/0b1BprKtpPa0B9HOQR//FgsvINi5ON4wmdK48RmHPxb1LMgf1HcIJXl/Gj3WmVtFmSj31YykhWZuUb08L0Cumd7fRPrlKwn9blX0fq+WzgVWrqAJgYmTLIuqpkwXX17yzUZef0NuAAgf5CO+YNiFjuORt6W/RQD35KFhaCYNvSxTz4ACwi/E30DiFHQRerLZyMvFAf+SvOGD8AwulYdi++gGL7fKbbv2fJ21H+iwOLE+W78kK3RTvndULwO9Mp1sdX6WWHsfKiaYw44u5kWiwv6YPER2gdg8f+mCV5aKQAAIASoGLElxN9k7T9W+o71YY2JVY+1Qp/c+oA3IGf1ZlmxaGgbwEdpFl3JH/BifKwBo8dKUN4gtflagfEXIfGeaysU5BM/PhwGeLiWP5UGeOaU/8HVIppwplvTQJvvxmS0J5NtN6x88ViTGTBhQuO4bpDqsIoou1YwsMBCS8CkevJVpLYsIsANayl/IweovIxMkMcsm4PapTT1ymP9IO8NeSOVf2xpi1WGQ5bfNMrlNSsIXT9QnQGbdbsTqw3yAWhYXWblhXbKY2EBCg057zAZWSB+gCmgivXI1pZr+FZxn76r5thxpCzefIcayCYKk7ckK0sLo2ywYN6enn0TGRDIqW9i5+1COi4WzhyxeAAMfDK28A9KA2wcD6AeMvB5n2v+ZeEDWH3yHlSPThsHGlUGAP2rmK0i8nD+6Y5VpPJEqufaw3EGxXafUn1MAAjpesrKmfJdYyf2ksF00tbWyt444CSVXXwC/4kt8Pxi78fMGqd9DeRsQuZyWdraWN4mtuUHYy0GFvlHaohlw3XiJC3HxMeERdLKP7anTy9IqB4CAAC3nChLOhY/0mzf2u5J6Kc6wBBwKnUQ6vmjeuNTXk9qMzYhnmwF2XJiOeFvAzxb9VLwRpa260lz7Fh08OzlaoDtxMOWy8NMD0/5lrpFizSJ2SoxkfMtThhTdWZxIPdJJB74gMw/c1C63E7lfBkv98vkdfS1BWdxWW/yUg6Q5ODCIrynUAIHvEoLTkWjKYDO6F5FB8n8RAF9cT+CY1/5UuaiVyM7OMcccBr62k3GHLz25EiCa4JwqOudQnCKWoXyOD/fKv46K2MRsE341soWjhmPkNMjZR5LrnLhYRHgo0gLV/m2/jmvMg0INCyNeP04R2+tsdIAAmHIekGH4VW+9SVWf2QNW73IK/GO7cprjsWhz517aJUj4ldqewe0a/tLZnstjsOY7/PwF40BmjCU+gzOsataobzd+hrQDbWnvT1NzaGHPyC8TVGMP4LJ8rpFYqwZnvqcYcFxSfxIeSyNxUj8AZOnClhYACLXYa+Vsa6Qi22PLVQWP/kANoq5Xsx7ygEnFimvuTstEtVDACnWhvlBKEM/pZMVvmyncnCjbYPop4D+ABiznACTG5Xlstf6mbiOLjBqjD2QYWyuoZokL+OiF3SB4xmwmYN655gDzhwqPhMPTYo7T9e2obsmj8qZmOVia2Mxa5nGBdA6ZVc9ANkpl+oBgl4waBM4jjsIppF/1YJT26E3Syxk9JxTmbc62o62JKxzFtcCXOiia2BcLGDAs+q6s7F6k+LHtXbeSwecXvWdpZKb73Q5GgBAGvdUi9Cc0VghJQCW+VM0ESy02o4RFGYFmtqxj7UNvV3Yq5t5Pqc6bA8/x8SbUy7ndYIGtJi5j23bmxuV21b4oHak7QAjebaabNVOIcazLd4p/c/WxwFnnKp5cjWeXuO6t7bGv2F+l9YGXrg7DWDRNEBHeV5vc9iPbQwnhNli5s562vOWKHfq8xkFIIT/K6RjX2UbhMN4yJ/V6LBWxrdUa2le42ry2BNvtH9iRbF96GENABL4MRr+Ht3vRj5nozosIz7VyAGH7VlnH/qrfXgAKp7DFwTLRakBOBIaRfH2AmKPl94cqI4n8bXCB/NFra51aTITOn/SLT2m819YA1ornPfhHAtWSTUQqC0PoLEPHwO3ha9qHvZpS6WL5QwCrxrtC1heXdqCYLRr1QW0VczZBl4Rmjc6b3dQ+cV9UYsC5iTpiFeSgDh6vJ2Tt/NaXwO6p6wN7u2YLTjrpnouqC1zyI5GrH/RFRIEwJHg7B9xiOaOS8AlHB6L9eaUwslJWzP1UGg4A6LYCPC6yC9q7QKnxNIdZjOgjQVZ6m4Ka++7IQ1ka6RKqhPmwkv1GWsRVcmyVKPwr34lNIADoCA8lg0mYTIFleZJHF7tKf1OaZC7daGonIUE8rIf7UVr1dOuceBKZQBY69exKm8l9TELi+3gna9wVT/qi1rai4/5V/IxAVvo7fuo8ZcDbJ1nTaxllJXr5uvbXTj6THaPXQNTNZD+t3hcCHjPjdhamRUTypRnEWK9dIKJ2vyn+teK8Zx3kuoBlueK0yJVGmCzhQ6oDZ4VUBsWL68UeQtwUEx/LIc0fizjA7TG9dB+DKk/lglmbxhrTF9rq77ohyeTbUetymPXwMVr4GhXqAXAIl31i1qNj5XQ+3WsyZvFWDfJrFR/0mz5sNoCZWWA3Nr0PwnwUDLxy2pJxrWF8vFdA+fQwFGTnnMB5p85KL3KF7WnXKxkZcECImn7F/ncKsZayikAUV6wRloyc+aG700gQGcLIPheGv/rGlhYA0fxZwuCzyKRFgFbh3N/UZvGH5HoWqz4WMzfYuxeKZG2WVa4YvxIYyP/nyvK4EO7Bs6qgSuNxhkQtiDBFxJH5ylc+hiwJhoO3ti2EdFPAauJrY5ZTiysG5XdxsbwanO8xurJEWBTghFjM+4mCF0osH3kpwH4wSN3IG/izrgQS2rgShOdV+GEXlI7tiSDTlyYqO2QcxYwMPChy6nUxQP+5TZrSYA7VX4sSxz1gLMDzqla9H670cBxJUkBA0BhEgnY4APotFkuJYjSpgShseMzFmEWyuVXerI+ZhHKmbgGFtQAW6qzkxYXb6PaQKJXlrgonyrOLagbdcIPFQBGdaTTV7gZQ8azLV5WXJ8U7/Q2rL7XYMvXaoHMDxUG+UuG/MwRFifb12ogndpf461Gkp0XAc8VuJe7OFogmZGVA7R+yFNKOCqsRW1f1HLGhQX1VCGklc/PBnHznqgMp3YgpfGD9H2FG1sGh/EWty1mMQ1aOLpWJi6/lMfk5boBXn7tD70M0tT+gwPM2ECyGrAmriqzXwdke1y+FEjtNpbg3nBv7c3kxsQ7rzirWDjxEplQOKaTtaIJxZM65WO7FDHhlGl8UUulyjv7xPqwmCN/inZHkt0OOCbQVBmOZ/L4gnrfwE3tf06FSVZbpF3DVlt0XQzOVa5r4ZhJ50HZc8mxlXGOawnCjdDY9+LkqhZD7dl+DG49CoYGbkXxrrKcyC79UlwAr/t5yzhkIU3tz1jnomTBnmvAJcfRvbldkv+eeK8GOChJNwILh9foQ4sl1ylbreobqLZYBrv6oja/2CwN0LKVKMme9tT30dT+fbxnq9P9wk9zZzs12wDOaFUNXK06ugbXBMMfUQ04atv60WjPdeBcrAaoHj6rVVXqp9OnMbX/XBce5QBMzHmP05uHQbBYFWPZXMfxsNr4kBjiZ1PSVvJ9UZg7bL3MGvpCafxb+LYapDLase1m3AcKWNbhnJligBiZaMMLCAAcXpRjFRLTl3rb8qNr+gOOlPHQZOuEvMjDfKMP10UZfamHXyCl6Qtv2tr8v1Z58vUo3Sl3YLLDP1dbkFmKXQwQluR9Rt0ZmPTpySZtm1hT+7fxHFWm+8ACY/HxEa1ZZQelcXrzo258LAyo8IYRYGCBBlBQuo24XkApAIxi+POpSOMNZSz/U3X8v6UwrmIOW9KWMkCBmI9qAUDa2Pko+HPi/qXKqOdXBwLwZWUBbFR3UBk/usXHzZTZNcI7/MwLbSDlkZ23iw2/m/LpepXmejrlhs8e6bhHoV3mVg3w5J5CU/sPjQ3YNMAgdsASYPGxwMYQH8Ama1dp829hNeTEuFi5BgAHpQEpQM6sI9qzXQVgTEYcvYG/YoAeoLlWCBTLKE9WS6ziTVoaK5aVW+GHKkd+gCcngM6oVm5rv4vYAWcXt6nVd2OS348Jfguoi8oJn7er6Z+3H53WwgJMPlF4VXZWnQFFWsxlm478647ytIizcRsWRuzHuGydjAAPk+WgvuRzeqEMIMV1UA9YAUj4CAOpDLDDwukltcOq4p78pzQWHhYXvsncWmOcGrl7x9pa5Sa2VFtTytbk0UTk9TdipcWUyWhl5VM1NZnaPzE6PREWqbqXizjnONbCyft2pW1ctjUJGGJjrIkStACBVlJ/LB/kZ9uDVfaF8vgfnyjYt3BjfnMJX81TBYALHxJWHts3+I+VW132QQ44+7hPSMlT0SZiLrVZKNT30dT+fbyH6gwMDRzb2lubO3VahCxGFvlYMp5YEcH3MsCgDxDpii8HcLlRbBYlZYBEDX81C9YR9/FtvKZwXUoDiPiIAMKxcqvLPui4DzFdSmmAPT17/5J4UuI3GFosU/uX41bnJRtbFeRrOElhoDq2IRDyGQ1di7XrjbNxW7dr2di9fLJKwACw4PMKAxjK8P1wIp50DWHNNSwu9Wd7Bk98O6avueSukeksbRxwzqLm6YPECclTERM8kNL3lHiswE+MBKJM4Z1CY/+vPBN6sH9ks0T0SEwfS45y68R2gjdUuYVGug1cTS6z6izfFzMugGDAFtoqz7hmSVCGLgmdpD4AAX3wt4S+WRmvtHN+JZ+SN98ElmXkTQ+1cpfjbDqfftN401JesHCadDzZARFeqdpTs/WK4wRlodwqYNLj9LxROQshkfKtr5Vr+ydGMyc0PtYBWwjkh8izjbBFFgr5ozKzeFjEXCN+LMDqqQL6ggf92N7Ah20NoBLK1Ta9PVLaxlV1OovDuHzPRx/65jxfqLz1XqgcS4Z+qV7pYK0oDm+1VB9IeZPXwA55eTgAplh7ti1TMpwPYuuXdKF0p9x02CM54Kx81zSpqgFnZVF9eNfAZA0cJ3NwBq4B14BroFIDDjiVivJmrgHXwHQNOOBM1+FUDm8jgzGO0Kljen/XwCoacMBZRe2NQe1t0p1Xxo1WnnENXIAGHHBWvolyGtubDd6SOLkGLloDDjjbuL32DU3+c6rbkMylcA3MqAF/LT6jMqewkqXD1opzG3z30zhXM4Wv93UNbEkDbuFs525wspRDbvxOix0U2450LolrYAYNuIUzgxLnZCGwsYOA/IiT+XfmHMJ5uQZW04BbOKupvn1ggQxH8vkgs++7nPbOXuoa2LgG/g+UwvemfwkDYgAAAABJRU5ErkJggg==\n", "text/latex": [ "$\\displaystyle {src}_{(1,0)} + \\begin{cases} 1.0 & \\text{for}\\: {src}_{(0,1)} > 0 \\\\0.0 & \\text{otherwise} \\end{cases}$" ], "text/plain": [ " ⎛⎧1.0 for src_N > 0⎞\n", "src_E + ⎜⎨ ⎟\n", " ⎝⎩0.0 otherwise ⎠" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sp.Piecewise((1.0, src[0,1] > 0), (0.0, True)) + src[1, 0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Piecewise objects are created by the `kernel` decorator for ternary if-else statements." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbcAAAA/CAYAAABjA4bqAAAACXBIWXMAAA7EAAAOxAGVKw4bAAAWrUlEQVR4Ae2d7ZUctRKGhz0bgDERXJMB2BHYZIC9EdhkAGd/mX8+kAEQwQIZgCPwQgaQAYsz4L6PViWrNf2hnu6Z6Z6pOqdH31LpbbVKVVL3fPT69etnm83mN10l/f3tt99+WkZ6+HAICP9Hau0XXdyL54dr2VtyBBwBR2D5CGhe/ENcftbC6fPLLPKLzI/3rgh78IAI6Ka9UnM/6Ppd18sDNu1NOQKOgCOwFgSYGx8WzAZlLQk3TaZMok4LQED34mux8Z2uH+X/agEsOQuOgCPgCCwOAc2Pf5ZMKe49cUm4lRk8fBwEdGMwEyPY/hwj2JSXMn9Fru8U/hW/3C/loJWzmmGF80AX5ubvlPa33EDyU/4fXQyMBwp/HxLO+Cdi4pie8Rjwrq8XgYv1sn6ynGOKhKpNkZqEKXMj90e5t7qudBkh0Ni3+4kI5UFoPdaV7NSKYwL/LaZR/lrXWZOwcEzPegR459eOgAu3Bd1BTagIHA6RoLVtqds9rFLmJ5Vhny4dPon1YW4mnXiEH/RSftPsmMRJM7M02txTMuWk9Ge6kkDM06b6Ve/XutAol0SrxnQMkAvFf0wXPK8jsIWAmyW3IDlqhGlcNyO5+Eb53+pCUGFu/FwXAisISLmYJVOdFq846IWudBJTaZglG4JVcUHoyjUBSLmNwggA2nwuP+U6KebFRIrwpNyvigtmUbnf66KeJe0vHgVTYVBNEdNTxb8aB8/oCLQhcNEW6XFHQ4BJH0p7YffB7t84wSHIPlYu9tLudCFEcmIfryGwskQ0pqH2EJim9YWiCiPwTNPMquv0YhrlgAxCDdMo4Zx+UTya59FJfHAfDo7pmI6Lx5PBn/uuC+2dcerkCMyCgAu3WWA8aiUcFGEPbaPJASGFuTFpWIpjouaASIpTOCeEngnVEK+8HC4xP2lbglF5MJ0G02bM2ukoHwL0kdxcuyNMfCD54Q8Ncwl0FEzHdFx4nQT+6gfvKbEg4/6z7+sCbsxAOOG8GgssqrlY+PygqzFPDXXdzZJDCC0/HUGE8DKt5538uTBCgPQJIUyS32QDh/xvdBmhIdiJQYsb6z5UASawnAiXgjMJuzzjEfxrwHQMLIvEP45Zxi7aPGOBcXo7pmOe9zQR0Hhg0fOGsUEP5TI3/CH3C11DliaKbFy4BRjW+6Mb3TAXlj1ROoIu7am1pDNQ+va62ibGspqhMIKMenIi/D6PkL/MUyQfJngsTNUuEzx7fV2E6bZLA+8qQ/xS8UdTZ3xu1C/GYec4JY/TeSCgscBCPSx6rMeKe68LQccec5WFx4WboeduFwKspEebijQQGZxBeMWBWa622NMq45iEz4FaMY149C00qrFRXWvB/1zuefW9W0tGxph4DQfZ5O9dZI/sE4ucsOgpyr1TOJysVnvlwrjI6prbFiAe0URAg4i9netmbFhpf6Y4hB7a1rXyYA41EwIaCGYFDrkYcaKSL68g0CjTWKUrjfraBrSiT4vU11ZMx/Qy4uX4jwHN886KgMYg2hSvDfH8s3XBvtgcH39gXLcJS1sMkx7mGrmddNmZ4gmOwAcEwka/Bm4yicmPIOLaGsxKQyt7+aF4Mjtt5c3yfKUys2gtWZ1L9m5hOobZNeMv3lnIsGBiktoozGIHNyx45KIRsO9pe72cAk4mWaVTjnQWUewPM+k90UU8iyibBBXsJuWjDr7KA9EGe89mbYBHXs35THHs85jJmPS/FN6afBWX10e+W8Wd9IJN/aOfmNLBDq2Ke4bgYb8sYCl/NanMg4rMYbwM5bsYyuDpjoAGHA8yDzkPeC1VDUAqU728uoAt/WxoR0zH4LNY/NV3NFcEGYul3/HH8EYuQgWtn1NyvP/IxaInTJ5yNwpT5nP8IoQSxPihbBCYRPSRynMi9kZuaEN+Xk1BOBldKS3sf8ql7nzxlefbKJ2Tv0zqWC+sPvhr5LOKT9WNfed+gAWHP9DkaoRVDomN2z7BWFWnC7ccVvd3IsDAVeJdZ4YiQfm3VrZFljzIZHXSK9y8s+Yfi6mVq3FXjD9CJr3gn/UVQYPAQ4AZMR6fKc7yf1zTb+VhkUa5fMxRP0J1E9NNa2SyJW8QdPIzsZZjG0EJD7mpjI8jEJ9I6V/GulPcsT3ih62CWUl18j4rQo7+v5UfIQfmc9EnNRVd1mTyPMtEQAPmv0Nzpjb30mRZr8If9TWkdCaZt7qqVnGxLjSEfELbakLpJ4PpVud6ItTvRqrCvfg3Ms8UUJsILiZBDg40SGloe8Rd6bJ7+F7+ZIJUOuFqUn4mX4Qpi6v8BB5j6udYETwlc7nyJT/pCqPVwTPm0USKz/ebyWefxkv8pswjPKoH3hCcaJKmvfbWoHymQWKC/URhE9Qb+dE0ET6NfvVWWJmoOhH2CP1ncjEr03c+/dd3n1iwdBELDchMyfehjt/LjniPXgECGiQHn4CWAkt8QKoe7jE8nzOmY3DaU15b3fdNfgibnPomwzxf8usesyeMJQKthYl3Q1hXmPTlBuEpN6Qp2QQdWUtC0CAcO3lWGgKJfbvGIaqyoqGwytN344k6B0llEKr/yKW/G7looez35sIcwYNWnITeYMW7ZRjkWTxwSIXa2/JaXNUC4WI3Hr2UI+AIOAKzI2CTlk1ibQ1YHkt7b54xLhO5LhaHTPLs/YUj5kUdCCMEYWsbiodProb5saiDIJrT5D1ltYf2ipAqMaCNLqLt/CAYfgScLSQ28lscfZmNVC9mWEy9aIVYTRDwrVgWjcJP4i9LM80t9SdL2/JebsV4hCOQIaDByIC/jlE24IZMC1kN7q1BwHEOWgWTN5MfAiffv9oo3jQWzIg7k+phsmfSDQdR5DJRonm1maNps8FH3rDKmJbROmErHSFigmN2s1/OS5tfbfO88vyWwhB+0QLz+MCn4jr7q7QqUrtoi2iB1Fl9ejWrnHtsptQseoOlxsZIHt/qd+HWCotHZghgrkgPpvysQFmN2Sm1LKt7JyBwjjgz8Zb0VBF2CMH21sjDZIfpkAnTiPJtdVh6m4vAamhRqpPTuhyCYNIPJD+CgWtIK+NwCRpe45CJysPvTawn1au4Q1IXNphyTQsyftjn3FpUWGKNq75i5mWuQEDyzyQ79VvluBdo1ixCgrCVS18wATM+qsiFWxVMZ53plQZWesdISPDQEserAfnkc9YgzdD5s8GZsSO8sAbwwW++pMJK3Y7RszJnhc7kZpMjQoZwEGxyEVBMosS/UJiJGkFSo3W8VF40qvCJJ/khNLC0gLuPCnVjksyFaUz64FBOFwsTngs7YclEHN7zUjy8IkyWROAFjzmBNXiOIvWPesAS/DiYMteilzEArk/kcoAE96nC1XPOaoWbOskD8pMubsjPCpeDU9FOMyAArrcz1ONV9CNwNjjHCQptp5WUjrms83lWOgKnV+i0VqzI2PbgBBnbqJqolbfvIAaTP4KjQSrD/IVAHCKEZ24+HMqfp2+1GxPhqaxztAAWX9SDFQchVIVVbH/QUX3w3jkGBitQhjULNwYoqi+28iHTQQ0WS8jDYFkUCd+GuUXMMeBY0Q5OEIvqyMKZcZwXfoN2Zw8hsvVcx+cHM+DeSG3YYRgUgPJ5LcPkKQVeL2+qHwE0q1DrbXBk4sXI/IvKLnBR+aGdVnH3RRf1i7kAKgfefeyRf4U3q032J2Y/gn/kri2qecd5UbdjEjO6lzzLCI69k9rCxFtqg2/UsM2TG6Xjt5fec57g0cyqefxq/asWbkKdlY+tTma/CRoIr3QxoR+KaAv7/6gV1CGYE08Mfh6cnTeKD8Hn2ttwnNd+B1v5Z46aLOCoQxfPIPuVwa8whziMaIM5iwVoIPl5dYAXtzEdkhdzMPuOJTGX1uxZluUWG16tWTIiyipkn1obJri+Fzhnu7EaeLa62md/duJXvPHQsKEfzCgxvJG7OCG8UwcXUiji6jgv5H7MyAYCibmkb29usLn4vHXWoXS0RD5BloQblSrcWSamB7Op8p3U8zxKuEXQ7PQKuCDpOXkU9mXkonlc6ar6krbyM2ly4wGVEzHUx/HP8Da9/Ila8iIEaA+1e3ZSewibvWmFLQyDA9S2qrpPOcJvxJ2j00y6psVOflCP0JVFN+k4L/r2TGJO95b36HjfC21rrwJE9e+y4DfhO6mfSyt8UcuQQAOAa7lMcggfBBEnZfJTT1ekK24jlwmRkz5MhJBN3iGgeG4C5anP6qQM6nNjA7Yjr9U3u6YT26f+gwgatcdRWgQHeLFJuyTiHtm9ws/1aoF8LgmzXXhxnHdBbSVl9LwwD/J8N+a2PbCPAK2eQ5SXuce++7gHdo5X5WVN0wIANRd7bfoYqOLQarhR4aSi/GhhtiH5UH5AtpM05Eun7mI53m1BqOUrGfwNbWkgb/Xb6qq3itQeQoZXDA7yFQ61B7amGSWMqpg9QCbxl+75AZo72yYc59O/9brHzHd7FW6qf+wcwmtU1cJwTXepSripQ0z2nLBJIMhvJirbhOSm2f5U0EIMCOU17c2i0Io42VPeCDQEq2+XvFZmJ1f8wBdCnH6ympGzEzGIy35sVRTbY+WEyWIw/1YFHuEIOAKrQkDPeZpDl8D40viZE5NB4abOI6gQXDdFwwiidLJP+cLxdbnEQybo7kPN3xcKNsyJKkcbaH/lO2tj8jZbGRkSDwgltE+EMW/D73sgIthYOblgG3mvPLsj4Ag4An0IDAo3FX4cKyjfveLkXENAxXzswTVMizE+OJrIEWJcbUJso/RU55i81obKoHlBmETRxDD7Bb/SchOoordJefiu2a1S3ura9/tc/6Mdtcd+yy4fGFUxJ0fAEXAEHIESgTEHSpJg0GSMcEJDKwUU9RNfo4mk+igkQlia9seJSbQ4o6q8KoNgw3zKgZeHuvjWGX4EtJlR5e0nlYEPyiIc90aqH83XBCh/yw6uTo6AI+AIOAITEagRbsG8qIk3FzbswUEN02PMQ742oRcKKM97edDOUn2KQ7tC+KAxQfzvj2l/VXnvi30wkyqca4ccDqkRuLGacNqT/UD4OgQ9VSPwi7bo5Ag4Ao6AIzARgUHhFoURWlV4w11htJk7XSZ8chYQWMQn02KemPkxXT5RPv4gkPrQzGjjIXFyOT1oVJ1XZfMDKkmDVHxpUrW6h1z7i/ShfJPSxR8CHw2T9wMPJVAn8eyFHQFHwBFYMgIfvX79OpgXNanyr7RVpLzsEd3KLU9BVpXfdybxhRbIXlY4xi537y9PTu0TPKoODrOwOLBXKKZW6+UdAUfAETgrBDR//qsOv+zV3JQJTYIj8ZjMAhEnD1euXd0nHvFXfCHA6BTEV1Lyfbrez8+EEkf+Ee/wiwZHPxLeR2bLm3cEHAFHYJUIXA5wfa10NLtAcdJlv40j87ua+mJt8zoIB10cq8esGfb8op+GFi/cIhrsOYI3B2CGTLuhiPpoh17+UUT1qdBQWD9Ty1s97joCh0ZAY5dFNvMRVo9V/KejeIZXLF/Mofk2iqKc5kRgSLiZdvZKN+ITNYxGwU2pmnjnZLSmLvGVm0kXyeNAP9DcoCrNTf3lIXkjNxyWkUs5Tl2GAzlU1EdTy/fV7WmOwJwIaKyy599YpCrMApt/qcCc/3DO9vZYF8KN59ROSe+xqfOuule4adAgINYoJE7+rure8AI4X3lJp0Dl59UCwixKOKDTSVPLd1bsCY7AzAhorJpA6Ko534LoyrOIePWFjyjz5X5byC6Cr1Nk4uIUO3UmfeIUaZtp+J3i+aeGIe1vavkzgdm7uQAETuoEsQu2w4woF26HwXkfrbA3d9dSsa1iSe+jqeX76vY0R2AWBCQI2FezfeVZ6vRKzgOBXrPkeUCwvl5WaGV0qnMPYmr59SHmHC8RgTgOEVzsmUEciOJ0dtgKkYvGxslnCGsE/yQC3cifzPH3UeFw1CP5Tct7Iv875eP90QYpjnzs39FuOEuguLBfL5dFHzyR540uFovURTzWDlzKkk4afp41yiOIieOvbTA/wi/8YIIkH/0ijrLhP97kBlJeylI3ec3qcqX4tDcnfyffoRL/aSBw2Qh5YC0ImODiQegie0Da0qeWb6vT4xyBagTiZM5E3zj8pPjfSNP1vS4EGB9SQAghDPIDY2VbjHcEYBBmchEWHK6ivFkzNjH+rdI4iBLi5fIxCfIShwDC5bUihC152MPmFDb18+1ZvsxEOp/oC0I2iwuCTWkbxfG9WE5uE2c8UDcHwRIpDO8cmGnskyuc+is//enkO1XmnoTARfK559QQYEU6haaWn9K2lz19BBBsDcETu4yGw0TPZD6GHqtMOlovv+1How3lRLu8NmDCZiN/29eBMPkjzIxHDoGE+uWyqESoXekKFOOIR7vLif+cTG3FhHI74bHi4R8hl5OdVieulu+8/Fn7Xbit8/aXD0feC9PKeO+ti6aW76rX4x2BQQQ0iSO4Hul6V2ZW2p8xLgmOMk9H+LYjPgmMrN2G5hTL0S7mRyMElfGyUVnCOd0ogIZJP0jH/Ijw4xRzIMUhWMM7tzGq1VE+tEWeyX/lR3NFk+RjDrkWSjs1fLe2cY6RbpZc4V3XoOfIP5ynBzfrhsWVq8WUZWr5VJF7HIHdEAgCQUVLgZHXNlZzy8t2+a1dTINJCMXMaEmlgETgtJLKo9HBP6ZDtE2+lcs7wK908a8maHaYXEmrIfbWrnUhJNnzQ3vFBEr9Y/lWEScXbusdA6z2bNDnvTDNjfQ+mlq+r25PcwT6ELCFly3E2vJanq00TfhM/LVCIy9vdaIdIXyGqE/4Upa9NwTZG7lmKSEOgVRTv7IFrY/n+C72KfRLfoQve3oI3bF8q4jThUOwWgSwwWOrL4kVIHb+oQdzavmyXQ87AlUIaGxi7mN8Ng5QUFhpmPIgxqfR0Fi2fL1u1u5VW8as7bbktjgED4KJT4CZMCOOvbry300U3UloqQ1NUuUxcVIne3GG11x8dzJySgku3FZ6N+PgZ7WHGSOQ/A/keaHr5X1MmCz4isl/uhr2eoV5eAbLWz3uOgIzI/BU9b3QOCzNj5jkOCmZWx7wty3kjCWzVli4z6VdhI8J0ZBXYdo1DYk4niWuTlIZhA5l0r+OZHEc48/rK+sp675W/jKOsOFQy3fZztmGd/rLm7NFa88d1+BmtYqw4gixrQQ7W40PAw/le12YRdgQ51uTPHSJFG49Sl1bPlXkHkdgRgQ0/tB6MMMxfiHCmOJsQg+R/CjONDkEBmOcfWcEo+1TUQflMBFSD6ZBBFiIV950ilF+a1fJ6V032uXj65ShLM+h1Xmj+NbnUfFoaJRL6fIHLUxuOF2p9EAKG78mWOGXhSiCGy3WTJvyhvfvMJ8mLOTv5JsCTvcICKfwlzcu3BY0InRTRgm3BbHurDgCjoAjsAgETLhdLIIbZ8IRcAQcAUfAEZgRARduM4LpVTkCjoAj4AgsAwEXbsu4D8bFXfSM2SC3su46Ao6AI+AIRARcuC1rKNiJxq0j0sti07lxBBwBR2DZCLhwW9D90Uaona7ipJaTI+AIOAKOwI4IuHDbEbg9FrPvyXHE2MkRcAQcAUdgBwRcuO0A2j6LSHvjvR/eU9vly+j7ZM3rdgQcAUdgNQi4cFvmreJrBLysyn9M2Qufy+TUuXIEHAFHYIEIpJe4W3jjrftPW+I96kAICH97qZs/O7T9uAO17s04Ao6AI7BsBDQvcgiPL7+U9PxSMbe62k7n2bH0spCHD4SAbhyf4eLG+asBB8Lcm3EEHIFVIcDny9rmx9v/AwbVQyLFDNTSAAAAAElFTkSuQmCC\n", "text/latex": [ "$\\displaystyle \\left[ grad_{x} \\leftarrow \\begin{cases} \\frac{{src}_{(1,0)}}{2} - \\frac{{src}_{(-1,0)}}{2} & \\text{for}\\: {src}_{(-1,0)} > 0 \\\\0.0 & \\text{otherwise} \\end{cases}\\right]$" ], "text/plain": [ "⎡ ⎧src_E src_W ⎤\n", "⎢ ⎪───── - ───── for src_W > 0⎥\n", "⎢gradₓ := ⎨ 2 2 ⎥\n", "⎢ ⎪ ⎥\n", "⎣ ⎩ 0.0 otherwise ⎦" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "@ps.kernel\n", "def kernel_with_piecewise():\n", " grad_x @= (src[1, 0] - src[-1, 0]) / 2 if src[-1, 0] > 0 else 0.0\n", "kernel_with_piecewise" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### c) Assignment level optimizations using `AssignmentCollection`\n", "\n", "When the kernels get larger and more complex, it is helpful to organize the list of assignment into a more structured way. The `AssignmentCollection` offers optimizating transformation on a list of assignments. It holds two assignment lists, one for subexpressions and one for the main assignments. Main assignments are typically those that write to an array." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Subexpressions:
$$a \\leftarrow {src}_{(0,1)} + {src}_{(-1,0)}$$
$$b \\leftarrow 2 {src}_{(1,0)} + {src}_{(0,-1)}$$
$$c \\leftarrow - {src}_{(0,0)} + 2 {src}_{(1,0)} + {src}_{(0,1)} + {src}_{(0,-1)} + {src}_{(-1,0)}$$
Main Assignments:
$${dst}_{(0,0)} \\leftarrow a + b + c$$
" ], "text/plain": [ "AssignmentCollection: dst_C, <- f(src_N, src_E, src_W, src_C, src_S)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "@ps.kernel\n", "def somewhat_longer_dummy_kernel(s):\n", " s.a @= src[0, 1] + src[-1, 0]\n", " s.b @= 2 * src[1, 0] + src[0, -1]\n", " s.c @= src[0, 1] + 2 * src[1, 0] + src[-1, 0] + src[0, -1] - src[0,0]\n", " dst[0, 0] @= s.a + s.b + s.c\n", " \n", "ac = ps.AssignmentCollection(main_assignments=somewhat_longer_dummy_kernel[-1:], \n", " subexpressions=somewhat_longer_dummy_kernel[:-1])\n", "ac" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'adds': 8,\n", " 'muls': 2,\n", " 'divs': 0,\n", " 'sqrts': 0,\n", " 'fast_sqrts': 0,\n", " 'fast_inv_sqrts': 0,\n", " 'fast_div': 0}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ac.operation_count" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The `pystencils.simp` submodule offers several functions to optimize a collection of assignments.\n", "It also offers functionality to group optimization into strategies and evaluate them. \n", "In this example we reduce the number of operations by reusing existing subexpressions to get rid of two unnecessary floating point additions. For more information about assignment collections and simplifications see the [demo notebook](demo_assignment_collection.ipynb)." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Subexpressions:
$$a \\leftarrow {src}_{(0,1)} + {src}_{(-1,0)}$$
$$b \\leftarrow 2 {src}_{(1,0)} + {src}_{(0,-1)}$$
$$c \\leftarrow - {src}_{(0,0)} + a + b$$
Main Assignments:
$${dst}_{(0,0)} \\leftarrow a + b + c$$
" ], "text/plain": [ "AssignmentCollection: dst_C, <- f(src_N, src_E, src_W, src_C, src_S)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opt_ac = ps.simp.subexpression_substitution_in_existing_subexpressions(ac)\n", "opt_ac" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'adds': 6,\n", " 'muls': 1,\n", " 'divs': 0,\n", " 'sqrts': 0,\n", " 'fast_sqrts': 0,\n", " 'fast_inv_sqrts': 0,\n", " 'fast_div': 0}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opt_ac.operation_count" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### d) Ghost layers and iteration region\n", "\n", "When creating a kernel with neighbor accesses, *pystencils* automatically restricts the iteration region, such that all accesses are safe. " ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
FUNC_PREFIX void kernel(double * RESTRICT  _data_dst, double * RESTRICT const _data_src)\n",
       "{\n",
       "   for (int64_t ctr_0 = 2; ctr_0 < 18; ctr_0 += 1)\n",
       "   {\n",
       "      double * RESTRICT  _data_dst_00 = _data_dst + 30*ctr_0;\n",
       "      double * RESTRICT _data_src_02 = _data_src + 30*ctr_0 + 60;\n",
       "      double * RESTRICT _data_src_0m1 = _data_src + 30*ctr_0 - 30;\n",
       "      for (int64_t ctr_1 = 2; ctr_1 < 28; ctr_1 += 1)\n",
       "      {\n",
       "         _data_dst_00[ctr_1] = _data_src_02[ctr_1] + _data_src_0m1[ctr_1];\n",
       "      }\n",
       "   }\n",
       "}\n",
       "
\n" ], "text/plain": [ "FUNC_PREFIX void kernel(double * RESTRICT _data_dst, double * RESTRICT const _data_src)\n", "{\n", " for (int64_t ctr_0 = 2; ctr_0 < 18; ctr_0 += 1)\n", " {\n", " double * RESTRICT _data_dst_00 = _data_dst + 30*ctr_0;\n", " double * RESTRICT _data_src_02 = _data_src + 30*ctr_0 + 60;\n", " double * RESTRICT _data_src_0m1 = _data_src + 30*ctr_0 - 30;\n", " for (int64_t ctr_1 = 2; ctr_1 < 28; ctr_1 += 1)\n", " {\n", " _data_dst_00[ctr_1] = _data_src_02[ctr_1] + _data_src_0m1[ctr_1];\n", " }\n", " }\n", "}" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "kernel = ps.create_kernel(ps.Assignment(dst[0,0], src[2, 0] + src[-1, 0]))\n", "ps.show_code(kernel)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "When no additional ghost layer information is given, *pystencils* looks at all neighboring field accesses and introduces the required number of ghost layers **for all directions**. In the example above the largest neighbor accesses was ``src[2, 0]``, so theoretically we would need 2 ghost layers only the the end of the x coordinate. \n", "By default *pystencils* introduces 2 ghost layers at all borders of the domain. The next cell shows how to change this behavior. Be careful with manual ghost layer specification, wrong values may lead to SEGFAULTs." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
FUNC_PREFIX void kernel(double * RESTRICT  _data_dst, double * RESTRICT const _data_src)\n",
       "{\n",
       "   for (int64_t ctr_0 = 0; ctr_0 < 18; ctr_0 += 1)\n",
       "   {\n",
       "      double * RESTRICT  _data_dst_00 = _data_dst + 30*ctr_0;\n",
       "      double * RESTRICT _data_src_02 = _data_src + 30*ctr_0 + 60;\n",
       "      double * RESTRICT _data_src_0m1 = _data_src + 30*ctr_0 - 30;\n",
       "      for (int64_t ctr_1 = 1; ctr_1 < 30; ctr_1 += 1)\n",
       "      {\n",
       "         _data_dst_00[ctr_1] = _data_src_02[ctr_1] + _data_src_0m1[ctr_1];\n",
       "      }\n",
       "   }\n",
       "}\n",
       "
\n" ], "text/plain": [ "FUNC_PREFIX void kernel(double * RESTRICT _data_dst, double * RESTRICT const _data_src)\n", "{\n", " for (int64_t ctr_0 = 0; ctr_0 < 18; ctr_0 += 1)\n", " {\n", " double * RESTRICT _data_dst_00 = _data_dst + 30*ctr_0;\n", " double * RESTRICT _data_src_02 = _data_src + 30*ctr_0 + 60;\n", " double * RESTRICT _data_src_0m1 = _data_src + 30*ctr_0 - 30;\n", " for (int64_t ctr_1 = 1; ctr_1 < 30; ctr_1 += 1)\n", " {\n", " _data_dst_00[ctr_1] = _data_src_02[ctr_1] + _data_src_0m1[ctr_1];\n", " }\n", " }\n", "}" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "gl_spec = [(0, 2), # 0 ghost layers at the left, 2 at the right border\n", " (1, 0)] # 1 ghost layer at the lower y, one at the upper y coordinate\n", "kernel = ps.create_kernel(ps.Assignment(dst[0,0], src[2, 0] + src[-1, 0]), ghost_layers=gl_spec)\n", "ps.show_code(kernel)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2 ) Restrictions\n", "\n", "\n", "### a) Independence Restriction\n", "\n", "*pystencils* only works for kernels where each array element can be updated independently from all other elements. This restriction ensures that the kernels can be easily parallelized and also be run on the GPU. Trying to define kernels where the results depends on the iteration order, leads to a ValueError." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Field dst is written at two different locations\n" ] } ], "source": [ "invalid_description = [\n", " ps.Assignment(dst[1, 0], src[1, 0] + src[-1, 0]),\n", " ps.Assignment(dst[0, 0], src[1, 0] - src[-1, 0]),\n", "]\n", "try:\n", " invalid_kernel = ps.create_kernel(invalid_description)\n", " assert False, \"Should never be executed\"\n", "except ValueError as e:\n", " print(e)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The independence restriction makes sure that the kernel can be safely parallelized by checking the following conditions: If a field is modified inside the kernel, it may only be modified at a single spatial position. In that case the field may also only be read at this position. Fields that are not modified may be read at multiple neighboring positions.\n", "\n", "Specifically, this rule allows for in-place updates that don't access neighbors." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "valid_kernel = ps.create_kernel(ps.Assignment(src[0,0], 2*src[0,0] + 42))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If a field stores multiple values per cell, as in the next example, this restriction only applies for accesses with the same index." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "v = ps.fields(\"v(2): double[2D]\")\n", "valid_kernel = ps.create_kernel([ps.Assignment(v[0,0](1), 2*v[0,0](1) + 42),\n", " ps.Assignment(v[0,1](0), 2*v[1,0](0) + 42)])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### b) Static Single Assignment Form\n", "\n", "All assignments that don't write to a field must be in SSA form\n", "1. Each sympy symbol may only occur once as a left-hand-side (fields can be written multiple times)\n", "2. A symbol has to be defined before it is used. If it is never defined it is introduced as function parameter\n", "\n", "The next cell demonstrates the first SSA restriction:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Assignments not in SSA form, multiple assignments to a\n" ] } ], "source": [ "@ps.kernel\n", "def not_allowed():\n", " a, b = sp.symbols(\"a b\")\n", " a @= src[0, 0]\n", " b @= a + 3\n", " a @= src[-1, 0]\n", " dst[0, 0] @= a + b\n", "try:\n", " ps.create_kernel(not_allowed)\n", " assert False\n", "except ValueError as e:\n", " print(e)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Also it is not allowed to write a field at the same location" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Field dst is written twice at the same location\n" ] } ], "source": [ "@ps.kernel\n", "def not_allowed():\n", " dst[0, 0] @= src[0, 1] + src[1, 0]\n", " dst[0, 0] @= 2 * dst[0, 0]\n", "\n", "try:\n", " ps.create_kernel(not_allowed)\n", " assert False\n", "except ValueError as e:\n", " print(e)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This situation should be resolved by introducing temporary variables" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
FUNC_PREFIX void kernel(double * RESTRICT  _data_dst, double * RESTRICT const _data_src)\n",
       "{\n",
       "   for (int64_t ctr_0 = 1; ctr_0 < 19; ctr_0 += 1)\n",
       "   {\n",
       "      double * RESTRICT _data_src_01 = _data_src + 30*ctr_0 + 30;\n",
       "      double * RESTRICT _data_src_00 = _data_src + 30*ctr_0;\n",
       "      double * RESTRICT  _data_dst_00 = _data_dst + 30*ctr_0;\n",
       "      for (int64_t ctr_1 = 1; ctr_1 < 29; ctr_1 += 1)\n",
       "      {\n",
       "         const double a = _data_src_00[ctr_1 + 1] + _data_src_01[ctr_1];\n",
       "         _data_dst_00[ctr_1] = a*2.0;\n",
       "      }\n",
       "   }\n",
       "}\n",
       "
\n" ], "text/plain": [ "FUNC_PREFIX void kernel(double * RESTRICT _data_dst, double * RESTRICT const _data_src)\n", "{\n", " for (int64_t ctr_0 = 1; ctr_0 < 19; ctr_0 += 1)\n", " {\n", " double * RESTRICT _data_src_01 = _data_src + 30*ctr_0 + 30;\n", " double * RESTRICT _data_src_00 = _data_src + 30*ctr_0;\n", " double * RESTRICT _data_dst_00 = _data_dst + 30*ctr_0;\n", " for (int64_t ctr_1 = 1; ctr_1 < 29; ctr_1 += 1)\n", " {\n", " const double a = _data_src_00[ctr_1 + 1] + _data_src_01[ctr_1];\n", " _data_dst_00[ctr_1] = a*2.0;\n", " }\n", " }\n", "}" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tmp_var = sp.Symbol(\"a\")\n", "\n", "@ps.kernel\n", "def allowed():\n", " tmp_var @= src[0, 1] + src[1, 0]\n", " dst[0, 0] @= 2 * tmp_var\n", "\n", "\n", "ast = ps.create_kernel(allowed)\n", "ps.show_code(ast)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.9" } }, "nbformat": 4, "nbformat_minor": 2 }