{ "cells": [ { "cell_type": "markdown", "id": "f4979f1f67df37f0", "metadata": {}, "source": [ "A notebook to convince myself of things." ] }, { "cell_type": "code", "execution_count": 2, "id": "d14941f5bc6047c1", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T02:55:10.538317Z", "start_time": "2024-09-23T02:55:08.686469Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-2.0000000000000004\n" ] } ], "source": [ "import numpy as np\n", "\n", "a = np.array([[1,2],[3,4]])\n", "print(np.linalg.det(a))" ] }, { "cell_type": "code", "execution_count": 9, "id": "b15c2be187dc45fe", "metadata": { "ExecuteTime": { "end_time": "2024-08-24T10:47:36.542442Z", "start_time": "2024-08-24T10:47:36.532172Z" } }, "outputs": [ { "ename": "ValueError", "evalue": "setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (4,) + inhomogeneous part.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(X\u001b[38;5;241m.\u001b[39mshape)\n", "\u001b[0;31mValueError\u001b[0m: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (4,) + inhomogeneous part." ] } ], "source": [ "X = np.array([[1],\n", " [2],\n", " [3],\n", " [4]])\n", "print(X.shape)" ] }, { "cell_type": "code", "execution_count": 15, "id": "64c4d2162b06932c", "metadata": { "ExecuteTime": { "end_time": "2024-08-24T10:49:56.925142Z", "start_time": "2024-08-24T10:49:56.922132Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 1 2 3 4 5 6 7 8 9]\n", "[[0 1]\n", " [2 3]\n", " [4 5]\n", " [6 7]\n", " [8 9]]\n", "[[0 1 2 3 4]\n", " [5 6 7 8 9]]\n", "[[0 1 2 3 4]\n", " [5 6 7 8 9]]\n" ] } ], "source": [ "b = np.arange(10)\n", "print(b)\n", "print(b.reshape(-1,2))\n", "print(b.reshape(-1,5))\n", "print(b.reshape(2,5))" ] }, { "cell_type": "code", "execution_count": 23, "id": "8e5bcead13143584", "metadata": { "ExecuteTime": { "end_time": "2024-08-24T10:57:39.343410Z", "start_time": "2024-08-24T10:57:39.339753Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1, 3)\n", "[0 1 2 3 4]\n", "(5, 1)\n", "[[0 1 2 3 4]]\n" ] } ], "source": [ "c = np.arange(5)\n", "print(np.array([1,2,3]).reshape(1,-1).shape)\n", "print(c)\n", "print(c.reshape(-1,1).shape)\n", "print(c.reshape(1,-1))" ] }, { "cell_type": "markdown", "id": "83f3c74d2a06d157", "metadata": {}, "source": [ "# COMP9444 Refresher" ] }, { "cell_type": "code", "execution_count": 12, "id": "a976b698cc07ba7", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T03:01:05.245803Z", "start_time": "2024-09-23T03:01:05.206547Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "(3,)\n", "(2, 3)\n", "[0. 0. 0. 0.]\n", "[[0. 0.]\n", " [0. 0.]]\n", "[[0. 0.]]\n", "[[7 7]\n", " [7 7]]\n", "[[1. 0.]\n", " [0. 1.]]\n", "[[0.09309175 0.59335963]\n", " [0.61022966 0.57365187]]\n" ] } ], "source": [ "a = np.array([1,2,3])\n", "print(type(a)) # returns \n", "print(a.shape) # (1,3)\n", "# no, it prints (3,)\n", "b = np.array([[1,2,3],[4,5,6]])\n", "print(b.shape) # (3,2)\n", "# again, you are incorrect: (2,3)\n", "\n", "a = np.zeros(4) # array(0,0,0,0) -> NOPE\n", "print(a)\n", "a = np.zeros((2,2))\n", "print(a)\n", "b = np.zeros((1,2))\n", "print(b) # [0,0]\n", "# again, wrong: [[0,0]]\n", "c = np.full((2,2), 7)\n", "print(c)\n", "d = np.eye(2)\n", "print(d)\n", "\n", "e = np.random.random((2,2))\n", "print(e)\n" ] }, { "cell_type": "code", "execution_count": 20, "id": "9463bf09d8552f80", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T03:58:34.026569Z", "start_time": "2024-09-23T03:58:34.022294Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(3, 4)\n", "2\n", "77\n" ] } ], "source": [ "# Create the following rank 2 array with shape (3, 4)\n", "# [[ 1 2 3 4]\n", "# [ 5 6 7 8]\n", "# [ 9 10 11 12]]\n", "\n", "a = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])\n", "print(a.shape) # (3,4)\n", "# you're getting it now; (rows, cols)\n", "\n", "# want [[2,3],[6,7]]\n", "b = a[:2,1:3]\n", "print(a[0,1])\n", "b[0,0] = 77 # this slice will be the same data as a\n", "print(a[0,1])\n", "\n", "\n" ] }, { "cell_type": "markdown", "id": "18860e15ee58f3b7", "metadata": {}, "source": [ "You can also mix integer with slice indexing, however doing so will yield an array of lower rank than the original array" ] }, { "cell_type": "code", "execution_count": 26, "id": "e3aa33c104e7f03e", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T04:03:56.739941Z", "start_time": "2024-09-23T04:03:56.733783Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[5 6 7 8] (4,)\n", "[[5 6 7 8]] (1, 4)\n", "[ 2 6 10] (3,)\n", "[[ 2]\n", " [ 6]\n", " [10]] (3, 1)\n" ] } ], "source": [ "a = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])\n", "row_r1 = a[1,:]\n", "row_r2 = a[1:2,:]\n", "print(row_r1, row_r1.shape)\n", "print(row_r2, row_r2.shape)\n", "\n", "# we can make the same distinction when accessing columns of an array\n", "col_r1 = a[:, 1]\n", "col_r2 = a[:, 1:2]\n", "print(col_r1, col_r1.shape)\n", "print(col_r2, col_r2.shape)\n" ] }, { "cell_type": "markdown", "id": "59629144a13f273e", "metadata": {}, "source": [ "I personally find indexing quite confusing, it has always been a problem for me.\n", "study: when you index into numpy arrays using slicing, the resultting array view will ALWAYS be a subarray of the original array.\n", " this is not the case when you use *integer array indexing*." ] }, { "cell_type": "code", "execution_count": 32, "id": "1d3997bab8e00090", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T04:14:00.022731Z", "start_time": "2024-09-23T04:14:00.011018Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(3, 2)\n", "[1 4 5]\n", "[1 4 5]\n", "[2 2]\n", "[2 2]\n" ] } ], "source": [ "a = np.array([[1,2],[3,4],[5,6]])\n", "print(a.shape)\n", "# integer array indexing\n", "print(a[[0,1,2],[0,1,0]])\n", "# which is equivalent to:\n", "print(np.array([a[0,0], a[1,1], a[2,0]]))\n", "\n", "# to get [2 2] spit out you would then use the following 2 methods:\n", "print(a[[0,0],[1,1]])\n", "print(np.array([a[0,1],a[0,1]]))" ] }, { "cell_type": "code", "execution_count": 41, "id": "2271b6f064543c77", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T04:19:37.009173Z", "start_time": "2024-09-23T04:19:36.990691Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 1 2 3]\n", " [ 4 5 6]\n", " [ 7 8 9]\n", " [10 11 12]]\n", "[ 1 6 7 11]\n", "[0 1 2 3]\n", "[[11 2 3]\n", " [ 4 5 16]\n", " [17 8 9]\n", " [10 21 12]]\n" ] } ], "source": [ "a = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])\n", "print(a)\n", "b = np.array([0,2,0,1])\n", "\n", "print(a[np.arange(4), b])\n", "\n", "# = a[[0,1,2,3], [0,2,0,1]]\n", "print(np.arange(4))\n", "\n", "# we can even mutate with\n", "a[np.arange(4), b] += 10\n", "print(a)\n" ] }, { "cell_type": "markdown", "id": "af10d47889b2716a", "metadata": {}, "source": [ "we can also do **boolean array indexing**" ] }, { "cell_type": "code", "execution_count": 44, "id": "3dab5117b7e3e7f", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T04:24:27.224053Z", "start_time": "2024-09-23T04:24:27.220032Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[False False]\n", " [ True True]\n", " [ True True]]\n", "[3 4 5 6]\n", "[3 4 5 6]\n" ] } ], "source": [ "# this type of indexing is used to select the elements that satisfy some sort of condition\n", "\n", "a = np.array([[1,2],[3,4],[5,6]])\n", "bool_idx = (a > 2)\n", "\n", "print(bool_idx)\n", "print(a[bool_idx]) # creates a rank 1 array of all the True corresponding values\n", "\n", "print(a[a > 2]) # compressed into 1 statement!\n" ] }, { "cell_type": "markdown", "id": "d90e4fc7f5c43edd", "metadata": {}, "source": [ "# Datatypes" ] }, { "cell_type": "code", "execution_count": 55, "id": "b82739a6cc1094a4", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T04:29:33.540490Z", "start_time": "2024-09-23T04:29:33.536518Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1, 2)\n", "[[1 2]]\n", "(2,)\n", "[1 2]\n", "int64\n", "\n", "float64\n", "int64\n" ] } ], "source": [ "x = np.array([[1,2]])\n", "print(x.shape) # (1,2)\n", "print(x)\n", "\n", "# but, shit is weird when you have\n", "x = np.array([1,2])\n", "print(x.shape) # (2,)\n", "print(x)\n", "\n", "print(x.dtype)\n", "print(type(x))\n", "\n", "x = np.array([1.0,2.0])\n", "print(x.dtype)\n", "\n", "x = np.array([1,2], dtype=np.int64)\n", "print(x.dtype)" ] }, { "cell_type": "markdown", "id": "bc1e5b68e8ab76b6", "metadata": {}, "source": [ "# Array Math" ] }, { "cell_type": "code", "execution_count": null, "id": "4cebd389719f11fc", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "x = np.array([[1,2],[3,4]], dtype=np.float64)\n", "y = np.array([[5,6],[7,8]], dtype=np.float64)\n", "\n", "# Elementwise sum; both produce the array\n", "# [[ 6.0 8.0]\n", "# [10.0 12.0]]\n", "print(x + y)\n", "print(np.add(x, y))\n", "\n", "# Elementwise difference; both produce the array\n", "# [[-4.0 -4.0]\n", "# [-4.0 -4.0]]\n", "print(x - y)\n", "print(np.subtract(x, y))\n", "\n", "# Elementwise product; both produce the array\n", "# [[ 5.0 12.0]\n", "# [21.0 32.0]]\n", "print(x * y)\n", "print(np.multiply(x, y))\n", "\n", "# Elementwise division; both produce the array\n", "# [[ 0.2 0.33333333]\n", "# [ 0.42857143 0.5 ]]\n", "print(x / y)\n", "print(np.divide(x, y))\n", "\n", "# Elementwise square root; produces the array\n", "# [[ 1. 1.41421356]\n", "# [ 1.73205081 2. ]]\n", "print(np.sqrt(x))" ] }, { "cell_type": "code", "execution_count": null, "id": "3bcf911f95960a3c", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "x = np.array([[1,2],[3,4]])\n", "y = np.array([[5,6],[7,8]])\n", "\n", "v = np.array([9,10])\n", "w = np.array([11, 12])\n", "\n", "# Inner product of vectors; both produce 219\n", "print(v.dot(w))\n", "print(np.dot(v, w))\n", "\n", "# Matrix / vector product; both produce the rank 1 array [29 67]\n", "print(x.dot(v))\n", "print(np.dot(x, v))\n", "\n", "# Matrix / matrix product; both produce the rank 2 array\n", "# [[19 22]\n", "# [43 50]]\n", "print(x.dot(y))\n", "print(np.dot(x, y))" ] }, { "cell_type": "markdown", "id": "e88d5a30bc0cf395", "metadata": {}, "source": [ "the above 2 snippets are from Alan Blair. I understand them, so don't really want to retype them.\n", "but one thing I did not know was the equivalence of `x.dot(v)` and `np.dot(x,v)`" ] }, { "cell_type": "code", "execution_count": 60, "id": "3614859160eeb5d3", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T04:35:28.148897Z", "start_time": "2024-09-23T04:35:28.139896Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[1 2]\n", " [3 4]]\n", "10\n", "10\n", "[4 6]\n", "[3 7]\n" ] } ], "source": [ "# the sum function\n", "\n", "x = np.array([[1,2],[3,4]])\n", "print(x)\n", "print(np.sum(x))\n", "print(x.sum()) # interesting, this and the line above are equivalent.\n", "print(x.sum(axis=0)) # also rank 1\n", "print(x.sum(axis=1)) # interesting, this is still rank 1\n" ] }, { "cell_type": "code", "execution_count": 61, "id": "f7a01ba28f44a249", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T04:36:21.510388Z", "start_time": "2024-09-23T04:36:21.507443Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 2 3]\n", "[1 2 3]\n" ] } ], "source": [ "# Note that taking the transpose of a rank 1 array does nothing:\n", "v = np.array([1,2,3])\n", "print(v) # Prints \"[1 2 3]\"\n", "print(v.T) # Prints \"[1 2 3]\"" ] }, { "cell_type": "markdown", "id": "e0a6f7ad2c3d0226", "metadata": {}, "source": [ "# Broadcasting" ] }, { "cell_type": "code", "execution_count": 67, "id": "f59b00a924514a5a", "metadata": { "ExecuteTime": { "end_time": "2024-09-23T04:59:17.831868Z", "start_time": "2024-09-23T04:59:17.826459Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 2 2 4]\n", " [ 5 5 7]\n", " [ 8 8 10]\n", " [11 11 13]]\n", "[[ 2 2 4]\n", " [ 5 5 7]\n", " [ 8 8 10]\n", " [11 11 13]]\n", "[[ 2 2 4]\n", " [ 5 5 7]\n", " [ 8 8 10]\n", " [11 11 13]]\n" ] } ], "source": [ "x = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])\n", "v = np.array([1,0,1])\n", "y = np.empty_like(x) # empty matrix with same shape as x\n", "\n", "for i in range(4):\n", " y[i:] = x[i:] + v\n", " \n", "print(y)\n", "\n", "# we then realise this is equivalent to stacking v's and computing explicit summation:\n", "vv = np.tile(v, (4,1))\n", "y = x + vv\n", "print(y)\n", "\n", "# or finally we could just leverage broadcasting:\n", "y = x + v\n", "print(y)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 5 }