diff --git a/docs/notebooks/Linalg_operations_walkthrough.ipynb b/docs/notebooks/Linalg_operations_walkthrough.ipynb
index 745481d5..5ce9da8b 100644
--- a/docs/notebooks/Linalg_operations_walkthrough.ipynb
+++ b/docs/notebooks/Linalg_operations_walkthrough.ipynb
@@ -30,13 +30,13 @@
     "import cola\n",
     "\n",
     "N=2000\n",
-    "U = cola.lazify(torch.randn(N,5))\n",
-    "V = cola.lazify(torch.randn(N,5))\n",
-    "D = cola.diag(torch.linspace(1,100,N))\n",
+    "U = cola.lazify(torch.randn(N, 5))\n",
+    "V = cola.lazify(torch.randn(N, 5))\n",
+    "D = cola.diag(torch.linspace(1, 100, N))\n",
     "\n",
-    "A = U@U.T + D # a PSD matrix\n",
-    "B = U@V.T + D # a generic square matrix\n",
-    "C = V.T@D # a generic rectangular matrix\n",
+    "A = U @ U.T + D # a PSD matrix\n",
+    "B = U @ V.T + D # a generic square matrix\n",
+    "C = V.T @ D # a generic rectangular matrix\n",
     "\n",
     "x = torch.ones(N) # test vector x"
    ]
@@ -130,7 +130,7 @@
    "id": "6e3b54b3",
    "metadata": {},
    "source": [
-    "`cola.linalg.inverse(A)` represents the linear operator $A^{-1}$ that, when applied to a vector $b$, it solves the linear system $A x = b$ and then outputs $x$.\n",
+    "`cola.linalg.inv(A)` represents the linear operator $A^{-1}$ that, when applied to a vector $b$, it solves the linear system $A x = b$ and then outputs $x$.\n",
     "It does not, however, compute the inverse densely $A^{-1}$ and then multiply it by the vector $b$, and is exactly equivalent to calling `cola.linalg.solve` as shown below.\n"
    ]
   },
@@ -150,8 +150,8 @@
    "outputs": [],
    "source": [
     "# these two are exactly equivalent in CoLA\n",
-    "y = cola.linalg.solve(A,x)\n",
-    "y = cola.linalg.inv(A)@x"
+    "y = cola.linalg.solve(A, x)\n",
+    "y = cola.linalg.inv(A) @ x"
    ]
   },
   {
@@ -261,7 +261,7 @@
     "    if method == 'iterative':\n",
     "        print(f\"Computed inverse in {Binv.info['iterations']} iters with error {Binv.info['errors'][-1]:.1e}\")\n",
     "\n",
-    "print(\"Actual residual error:\", torch.linalg.norm(B@y-x)/torch.linalg.norm(x))"
+    "print(\"Actual residual error:\", torch.linalg.norm(B @ y - x) / torch.linalg.norm(x))"
    ]
   },
   {
@@ -301,7 +301,7 @@
    "source": [
     "### SelfAdjoint\n",
     "\n",
-    "Whether CoLA uses the more efficient Lanczos process or more costly version using Arnoldi depends on whether or not the matrix is `SelfAdjoint` (which guarantees an orthogonal eigenbasis). `SelfAdjoint` is considered a superset of `PSD`."
+    "Whether CoLA uses the more efficient Lanczos process or more costly version using Arnoldi depends on whether or not the matrix is `SelfAdjoint` (which gauruntees an orthogonal eigenbasis). `SelfAdjoint` is considered a superset of `PSD`."
    ]
   },
   {
@@ -354,7 +354,7 @@
     "for method in ['dense', 'iterative']:\n",
     "    S = cola.linalg.sqrt(A,tol=1e-4, max_iters=100, method=method)\n",
     "    print(f\"S with method={method} is of type {type(S)}\")\n",
-    "    print(\"error in sqrt:\", torch.linalg.norm(S@(S@x)-A@x)/torch.linalg.norm(A@x))"
+    "    print(\"error in sqrt:\", torch.linalg.norm(S @ (S @ x) - A @ x) / torch.linalg.norm(A @ x))"
    ]
   },
   {
@@ -390,8 +390,8 @@
     "expA = cola.linalg.exp(-A)\n",
     "logA = cola.linalg.log(A)\n",
     "Apow10 = cola.linalg.pow(A,10)\n",
-    "resolvent = cola.linalg.apply_unary(lambda x: 1/(x-1), A)\n",
-    "[print(op[:2,:2].to_dense()) for op in [expA, logA, Apow10, resolvent]];"
+    "resolvent = cola.linalg.apply_unary(lambda x: 1 / (x - 1), A)\n",
+    "[print(op[:2, :2].to_dense()) for op in [expA, logA, Apow10, resolvent]];"
    ]
   },
   {
@@ -423,8 +423,8 @@
     }
    ],
    "source": [
-    "ops = [cola.linalg.sqrt(B), cola.linalg.exp(-B), cola.linalg.pow(B,10)]\n",
-    "[print(op[:2,:2].to_dense()) for op in ops];"
+    "ops = [cola.linalg.sqrt(B), cola.linalg.exp(-B), cola.linalg.pow(B, 10)]\n",
+    "[print(op[:2, :2].to_dense()) for op in ops];"
    ]
   },
   {
@@ -483,7 +483,7 @@
    ],
    "source": [
     "diag = cola.linalg.diag(A)\n",
-    "off_diag = cola.linalg.diag(A,k=1)\n",
+    "off_diag = cola.linalg.diag(A, k=1)\n",
     "print(f\"diagonal: {diag}\")\n",
     "print(f\"off-diagonal: {off_diag}\")"
    ]
@@ -493,7 +493,7 @@
    "id": "d321cde0",
    "metadata": {},
    "source": [
-    "## log determinants\n",
+    "## Log Determinants\n",
     "\n",
     "In the dense case, we compute log determinants from the Cholesky or LU decompositions depending on whether the matrix is PSD or not, and this runs in time $O(n^3)$.\n",
     "\n",
@@ -523,8 +523,8 @@
     }
    ],
    "source": [
-    "print(\"Tr(log(A))\",cola.linalg.logdet(A, method='iterative', vtol=1e-1,tol=1e-4, max_iters=30, key=42))\n",
-    "print(\"SLQ:\", cola.linalg.logdet(A, method='iterative', tol=1e-3,max_iters=10))\n",
+    "print(\"Tr(log(A))\",cola.linalg.logdet(A, method='iterative', vtol=1e-1, tol=1e-4, max_iters=30, key=42))\n",
+    "print(\"SLQ:\", cola.linalg.logdet(A, method='iterative', tol=1e-3, max_iters=10))\n",
     "print(\"Dense:\", cola.linalg.logdet(A, method='dense'))"
    ]
   },
diff --git a/docs/notebooks/colabs/Linalg_operations_walkthrough.ipynb b/docs/notebooks/colabs/Linalg_operations_walkthrough.ipynb
index d0610605..f7b96af7 100644
--- a/docs/notebooks/colabs/Linalg_operations_walkthrough.ipynb
+++ b/docs/notebooks/colabs/Linalg_operations_walkthrough.ipynb
@@ -47,13 +47,13 @@
     "import cola\n",
     "\n",
     "N=2000\n",
-    "U = cola.lazify(torch.randn(N,5))\n",
-    "V = cola.lazify(torch.randn(N,5))\n",
-    "D = cola.diag(torch.linspace(1,100,N))\n",
+    "U = cola.lazify(torch.randn(N, 5))\n",
+    "V = cola.lazify(torch.randn(N, 5))\n",
+    "D = cola.diag(torch.linspace(1, 100, N))\n",
     "\n",
-    "A = U@U.T + D # a PSD matrix\n",
-    "B = U@V.T + D # a generic square matrix\n",
-    "C = V.T@D # a generic rectangular matrix\n",
+    "A = U @ U.T + D # a PSD matrix\n",
+    "B = U @ V.T + D # a generic square matrix\n",
+    "C = V.T @ D # a generic rectangular matrix\n",
     "\n",
     "x = torch.ones(N) # test vector x"
    ]
@@ -147,7 +147,7 @@
    "id": "6e3b54b3",
    "metadata": {},
    "source": [
-    "`cola.linalg.inverse(A)` represents the linear operator $A^{-1}$ that, when applied to a vector $b$, it solves the linear system $A x = b$ and then outputs $x$.\n",
+    "`cola.linalg.inv(A)` represents the linear operator $A^{-1}$ that, when applied to a vector $b$, it solves the linear system $A x = b$ and then outputs $x$.\n",
     "It does not, however, compute the inverse densely $A^{-1}$ and then multiply it by the vector $b$, and is exactly equivalent to calling `cola.linalg.solve` as shown below.\n"
    ]
   },
@@ -167,8 +167,8 @@
    "outputs": [],
    "source": [
     "# these two are exactly equivalent in CoLA\n",
-    "y = cola.linalg.solve(A,x)\n",
-    "y = cola.linalg.inv(A)@x"
+    "y = cola.linalg.solve(A, x)\n",
+    "y = cola.linalg.inv(A) @ x"
    ]
   },
   {
@@ -278,7 +278,7 @@
     "    if method == 'iterative':\n",
     "        print(f\"Computed inverse in {Binv.info['iterations']} iters with error {Binv.info['errors'][-1]:.1e}\")\n",
     "\n",
-    "print(\"Actual residual error:\", torch.linalg.norm(B@y-x)/torch.linalg.norm(x))"
+    "print(\"Actual residual error:\", torch.linalg.norm(B @ y - x) / torch.linalg.norm(x))"
    ]
   },
   {
@@ -371,7 +371,7 @@
     "for method in ['dense', 'iterative']:\n",
     "    S = cola.linalg.sqrt(A,tol=1e-4, max_iters=100, method=method)\n",
     "    print(f\"S with method={method} is of type {type(S)}\")\n",
-    "    print(\"error in sqrt:\", torch.linalg.norm(S@(S@x)-A@x)/torch.linalg.norm(A@x))"
+    "    print(\"error in sqrt:\", torch.linalg.norm(S @ (S @ x) - A @ x) / torch.linalg.norm(A @ x))"
    ]
   },
   {
@@ -407,8 +407,8 @@
     "expA = cola.linalg.exp(-A)\n",
     "logA = cola.linalg.log(A)\n",
     "Apow10 = cola.linalg.pow(A,10)\n",
-    "resolvent = cola.linalg.apply_unary(lambda x: 1/(x-1), A)\n",
-    "[print(op[:2,:2].to_dense()) for op in [expA, logA, Apow10, resolvent]];"
+    "resolvent = cola.linalg.apply_unary(lambda x: 1 / (x - 1), A)\n",
+    "[print(op[:2, :2].to_dense()) for op in [expA, logA, Apow10, resolvent]];"
    ]
   },
   {
@@ -440,8 +440,8 @@
     }
    ],
    "source": [
-    "ops = [cola.linalg.sqrt(B), cola.linalg.exp(-B), cola.linalg.pow(B,10)]\n",
-    "[print(op[:2,:2].to_dense()) for op in ops];"
+    "ops = [cola.linalg.sqrt(B), cola.linalg.exp(-B), cola.linalg.pow(B, 10)]\n",
+    "[print(op[:2, :2].to_dense()) for op in ops];"
    ]
   },
   {
@@ -500,7 +500,7 @@
    ],
    "source": [
     "diag = cola.linalg.diag(A)\n",
-    "off_diag = cola.linalg.diag(A,k=1)\n",
+    "off_diag = cola.linalg.diag(A, k=1)\n",
     "print(f\"diagonal: {diag}\")\n",
     "print(f\"off-diagonal: {off_diag}\")"
    ]
@@ -510,7 +510,7 @@
    "id": "d321cde0",
    "metadata": {},
    "source": [
-    "## log determinants\n",
+    "## Log Determinants\n",
     "\n",
     "In the dense case, we compute log determinants from the Cholesky or LU decompositions depending on whether the matrix is PSD or not, and this runs in time $O(n^3)$.\n",
     "\n",
@@ -540,8 +540,8 @@
     }
    ],
    "source": [
-    "print(\"Tr(log(A))\",cola.linalg.logdet(A, method='iterative', vtol=1e-1,tol=1e-4, max_iters=30, key=42))\n",
-    "print(\"SLQ:\", cola.linalg.logdet(A, method='iterative', tol=1e-3,max_iters=10))\n",
+    "print(\"Tr(log(A))\",cola.linalg.logdet(A, method='iterative', vtol=1e-1, tol=1e-4, max_iters=30, key=42))\n",
+    "print(\"SLQ:\", cola.linalg.logdet(A, method='iterative', tol=1e-3, max_iters=10))\n",
     "print(\"Dense:\", cola.linalg.logdet(A, method='dense'))"
    ]
   },
@@ -685,9 +685,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/docs/notebooks/colabs/worksheet.ipynb b/docs/notebooks/colabs/worksheet.ipynb
index a614ac38..bd7343b9 100644
--- a/docs/notebooks/colabs/worksheet.ipynb
+++ b/docs/notebooks/colabs/worksheet.ipynb
@@ -246,7 +246,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Having a PSD `LinearOperator` opens up the possiblity in CoLA to dispatch our favorite algorithm: CG. As we all know, CG has a couple of hyperparameters like the tolerance (set it to $10^{-10}$) and the max number of iterations (set it to 10K). _Hint_: Pass `method`, `tol` and `max_iters` to the function that you used in the previous exercise. To see what is going on under the hood, take a look at the [source code](https://github.com/wilson-labs/cola/blob/main/cola/linalg/inverse.py#L67)."
+    "Having a PSD `LinearOperator` opens up the possiblity in CoLA to dispatch our favorite algorithm: CG. As we all know, CG has a couple of hyperparameters like the tolerance (set it to $10^{-10}$) and the max number of iterations (set it to 10K). _Hint_: Pass `method`, `tol` and `max_iters` to the function that you used in the previous exercise. To see what is going on under the hood, take a look at the [source code](https://github.com/wilson-labs/cola/blob/main/cola/linalg/inv.py#L80)."
    ]
   },
   {
diff --git a/docs/notebooks/worksheet.ipynb b/docs/notebooks/worksheet.ipynb
index d51a22ce..fdf7a7b2 100644
--- a/docs/notebooks/worksheet.ipynb
+++ b/docs/notebooks/worksheet.ipynb
@@ -229,7 +229,7 @@
             "cell_type": "markdown",
             "metadata": {},
             "source": [
-                "Having a PSD `LinearOperator` opens up the possibility in CoLA to dispatch our favorite algorithm: CG. As we all know, CG has a couple of hyperparameters like the tolerance (set it to $10^{-10}$) and the max number of iterations (set it to 10K). _Hint_: Pass `method`, `tol` and `max_iters` to the function that you used in the previous exercise. To see what is going on under the hood, take a look at the [source code](https://github.com/wilson-labs/cola/blob/main/cola/linalg/inverse.py#L67)."
+                "Having a PSD `LinearOperator` opens up the possibility in CoLA to dispatch our favorite algorithm: CG. As we all know, CG has a couple of hyperparameters like the tolerance (set it to $10^{-10}$) and the max number of iterations (set it to 10K). _Hint_: Pass `method`, `tol` and `max_iters` to the function that you used in the previous exercise. To see what is going on under the hood, take a look at the [source code](https://github.com/wilson-labs/cola/blob/main/cola/linalg/inv.py#L80)."
             ]
         },
         {