diff --git a/examples/BabelStream/functor/babel_stream.py b/examples/BabelStream/functor/babel_stream.py
index 8041e4ee..4109d709 100644
--- a/examples/BabelStream/functor/babel_stream.py
+++ b/examples/BabelStream/functor/babel_stream.py
@@ -43,7 +43,7 @@ def dot(self, index: int, acc: pk.Acc[float]):
         acc += self.a[index] * self.b[index]
 
 
-if __name__ == "__main__":
+def run() -> None:
     array_size: int = 2**25 # 100000
     startA: float = 0.1
     startB: float = 0.2
@@ -92,7 +92,7 @@ def dot(self, index: int, acc: pk.Acc[float]):
         timings[4].append(timer.seconds())
         timer.reset()
 
-    goldA = startA 
+    goldA = startA
     goldB = startB
     goldC = startC
 
@@ -108,9 +108,9 @@ def dot(self, index: int, acc: pk.Acc[float]):
     errB /= len(w.b)
     errC = reduce(lambda s, val: s + abs(val - goldC), w.c)
     errC /= len(w.c)
-    
-    # epsi = sys.float_info.epsilon * 100 
-    epsi = 1e-8 
+
+    # epsi = sys.float_info.epsilon * 100
+    epsi = 1e-8
     if (errA > epsi):
         print(f"Validation failed on a[]. Average error {errA}")
     if (errB > epsi):
@@ -143,3 +143,6 @@ def dot(self, index: int, acc: pk.Acc[float]):
     # bandwidth = 1.0e-9 * (total_bytes / runtime)
     # print(f"Runtime (seconds): {runtime}")
     # print(f"Bandwidth (GB/s): {bandwidth}")
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/BabelStream/standalone/babel_stream.py b/examples/BabelStream/standalone/babel_stream.py
index 13c96e3a..9ba6d5a1 100644
--- a/examples/BabelStream/standalone/babel_stream.py
+++ b/examples/BabelStream/standalone/babel_stream.py
@@ -31,7 +31,7 @@ def dot(index, acc, a_view, b_view):
     acc += a_view[index] * b_view[index]
 
 
-if __name__ == "__main__":
+def run() -> None:
     array_size: int = 2**25 # 100000
     startA: float = 0.1
     startB: float = 0.2
@@ -85,7 +85,7 @@ def dot(index, acc, a_view, b_view):
         timings[4].append(timer.seconds())
         timer.reset()
 
-    goldA = startA 
+    goldA = startA
     goldB = startB
     goldC = startC
 
@@ -101,9 +101,9 @@ def dot(index, acc, a_view, b_view):
     errB /= len(b)
     errC = reduce(lambda s, val: s + abs(val - goldC), c)
     errC /= len(c)
-    
-    # epsi = sys.float_info.epsilon * 100 
-    epsi = 1e-8 
+
+    # epsi = sys.float_info.epsilon * 100
+    epsi = 1e-8
     if (errA > epsi):
         print(f"Validation failed on a[]. Average error {errA}")
     if (errB > epsi):
@@ -136,3 +136,6 @@ def dot(index, acc, a_view, b_view):
     # bandwidth = 1.0e-9 * (total_bytes / runtime)
     # print(f"Runtime (seconds): {runtime}")
     # print(f"Bandwidth (GB/s): {bandwidth}")
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/BabelStream/workload/babel_stream.py b/examples/BabelStream/workload/babel_stream.py
index 97ce320c..30631468 100644
--- a/examples/BabelStream/workload/babel_stream.py
+++ b/examples/BabelStream/workload/babel_stream.py
@@ -6,7 +6,7 @@
 
 @pk.workload
 class KokkosStream:
-    def __init__(self, ARRAY_SIZE: int, initA: float, initB: float, initC: float, 
+    def __init__(self, ARRAY_SIZE: int, initA: float, initB: float, initC: float,
             scalar: float, num_times: int):
         self.array_size: int = ARRAY_SIZE
 
@@ -18,7 +18,7 @@ def __init__(self, ARRAY_SIZE: int, initA: float, initB: float, initC: float,
         self.initB: pk.double = initB
         self.initC: pk.double = initC
         self.scalar: pk.double = scalar
-        self.num_times: int = num_times 
+        self.num_times: int = num_times
         self.sum: pk.double = 0
 
         self.runtime: float = 0
@@ -48,38 +48,38 @@ def run(self):
 
         self.runtime = timer.seconds()
 
-    # @pk.callback
-    # def results(self):
-    #     goldA = self.initA
-    #     goldB = self.initB
-    #     goldC = self.initC
-
-    #     for i in range(self.num_times):
-    #         goldC = goldA
-    #         goldB = self.scalar * goldC
-    #         goldC = goldA + goldB
-    #         goldA = goldB + self.scalar * goldC
-
-    #     errA = reduce(lambda s, val: s + abs(val - goldA), self.a)
-    #     errA /= len(self.a)
-    #     errB = reduce(lambda s, val: s + abs(val - goldB), self.b)
-    #     errB /= len(self.b)
-    #     errC = reduce(lambda s, val: s + abs(val - goldC), self.c)
-    #     errC /= len(self.c)
-        
-    #     # epsi = sys.float_info.epsilon * 100 
-    #     epsi = 1e-8 
-    #     if (errA > epsi):
-    #         print(f"Validation failed on a[]. Average error {errA}")
-    #     if (errB > epsi):
-    #         print(f"Validation failed on b[]. Average error {errB}")
-    #     if (errC > epsi):
-    #         print(f"Validation failed on c[]. Average error {errC}")
-
-    #     goldSum = goldA * goldB * self.array_size
-    #     errSum = self.sum - goldSum
-    #     if (abs(errSum) > 1e-8):
-    #         print(f"Validation failed on sum. Error {errSum}")
+    @pk.callback
+    def results(self):
+        goldA = self.initA
+        goldB = self.initB
+        goldC = self.initC
+
+        for i in range(self.num_times):
+            goldC = goldA
+            goldB = self.scalar * goldC
+            goldC = goldA + goldB
+            goldA = goldB + self.scalar * goldC
+
+        errA = reduce(lambda s, val: s + abs(val - goldA), self.a)
+        errA /= len(self.a)
+        errB = reduce(lambda s, val: s + abs(val - goldB), self.b)
+        errB /= len(self.b)
+        errC = reduce(lambda s, val: s + abs(val - goldC), self.c)
+        errC /= len(self.c)
+
+        # epsi = sys.float_info.epsilon * 100
+        epsi = 1e-8
+        if (errA > epsi):
+            print(f"Validation failed on a[]. Average error {errA}")
+        if (errB > epsi):
+            print(f"Validation failed on b[]. Average error {errB}")
+        if (errC > epsi):
+            print(f"Validation failed on c[]. Average error {errC}")
+
+        goldSum = goldA * goldB * self.array_size
+        errSum = self.sum - goldSum
+        if (abs(errSum) > 1e-8):
+            print(f"Validation failed on sum. Error {errSum}")
 
     #     total_bytes = 3 * sys.getsizeof(0.0) * self.array_size * num_times;
     #     bandwidth = 1.0e-9 * (total_bytes / self.runtime)
@@ -114,7 +114,7 @@ def dot(self, index: int, acc: pk.Acc[float]):
         acc += self.a[index] * self.b[index]
 
 
-if __name__ == "__main__":
+def run() -> None:
     array_size: int = 2**25 # 100000
     startA: float = 0.1
     startB: float = 0.2
@@ -138,3 +138,6 @@ def dot(self, index: int, acc: pk.Acc[float]):
 
     pk.set_default_space(space)
     pk.execute(space, KokkosStream(array_size, startA, startB, startC, startScalar, num_times))
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/ParRes/workload/nstream.py b/examples/ParRes/workload/nstream.py
index ef6ce4e1..18f62d9e 100644
--- a/examples/ParRes/workload/nstream.py
+++ b/examples/ParRes/workload/nstream.py
@@ -16,7 +16,7 @@ def __init__(self, iterations, length, offset):
         self.scalar: float = 3
         self.asum: float = 0
 
-        self.nstream_time: float = 0 
+        self.nstream_time: float = 0
 
     @pk.main
     def run(self):
@@ -66,7 +66,7 @@ def init(self, i: int):
         self.B[i] = 2
         self.C[i] = 2
 
-if __name__ == "__main__":
+def run() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument('iterations', type=int)
     parser.add_argument('length', type=int)
@@ -100,3 +100,5 @@ def init(self, i: int):
     print("Offset               = " , offset)
     pk.execute(pk.ExecutionSpace.Default, main(iterations, length, offset))
 
+if __name__ == "__main__":
+    run()
diff --git a/examples/ParRes/workload/stencil.py b/examples/ParRes/workload/stencil.py
index a92c4424..0bfd33bc 100644
--- a/examples/ParRes/workload/stencil.py
+++ b/examples/ParRes/workload/stencil.py
@@ -18,14 +18,14 @@ def __init__(self, iterations, n, tile_size, star, radius):
         self.out: pk.View2D[pk.double] = pk.View([self.n, self.n], pk.double, layout=pk.Layout.LayoutRight)
         self.norm: float = 0
 
-        self.stencil_time: float = 0 
+        self.stencil_time: float = 0
 
     @pk.main
     def run(self):
         t: int = tile_size
         r: int = radius
 
-        pk.parallel_for(pk.MDRangePolicy([0,0], [n, n], [t, t]), 
+        pk.parallel_for(pk.MDRangePolicy([0,0], [n, n], [t, t]),
             self.init)
         pk.fence()
 
@@ -34,7 +34,7 @@ def run(self):
         for i in range(iterations):
             if (i == 1):
                 pk.fence()
-            
+
             if r == 1:
                 # star1 stencil
                 pk.parallel_for("stencil", pk.MDRangePolicy([r,r], [n-r, n-r], [t, t]), self.star1)
@@ -45,8 +45,8 @@ def run(self):
                 # star3 stencil
                 pk.parallel_for("stencil", pk.MDRangePolicy([r,r], [n-r, n-r], [t, t]), self.star3)
 
-            
-            pk.parallel_for(pk.MDRangePolicy([0,0], [n, n], [t, t]), 
+
+            pk.parallel_for(pk.MDRangePolicy([0,0], [n, n], [t, t]),
                 self.increment)
 
         pk.fence()
@@ -55,7 +55,7 @@ def run(self):
         active_points: int = (n-2*r)*(n-2*r)
 
         # verify correctness
-        self.norm = pk.parallel_reduce(pk.MDRangePolicy([r, r], [n-r, n-r], [t, t]), 
+        self.norm = pk.parallel_reduce(pk.MDRangePolicy([r, r], [n-r, n-r], [t, t]),
                 self.norm_reduce)
         pk.fence()
         self.norm /= active_points
@@ -78,7 +78,7 @@ def increment(self, i: int, j: int):
 
     @pk.workunit
     def norm_reduce(self, i: int, j: int, acc: pk.Acc[pk.double]):
-        acc += abs(self.out[i][j]) 
+        acc += abs(self.out[i][j])
 
     # @pk.callback
     # def print_result(self):
@@ -121,7 +121,7 @@ def star3(self, i: int, j: int):
             +self.inp[i][j+2] * 0.08333333333333333 \
             +self.inp[i][j+3] * 0.05555555555555555
 
-if __name__ == "__main__":
+def run() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument('iterations', type=int)
     parser.add_argument('n', type=int)
@@ -169,9 +169,11 @@ def star3(self, i: int, j: int):
 
     n = 2 ** n
     print("Number of iterations = ", iterations)
-    print("Grid size            = ", n) 
+    print("Grid size            = ", n)
     print("Tile size            = ", tile_size)
     print("Type of stencil      = ", "star" if star else "grid")
     print("Radius of stencil    = ", radius)
     pk.execute(pk.ExecutionSpace.Default, main(iterations, n, tile_size, star, radius))
 
+if __name__ == "__main__":
+    run()
diff --git a/examples/ParRes/workload/transpose.py b/examples/ParRes/workload/transpose.py
index b1e74646..7b57edca 100644
--- a/examples/ParRes/workload/transpose.py
+++ b/examples/ParRes/workload/transpose.py
@@ -11,19 +11,19 @@ def __init__(self, iterations, order, tile_size, permute):
         self.iterations: int = iterations
         self.order: int = order
         self.tile_size: int = tile_size
-        self.permute: int = permute 
+        self.permute: int = permute
 
         self.A: pk.View2D[pk.double] = pk.View([self.order, self.order], pk.double, layout=pk.LayoutRight)
         self.B: pk.View2D[pk.double] = pk.View([self.order, self.order], pk.double, layout=pk.LayoutRight)
 
         self.abserr: float = 0
-        self.transpose_time: float = 0 
+        self.transpose_time: float = 0
         self.addit: float = (self.iterations) * (0.5 * (self.iterations - 1))
 
     @pk.main
     def run(self):
         pk.parallel_for(
-            pk.MDRangePolicy([0,0], [self.order, self.order], [self.tile_size, self.tile_size]), self.init) 
+            pk.MDRangePolicy([0,0], [self.order, self.order], [self.tile_size, self.tile_size]), self.init)
         pk.fence()
 
         timer = pk.Timer()
@@ -39,7 +39,7 @@ def run(self):
         self.transpose_time = timer.seconds()
 
         self.abserr = pk.parallel_reduce(
-            pk.MDRangePolicy([0,0], [self.order, self.order], [self.tile_size, self.tile_size]), 
+            pk.MDRangePolicy([0,0], [self.order, self.order], [self.tile_size, self.tile_size]),
             self.abserr_reduce)
 
         pk.printf("%f\n", self.abserr)
@@ -69,9 +69,9 @@ def abserr_reduce(self, i: int, j: int, acc: pk.Acc[pk.double]):
     def tranpose(self, i: int, j: int):
         self.B[i][j] += self.A[j][i]
         self.A[j][i] += 1
-        
 
-if __name__ == "__main__":
+
+def run() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument('iterations', type=int)
     parser.add_argument('order', type=int)
@@ -112,3 +112,6 @@ def tranpose(self, i: int, j: int):
     print("Tile size            = " , tile_size)
     print("Permute loops        = " , "yes" if permute else "no")
     pk.execute(pk.ExecutionSpace.Default, main(iterations, order, tile_size, permute))
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/kokkos-benchmarks/functor/bytes_and_flops.py b/examples/kokkos-benchmarks/functor/bytes_and_flops.py
index b7defa76..7a982e2a 100644
--- a/examples/kokkos-benchmarks/functor/bytes_and_flops.py
+++ b/examples/kokkos-benchmarks/functor/bytes_and_flops.py
@@ -26,7 +26,7 @@ def benchmark(self, team: pk.TeamMember):
         n: int = team.league_rank()
         for r in range(self.R):
             def team_for(i: int):
-                a1: pk.double = self.A[n][i][0] 
+                a1: pk.double = self.A[n][i][0]
                 b: pk.double = self.B[n][i][0]
                 a2: pk.double = a1 * 1.3
                 a3: pk.double = a2 * 1.1
@@ -51,13 +51,13 @@ def team_for(i: int):
 
             pk.parallel_for(pk.TeamThreadRange(team, self.K), team_for)
 
-if __name__ == "__main__":
+def run() -> None:
     # example args
-    # Bandwidth Bound : 2 100000 1024 1 1 1 8 256 0 
-    # Cache Bound     : 2 100000 1024 64 1 1 8 512 0 
-    # Compute Bound   : 2 100000 1024 1 1 8 64 256 0 
-    # Load Slots Used : 2 20000 256 32 16 8 1 256 0 
-    # Inefficient Load: 2 20000 256 32 2 8 1 256 0 
+    # Bandwidth Bound : 2 100000 1024 1 1 1 8 256 0
+    # Cache Bound     : 2 100000 1024 64 1 1 8 512 0
+    # Compute Bound   : 2 100000 1024 1 1 8 64 256 0
+    # Load Slots Used : 2 20000 256 32 16 8 1 256 0
+    # Inefficient Load: 2 20000 256 32 2 8 1 256 0
     # NOTE P and U are hard coded to double and 8 because otherwise we would have a lot of duplicates
     parser = argparse.ArgumentParser()
     parser.add_argument("P", type=int, help="Precision (1==float, 2==double)")
@@ -84,7 +84,7 @@ def team_for(i: int):
         exit(1)
     if args.S != 0:
         print("S must be 0 (shared scratch memory not supported)")
-        exit(1) 
+        exit(1)
 
     space = pk.ExecutionSpace.OpenMP
     if args.execution_space:
@@ -98,7 +98,7 @@ def team_for(i: int):
     T = args.T
     S = args.S
     scalar_size = 8
-    
+
     pk.set_default_space(space)
 
     r = pk.TeamPolicy(N, T)
@@ -113,3 +113,7 @@ def team_for(i: int):
     print(f"NKRUFTS: {N} {K} {R} {U} {F} {T} {S} Time: {seconds} " +
             f"Bandwidth: {1.0 * num_bytes / seconds / (1024**3)} GiB/s GFlop/s: {1e-9 * flops / seconds}")
     print(w.C)
+
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/kokkos-benchmarks/functor/gather.py b/examples/kokkos-benchmarks/functor/gather.py
index a06664b9..e4e94ac7 100644
--- a/examples/kokkos-benchmarks/functor/gather.py
+++ b/examples/kokkos-benchmarks/functor/gather.py
@@ -23,7 +23,7 @@ def __init__(self, N: int, K: int, D: int, R: int, F: int):
 
         self.A.fill(1.5)
         self.B.fill(2.0)
-        
+
         #TODO use kokkos to init in parallel
         random.seed(12313)
         for i in range(N):
@@ -60,7 +60,7 @@ def benchmark(self, i: int):
         self.C[i] = c
 
 
-if __name__ == "__main__":
+def run() -> None:
     # example args 2 100000 32 512 1000 8 8
     # NOTE S and U are hard coded to double and 8 because otherwise we would have a lot of duplicates
     parser = argparse.ArgumentParser()
@@ -87,7 +87,7 @@ def benchmark(self, i: int):
     space = pk.ExecutionSpace.OpenMP
     if args.execution_space:
         space = pk.ExecutionSpace(args.execution_space)
-    
+
     pk.set_default_space(space)
 
     N = args.N
@@ -115,3 +115,5 @@ def benchmark(self, i: int):
     print(f"SNKDRUF: {scalar_size/4} {N} {K} {D} {R} {U} {F} Time: {seconds} " +
             f"Bandwidth: {1.0 * num_bytes / seconds / (1024**3)} GiB/s GFlop/s: {1e-9 * flops / seconds} GGather/s: {1e-9 * gather_ops / seconds}")
 
+if __name__ == "__main__":
+    run()
diff --git a/examples/kokkos-tutorials/functor/subview.py b/examples/kokkos-tutorials/functor/subview.py
index afa29141..79dd1cb6 100644
--- a/examples/kokkos-tutorials/functor/subview.py
+++ b/examples/kokkos-tutorials/functor/subview.py
@@ -40,7 +40,7 @@ def yAx(self, j: int, acc: pk.Acc[float]):
         acc += self.y[j] * temp2
 
 
-if __name__ == "__main__":
+def run() -> None:
     values: Tuple[int, int, int, int, int, bool] = parse_args()
     N: int = values[0]
     M: int = values[1]
@@ -75,3 +75,6 @@ def yAx(self, j: int, acc: pk.Acc[float]):
 
     print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
 
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/kokkos-tutorials/standalone/subview.py b/examples/kokkos-tutorials/standalone/subview.py
index a1dd4d76..a07e7035 100644
--- a/examples/kokkos-tutorials/standalone/subview.py
+++ b/examples/kokkos-tutorials/standalone/subview.py
@@ -15,7 +15,7 @@ def yAx(j, acc, cols, y_view, x_view, A_view):
     acc += y_view[j] * temp2
 
 
-if __name__ == "__main__":
+def run() -> None:
     values: Tuple[int, int, int, int, int, bool] = parse_args()
     N: int = values[0]
     M: int = values[1]
@@ -68,3 +68,6 @@ def yAx(j, acc, cols, y_view, x_view, A_view):
 
     print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
 
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/kokkos-tutorials/workload/subview.py b/examples/kokkos-tutorials/workload/subview.py
index 9a60ebe5..66176d9b 100644
--- a/examples/kokkos-tutorials/workload/subview.py
+++ b/examples/kokkos-tutorials/workload/subview.py
@@ -63,7 +63,7 @@ def yAx(self, j: int, acc: pk.Acc[float]):
         acc += self.y[j] * temp2
 
 
-if __name__ == "__main__":
+def run() -> None:
     values: Tuple[int, int, int, int, int, bool] = parse_args()
     N: int = values[0]
     M: int = values[1]
@@ -80,3 +80,6 @@ def yAx(self, j: int, acc: pk.Acc[float]):
 
     print(f"Total size S = {N * M} N = {N} M = {M}")
     pk.execute(pk.get_default_space(), Workload(N, M, nrepeat, fill))
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/kokkos/scan_functor.py b/examples/kokkos/scan_functor.py
index 07d18877..ed362b17 100644
--- a/examples/kokkos/scan_functor.py
+++ b/examples/kokkos/scan_functor.py
@@ -15,8 +15,7 @@ def scan(self, i: int, acc: pk.Acc[pk.double], last_pass: bool):
         if last_pass:
             self.A[i] = acc
 
-
-if __name__ == "__main__":
+def run() -> None:
     N = 10
     w = Workload(N)
     p = pk.RangePolicy(pk.ExecutionSpace.OpenMP, 0, N)
@@ -27,3 +26,6 @@ def scan(self, i: int, acc: pk.Acc[pk.double], last_pass: bool):
     timer_result = timer.seconds()
 
     print(f"{w.A} total={result} time({timer_result})")
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/kokkos/scan_standalone.py b/examples/kokkos/scan_standalone.py
index 276e5f0f..30a3b9fd 100644
--- a/examples/kokkos/scan_standalone.py
+++ b/examples/kokkos/scan_standalone.py
@@ -10,7 +10,7 @@ def scan(i, acc, last_pass, view):
     if last_pass:
         view[i] = acc
 
-if __name__ == "__main__":
+def run() -> None:
     N = 10
 
     A: pk.View1D[pk.int32] = pk.View([N], pk.int32)
@@ -22,3 +22,6 @@ def scan(i, acc, last_pass, view):
     timer_result = timer.seconds()
 
     print(f"{A} total={result} time({timer_result})")
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/kokkos/scan_workload.py b/examples/kokkos/scan_workload.py
index a331426b..3e4e1138 100644
--- a/examples/kokkos/scan_workload.py
+++ b/examples/kokkos/scan_workload.py
@@ -29,6 +29,8 @@ def scan(self, i: int, acc: pk.Acc[pk.double], last_pass: bool):
         if last_pass:
             self.A[i] = acc
 
+def run() -> None:
+    pk.execute(pk.ExecutionSpace.OpenMP, Workload(10))
 
 if __name__ == "__main__":
-    pk.execute(pk.ExecutionSpace.OpenMP, Workload(10))
+    run()
diff --git a/examples/pykokkos/binsort.py b/examples/pykokkos/binsort.py
index f9ea59e4..2c2b6646 100644
--- a/examples/pykokkos/binsort.py
+++ b/examples/pykokkos/binsort.py
@@ -43,7 +43,7 @@ def results(self) -> None:
             print(f"{self.view[i]} ")
 
 
-def run():
+def run() -> None:
     workload = Workload(10)
     pk.execute(pk.ExecutionSpace.Default, workload)
     print(workload.view)
@@ -51,8 +51,5 @@ def run():
     print(workload.bin_offsets)
     print(workload.bin_count)
 
-
 if __name__ == "__main__":
-    pk.kokkos_manager.initialize()
     run()
-    pk.kokkos_manager.finalize()
diff --git a/examples/pykokkos/streams.py b/examples/pykokkos/streams.py
index 4fc29590..d437b308 100644
--- a/examples/pykokkos/streams.py
+++ b/examples/pykokkos/streams.py
@@ -15,7 +15,7 @@ def print_stream(i, x, id):
     elif x == 4:
         pk.printf("Stream 3 GPU %d\n", id)
 
-if __name__ == "__main__":
+def run() -> None:
     space = pk.Cuda
 
     # Create streams on GPU 0 (default GPU)
@@ -48,3 +48,7 @@ def print_stream(i, x, id):
         pk.parallel_for(pk.RangePolicy(instance2, 0, 2), print_stream, x=2, id=cp.cuda.runtime.getDevice())
 
     print("Done launching kernels")
+
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/pykokkos/subviews.py b/examples/pykokkos/subviews.py
index d3ec7333..77993c83 100644
--- a/examples/pykokkos/subviews.py
+++ b/examples/pykokkos/subviews.py
@@ -19,6 +19,8 @@ def work(self, i: int):
     def callback(self) -> None:
         print(self.view)
 
+def run() -> None:
+    pk.execute(pk.ExecutionSpace.Default, Workload())
 
 if __name__ == "__main__":
-    pk.execute(pk.ExecutionSpace.Default, Workload())
+    run()