Skip to content

Commit

Permalink
Update predicates for k
Browse files Browse the repository at this point in the history
  • Loading branch information
davschneller committed Oct 30, 2024
1 parent 87d410e commit 9118704
Showing 1 changed file with 3 additions and 11 deletions.
14 changes: 3 additions & 11 deletions pspamm/codegen/architectures/arm_sve/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,22 +379,14 @@ def make_microkernel(self,
divider = 1
elem128 = 1
vk = bk
preg = 'p7/z'
preg_last = 'p7/z'
else:
max_offs = 127
divider = 16
elem128 = 16 // self.get_precision().size()
vk = -(bk // -elem128)

#if isinstance(B, DenseCursor):
# preg = 'p1/z' if self.has_bk_overhead else 'p7/z'
# preg_last = 'p2/z' if self.has_k_overhead else preg
#else:
# preg = 'p7/z'
# preg_last = 'p7/z'
preg = 'p7/z'
preg_last = 'p7/z'
preg = self.pred_n_trues(elem128, elem128, 'z')
preg_last = preg if bk % elem128 == 0 else self.pred_n_trues(bk % elem128, elem128, 'z')
for Vmi in range(Vm):
# set to all v_size predicates to true, we want to replicate a B element into a whole vector
for bni in range(bn): # inside this n-block
Expand All @@ -404,7 +396,7 @@ def make_microkernel(self,
if B.has_nonzero_cell(B_ptr, to_B_block, to_cell):
B_cell_addr, B_comment = B.look(B_ptr, to_B_block, to_cell)
if B_regs[bki_reg, bni] not in bs:
p_zeroing = Register_ARM(AsmType.p64x8, preg_last) if bki_reg + 1 == vk else Register_ARM(AsmType.p64x8, preg)
p_zeroing = preg_last if bki_reg + 1 == vk else preg

# max_offs is the maximum allowed immediate offset when using ld1rd/ld1rw to broadcast a scalar value
if B_cell_addr.disp > max_offs or B_cell_addr.disp % divider != 0:
Expand Down

0 comments on commit 9118704

Please sign in to comment.