Skip to content

Commit

Permalink
Bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
davschneller committed Oct 27, 2024
1 parent aa95d58 commit 6193c09
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
9 changes: 6 additions & 3 deletions pspamm/codegen/architectures/arm_sve/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,15 @@ def move_register_block(self,
addr, comment = cursor.look(cursor_ptr, block_offset, cell_offset)
addr.disp += self.precision.size() * load_offset

offset = addr.disp - prev_disp

# count how many elements we have processed between last step and this step
cont_counter = ((addr.disp - prev_disp) // mul_vl)
cont_counter = (offset // mul_vl)
larger_max_offset = cont_counter > max_mem_ins_mult
non_dividing_offset = offset % mul_vl != 0

if larger_max_offset or (prev_overhead and addr.disp > 0):
offset_comment = "disp > {}".format(max_offset) if larger_max_offset else "previous mem. instr. used p0"
if larger_max_offset or (prev_overhead and addr.disp > 0) or non_dividing_offset:
offset_comment = f"disp > {max_offset}" if larger_max_offset else ("disp % VL != 0" if non_dividing_offset else "previous mem. instr. used p0")
asm.add(add(addr.disp, additional_regs[0], offset_comment, addr.base))
prev_disp = addr.disp
addr.base = additional_regs[0]
Expand Down
1 change: 0 additions & 1 deletion pspamm/matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,6 @@ def kernelK(asm, Bki, A_ptr, B_ptr):
store_block.add(fma(regs[ir, x + ic], self.alpha_reg[1], A_regs_cut[ir, ic], "C = C + alpha * AB", None, pred=pred_m))
store_block.add(self.generator.move_register_block(self.C, C_ptr, Coords(), A_regs_cut, self.v_size, self.additional_regs, None, True, self.prefetching, self.ldc * x))
asm.add(store_block)

else:
asm.add(self.generator.move_register_block(self.C, C_ptr, Coords(), regs, self.v_size, self.additional_regs, None, True, self.prefetching))

Expand Down

0 comments on commit 6193c09

Please sign in to comment.