Skip to content

Commit

Permalink
Merge pull request OpenMathLib#4560 from martin-frbg/issue4551-3
Browse files Browse the repository at this point in the history
Add support for negative increments to the ?NRM2 kernels for RISC-V RVV targets
  • Loading branch information
martin-frbg authored Mar 13, 2024
2 parents 855bbdd + cf80bd8 commit dc0338a
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 9 deletions.
6 changes: 3 additions & 3 deletions kernel/riscv64/nrm2_rvv.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;

if (n <= 0 || inc_x <= 0) return(0.0);
if (n <= 0 || inc_x == 0) return(0.0);
if(n == 1) return (ABS(x[0]));

unsigned int gvl = 0;
Expand All @@ -119,7 +119,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
unsigned int stride_x = inc_x * sizeof(FLOAT);
int idx = 0;

if( n >= gvl ) // don't pay overheads if we're not doing useful work
if( n >= gvl && inc_x > 0 ) // don't pay overheads if we're not doing useful work
{
for(i=0; i<n/gvl; i++){
v0 = VLSEV_FLOAT( &x[idx], stride_x, gvl );
Expand Down Expand Up @@ -190,7 +190,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
//finish any tail using scalar ops
i*=gvl*inc_x;
n*=inc_x;
while(i < n){
while(abs(i) < abs(n)){
if ( x[i] != 0.0 ){
FLOAT absxi = ABS( x[i] );
if ( scale < absxi ){
Expand Down
6 changes: 3 additions & 3 deletions kernel/riscv64/nrm2_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;

if (n <= 0 || inc_x <= 0) return(0.0);
if (n <= 0 || inc_x == 0) return(0.0);
if(n == 1) return (ABS(x[0]));

unsigned int gvl = 0;
Expand All @@ -122,7 +122,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
unsigned int stride_x = inc_x * sizeof(FLOAT);
int idx = 0;

if( n >= gvl ) // don't pay overheads if we're not doing useful work
if( n >= gvl && inc_x > 0) // don't pay overheads if we're not doing useful work
{
for(i=0; i<n/gvl; i++){
v0 = VLSEV_FLOAT( &x[idx], stride_x, gvl );
Expand Down Expand Up @@ -193,7 +193,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
//finish any tail using scalar ops
i*=gvl*inc_x;
n*=inc_x;
while(i < n){
while(abs(i)< abs(n)){
if ( x[i] != 0.0 ){
FLOAT absxi = ABS( x[i] );
if ( scale < absxi ){
Expand Down
2 changes: 1 addition & 1 deletion kernel/riscv64/znrm2_rvv.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0, j=0;

if (n <= 0 || inc_x <= 0) return(0.0);
if (n <= 0 || inc_x == 0) return(0.0);

FLOAT_V_T vr, v0, v_zero;
unsigned int gvl = 0;
Expand Down
4 changes: 2 additions & 2 deletions kernel/riscv64/znrm2_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;

if (n <= 0 || inc_x <= 0) return(0.0);
if (n <= 0 || inc_x == 0) return(0.0);

FLOAT_V_T v_ssq, v_scale, v0, v1, v_zero;
unsigned int gvl = 0;
Expand Down Expand Up @@ -176,7 +176,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
}

i += inc_x*2;
}while(i<n);
}while(abs(i)<abs(n));
}

return(scale * sqrt(ssq));
Expand Down

0 comments on commit dc0338a

Please sign in to comment.