Skip to content

Commit

Permalink
Do framebuffer read bandwidth tests in Linux builds by default
Browse files Browse the repository at this point in the history
There is no need for the -DBENCH_FRAMEBUFFER hack anymore.
  • Loading branch information
ssvb committed Mar 30, 2016
1 parent 21b4a91 commit f45e61b
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 12 deletions.
69 changes: 69 additions & 0 deletions asm-opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,15 @@ static bench_info x86_sse2[] =
{ NULL, 0, NULL }
};

static bench_info x86_sse2_fb[] =
{
{ "MOVSD copy (from framebuffer)", 0, aligned_block_copy_movsd },
{ "MOVSD 2-pass copy (from framebuffer)", 1, aligned_block_copy_movsd },
{ "SSE2 copy (from framebuffer)", 0, aligned_block_copy_sse2 },
{ "SSE2 2-pass copy (from framebuffer)", 1, aligned_block_copy_sse2 },
{ NULL, 0, NULL }
};

static int check_sse2_support(void)
{
#ifdef __amd64__
Expand Down Expand Up @@ -185,6 +194,14 @@ bench_info *get_asm_benchmarks(void)
return empty;
}

bench_info *get_asm_framebuffer_benchmarks(void)
{
if (check_sse2_support())
return x86_sse2_fb;
else
return empty;
}

#elif defined(__arm__)

#include "arm-neon.h"
Expand Down Expand Up @@ -271,6 +288,48 @@ bench_info *get_asm_benchmarks(void)
return arm_v4;
}

static bench_info arm_neon_fb[] =
{
{ "NEON read (from framebuffer)", 0, aligned_block_read_neon },
{ "NEON copy (from framebuffer)", 0, aligned_block_copy_neon },
{ "NEON 2-pass copy (from framebuffer)", 1, aligned_block_copy_neon },
{ "NEON unrolled copy (from framebuffer)", 0, aligned_block_copy_unrolled_neon },
{ "NEON 2-pass unrolled copy (from framebuffer)", 1, aligned_block_copy_unrolled_neon },
{ "VFP copy (from framebuffer)", 0, aligned_block_copy_vfp },
{ "VFP 2-pass copy (from framebuffer)", 1, aligned_block_copy_vfp },
{ "ARM copy (from framebuffer)", 0, aligned_block_copy_incr_armv5te },
{ "ARM 2-pass copy (from framebuffer)", 1, aligned_block_copy_incr_armv5te },
{ NULL, 0, NULL }
};

static bench_info arm_v5te_vfp_fb[] =
{
{ "VFP copy (from framebuffer)", 0, aligned_block_copy_vfp },
{ "VFP 2-pass copy (from framebuffer)", 1, aligned_block_copy_vfp },
{ "ARM copy (from framebuffer)", 0, aligned_block_copy_incr_armv5te },
{ "ARM 2-pass copy (from framebuffer)", 1, aligned_block_copy_incr_armv5te },
{ NULL, 0, NULL }
};

static bench_info arm_v5te_fb[] =
{
{ "ARM copy (from framebuffer)", 0, aligned_block_copy_incr_armv5te },
{ "ARM 2-pass copy (from framebuffer)", 1, aligned_block_copy_incr_armv5te },
{ NULL, 0, NULL }
};

bench_info *get_asm_framebuffer_benchmarks(void)
{
if (check_cpu_feature("neon"))
return arm_neon_fb;
else if (check_cpu_feature("edsp") && check_cpu_feature("vfp"))
return arm_v5te_vfp_fb;
else if (check_cpu_feature("edsp"))
return arm_v5te_fb;
else
return empty;
}

#elif defined(__mips__) && defined(_ABIO32)

#include "mips-32.h"
Expand Down Expand Up @@ -307,11 +366,21 @@ bench_info *get_asm_benchmarks(void)
}
}

bench_info *get_asm_framebuffer_benchmarks(void)
{
return empty;
}

#else

bench_info *get_asm_benchmarks(void)
{
return empty;
}

bench_info *get_asm_framebuffer_benchmarks(void)
{
return empty;
}

#endif
1 change: 1 addition & 0 deletions asm-opt.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,6 @@ typedef struct
} bench_info;

bench_info *get_asm_benchmarks(void);
bench_info *get_asm_framebuffer_benchmarks(void);

#endif
47 changes: 35 additions & 12 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#define BLOCKSIZE 2048
#define MAXREPEATS 10

#ifdef BENCH_FRAMEBUFFER
#ifdef __linux__
static void *mmap_framebuffer(size_t *fbsize)
{
int fd;
Expand Down Expand Up @@ -475,7 +475,7 @@ int main(void)
int64_t *srcbuf, *dstbuf, *tmpbuf;
void *poolbuf;
size_t bufsize = SIZE;
#ifdef BENCH_FRAMEBUFFER
#ifdef __linux__
size_t fbsize;
int64_t *fbbuf = mmap_framebuffer(&fbsize);
fbsize = (fbsize / BLOCKSIZE) * BLOCKSIZE;
Expand All @@ -488,16 +488,6 @@ int main(void)
(void **)&dstbuf, bufsize,
(void **)&tmpbuf, BLOCKSIZE,
NULL, 0);
#ifdef BENCH_FRAMEBUFFER
if (fbbuf)
{
printf("(*) using framebuffer as the source buffer (size=%d)\n", (int)fbsize);
srcbuf = fbbuf;
if (bufsize > fbsize)
bufsize = fbsize;
}
#endif

printf("\n");
printf("==========================================================================\n");
printf("== Memory bandwidth tests ==\n");
Expand All @@ -521,6 +511,39 @@ int main(void)
printf(" ---\n");
bandwidth_bench(dstbuf, srcbuf, tmpbuf, bufsize, BLOCKSIZE, " ", bi);
}

#ifdef __linux__
bi = get_asm_framebuffer_benchmarks();
if (bi->f && fbbuf)
{
printf("\n");
printf("==========================================================================\n");
printf("== Framebuffer read tests. ==\n");
printf("== ==\n");
printf("== Many ARM devices use a part of the system memory as the framebuffer, ==\n");
printf("== typically mapped as uncached but with write-combining enabled. ==\n");
printf("== Writes to such framebuffers are quite fast, but reads are much ==\n");
printf("== slower and very sensitive to the alignment and the selection of ==\n");
printf("== CPU instructions which are used for accessing memory. ==\n");
printf("== ==\n");
printf("== Many x86 systems allocate the framebuffer in the GPU memory, ==\n");
printf("== accessible for the CPU via a relatively slow PCI-E bus. Moreover, ==\n");
printf("== PCI-E is asymmetric and handles reads a lot worse than writes. ==\n");
printf("== ==\n");
printf("== If uncached framebuffer reads are reasonably fast (at least 100 MB/s ==\n");
printf("== or preferably >300 MB/s), then using the shadow framebuffer layer ==\n");
printf("== is not necessary in Xorg DDX drivers, resulting in a nice overall ==\n");
printf("== performance improvement. For example, the xf86-video-fbturbo DDX ==\n");
printf("== uses this trick. ==\n");
printf("==========================================================================\n\n");

srcbuf = fbbuf;
if (bufsize > fbsize)
bufsize = fbsize;
bandwidth_bench(dstbuf, srcbuf, tmpbuf, bufsize, BLOCKSIZE, " ", bi);
}
#endif

free(poolbuf);

printf("\n");
Expand Down

0 comments on commit f45e61b

Please sign in to comment.