Skip to content

Commit

Permalink
Merge pull request #1 from fireice-uk/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
Spudz76 authored Apr 17, 2018
2 parents 9158460 + 9df1d4d commit 70c3191
Show file tree
Hide file tree
Showing 23 changed files with 129 additions and 57 deletions.
30 changes: 22 additions & 8 deletions xmrstak/backend/amd/amd_gpu/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -675,11 +675,18 @@ std::vector<GpuContext> getAMDDevices(int index)
}

std::string devVendor(devVendorVec.data());
if( devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos)

bool isAMDDevice = devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos;
bool isNVIDIADevice = devVendor.find("NVIDIA Corporation") != std::string::npos || devVendor.find("NVIDIA") != std::string::npos;

std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor;
if((isAMDDevice && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA"))
{
GpuContext ctx;
std::vector<char> devNameVec(1024);
size_t maxMem;
if( devVendor.find("NVIDIA Corporation") != std::string::npos)
ctx.isNVIDIA = true;

if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL)) != CL_SUCCESS)
{
Expand All @@ -699,6 +706,10 @@ std::vector<GpuContext> getAMDDevices(int index)
continue;
}

// the allocation for NVIDIA OpenCL is not limited to 1/4 of the GPU memory per allocation
if(ctx.isNVIDIA)
maxMem = ctx.freeMem;

if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k);
Expand Down Expand Up @@ -747,13 +758,15 @@ int getAMDPlatformIdx()

clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
std::string platformName(platformNameVec.data());
if( platformName.find("Advanced Micro Devices") != std::string::npos ||

bool isAMDOpenCL = platformName.find("Advanced Micro Devices") != std::string::npos ||
platformName.find("Apple") != std::string::npos ||
platformName.find("Mesa") != std::string::npos
)
platformName.find("Mesa") != std::string::npos;
bool isNVIDIADevice = platformName.find("NVIDIA Corporation") != std::string::npos || platformName.find("NVIDIA") != std::string::npos;
std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor;
if((isAMDOpenCL && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA"))
{

printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str());
printer::inst()->print_msg(L0,"Found %s platform index id = %i, name = %s", selectedOpenCLVendor.c_str(), i , platformName.c_str());
if(platformName.find("Mesa") != std::string::npos)
mesaPlatform = i;
else
Expand Down Expand Up @@ -819,7 +832,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
std::vector<char> platformNameVec(infoSize);
clGetPlatformInfo(PlatformIDList[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
std::string platformName(platformNameVec.data());
if( platformName.find("Advanced Micro Devices") == std::string::npos)
if(xmrstak::params::inst().openCLVendor == "AMD" && platformName.find("Advanced Micro Devices") == std::string::npos)
{
printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str());
}
Expand Down Expand Up @@ -907,7 +920,8 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
{
size_t reduced_intensity = (ctx[i].rawIntensity / ctx[i].workSize) * ctx[i].workSize;
ctx[i].rawIntensity = reduced_intensity;
printer::inst()->print_msg(L0, "WARNING AMD: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", ctx[i].deviceIdx, int(reduced_intensity));
const std::string backendName = xmrstak::params::inst().openCLVendor;
printer::inst()->print_msg(L0, "WARNING %s: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", backendName.c_str(), ctx[i].deviceIdx, int(reduced_intensity));
}

if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS)
Expand Down
1 change: 1 addition & 0 deletions xmrstak/backend/amd/amd_gpu/gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct GpuContext
size_t workSize;
int stridedIndex;
int memChunk;
bool isNVIDIA = false;
int compMode;

/*Output vars*/
Expand Down
23 changes: 20 additions & 3 deletions xmrstak/backend/amd/autoAdjust.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class autoAdjust
std::string conf;
for(auto& ctx : devVec)
{
size_t minFreeMem = 128u * byteToMiB;
/* 1000 is a magic selected limit, the reason is that more than 2GiB memory
* sowing down the memory performance because of TLB cache misses
*/
Expand All @@ -112,12 +113,26 @@ class autoAdjust
*/
maxThreads = 2024u;
}

// NVIDIA optimizations
if(
ctx.isNVIDIA && (
ctx.name.find("P100") != std::string::npos ||
ctx.name.find("V100") != std::string::npos
)
)
{
// do not limit the number of threads
maxThreads = 40000u;
minFreeMem = 512u * byteToMiB;
}

// increase all intensity limits by two for aeon
if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite)
maxThreads *= 2u;

// keep 128MiB memory free (value is randomly chosen)
size_t availableMem = ctx.freeMem - (128u * byteToMiB);
size_t availableMem = ctx.freeMem - minFreeMem;
// 224byte extra memory is used per thread for meta data
size_t perThread = hashMemSize + 224u;
size_t maxIntensity = availableMem / perThread;
Expand All @@ -138,7 +153,7 @@ class autoAdjust
// set 8 threads per block (this is a good value for the most gpus)
conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" +
" \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" +
" \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 2,\n"
" \"affine_to_cpu\" : false, \"strided_index\" : " + (ctx.isNVIDIA ? "0" : "1") + ", \"mem_chunk\" : 2,\n"
" \"comp_mode\" : true\n" +
" },\n";
}
Expand All @@ -151,7 +166,9 @@ class autoAdjust
configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex));
configTpl.replace("GPUCONFIG",conf);
configTpl.write(params::inst().configFileAMD);
printer::inst()->print_msg(L0, "AMD: GPU configuration stored in file '%s'", params::inst().configFileAMD.c_str());

const std::string backendName = xmrstak::params::inst().openCLVendor;
printer::inst()->print_msg(L0, "%s: GPU (OpenCL) configuration stored in file '%s'", backendName.c_str(), params::inst().configFileAMD.c_str());
}

std::vector<GpuContext> devVec;
Expand Down
6 changes: 4 additions & 2 deletions xmrstak/backend/amd/minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,17 +137,19 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
for (i = 0; i < n; i++)
{
jconf::inst()->GetThreadConfig(i, cfg);

const std::string backendName = xmrstak::params::inst().openCLVendor;

if(cfg.cpu_aff >= 0)
{
#if defined(__APPLE__)
printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory.");
#endif

printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff);
printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, affinity: %d.", backendName.c_str(), i, (int)cfg.cpu_aff);
}
else
printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, no affinity.", i);
printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, no affinity.", backendName.c_str(), i);

minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i], cfg);
pvThreads->push_back(thd);
Expand Down
5 changes: 3 additions & 2 deletions xmrstak/backend/backendConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,12 @@ std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
#ifndef CONF_NO_OPENCL
if(params::inst().useAMD)
{
plugin amdplugin("AMD", "xmrstak_opencl_backend");
const std::string backendName = xmrstak::params::inst().openCLVendor;
plugin amdplugin(backendName, "xmrstak_opencl_backend");
std::vector<iBackend*>* amdThreads = amdplugin.startBackend(static_cast<uint32_t>(pvThreads->size()), pWork, environment::inst());
pvThreads->insert(std::end(*pvThreads), std::begin(*amdThreads), std::end(*amdThreads));
if(amdThreads->size() == 0)
printer::inst()->print_msg(L0, "WARNING: backend AMD disabled.");
printer::inst()->print_msg(L0, "WARNING: backend %s (OpenCL) disabled.", backendName.c_str());
}
#endif

Expand Down
16 changes: 8 additions & 8 deletions xmrstak/backend/cpu/crypto/c_blake256.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ void blake224_init(state *S) {
}

// datalen = number of bits
void blake256_update(state *S, const uint8_t *data, uint64_t datalen) {
void blake256_update(state *S, const uint8_t *data, uint32_t datalen) {
int left = S->buflen >> 3;
int fill = 64 - left;

Expand Down Expand Up @@ -155,7 +155,7 @@ void blake256_update(state *S, const uint8_t *data, uint64_t datalen) {
}

// datalen = number of bits
void blake224_update(state *S, const uint8_t *data, uint64_t datalen) {
void blake224_update(state *S, const uint8_t *data, uint32_t datalen) {
blake256_update(S, data, datalen);
}

Expand Down Expand Up @@ -206,15 +206,15 @@ void blake224_final(state *S, uint8_t *digest) {
}

// inlen = number of bytes
void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
void blake256_hash(uint8_t *out, const uint8_t *in, uint32_t inlen) {
state S;
blake256_init(&S);
blake256_update(&S, in, inlen * 8);
blake256_final(&S, out);
}

// inlen = number of bytes
void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
void blake224_hash(uint8_t *out, const uint8_t *in, uint32_t inlen) {
state S;
blake224_init(&S);
blake224_update(&S, in, inlen * 8);
Expand Down Expand Up @@ -282,13 +282,13 @@ void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
}

// datalen = number of bits
void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint32_t datalen) {
// update the inner state
blake256_update(&S->inner, data, datalen);
}

// datalen = number of bits
void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint32_t datalen) {
// update the inner state
blake224_update(&S->inner, data, datalen);
}
Expand All @@ -310,15 +310,15 @@ void hmac_blake224_final(hmac_state *S, uint8_t *digest) {
}

// keylen = number of bytes; inlen = number of bytes
void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint32_t inlen) {
hmac_state S;
hmac_blake256_init(&S, key, keylen);
hmac_blake256_update(&S, in, inlen * 8);
hmac_blake256_final(&S, out);
}

// keylen = number of bytes; inlen = number of bytes
void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint32_t inlen) {
hmac_state S;
hmac_blake224_init(&S, key, keylen);
hmac_blake224_update(&S, in, inlen * 8);
Expand Down
16 changes: 8 additions & 8 deletions xmrstak/backend/cpu/crypto/c_blake256.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,27 @@ typedef struct {
void blake256_init(state *);
void blake224_init(state *);

void blake256_update(state *, const uint8_t *, uint64_t);
void blake224_update(state *, const uint8_t *, uint64_t);
void blake256_update(state *, const uint8_t *, uint32_t);
void blake224_update(state *, const uint8_t *, uint32_t);

void blake256_final(state *, uint8_t *);
void blake224_final(state *, uint8_t *);

void blake256_hash(uint8_t *, const uint8_t *, uint64_t);
void blake224_hash(uint8_t *, const uint8_t *, uint64_t);
void blake256_hash(uint8_t *, const uint8_t *, uint32_t);
void blake224_hash(uint8_t *, const uint8_t *, uint32_t);

/* HMAC functions: */

void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t);
void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t);

void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t);
void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t);
void hmac_blake256_update(hmac_state *, const uint8_t *, uint32_t);
void hmac_blake224_update(hmac_state *, const uint8_t *, uint32_t);

void hmac_blake256_final(hmac_state *, uint8_t *);
void hmac_blake224_final(hmac_state *, uint8_t *);

void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint32_t);
void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint32_t);

#endif /* _BLAKE256_H_ */
2 changes: 1 addition & 1 deletion xmrstak/backend/cpu/crypto/c_skein.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ typedef enum
}
SkeinHashReturn;

typedef size_t SkeinDataLength; /* bit count type */
typedef uint32_t SkeinDataLength; /* bit count type */
typedef u08b_t SkeinBitSequence; /* bit stream type */

/* "all-in-one" call */
Expand Down
4 changes: 2 additions & 2 deletions xmrstak/backend/cpu/crypto/cryptonight_aesni.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ extern "C"
{
void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen);
void keccakf(uint64_t st[25], int rounds);
extern void(*const extra_hashes[4])(const void *, size_t, char *);
extern void(*const extra_hashes[4])(const void *, uint32_t, char *);
}

// This will shift and xor tmp1 into itself as 4 32-bit vals such as
Expand Down Expand Up @@ -429,7 +429,7 @@ inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
uint64_t vh = _mm_cvtsi128_si64(tmp);

uint8_t x = vh >> 24;
uint8_t x = static_cast<uint8_t>(vh >> 24);
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
Expand Down
10 changes: 5 additions & 5 deletions xmrstak/backend/cpu/crypto/cryptonight_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,23 +56,23 @@ extern "C"
#include <string.h>
#endif // _WIN32

void do_blake_hash(const void* input, size_t len, char* output) {
void do_blake_hash(const void* input, uint32_t len, char* output) {
blake256_hash((uint8_t*)output, (const uint8_t*)input, len);
}

void do_groestl_hash(const void* input, size_t len, char* output) {
void do_groestl_hash(const void* input, uint32_t len, char* output) {
groestl((const uint8_t*)input, len * 8, (uint8_t*)output);
}

void do_jh_hash(const void* input, size_t len, char* output) {
void do_jh_hash(const void* input, uint32_t len, char* output) {
jh_hash(32 * 8, (const uint8_t*)input, 8 * len, (uint8_t*)output);
}

void do_skein_hash(const void* input, size_t len, char* output) {
void do_skein_hash(const void* input, uint32_t len, char* output) {
skein_hash(8 * 32, (const uint8_t*)input, 8 * len, (uint8_t*)output);
}

void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
void (* const extra_hashes[4])(const void *, uint32_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};

#ifdef _WIN32
#include "xmrstak/misc/uac.hpp"
Expand Down
4 changes: 3 additions & 1 deletion xmrstak/backend/cpu/crypto/hash.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <stdint.h>

typedef unsigned char BitSequence;
typedef unsigned long long DataLength;
typedef uint32_t DataLength;
typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn;
2 changes: 1 addition & 1 deletion xmrstak/backend/cpu/hwlocMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ void bindMemoryToNUMANode( size_t puId )

depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);

for( size_t i = 0;
for( uint32_t i = 0;
i < hwloc_get_nbobjs_by_depth(topology, depth);
i++ )
{
Expand Down
10 changes: 5 additions & 5 deletions xmrstak/backend/cpu/minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,22 +627,22 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes,

void minethd::double_work_main()
{
multiway_work_main<2>();
multiway_work_main<2u>();
}

void minethd::triple_work_main()
{
multiway_work_main<3>();
multiway_work_main<3u>();
}

void minethd::quad_work_main()
{
multiway_work_main<4>();
multiway_work_main<4u>();
}

void minethd::penta_work_main()
{
multiway_work_main<5>();
multiway_work_main<5u>();
}

template<size_t N>
Expand All @@ -656,7 +656,7 @@ void minethd::prep_multiway_work(uint8_t *bWorkBlob, uint32_t **piNonce)
}
}

template<size_t N>
template<uint32_t N>
void minethd::multiway_work_main()
{
if(affinity >= 0) //-1 means no affinity
Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/cpu/minethd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class minethd : public iBackend

minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, int64_t affinity);

template<size_t N>
template<uint32_t N>
void multiway_work_main();

template<size_t N>
Expand Down
Loading

0 comments on commit 70c3191

Please sign in to comment.