Skip to content

Commit

Permalink
rpcsx-gpu: add multiprocess support
Browse files Browse the repository at this point in the history
  • Loading branch information
DHrpcs3 committed Sep 1, 2024
1 parent f77376c commit 2c78162
Show file tree
Hide file tree
Showing 14 changed files with 747 additions and 451 deletions.
90 changes: 57 additions & 33 deletions hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
#pragma once

#include <orbis/utils/SharedMutex.hpp>
#include <atomic>
#include <cstdint>
#include <cstring>
#include <initializer_list>
#include <orbis/utils/SharedMutex.hpp>

namespace amdgpu::bridge {
extern std::uint32_t expGpuPid;

struct PadState {
std::uint64_t timestamp;
std::uint32_t unk;
Expand Down Expand Up @@ -47,7 +45,9 @@ enum class CommandId : std::uint32_t {
ProtectMemory,
CommandBuffer,
Flip,
MapDmem,
MapMemory,
MapProcess,
UnmapProcess,
};

struct CmdMemoryProt {
Expand Down Expand Up @@ -79,15 +79,25 @@ struct CmdFlip {
std::uint64_t arg;
};

struct CmdMapDmem {
std::uint64_t offset;
struct CmdMapMemory {
std::int64_t offset;
std::uint64_t address;
std::uint64_t size;
std::uint32_t prot;
std::uint32_t pid;
std::int32_t memoryType;
std::uint32_t dmemIndex;
};

struct CmdMapProcess {
std::uint64_t pid;
int vmId;
};

struct CmdUnmapProcess {
std::uint64_t pid;
};

enum {
kPageWriteWatch = 1 << 0,
kPageReadWriteLock = 1 << 1,
Expand All @@ -112,17 +122,15 @@ struct BridgeHeader {
volatile std::uint64_t flipArg;
volatile std::uint64_t flipCount;
volatile std::uint64_t bufferInUseAddress;
std::uint32_t memoryAreaCount;
std::uint32_t commandBufferCount;
std::uint32_t bufferCount;
CmdMemoryProt memoryAreas[512];
CmdCommandBuffer commandBuffers[32];
CmdBuffer buffers[10];
// orbis::shared_mutex cacheCommandMtx;
// orbis::shared_cv cacheCommandCv;
std::atomic<std::uint64_t> cacheCommands[4];
std::atomic<std::uint32_t> gpuCacheCommand;
std::atomic<std::uint8_t> cachePages[0x100'0000'0000 / kHostPageSize];
std::atomic<std::uint64_t> cacheCommands[6][4];
std::atomic<std::uint32_t> gpuCacheCommand[6];
std::atomic<std::uint8_t> cachePages[6][0x100'0000'0000 / kHostPageSize];

volatile std::uint64_t pull;
volatile std::uint64_t push;
Expand All @@ -137,7 +145,9 @@ struct Command {
CmdCommandBuffer commandBuffer;
CmdBuffer buffer;
CmdFlip flip;
CmdMapDmem mapDmem;
CmdMapMemory mapMemory;
CmdMapProcess mapProcess;
CmdUnmapProcess unmapProcess;
};
};

Expand All @@ -160,29 +170,32 @@ struct BridgePusher {

void sendMemoryProtect(std::uint32_t pid, std::uint64_t address,
std::uint64_t size, std::uint32_t prot) {
if (pid == expGpuPid) {
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
}
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
}

void sendMapDmem(std::uint32_t pid, std::uint32_t dmemIndex, std::uint64_t address, std::uint64_t size, std::uint32_t prot, std::uint64_t offset) {
// if (pid == expGpuPid) {
sendCommand(CommandId::MapDmem, {pid, dmemIndex, address, size, prot, offset});
// }
void sendMapMemory(std::uint32_t pid, std::uint32_t memoryType,
std::uint32_t dmemIndex, std::uint64_t address,
std::uint64_t size, std::uint32_t prot,
std::uint64_t offset) {
sendCommand(CommandId::MapMemory,
{pid, memoryType, dmemIndex, address, size, prot, offset});
}

void sendCommandBuffer(std::uint32_t pid, std::uint64_t queue,
std::uint64_t address, std::uint64_t size) {
// if (pid == expGpuPid) {
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
// }
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
}

void sendFlip(std::uint32_t pid, std::uint32_t bufferIndex,
std::uint64_t arg) {
// if (pid == expGpuPid) {
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
// }
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
}

void sendMapProcess(std::uint32_t pid, unsigned vmId) {
sendCommand(CommandId::MapProcess, {pid, vmId});
}
void sendUnmapProcess(std::uint32_t pid) {
sendCommand(CommandId::UnmapProcess, {pid});
}

void wait() {
Expand All @@ -198,7 +211,8 @@ struct BridgePusher {

void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
std::uint64_t exp = 0;
while (!header->lock.compare_exchange_weak(exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
while (!header->lock.compare_exchange_weak(
exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
exp = 0;
}

Expand Down Expand Up @@ -303,13 +317,23 @@ struct BridgePuller {
result.flip.arg = args[2];
return result;

case CommandId::MapDmem:
result.mapDmem.pid = args[0];
result.mapDmem.dmemIndex = args[1];
result.mapDmem.address = args[2];
result.mapDmem.size = args[3];
result.mapDmem.prot = args[4];
result.mapDmem.offset = args[5];
case CommandId::MapMemory:
result.mapMemory.pid = args[0];
result.mapMemory.memoryType = args[1];
result.mapMemory.dmemIndex = args[2];
result.mapMemory.address = args[3];
result.mapMemory.size = args[4];
result.mapMemory.prot = args[5];
result.mapMemory.offset = args[6];
return result;

case CommandId::MapProcess:
result.mapProcess.pid = args[0];
result.mapProcess.vmId = args[1];
return result;

case CommandId::UnmapProcess:
result.unmapProcess.pid = args[0];
return result;
}

Expand Down
2 changes: 0 additions & 2 deletions hw/amdgpu/bridge/src/bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
static int gShmFd = -1;
static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
(sizeof(std::uint64_t) * 256);
std::uint32_t amdgpu::bridge::expGpuPid = 0;

amdgpu::bridge::BridgeHeader *
amdgpu::bridge::createShmCommandBuffer(const char *name) {
if (gShmFd != -1) {
Expand Down
47 changes: 42 additions & 5 deletions hw/amdgpu/device/include/amdgpu/device/device.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include "amdgpu/RemoteMemory.hpp"
#include "amdgpu/bridge/bridge.hpp"
#include "amdgpu/shader/Instruction.hpp"
#include "gpu-scheduler.hpp"
Expand Down Expand Up @@ -1259,18 +1260,54 @@ struct GnmTBuffer {

static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);

struct GnmSSampler {
int32_t clamp_x : 3;
int32_t clamp_y : 3;
int32_t clamp_z : 3;
int32_t max_aniso_ratio : 3;
int32_t depth_compare_func : 3;
int32_t force_unorm_coords : 1;
int32_t aniso_threshold : 3;
int32_t mc_coord_trunc : 1;
int32_t force_degamma : 1;
int32_t aniso_bias : 6;
int32_t trunc_coord : 1;
int32_t disable_cube_wrap : 1;
int32_t filter_mode : 2;
int32_t : 1;
int32_t min_lod : 12;
int32_t max_lod : 12;
int32_t perf_mip : 4;
int32_t perf_z : 4;
int32_t lod_bias : 14;
int32_t lod_bias_sec : 6;
int32_t xy_mag_filter : 2;
int32_t xy_min_filter : 2;
int32_t z_filter : 2;
int32_t mip_filter : 2;
int32_t : 4;
int32_t border_color_ptr : 12;
int32_t : 18;
int32_t border_color_type : 2;

auto operator<=>(const GnmSSampler &) const = default;
bool operator==(const GnmSSampler &) const = default;
};

static_assert(sizeof(GnmSSampler) == sizeof(std::uint32_t) * 4);

constexpr auto kPageSize = 0x4000;

void setVkDevice(VkDevice device,
VkPhysicalDeviceMemoryProperties memProperties,
VkPhysicalDeviceProperties devProperties);

struct AmdgpuDevice {
void handleProtectMemory(std::uint64_t address, std::uint64_t size,
std::uint32_t prot);
void handleCommandBuffer(std::uint64_t queueId, std::uint64_t address,
std::uint64_t size);
bool handleFlip(VkQueue queue, VkCommandBuffer cmdBuffer,
void handleProtectMemory(RemoteMemory memory, std::uint64_t address,
std::uint64_t size, std::uint32_t prot);
void handleCommandBuffer(RemoteMemory memory, std::uint64_t queueId,
std::uint64_t address, std::uint64_t size);
bool handleFlip(RemoteMemory memory, VkQueue queue, VkCommandBuffer cmdBuffer,
TaskChain &initTaskChain, std::uint32_t bufferIndex,
std::uint64_t arg, VkImage targetImage,
VkExtent2D targetExtent, VkSemaphore waitSemaphore,
Expand Down
Loading

0 comments on commit 2c78162

Please sign in to comment.