rpcsx-gpu: add multiprocess support

RPCSX · Sep 1, 2024 · 2c78162 · 2c78162
1 parent f77376c
commit 2c78162
Show file tree

Hide file tree

Showing 14 changed files with 747 additions and 451 deletions.
diff --git a/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp b/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp
@@ -1,14 +1,12 @@
 #pragma once
 
-#include <orbis/utils/SharedMutex.hpp>
 #include <atomic>
 #include <cstdint>
 #include <cstring>
 #include <initializer_list>
+#include <orbis/utils/SharedMutex.hpp>
 
 namespace amdgpu::bridge {
-extern std::uint32_t expGpuPid;
-
 struct PadState {
   std::uint64_t timestamp;
   std::uint32_t unk;
@@ -47,7 +45,9 @@ enum class CommandId : std::uint32_t {
   ProtectMemory,
   CommandBuffer,
   Flip,
-  MapDmem,
+  MapMemory,
+  MapProcess,
+  UnmapProcess,
 };
 
 struct CmdMemoryProt {
@@ -79,15 +79,25 @@ struct CmdFlip {
   std::uint64_t arg;
 };
 
-struct CmdMapDmem {
-  std::uint64_t offset;
+struct CmdMapMemory {
+  std::int64_t offset;
   std::uint64_t address;
   std::uint64_t size;
   std::uint32_t prot;
   std::uint32_t pid;
+  std::int32_t memoryType;
   std::uint32_t dmemIndex;
 };
 
+struct CmdMapProcess {
+  std::uint64_t pid;
+  int vmId;
+};
+
+struct CmdUnmapProcess {
+  std::uint64_t pid;
+};
+
 enum {
   kPageWriteWatch = 1 << 0,
   kPageReadWriteLock = 1 << 1,
@@ -112,17 +122,15 @@ struct BridgeHeader {
   volatile std::uint64_t flipArg;
   volatile std::uint64_t flipCount;
   volatile std::uint64_t bufferInUseAddress;
-  std::uint32_t memoryAreaCount;
   std::uint32_t commandBufferCount;
   std::uint32_t bufferCount;
-  CmdMemoryProt memoryAreas[512];
   CmdCommandBuffer commandBuffers[32];
   CmdBuffer buffers[10];
   // orbis::shared_mutex cacheCommandMtx;
   // orbis::shared_cv cacheCommandCv;
-  std::atomic<std::uint64_t> cacheCommands[4];
-  std::atomic<std::uint32_t> gpuCacheCommand;
-  std::atomic<std::uint8_t> cachePages[0x100'0000'0000 / kHostPageSize];
+  std::atomic<std::uint64_t> cacheCommands[6][4];
+  std::atomic<std::uint32_t> gpuCacheCommand[6];
+  std::atomic<std::uint8_t> cachePages[6][0x100'0000'0000 / kHostPageSize];
 
   volatile std::uint64_t pull;
   volatile std::uint64_t push;
@@ -137,7 +145,9 @@ struct Command {
     CmdCommandBuffer commandBuffer;
     CmdBuffer buffer;
     CmdFlip flip;
-    CmdMapDmem mapDmem;
+    CmdMapMemory mapMemory;
+    CmdMapProcess mapProcess;
+    CmdUnmapProcess unmapProcess;
   };
 };
 
@@ -160,29 +170,32 @@ struct BridgePusher {
 
   void sendMemoryProtect(std::uint32_t pid, std::uint64_t address,
                          std::uint64_t size, std::uint32_t prot) {
-    if (pid == expGpuPid) {
-      sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
-    }
+    sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
   }
 
-  void sendMapDmem(std::uint32_t pid, std::uint32_t dmemIndex, std::uint64_t address, std::uint64_t size, std::uint32_t prot, std::uint64_t offset) {
-    // if (pid == expGpuPid) {
-      sendCommand(CommandId::MapDmem, {pid, dmemIndex, address, size, prot, offset});
-    // }
+  void sendMapMemory(std::uint32_t pid, std::uint32_t memoryType,
+                     std::uint32_t dmemIndex, std::uint64_t address,
+                     std::uint64_t size, std::uint32_t prot,
+                     std::uint64_t offset) {
+    sendCommand(CommandId::MapMemory,
+                {pid, memoryType, dmemIndex, address, size, prot, offset});
   }
 
   void sendCommandBuffer(std::uint32_t pid, std::uint64_t queue,
                          std::uint64_t address, std::uint64_t size) {
-    // if (pid == expGpuPid) {
-      sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
-    // }
+    sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
   }
 
   void sendFlip(std::uint32_t pid, std::uint32_t bufferIndex,
                 std::uint64_t arg) {
-    // if (pid == expGpuPid) {
-      sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
-    // }
+    sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
+  }
+
+  void sendMapProcess(std::uint32_t pid, unsigned vmId) {
+    sendCommand(CommandId::MapProcess, {pid, vmId});
+  }
+  void sendUnmapProcess(std::uint32_t pid) {
+    sendCommand(CommandId::UnmapProcess, {pid});
   }
 
   void wait() {
@@ -198,7 +211,8 @@ struct BridgePusher {
 
   void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
     std::uint64_t exp = 0;
-    while (!header->lock.compare_exchange_weak(exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
+    while (!header->lock.compare_exchange_weak(
+        exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
       exp = 0;
     }
 
@@ -303,13 +317,23 @@ struct BridgePuller {
       result.flip.arg = args[2];
       return result;
 
-    case CommandId::MapDmem:
-      result.mapDmem.pid = args[0];
-      result.mapDmem.dmemIndex = args[1];
-      result.mapDmem.address = args[2];
-      result.mapDmem.size = args[3];
-      result.mapDmem.prot = args[4];
-      result.mapDmem.offset = args[5];
+    case CommandId::MapMemory:
+      result.mapMemory.pid = args[0];
+      result.mapMemory.memoryType = args[1];
+      result.mapMemory.dmemIndex = args[2];
+      result.mapMemory.address = args[3];
+      result.mapMemory.size = args[4];
+      result.mapMemory.prot = args[5];
+      result.mapMemory.offset = args[6];
+      return result;
+
+    case CommandId::MapProcess:
+      result.mapProcess.pid = args[0];
+      result.mapProcess.vmId = args[1];
+      return result;
+
+    case CommandId::UnmapProcess:
+      result.unmapProcess.pid = args[0];
       return result;
     }
 

diff --git a/hw/amdgpu/bridge/src/bridge.cpp b/hw/amdgpu/bridge/src/bridge.cpp
@@ -8,8 +8,6 @@
 static int gShmFd = -1;
 static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
                                         (sizeof(std::uint64_t) * 256);
-std::uint32_t amdgpu::bridge::expGpuPid = 0;
-
 amdgpu::bridge::BridgeHeader *
 amdgpu::bridge::createShmCommandBuffer(const char *name) {
   if (gShmFd != -1) {

diff --git a/hw/amdgpu/device/include/amdgpu/device/device.hpp b/hw/amdgpu/device/include/amdgpu/device/device.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include "amdgpu/RemoteMemory.hpp"
 #include "amdgpu/bridge/bridge.hpp"
 #include "amdgpu/shader/Instruction.hpp"
 #include "gpu-scheduler.hpp"
@@ -1259,18 +1260,54 @@ struct GnmTBuffer {
 
 static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);
 
+struct GnmSSampler {
+  int32_t clamp_x : 3;
+  int32_t clamp_y : 3;
+  int32_t clamp_z : 3;
+  int32_t max_aniso_ratio : 3;
+  int32_t depth_compare_func : 3;
+  int32_t force_unorm_coords : 1;
+  int32_t aniso_threshold : 3;
+  int32_t mc_coord_trunc : 1;
+  int32_t force_degamma : 1;
+  int32_t aniso_bias : 6;
+  int32_t trunc_coord : 1;
+  int32_t disable_cube_wrap : 1;
+  int32_t filter_mode : 2;
+  int32_t : 1;
+  int32_t min_lod : 12;
+  int32_t max_lod : 12;
+  int32_t perf_mip : 4;
+  int32_t perf_z : 4;
+  int32_t lod_bias : 14;
+  int32_t lod_bias_sec : 6;
+  int32_t xy_mag_filter : 2;
+  int32_t xy_min_filter : 2;
+  int32_t z_filter : 2;
+  int32_t mip_filter : 2;
+  int32_t : 4;
+  int32_t border_color_ptr : 12;
+  int32_t : 18;
+  int32_t border_color_type : 2;
+
+  auto operator<=>(const GnmSSampler &) const = default;
+  bool operator==(const GnmSSampler &) const = default;
+};
+
+static_assert(sizeof(GnmSSampler) == sizeof(std::uint32_t) * 4);
+
 constexpr auto kPageSize = 0x4000;
 
 void setVkDevice(VkDevice device,
                  VkPhysicalDeviceMemoryProperties memProperties,
                  VkPhysicalDeviceProperties devProperties);
 
 struct AmdgpuDevice {
-  void handleProtectMemory(std::uint64_t address, std::uint64_t size,
-                           std::uint32_t prot);
-  void handleCommandBuffer(std::uint64_t queueId, std::uint64_t address,
-                           std::uint64_t size);
-  bool handleFlip(VkQueue queue, VkCommandBuffer cmdBuffer,
+  void handleProtectMemory(RemoteMemory memory, std::uint64_t address,
+                           std::uint64_t size, std::uint32_t prot);
+  void handleCommandBuffer(RemoteMemory memory, std::uint64_t queueId,
+                           std::uint64_t address, std::uint64_t size);
+  bool handleFlip(RemoteMemory memory, VkQueue queue, VkCommandBuffer cmdBuffer,
                   TaskChain &initTaskChain, std::uint32_t bufferIndex,
                   std::uint64_t arg, VkImage targetImage,
                   VkExtent2D targetExtent, VkSemaphore waitSemaphore,