diff --git a/common/rfb/CConnection.cxx b/common/rfb/CConnection.cxx
index 675b49d616..d432cc4ce7 100644
--- a/common/rfb/CConnection.cxx
+++ b/common/rfb/CConnection.cxx
@@ -49,7 +49,7 @@ static LogWriter vlog("CConnection");
 CConnection::CConnection()
   : csecurity(0),
     supportsLocalCursor(false), supportsCursorPosition(false),
-    supportsDesktopResize(false), supportsLEDState(false),
+    supportsDesktopResize(false), supportsLEDState(false), supportsAudio(false),
     is(0), os(0), reader_(0), writer_(0),
     shared(false),
     state_(RFBSTATE_UNINITIALISED), serverName(strDup("")),
@@ -524,6 +524,18 @@ void CConnection::framebufferUpdateEnd()
 
     firstUpdate = false;
   }
+
+  if (server.awaitsQEMUAudioFormatMsg) {
+    if (supportsAudio) {
+      rdr::U8 sampleFormat, channels;
+      rdr::U32 samplingFreq;
+      if (audioInitAndGetFormat(&sampleFormat, &channels, &samplingFreq)) {
+        writer()->writeQemuAudioSetFormat(sampleFormat, channels, samplingFreq);
+        writer()->writeQemuAudioEnableOrDisable(true /* enable */);
+      }
+    }
+    server.awaitsQEMUAudioFormatMsg = false;
+  }
 }
 
 bool CConnection::dataRect(const Rect& r, int encoding)
@@ -608,6 +620,13 @@ void CConnection::handleClipboardProvide(rdr::U32 flags,
   handleClipboardData(serverClipboard);
 }
 
+bool CConnection::audioInitAndGetFormat(rdr::U8* sampleFormat,
+                                        rdr::U8* channels,
+                                        rdr::U32* samplingFreq)
+{
+  return false;
+}
+
 void CConnection::authSuccess()
 {
 }
@@ -828,6 +847,9 @@ void CConnection::updateEncodings()
     encodings.push_back(pseudoEncodingLEDState);
     encodings.push_back(pseudoEncodingVMwareLEDState);
   }
+  if (supportsAudio) {
+    encodings.push_back(pseudoEncodingQEMUAudio);
+  }
 
   encodings.push_back(pseudoEncodingDesktopName);
   encodings.push_back(pseudoEncodingLastRect);
diff --git a/common/rfb/CConnection.h b/common/rfb/CConnection.h
index 68554b59c5..5c6d482e1a 100644
--- a/common/rfb/CConnection.h
+++ b/common/rfb/CConnection.h
@@ -125,6 +125,9 @@ namespace rfb {
                                         const size_t* lengths,
                                         const rdr::U8* const* data);
 
+    virtual bool audioInitAndGetFormat(rdr::U8* sampleFormat,
+                                       rdr::U8* channels,
+                                       rdr::U32* samplingFreq);
 
     // Methods to be overridden in a derived class
 
@@ -242,6 +245,7 @@ namespace rfb {
     bool supportsCursorPosition;
     bool supportsDesktopResize;
     bool supportsLEDState;
+    bool supportsAudio;
 
   private:
     // This is a default implementation of fences that automatically
diff --git a/common/rfb/CMsgHandler.cxx b/common/rfb/CMsgHandler.cxx
index 8cdfc451ff..e9168a08db 100644
--- a/common/rfb/CMsgHandler.cxx
+++ b/common/rfb/CMsgHandler.cxx
@@ -83,6 +83,14 @@ void CMsgHandler::supportsQEMUKeyEvent()
   server.supportsQEMUKeyEvent = true;
 }
 
+void CMsgHandler::supportsQEMUAudioAndAwaitsFormatMsgOnce()
+{
+  if (!server.supportsQEMUAudio) {
+    server.supportsQEMUAudio        = true;
+    server.awaitsQEMUAudioFormatMsg = true;
+  }
+}
+
 void CMsgHandler::serverInit(int width, int height,
                              const PixelFormat& pf,
                              const char* name)
@@ -167,3 +175,22 @@ void CMsgHandler::handleClipboardProvide(rdr::U32 flags,
                                          const rdr::U8* const* data)
 {
 }
+
+size_t CMsgHandler::audioSampleSize()
+{
+  return 1;
+}
+
+void CMsgHandler::audioNotifyStreamingStartStop(bool isStart)
+{
+}
+
+size_t CMsgHandler::audioAddSamples(const rdr::U8* data, size_t size)
+{
+  return size;
+}
+
+bool CMsgHandler::audioSubmitSamples()
+{
+  return false;
+}
diff --git a/common/rfb/CMsgHandler.h b/common/rfb/CMsgHandler.h
index 43d8df246a..dcfa8d0742 100644
--- a/common/rfb/CMsgHandler.h
+++ b/common/rfb/CMsgHandler.h
@@ -58,6 +58,7 @@ namespace rfb {
     virtual void fence(rdr::U32 flags, unsigned len, const char data[]);
     virtual void endOfContinuousUpdates();
     virtual void supportsQEMUKeyEvent();
+    virtual void supportsQEMUAudioAndAwaitsFormatMsgOnce();
     virtual void serverInit(int width, int height,
                             const PixelFormat& pf,
                             const char* name) = 0;
@@ -85,6 +86,11 @@ namespace rfb {
                                         const size_t* lengths,
                                         const rdr::U8* const* data);
 
+    virtual size_t audioSampleSize();
+    virtual void   audioNotifyStreamingStartStop(bool isStart);
+    virtual size_t audioAddSamples(const rdr::U8* data, size_t size);
+    virtual bool   audioSubmitSamples();
+
     ServerParams server;
   };
 }
diff --git a/common/rfb/CMsgReader.cxx b/common/rfb/CMsgReader.cxx
index 1ca993aa2a..a68478e411 100644
--- a/common/rfb/CMsgReader.cxx
+++ b/common/rfb/CMsgReader.cxx
@@ -28,6 +28,7 @@
 #include <rdr/ZlibInStream.h>
 
 #include <rfb/msgTypes.h>
+#include <rfb/qemuTypes.h>
 #include <rfb/clipboardTypes.h>
 #include <rfb/Exception.h>
 #include <rfb/LogWriter.h>
@@ -43,7 +44,7 @@ using namespace rfb;
 
 CMsgReader::CMsgReader(CMsgHandler* handler_, rdr::InStream* is_)
   : imageBufIdealSize(0), handler(handler_), is(is_),
-    state(MSGSTATE_IDLE), cursorEncoding(-1)
+    state(MSGSTATE_IDLE), cursorEncoding(-1), nAudioBytesLeft(0)
 {
 }
 
@@ -81,6 +82,13 @@ bool CMsgReader::readServerInit()
 
 bool CMsgReader::readMsg()
 {
+  if (state == MSGSTATE_AUDIO_DATA) {
+    if (readAudioData())
+      state = MSGSTATE_IDLE;
+    else
+      return false;
+  }
+
   if (state == MSGSTATE_IDLE) {
     if (!is->hasData(1))
       return false;
@@ -111,6 +119,9 @@ bool CMsgReader::readMsg()
     case msgTypeEndOfContinuousUpdates:
       ret = readEndOfContinuousUpdates();
       break;
+    case msgTypeQEMUServerMessage:
+      ret = readQemuServerMessage();
+      break;
     default:
       throw Exception("Unknown message type %d", currentMsgType);
     }
@@ -195,6 +206,10 @@ bool CMsgReader::readMsg()
       handler->supportsQEMUKeyEvent();
       ret = true;
       break;
+    case pseudoEncodingQEMUAudio:
+      handler->supportsQEMUAudioAndAwaitsFormatMsgOnce();
+      ret = true;
+      break;
     default:
       ret = readRect(dataRect, rectEncoding);
       break;
@@ -443,6 +458,68 @@ bool CMsgReader::readEndOfContinuousUpdates()
   return true;
 }
 
+bool CMsgReader::readQemuServerMessage()
+{
+  if (!is->hasData(1 + 2))
+    return false;
+
+  is->setRestorePoint();
+  rdr::U8  subMsgType = is->readU8();
+  rdr::U16 operation  = is->readU16();
+
+  if (subMsgType != qemuAudio) {
+    is->clearRestorePoint();
+    throw Exception("Invalid QEMU submessage type");
+  }
+
+  switch (operation) {
+    case msgFromQemuAudioBegin:
+      is->clearRestorePoint();
+      handler->audioNotifyStreamingStartStop(true /* isStart */);
+      return true;
+
+    case msgFromQemuAudioEnd:
+      is->clearRestorePoint();
+      handler->audioNotifyStreamingStartStop(false /* isStart */);
+      return true;
+
+    case msgFromQemuAudioData:
+      if (!is->hasDataOrRestore(4))
+        return false;
+      is->clearRestorePoint();
+      nAudioBytesLeft = is->readU32();
+      if (nAudioBytesLeft == 0)
+        return true;
+      if ((nAudioBytesLeft % handler->audioSampleSize()) != 0)
+        throw Exception("QEMU audio protocol error: sample torn apart");
+      if (readAudioData())
+        return true;
+      state = MSGSTATE_AUDIO_DATA;
+      return false;
+
+    default:
+      is->clearRestorePoint();
+      throw Exception("Invalid QEMU audio operation");
+  }
+}
+
+bool CMsgReader::readAudioData()
+{
+  while (nAudioBytesLeft != 0) {
+    is->hasData(__rfbmin(maxBufferedAudioBytes, nAudioBytesLeft));  // request as much as possible
+    size_t available = __rfbmin(is->avail(), nAudioBytesLeft);      // see how many we've got
+    if (available == 0)
+      return false;
+    size_t consumed = handler->audioAddSamples(is->getptr(available), available);
+    if (consumed == 0)
+      return false;
+    is->skip(consumed);
+    nAudioBytesLeft -= consumed;
+  }
+  handler->audioSubmitSamples();
+  return true;
+}
+
 bool CMsgReader::readFramebufferUpdate()
 {
   if (!is->hasData(1 + 2))
diff --git a/common/rfb/CMsgReader.h b/common/rfb/CMsgReader.h
index ab55aed8b3..08ff125561 100644
--- a/common/rfb/CMsgReader.h
+++ b/common/rfb/CMsgReader.h
@@ -56,6 +56,8 @@ namespace rfb {
     bool readExtendedClipboard(rdr::S32 len);
     bool readFence();
     bool readEndOfContinuousUpdates();
+    bool readQemuServerMessage();
+    bool readAudioData();
 
     bool readFramebufferUpdate();
 
@@ -79,6 +81,7 @@ namespace rfb {
       MSGSTATE_MESSAGE,
       MSGSTATE_RECT_HEADER,
       MSGSTATE_RECT_DATA,
+      MSGSTATE_AUDIO_DATA,
     };
 
     stateEnum state;
@@ -90,7 +93,10 @@ namespace rfb {
 
     int cursorEncoding;
 
+    size_t nAudioBytesLeft;
+
     static const int maxCursorSize = 256;
+    static const size_t maxBufferedAudioBytes = 32768;
   };
 }
 #endif
diff --git a/common/rfb/CMsgWriter.cxx b/common/rfb/CMsgWriter.cxx
index 0ac1bd73bd..8097cea2af 100644
--- a/common/rfb/CMsgWriter.cxx
+++ b/common/rfb/CMsgWriter.cxx
@@ -204,6 +204,25 @@ void CMsgWriter::writeClientCutText(const char* str)
   endMsg();
 }
 
+void CMsgWriter::writeQemuAudioEnableOrDisable(bool enable)
+{
+  startMsg(msgTypeQEMUClientMessage);
+  os->writeU8(qemuAudio);
+  os->writeU16(enable ? msgToQemuEnableAudio : msgToQemuDisableAudio);
+  endMsg();
+}
+
+void CMsgWriter::writeQemuAudioSetFormat(rdr::U8 fmt, rdr::U8 channels, rdr::U32 frequency)
+{
+  startMsg(msgTypeQEMUClientMessage);
+  os->writeU8(qemuAudio);
+  os->writeU16(msgToQemuSetAudioFormat);
+  os->writeU8(fmt);
+  os->writeU8(channels);
+  os->writeU32(frequency);
+  endMsg();
+}
+
 void CMsgWriter::writeClipboardCaps(rdr::U32 caps,
                                     const rdr::U32* lengths)
 {
diff --git a/common/rfb/CMsgWriter.h b/common/rfb/CMsgWriter.h
index 7b83939383..10cd71043d 100644
--- a/common/rfb/CMsgWriter.h
+++ b/common/rfb/CMsgWriter.h
@@ -58,6 +58,9 @@ namespace rfb {
 
     void writeClientCutText(const char* str);
 
+    void writeQemuAudioEnableOrDisable(bool enable);
+    void writeQemuAudioSetFormat(rdr::U8 fmt, rdr::U8 channels, rdr::U32 frequency);
+
     void writeClipboardCaps(rdr::U32 caps, const rdr::U32* lengths);
     void writeClipboardRequest(rdr::U32 flags);
     void writeClipboardPeek(rdr::U32 flags);
diff --git a/common/rfb/ServerParams.cxx b/common/rfb/ServerParams.cxx
index 729b3cfb24..50da3296d0 100644
--- a/common/rfb/ServerParams.cxx
+++ b/common/rfb/ServerParams.cxx
@@ -30,9 +30,9 @@ using namespace rfb;
 
 ServerParams::ServerParams()
   : majorVersion(0), minorVersion(0),
-    supportsQEMUKeyEvent(false),
+    supportsQEMUKeyEvent(false), supportsQEMUAudio(false),
     supportsSetDesktopSize(false), supportsFence(false),
-    supportsContinuousUpdates(false),
+    supportsContinuousUpdates(false), awaitsQEMUAudioFormatMsg(false),
     width_(0), height_(0), name_(0),
     ledState_(ledUnknown)
 {
diff --git a/common/rfb/ServerParams.h b/common/rfb/ServerParams.h
index ce0c722f17..1cd68b87e7 100644
--- a/common/rfb/ServerParams.h
+++ b/common/rfb/ServerParams.h
@@ -74,10 +74,13 @@ namespace rfb {
     void setClipboardCaps(rdr::U32 flags, const rdr::U32* lengths);
 
     bool supportsQEMUKeyEvent;
+    bool supportsQEMUAudio;
     bool supportsSetDesktopSize;
     bool supportsFence;
     bool supportsContinuousUpdates;
 
+    bool awaitsQEMUAudioFormatMsg;
+
   private:
 
     int width_;
diff --git a/common/rfb/encodings.h b/common/rfb/encodings.h
index e427572f6f..b4d024d5e0 100644
--- a/common/rfb/encodings.h
+++ b/common/rfb/encodings.h
@@ -44,6 +44,7 @@ namespace rfb {
   const int pseudoEncodingContinuousUpdates = -313;
   const int pseudoEncodingCursorWithAlpha = -314;
   const int pseudoEncodingQEMUKeyEvent = -258;
+  const int pseudoEncodingQEMUAudio = -259;
 
   // TightVNC-specific
   const int pseudoEncodingLastRect = -224;
diff --git a/common/rfb/msgTypes.h b/common/rfb/msgTypes.h
index a17493cd80..d4245d99a7 100644
--- a/common/rfb/msgTypes.h
+++ b/common/rfb/msgTypes.h
@@ -30,6 +30,8 @@ namespace rfb {
 
   const int msgTypeServerFence = 248;
 
+  const int msgTypeQEMUServerMessage = 255;
+
   // client to server
 
   const int msgTypeSetPixelFormat = 0;
diff --git a/common/rfb/qemuTypes.h b/common/rfb/qemuTypes.h
index 6a67f78103..fe7fde4059 100644
--- a/common/rfb/qemuTypes.h
+++ b/common/rfb/qemuTypes.h
@@ -21,5 +21,15 @@
 namespace rfb {
   const int qemuExtendedKeyEvent = 0;
   const int qemuAudio = 1;
+
+  // VNC client -> QEMU server audio message IDs
+  const int msgToQemuEnableAudio    = 0;
+  const int msgToQemuDisableAudio   = 1;
+  const int msgToQemuSetAudioFormat = 2;
+
+  // QEMU server -> VNC client audio message IDs
+  const int msgFromQemuAudioEnd   = 0;
+  const int msgFromQemuAudioBegin = 1;
+  const int msgFromQemuAudioData  = 2;
 }
 #endif
diff --git a/vncviewer/CConn.cxx b/vncviewer/CConn.cxx
index c184fda0a2..240b0baa09 100644
--- a/vncviewer/CConn.cxx
+++ b/vncviewer/CConn.cxx
@@ -54,6 +54,7 @@
 
 #ifdef WIN32
 #include "win32.h"
+#include "Win32AudioOutput.h"
 #endif
 
 using namespace rdr;
@@ -77,6 +78,9 @@ static const unsigned bpsEstimateWindow = 1000;
 
 CConn::CConn(const char* vncServerName, network::Socket* socket=NULL)
   : serverHost(0), serverPort(0), desktop(NULL),
+#ifdef WIN32
+    win32AudioOutput(NULL),
+#endif
     updateCount(0), pixelCount(0),
     lastServerEncoding((unsigned int)-1), bpsEstimate(20000000)
 {
@@ -122,6 +126,17 @@ CConn::CConn(const char* vncServerName, network::Socket* socket=NULL)
   setServerName(serverHost);
   setStreams(&sock->inStream(), &sock->outStream());
 
+#ifndef WIN32
+  supportsAudio = false;
+#else
+  win32AudioOutput = new Win32AudioOutput();
+  supportsAudio = win32AudioOutput->isAvailable();
+  if (!supportsAudio) {
+    delete win32AudioOutput;
+    win32AudioOutput = NULL;
+  }
+#endif
+
   initialiseProtocol();
 
   OptionsDialog::addCallback(handleOptions, this);
@@ -134,6 +149,11 @@ CConn::~CConn()
   OptionsDialog::removeCallback(handleOptions);
   Fl::remove_timeout(handleUpdateTimeout, this);
 
+#ifdef WIN32
+  if (win32AudioOutput)
+    delete win32AudioOutput;
+#endif
+
   if (desktop)
     delete desktop;
 
@@ -470,6 +490,60 @@ void CConn::handleClipboardData(const char* data)
   desktop->handleClipboardData(data);
 }
 
+bool CConn::audioInitAndGetFormat(rdr::U8* sampleFormat,
+                                  rdr::U8* channels,
+                                  rdr::U32* samplingFreq)
+{
+#ifdef WIN32
+  if (win32AudioOutput) {
+    if (win32AudioOutput->isOpened() || win32AudioOutput->openAndAllocateBuffer()) {
+      (*sampleFormat) = win32AudioOutput->getSampleFormat();
+      (*channels)     = win32AudioOutput->getNumberOfChannels();
+      (*samplingFreq) = win32AudioOutput->getSamplingFreq();
+      return true;
+    } else {
+      delete win32AudioOutput;
+      win32AudioOutput = NULL;
+    }
+  }
+#endif
+  return false;
+}
+
+size_t CConn::audioSampleSize()
+{
+#ifdef WIN32
+  if (win32AudioOutput)
+    return win32AudioOutput->getSampleSize();
+#endif
+  return 1;
+}
+
+void CConn::audioNotifyStreamingStartStop(bool isStart)
+{
+#ifdef WIN32
+  if (win32AudioOutput)
+    return win32AudioOutput->notifyStreamingStartStop(isStart);
+#endif
+}
+
+size_t CConn::audioAddSamples(const rdr::U8* data, size_t size)
+{
+#ifdef WIN32
+  if (win32AudioOutput)
+    return win32AudioOutput->addSamples(data, size);
+#endif
+  return size;
+}
+
+bool CConn::audioSubmitSamples()
+{
+#ifdef WIN32
+  if (win32AudioOutput)
+    return win32AudioOutput->submitSamples();
+#endif
+  return false;
+}
 
 ////////////////////// Internal methods //////////////////////
 
diff --git a/vncviewer/CConn.h b/vncviewer/CConn.h
index e662ec8726..df40b1f8fe 100644
--- a/vncviewer/CConn.h
+++ b/vncviewer/CConn.h
@@ -29,6 +29,10 @@ namespace network { class Socket; }
 
 class DesktopWindow;
 
+#ifdef WIN32
+class Win32AudioOutput;
+#endif
+
 class CConn : public rfb::CConnection
 {
 public:
@@ -73,6 +77,14 @@ class CConn : public rfb::CConnection
   virtual void handleClipboardAnnounce(bool available);
   virtual void handleClipboardData(const char* data);
 
+  virtual bool   audioInitAndGetFormat(rdr::U8* sampleFormat,
+                                       rdr::U8* channels,
+                                       rdr::U32* samplingFreq);
+  virtual size_t audioSampleSize();
+  virtual void   audioNotifyStreamingStartStop(bool isStart);
+  virtual size_t audioAddSamples(const rdr::U8* data, size_t size);
+  virtual bool   audioSubmitSamples();
+
 private:
 
   void resizeFramebuffer();
@@ -90,6 +102,9 @@ class CConn : public rfb::CConnection
   network::Socket* sock;
 
   DesktopWindow *desktop;
+#ifdef WIN32
+  Win32AudioOutput *win32AudioOutput;
+#endif
 
   unsigned updateCount;
   unsigned pixelCount;
diff --git a/vncviewer/CMakeLists.txt b/vncviewer/CMakeLists.txt
index 15eac66150..bbad5c38a0 100644
--- a/vncviewer/CMakeLists.txt
+++ b/vncviewer/CMakeLists.txt
@@ -35,7 +35,7 @@ if(WIN32)
 endif()
 
 if(WIN32)
-  target_sources(vncviewer PRIVATE Win32TouchHandler.cxx win32.c)
+  target_sources(vncviewer PRIVATE Win32TouchHandler.cxx Win32AudioOutput.cxx win32.c)
 elseif(APPLE)
   target_sources(vncviewer PRIVATE cocoa.mm osx_to_qnum.c)
 else()
@@ -53,7 +53,7 @@ endif()
 target_link_libraries(vncviewer rfb network rdr os ${FLTK_LIBRARIES} ${GETTEXT_LIBRARIES})
 
 if(WIN32)
-  target_link_libraries(vncviewer msimg32)
+  target_link_libraries(vncviewer msimg32 winmm)
 elseif(APPLE)
   target_link_libraries(vncviewer "-framework Cocoa")
   target_link_libraries(vncviewer "-framework Carbon")
diff --git a/vncviewer/Win32AudioOutput.cxx b/vncviewer/Win32AudioOutput.cxx
new file mode 100644
index 0000000000..b83b0ad285
--- /dev/null
+++ b/vncviewer/Win32AudioOutput.cxx
@@ -0,0 +1,337 @@
+/* Copyright 2022 Mikhail Kupchik
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#include "Win32AudioOutput.h"
+
+//
+// Lifecycle of audio samples is as follows:
+//
+// 1) Caller of this class must provide a pointer and length of continuous chunk of source
+//    audio samples, e.g. in the socket input buffer. Win32AudioOutput::addSamples() may
+//    be invoked multiple times per one audio data message (for partial chunks of audio data).
+//
+// 2) Until the end of an audio data message arrives, samples are copied from the socket input
+//    buffer to the circular audio playback buffer. We prefer not to leave audio samples in the
+//    socket input buffer, and move them to circular audio playback buffer as soon as they arrive.
+//    If a bad-behaved VNC server sends unreasonably large audio data message, then we clamp it
+//    here: samples which do not fit into the circular audio playback buffer are de facto discarded,
+//    but reported as consumed to the caller, so the caller can release this space in the socket
+//    input buffer, and continue to do so until the end of audio data message.
+//
+// 3) When the end of an audio data message arrives, Win32AudioOutput::submitSamples() should be
+//    called. It submits already acculumated samples for asyncronous playback to the NT kernel
+//    device driver, the audio mixer. Asyncronous I/O request in flight refer to the audio samples
+//    in circular buffer owned by this class. So while audio playback I/O request is still in flight,
+//    socket input buffer may contain different data, may be reallocated, may be empty etc.
+//
+// 4) When kernel completes asyncronous I/O request, it awakes internal worker thread started
+//    by winmm.dll / wdmaud.drv in the address space of this process. This thread invokes
+//    Win32AudioOutput::waveOutCallback(). There we link asyncronous I/O request header into the
+//    linked list to dispose it later, during the next call to Win32AudioOutput::submitSamples()
+//    or in the destructor of this class.
+//
+// 5) There may be multiple audio I/O requests in flight, they are played in the order of submission.
+//
+
+Win32AudioOutput::Win32AudioOutput()
+  : haveWO(false), openedWO(false), sampleFormat(sampleFormatU8), numberOfChannels(0),
+    samplingFreq(0), currentStreamId(0), handleWO(NULL), bufPtr(NULL), bufTotalSize(0),
+    bufFreeSize(0), bufUnsubmittedSize(0), bufSubmittedHead(0), bufUnsubmittedHead(0),
+    doneHdrsSlist(NULL), hdrsInFlight(0), extraDelayInMillisec(0)
+{
+  static const rdr::U32 freqTable[4] = {48000, 44100, 22050, 11025};
+
+  // Default format (16-bit stereo, 48000 Hz) is tried first and usually succeeds,
+  // but we also try other formats before giving up
+  for (rdr::U8 bits_per_sample = 16; bits_per_sample != 0; bits_per_sample -= 8) {
+    for (rdr::U8 n_channels = 2; n_channels != 0; n_channels--) {
+      for (rdr::U8 freq_index = 0; freq_index < 4; freq_index++) {
+
+        WAVEFORMATEX wfx;
+        memset(&wfx, 0, sizeof(wfx));
+        wfx.wFormatTag      = WAVE_FORMAT_PCM;
+        wfx.nChannels       = n_channels;
+        wfx.nSamplesPerSec  = freqTable[freq_index];
+        wfx.nBlockAlign     = n_channels * (bits_per_sample / 8);
+        wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;
+        wfx.wBitsPerSample  = bits_per_sample;
+        wfx.cbSize          = 0;
+
+        MMRESULT mmr = waveOutOpen(NULL, WAVE_MAPPER, &wfx, 0, 0,
+                                   CALLBACK_NULL | WAVE_FORMAT_QUERY);
+        if (mmr == MMSYSERR_NOERROR) {
+          sampleFormat     = ((bits_per_sample == 8) ? sampleFormatU8 : sampleFormatS16);
+          numberOfChannels = n_channels;
+          samplingFreq     = freqTable[freq_index];
+          haveWO           = true;
+          return;
+        }
+      }
+    }
+  }
+}
+
+bool Win32AudioOutput::openAndAllocateBuffer()
+{
+  if (!haveWO)
+    return false;
+
+  if (!openedWO) {
+    // open wave output
+    WAVEFORMATEX wfx;
+    memset(&wfx, 0, sizeof(wfx));
+    wfx.wFormatTag      = WAVE_FORMAT_PCM;
+    wfx.nChannels       = numberOfChannels;
+    wfx.nSamplesPerSec  = samplingFreq;
+    wfx.nBlockAlign     = WORD(getSampleSize());
+    wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;
+    wfx.wBitsPerSample  = 8 << (sampleFormat >> 1);
+    wfx.cbSize          = 0;
+
+    MMRESULT mmr = waveOutOpen(&handleWO, WAVE_MAPPER, &wfx,
+                               DWORD_PTR(&Win32AudioOutput::waveOutCallback),
+                               DWORD_PTR(this), CALLBACK_FUNCTION);
+    if (mmr == MMSYSERR_NOERROR)
+      openedWO = true;
+    else
+      return false;
+
+    // allocate buffer
+    size_t buf_estim_size = (4 * maxNetworkJitterInMillisec * samplingFreq) / 1000;
+
+    size_t buf_alloc_size = 1;
+    while (buf_alloc_size < buf_estim_size)
+      buf_alloc_size <<= 1;
+
+    size_t sample_size = getSampleSize();
+
+    bufPtr = ((rdr::U8*)( calloc(buf_alloc_size, sample_size) ));
+    if (bufPtr == NULL) {
+      waveOutClose(handleWO);
+      handleWO = NULL;
+      openedWO = false;
+      return false;
+    }
+
+    bufTotalSize = bufFreeSize = buf_alloc_size * sample_size;
+    bufUnsubmittedSize = bufSubmittedHead = bufUnsubmittedHead = 0;
+
+    // try to change scheduling of this process (use minimum time slice)
+    timeBeginPeriod(1);
+  }
+
+  return true;
+}
+
+void Win32AudioOutput::addSilentSamples(size_t numberOfSamples)
+{
+  if (openedWO) {
+    size_t bytes_left_to_add = numberOfSamples * getSampleSize();
+    while (bytes_left_to_add != 0) {
+      size_t bytes_to_add = bytes_left_to_add;
+      if (bytes_to_add > bufFreeSize)
+        bytes_to_add = bufFreeSize;
+      if (bytes_to_add + bufUnsubmittedHead > bufTotalSize)
+        bytes_to_add = bufTotalSize - bufUnsubmittedHead;
+      if (bytes_to_add == 0)
+        break;
+
+      memset(bufPtr + bufUnsubmittedHead, ((sampleFormat == sampleFormatU8) ? 0x80 : 0), bytes_to_add);
+      bufUnsubmittedHead  = ((bufUnsubmittedHead + bytes_to_add) & (bufTotalSize - 1));
+      bufFreeSize        -= bytes_to_add;
+      bufUnsubmittedSize += bytes_to_add;
+      bytes_left_to_add  -= bytes_to_add;
+    }
+  }
+}
+
+size_t Win32AudioOutput::addSamples(const rdr::U8* data, size_t size)
+{
+  if (haveWO) {
+    size_t sample_size = getSampleSize();
+    size -= (size & (sample_size - 1));
+  }
+
+  if (openedWO) {
+    size_t bytes_left_to_copy = size;
+    while (bytes_left_to_copy != 0) {
+      size_t bytes_to_copy = bytes_left_to_copy;
+      if (bytes_to_copy > bufFreeSize)
+        bytes_to_copy = bufFreeSize;
+      if (bytes_to_copy + bufUnsubmittedHead > bufTotalSize)
+        bytes_to_copy = bufTotalSize - bufUnsubmittedHead;
+      if (bytes_to_copy == 0)
+        break;
+
+      memcpy(bufPtr + bufUnsubmittedHead, data, bytes_to_copy);
+      bufUnsubmittedHead  = ((bufUnsubmittedHead + bytes_to_copy) & (bufTotalSize - 1));
+      bufFreeSize        -= bytes_to_copy;
+      bufUnsubmittedSize += bytes_to_copy;
+      data               += bytes_to_copy;
+      bytes_left_to_copy -= bytes_to_copy;
+    }
+  }
+
+  return size;
+}
+
+ULONGLONG Win32AudioOutput::getCurrentTimestamp()
+{
+  FILETIME ft_now;
+  GetSystemTimeAsFileTime(&ft_now);
+
+  ULARGE_INTEGER ul_now;
+  ul_now.LowPart  = ft_now.dwLowDateTime;
+  ul_now.HighPart = ft_now.dwHighDateTime;
+
+  return ul_now.QuadPart;
+}
+
+void CALLBACK Win32AudioOutput::waveOutCallback(HWAVEOUT hwo, UINT msg, DWORD_PTR instance,
+                                                DWORD_PTR param1, DWORD_PTR param2)
+{
+  if (msg == WOM_DONE) {
+    Win32AudioOutput* p_this = ((Win32AudioOutput*)instance);
+    HdrInSlist*       hdr    = ((HdrInSlist*)param1);
+
+    if (p_this->openedWO && (p_this->handleWO == hwo) && (0 != (hdr->whdr.dwFlags & WHDR_DONE))) {
+      if (0 == InterlockedDecrement(&(p_this->hdrsInFlight))) {
+        hdr->starvedWhenDone     = TRUE;
+        hdr->starvationTimestamp = getCurrentTimestamp();
+      }
+
+      PVOID next_hdr_ptr = p_this->doneHdrsSlist;
+      while (true) {
+        InterlockedExchangePointer(&(hdr->volatileNext), next_hdr_ptr);
+        PVOID xchg_initial_value = InterlockedCompareExchangePointer(
+          &(p_this->doneHdrsSlist), hdr, next_hdr_ptr
+        );
+        if (xchg_initial_value == next_hdr_ptr)
+          break;
+        next_hdr_ptr = xchg_initial_value;
+      }
+    }
+  }
+}
+
+bool Win32AudioOutput::submitSamples()
+{
+  if (!openedWO)
+    return false;
+
+  HdrInSlist* spare_hdrs = ((HdrInSlist*)(InterlockedExchangePointer(&doneHdrsSlist, NULL)));
+  for (HdrInSlist* hdr = spare_hdrs; hdr != NULL; hdr = hdr->next) {
+    bufFreeSize += hdr->whdr.dwBufferLength;
+    waveOutUnprepareHeader(handleWO, &(hdr->whdr), sizeof(WAVEHDR));
+    if (hdr->starvedWhenDone && (hdr->streamId == currentStreamId)) {
+      ULONGLONG now = getCurrentTimestamp();
+      if (now > hdr->starvationTimestamp) {
+        ULONGLONG delay_in_100nsec  = now - hdr->starvationTimestamp;     // delay in 100ns intervals
+        ULONGLONG delay_in_millisec = (delay_in_100nsec + 9999) / 10000;  // convert to delay in milliseconds
+        if (delay_in_millisec > maxNetworkJitterInMillisec)               // and clamp at a reasonable limit
+          delay_in_millisec = maxNetworkJitterInMillisec;
+        if (extraDelayInMillisec < ((rdr::U32)delay_in_millisec))
+          extraDelayInMillisec = ((rdr::U32)delay_in_millisec);
+      }
+    }
+  }
+
+  while (bufUnsubmittedSize != 0) {
+    size_t io_bytes = bufUnsubmittedSize;
+    if (io_bytes + bufSubmittedHead > bufTotalSize)
+      io_bytes = bufTotalSize - bufSubmittedHead;
+
+    HdrInSlist* hdr = NULL;
+    if (spare_hdrs != NULL) {
+      hdr = spare_hdrs;
+      spare_hdrs = hdr->next;
+    } else {
+      hdr = ((HdrInSlist*)(malloc(sizeof(HdrInSlist))));
+      if (!hdr)
+        break;
+    }
+    memset(hdr, 0, sizeof(HdrInSlist));
+    hdr->whdr.lpData         = LPSTR(bufPtr + bufSubmittedHead);
+    hdr->whdr.dwBufferLength = io_bytes; 
+    hdr->streamId            = currentStreamId;
+
+    MMRESULT mmr = waveOutPrepareHeader(handleWO, &(hdr->whdr), sizeof(WAVEHDR));
+    if (mmr != MMSYSERR_NOERROR) {
+      hdr->next  = spare_hdrs;
+      spare_hdrs = hdr;
+      break;
+    }
+
+    InterlockedIncrement(&hdrsInFlight);
+    mmr = waveOutWrite(handleWO, &(hdr->whdr), sizeof(WAVEHDR));
+    if (mmr != MMSYSERR_NOERROR) {
+      InterlockedDecrement(&hdrsInFlight);
+      waveOutUnprepareHeader(handleWO, &(hdr->whdr), sizeof(WAVEHDR));
+      hdr->next  = spare_hdrs;
+      spare_hdrs = hdr;
+      break;
+    }
+
+    bufSubmittedHead = ((bufSubmittedHead + io_bytes) & (bufTotalSize - 1));
+    bufUnsubmittedSize -= io_bytes;
+  }
+
+  while (spare_hdrs != NULL) {
+    HdrInSlist* next_hdr = spare_hdrs->next;
+    free(spare_hdrs);
+    spare_hdrs = next_hdr;
+  }
+
+  return (bufUnsubmittedSize == 0);
+}
+
+void Win32AudioOutput::notifyStreamingStartStop(bool isStart)
+{
+  if (isStart) {
+    ++currentStreamId;
+
+    // suppress audio stuttering caused by network jitter:
+    // add 20+ milliseconds of silence (playback delay) ahead of actual samples
+    size_t delay_in_millisec = 20 + extraDelayInMillisec;
+    addSilentSamples(delay_in_millisec * samplingFreq / 1000);
+    submitSamples();
+  }
+}
+
+Win32AudioOutput::~Win32AudioOutput()
+{
+  if (openedWO) {
+    waveOutReset(handleWO);
+
+    timeEndPeriod(1);
+
+    HdrInSlist* spare_hdrs = ((HdrInSlist*)(InterlockedExchangePointer(&doneHdrsSlist, NULL)));
+    while (spare_hdrs != NULL) {
+      waveOutUnprepareHeader(handleWO, &(spare_hdrs->whdr), sizeof(WAVEHDR));
+      HdrInSlist* next_hdr = spare_hdrs->next;
+      free(spare_hdrs);
+      spare_hdrs = next_hdr;
+    }
+
+    waveOutClose(handleWO);
+    handleWO = NULL;
+    openedWO = false;
+
+    free(bufPtr);
+  }
+}
diff --git a/vncviewer/Win32AudioOutput.h b/vncviewer/Win32AudioOutput.h
new file mode 100644
index 0000000000..31b104e773
--- /dev/null
+++ b/vncviewer/Win32AudioOutput.h
@@ -0,0 +1,87 @@
+/* Copyright 2022 Mikhail Kupchik
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#ifndef __WIN32AUDIOOUTPUT_H__
+#define __WIN32AUDIOOUTPUT_H__
+
+#include <rdr/types.h>
+
+#include <windows.h>
+
+class Win32AudioOutput {
+  private:
+    struct HdrInSlist {
+      WAVEHDR          whdr;
+      ULONGLONG        streamId;
+      BOOL             starvedWhenDone;
+      ULONGLONG        starvationTimestamp;
+      union {
+        HdrInSlist*    next;
+        PVOID volatile volatileNext;
+      };
+    };
+
+  public:
+    static const size_t maxNetworkJitterInMillisec = 1000;
+
+    static const rdr::U8 sampleFormatU8  = 0;
+    static const rdr::U8 sampleFormatS8  = 1;
+    static const rdr::U8 sampleFormatU16 = 2;
+    static const rdr::U8 sampleFormatS16 = 3;
+    static const rdr::U8 sampleFormatU32 = 4;
+    static const rdr::U8 sampleFormatS32 = 5;
+
+    Win32AudioOutput();
+    ~Win32AudioOutput();
+
+    bool isAvailable() const { return haveWO; }
+    rdr::U8 getSampleFormat() const { return sampleFormat; } 
+    rdr::U8 getNumberOfChannels() const { return numberOfChannels; }
+    rdr::U32 getSamplingFreq() const { return samplingFreq; }
+    size_t getSampleSize() const { return (numberOfChannels << (sampleFormat >> 1)); }
+
+    bool openAndAllocateBuffer();
+    bool isOpened() const { return openedWO; }
+
+    void notifyStreamingStartStop(bool isStart);
+    void addSilentSamples(size_t numberOfSamples);
+    size_t addSamples(const rdr::U8* data, size_t size);
+    bool submitSamples();
+
+  private:
+    static ULONGLONG getCurrentTimestamp();
+    static void CALLBACK waveOutCallback(HWAVEOUT hwo, UINT msg, DWORD_PTR instance,
+                                         DWORD_PTR param1, DWORD_PTR param2);
+
+    bool           haveWO, openedWO;
+    rdr::U8        sampleFormat, numberOfChannels;
+    rdr::U32       samplingFreq;
+    ULONGLONG      currentStreamId;
+    HWAVEOUT       handleWO;
+    rdr::U8*       bufPtr;
+    size_t         bufTotalSize;
+    size_t         bufFreeSize;
+    size_t         bufUnsubmittedSize;
+    size_t         bufSubmittedHead;
+    size_t         bufUnsubmittedHead;
+    PVOID volatile doneHdrsSlist;
+    LONG volatile  hdrsInFlight;
+    rdr::U32       extraDelayInMillisec;
+};
+
+#endif // __WIN32AUDIOOUTPUT_H__