virtual reality - How to fix MSAA performance issue with Vulkan + OpenXR? (Custom game engine) - Stack Overflow

admin2025-04-18  0

I have 25 years experience as a professional game dev so not much stumps me anymore, but this one I just can't seem to figure out and I need to fix this before I can demo the project (looks bad without anti-aliasing).

I'm working on a custom game engine using Vulkan + OpenXR and have a scene running at the full 72 fps (refresh rate) on Quest 2 without anti-aliasing. However when I enable 4x hardware MSAA, the frame rate drops abnormally low to 15 fps when there should have been very little impact. The only difference between the two is the addition of the MSAA resolve attachment.

I know a better frame rate is possible because I've tested the same scene using 4x super-sampling instead of MSAA and getting more than double the speed (36 fps), even though super-sampling is supposed to be significantly slower than MSAA. I've tested the same code (minus OpenXR) running full screen on a regular Android device and the MSAA is working as expected with minimal impact to the frame rate.

I have Vulkan validation layers and XR debug messages turned on but there's no errors or warnings. I am checking return codes throughout and all are without errors (for simplicity the error checking is not shown in code below). And I've also tested with validation layers disabled to see if that impacts the speed but there was no change.

I've implemented recommended MSAA optimizations, including using VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT so the MSAA resolve happens entirely in tiled memory (which should make the MSAA almost free according to this article: ), but still the problem persists. I'm at a loss and would appreciate any pointers for resolving this.

Here's my code for creating the swapchain, MSAA buffer, and the relevant bits of the render pipeline. 'color_format' is selected from the enumerated formats, which on this device is VK_FORMAT_R8G8B8A8_SRGB. 'swapChainExtent' is the recommended size as obtained from OpenXR (1440 x 1584). There's no depth buffer currently (there was one initially but I removed it to narrow down the problem, and the problem persists with or without it).

XrSwapchainCreateInfo swapchain_create_info =
{
    .type = XR_TYPE_SWAPCHAIN_CREATE_INFO,
    .next = null,
    .createFlags = 0,
    .usageFlags = XR_SWAPCHAIN_USAGE_COLOR_ATTACHMENT_BIT,
    .format = color_format,
    .sampleCount = 1,
    .width = swapChainExtent.width,
    .height = swapChainExtent.height,
    .faceCount = 1,
    .arraySize = 1,
    .mipCount = 1,
};
XrResult result = xrCreateSwapchain(m_xrInfo->session, &swapchain_create_info, &m_xrSwapchain);
// allocate MSAA buffer
VkImageCreateInfo imageInfo =
{
    .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
    .flags = 0,
    .imageType = VK_IMAGE_TYPE_2D,
    .format = color_format,
    .extent = { swapChainExtent.width, swapChainExtent.height, 1},
    .mipLevels = 1,
    .arrayLayers = 1,
    .samples = VK_SAMPLE_COUNT_4_BIT,
    .tiling = VK_IMAGE_TILING_OPTIMAL,
    .usage = VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
    .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
    .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
vkCreateImage(m_device, &imageInfo, nullptr, &image);

VkMemoryRequirements memRequirements;
vkGetImageMemoryRequirements(m_device, image, &memRequirements);

VkMemoryAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memRequirements.size;
allocInfo.memoryTypeIndex = FindMemoryType(memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT);
vkAllocateMemory(m_device, &allocInfo, nullptr, &bufferMemory);
VkAttachmentDescription colorAttachment{};
colorAttachment.format = color_format;
colorAttachment.samples = VK_SAMPLE_COUNT_4_BIT;
colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
colorAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
colorAttachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachments.Add(colorAttachment);

VkAttachmentReference colorAttachmentRef{};
colorAttachmentRef.attachment = attachments.Count() - 1;
colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
colorAttachments.Add(colorAttachmentRef);

VkAttachmentDescription msaaAttachmentResolve{};
msaaAttachmentResolve.format = colorFormat;
msaaAttachmentResolve.samples = VK_SAMPLE_COUNT_1_BIT;
msaaAttachmentResolve.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
msaaAttachmentResolve.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
msaaAttachmentResolve.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
msaaAttachmentResolve.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
msaaAttachmentResolve.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
msaaAttachmentResolve.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
attachments.Add(msaaAttachmentResolve);

VkAttachmentReference msaaAttachmentResolveRef{};
msaaAttachmentResolveRef.attachment = attachments.Count() - 1;
msaaAttachmentResolveRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = colorAttachments.Count();
subpass.pColorAttachments = colorAttachments.GetPointer();
subpass.pResolveAttachments = &msaaAttachmentResolveRef;

VkRenderPassCreateInfo renderPassInfo{};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
renderPassInfo.attachmentCount = attachments.Count();
renderPassInfo.pAttachments = attachments.GetPointer();
renderPassInfo.subpassCount = 1;
renderPassInfo.pSubpasses = &subpass;

What can I try next?

I have 25 years experience as a professional game dev so not much stumps me anymore, but this one I just can't seem to figure out and I need to fix this before I can demo the project (looks bad without anti-aliasing).

I'm working on a custom game engine using Vulkan + OpenXR and have a scene running at the full 72 fps (refresh rate) on Quest 2 without anti-aliasing. However when I enable 4x hardware MSAA, the frame rate drops abnormally low to 15 fps when there should have been very little impact. The only difference between the two is the addition of the MSAA resolve attachment.

I know a better frame rate is possible because I've tested the same scene using 4x super-sampling instead of MSAA and getting more than double the speed (36 fps), even though super-sampling is supposed to be significantly slower than MSAA. I've tested the same code (minus OpenXR) running full screen on a regular Android device and the MSAA is working as expected with minimal impact to the frame rate.

I have Vulkan validation layers and XR debug messages turned on but there's no errors or warnings. I am checking return codes throughout and all are without errors (for simplicity the error checking is not shown in code below). And I've also tested with validation layers disabled to see if that impacts the speed but there was no change.

I've implemented recommended MSAA optimizations, including using VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT so the MSAA resolve happens entirely in tiled memory (which should make the MSAA almost free according to this article: https://medium/androiddevelopers/multisampled-anti-aliasing-for-almost-free-on-tile-based-rendering-hardware-21794c479cb9 ), but still the problem persists. I'm at a loss and would appreciate any pointers for resolving this.

Here's my code for creating the swapchain, MSAA buffer, and the relevant bits of the render pipeline. 'color_format' is selected from the enumerated formats, which on this device is VK_FORMAT_R8G8B8A8_SRGB. 'swapChainExtent' is the recommended size as obtained from OpenXR (1440 x 1584). There's no depth buffer currently (there was one initially but I removed it to narrow down the problem, and the problem persists with or without it).

XrSwapchainCreateInfo swapchain_create_info =
{
    .type = XR_TYPE_SWAPCHAIN_CREATE_INFO,
    .next = null,
    .createFlags = 0,
    .usageFlags = XR_SWAPCHAIN_USAGE_COLOR_ATTACHMENT_BIT,
    .format = color_format,
    .sampleCount = 1,
    .width = swapChainExtent.width,
    .height = swapChainExtent.height,
    .faceCount = 1,
    .arraySize = 1,
    .mipCount = 1,
};
XrResult result = xrCreateSwapchain(m_xrInfo->session, &swapchain_create_info, &m_xrSwapchain);
// allocate MSAA buffer
VkImageCreateInfo imageInfo =
{
    .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
    .flags = 0,
    .imageType = VK_IMAGE_TYPE_2D,
    .format = color_format,
    .extent = { swapChainExtent.width, swapChainExtent.height, 1},
    .mipLevels = 1,
    .arrayLayers = 1,
    .samples = VK_SAMPLE_COUNT_4_BIT,
    .tiling = VK_IMAGE_TILING_OPTIMAL,
    .usage = VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
    .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
    .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
vkCreateImage(m_device, &imageInfo, nullptr, &image);

VkMemoryRequirements memRequirements;
vkGetImageMemoryRequirements(m_device, image, &memRequirements);

VkMemoryAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memRequirements.size;
allocInfo.memoryTypeIndex = FindMemoryType(memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT);
vkAllocateMemory(m_device, &allocInfo, nullptr, &bufferMemory);
VkAttachmentDescription colorAttachment{};
colorAttachment.format = color_format;
colorAttachment.samples = VK_SAMPLE_COUNT_4_BIT;
colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
colorAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
colorAttachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachments.Add(colorAttachment);

VkAttachmentReference colorAttachmentRef{};
colorAttachmentRef.attachment = attachments.Count() - 1;
colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
colorAttachments.Add(colorAttachmentRef);

VkAttachmentDescription msaaAttachmentResolve{};
msaaAttachmentResolve.format = colorFormat;
msaaAttachmentResolve.samples = VK_SAMPLE_COUNT_1_BIT;
msaaAttachmentResolve.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
msaaAttachmentResolve.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
msaaAttachmentResolve.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
msaaAttachmentResolve.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
msaaAttachmentResolve.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
msaaAttachmentResolve.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
attachments.Add(msaaAttachmentResolve);

VkAttachmentReference msaaAttachmentResolveRef{};
msaaAttachmentResolveRef.attachment = attachments.Count() - 1;
msaaAttachmentResolveRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = colorAttachments.Count();
subpass.pColorAttachments = colorAttachments.GetPointer();
subpass.pResolveAttachments = &msaaAttachmentResolveRef;

VkRenderPassCreateInfo renderPassInfo{};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
renderPassInfo.attachmentCount = attachments.Count();
renderPassInfo.pAttachments = attachments.GetPointer();
renderPassInfo.subpassCount = 1;
renderPassInfo.pSubpasses = &subpass;

What can I try next?

Share Improve this question edited Mar 6 at 23:50 halfer 20.4k19 gold badges109 silver badges202 bronze badges asked Mar 6 at 15:37 JohnathanJohnathan 697 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 1

From the setup below, it looks like you are writing the multisampled data back to memory, rather than the resolved data?

colorAttachment.samples = VK_SAMPLE_COUNT_4_BIT;
colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
...
msaaAttachmentResolve.samples = VK_SAMPLE_COUNT_1_BIT;
msaaAttachmentResolve.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;

This looks backwards - for efficient MSAA on a tiler, I'd expect this to be reversed. You should never be writing the unresolved MSAA data back to DRAM.

The multi-sampled data should be storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, and the single-sample resolve attachment should be storeOp = VK_ATTACHMENT_STORE_OP_STORE.

转载请注明原文地址:http://conceptsofalgorithm.com/Algorithm/1744965348a277165.html

最新回复(0)