Overview
通常,在学习OpenGL、D3D等图形API时,我们可以直接调用其DrawCall,直奔主题。但是,对于商业游戏引擎而言,需要以每秒数十帧渲染复杂的场景,其中可能产生成百上千个Draw Call,绘制数十万甚至数百万个三角形,直接采用简单的图形API调用显然太过低效。
因此,商业游戏引擎需要在真正调用图形API之前,会进行很多操作,如遮挡剔除、动态和静态合拼、动态Instance、缓存状态和命令、生成中间指令再转译成图形API指令等等。
UE4.22对Mesh Drawing Pipeline进行了一次重构,使用PassMeshProcessor取代DrawingPolicy,在FMeshBatch和RHI命令之间增加了一个概念FMeshDrawCommand,以便更大程度更加可控地排序、缓存、合并绘制指令。
Mesh Rendering从FPrimitiveSceneProxy(游戏线程UPrimitiveComponent在渲染线程的代表)开始。FPrimitiveSceneProxy负责通过对GetDynamicMeshElements和DrawStaticElements的回调将FMeshBatch提交给渲染器。
FMeshBatch将FPrimitiveSceneProxy实现(用户代码)与Mesh Passes(私有渲染器模块)解耦。它包含了Pass确定最终着色器绑定和渲染状态所需的所有内容,因此Proxy永远不知道将在哪些通道中渲染。
下一步是将FMeshBatch转换为MeshPass特定的FMeshDrawCommand。
最后,SubmitMeshDrawCommands用于将FMeshDrawCommand转换为RHICommandList上设置的一系列RHI命令。
FPrimitiveSceneProxy的创建
UPrimitiveComponent的数据不会在渲染线程被直接使用,因此当渲染开始时,会先调用FScene::AddPrimitive对传入的UPrimitiveComponent创建FPrimitiveSceneProxy
// Engine\Source\Runtime\Renderer\Private\RendererScene.cpp
void FScene::AddPrimitive(UPrimitiveComponent* Primitive)
{
// If the bulk reregister flag is set, add / remove will be handled in bulk by the FStaticMeshComponentBulkReregisterContext
if (Primitive->bBulkReregister)
{
return;
}
BatchAddPrimitivesInternal(MakeArrayView(&Primitive, 1));
}
template<class T>
void FScene::BatchAddPrimitivesInternal(TArrayView<T*> InPrimitives)
{
(...)
for (T* Primitive : InPrimitives)
{
(...)
FPrimitiveSceneProxy* PrimitiveSceneProxy = nullptr;
if (Primitive->GetPrimitiveComponentInterface())
{
checkf(!Primitive->GetSceneProxy(), TEXT("Primitive has already been added to the scene!"));
PrimitiveSceneProxy = Primitive->GetPrimitiveComponentInterface()->CreateSceneProxy();
check(SceneData.SceneProxy == PrimitiveSceneProxy); // CreateSceneProxy has access to the shared SceneData and should set it properly
}
else
{
check(!Primitive->ShouldRecreateProxyOnUpdateTransform()); // recreating proxies when updating the transform requires a IPrimitiveComponentInterface
PrimitiveSceneProxy = Primitive->GetSceneProxy();
}
(...)
if (!CreateCommands.IsEmpty())
{
ENQUEUE_RENDER_COMMAND(AddPrimitiveCommand)(
[this, CreateCommands = MoveTemp(CreateCommands)](FRHICommandListBase& RHICmdList)
{
for (const FCreateCommand& Command : CreateCommands)
{
FScopeCycleCounter Context(Command.PrimitiveSceneProxy->GetStatId());
Command.PrimitiveSceneProxy->SetTransform(RHICmdList, Command.RenderMatrix, Command.WorldBounds, Command.LocalBounds, Command.AttachmentRootPosition);
Command.PrimitiveSceneProxy->CreateRenderThreadResources(RHICmdList);
AddPrimitiveSceneInfo_RenderThread(Command.PrimitiveSceneInfo, Command.PreviousTransform);
}
});
}
}
}
FPrimitiveSceneProxy创建完成后,会通过ENQUEUE_RENDER_COMMAND(AddPrimitiveCommand)将命令加入渲染线程的队列,后续执行时会对FPrimitiveSceneProxy设置Transform,创建渲染资源,并进行Primitive信息的收集,加入到AddedPrimitiveSceneInfos中。
生成MeshBatch
EMeshBatch
FMeshBatch记录了一组拥有相同材质和顶点工厂的FMeshBatchElement数据。其中FMeshBatchElement存储了FMeshBatch中单个Mesh所需的数据。也就是说,一个FMeshBatch拥有一组FMeshBatchElement、一个顶点工厂和一个材质实例,同一个FMeshBatch的所有FMeshBatchElement共享着相同的材质和Vertex Factory。但大多数情况下,FMeshBatch只会有一个FMeshBatchElement。
FMeshBatch的作用就是:将FPrimitiveSceneProxy和最终的渲染结构分离,FMeshBatch包含了需要渲染的全部信息,因此FPrimitiveSceneProxy不需要知道Pass将渲染什么。
收集FMeshBatches
FPrimitiveSceneProxy有两个生成FMeshBatch的路径:缓存路径和动态路径
FPrimitiveSceneProxy使用GetViewRelevance函数控制生成FMeshBatches所使用的路径。
缓存路径构建并重用FMeshBatch,用于于不改变每一帧(比如静态网格体)的绘制。由DrawStaticElements函数实现,当一个Proxy被添加到场景中时会调用此函数。创建的FMeshBatches存储在FPrimitiveSceneInfo::StaticMeshes中,并且每一帧都被重用,直到从场景中删除Proxy为止。
动态路径每一帧重新创建FMeshBatch,用于在帧与帧之间经常会发生变化的绘制,例如粒子。由GetDynamicMeshElements实现。该函数从InitViews中调用每一帧,并为每个View创建一个临时的FMeshBatch。
Static/Cached Mesh Batches
注:UE5是在FPrimitiveSceneInfo::AddStaticMeshes中嵌套调用FPrimitiveSceneProxy::DrawStaticElements,FPrimitiveSceneInfo::AddStaticMeshes执行完毕后,才调用FPrimitiveSceneInfo::CacheMeshDrawCommands
FScene::UpdateAllPrimitiveSceneInfos --> FScene::Update --> (FPrimitiveSceneInfo::AddToScene) --> FPrimitiveSceneInfo::AddStaticMeshes
// Engine\Source\Runtime\Renderer\Private\RendererScene.cpp
void FScene::UpdateAllPrimitiveSceneInfos(FRDGBuilder& GraphBuilder, EUpdateAllPrimitiveSceneInfosAsyncOps AsyncOps)
{
FUpdateParameters Parameters;
Parameters.AsyncOps = AsyncOps;
Update(GraphBuilder, Parameters);
}
void FScene::Update(FRDGBuilder& GraphBuilder, const FUpdateParameters& Parameters)
{
(...)
if (SceneInfosWithAddToScene.Num() > 0)
{
FPrimitiveSceneInfo::AddToScene(this, SceneInfosWithAddToScene);
}
(...)
}
// Engine\Source\Runtime\Renderer\Private\PrimitiveSceneInfo.cpp
void FPrimitiveSceneInfo::AddToScene(FScene* Scene, TArrayView<FPrimitiveSceneInfo*> SceneInfos)
{
(...)
}
void FPrimitiveSceneInfo::AddStaticMeshes(FRHICommandListBase& RHICmdList, FScene* Scene, TArrayView<FPrimitiveSceneInfo*> SceneInfos, bool bCacheMeshDrawCommands)
{
(...)
{
ParallelForTemplate(SceneInfos.Num(), [Scene, &SceneInfos](int32 Index)
{
(...)
SceneInfo->Proxy->DrawStaticElements(&BatchingSPDI);
(...)
});
}
(...)
if (bCacheMeshDrawCommands)
{
CacheMeshDrawCommands(Scene, SceneInfos);
CacheNaniteMaterialBins(Scene, SceneInfos);
#if RHI_RAYTRACING
CacheRayTracingPrimitives(Scene, SceneInfos);
#endif
}
}
DrawStaticElements由FPrimitiveSceneProxy的子类各自实现,在其中传入FStaticPrimitiveDrawInterface实例并完成设置,最后调用FStaticPrimitiveDrawInterface::DrawMesh(该函数为纯虚函数,实际上调用的是其子类版本FBatchingSPDI::DrawMesh)
以FTextRenderSceneProxy::DrawStaticElements作为参考
// Engine\Source\Runtime\Engine\Private\Components\TextRenderComponent.cpp
void FTextRenderSceneProxy::DrawStaticElements(FStaticPrimitiveDrawInterface* PDI)
{
// Vertex factory will not been initialized when the font is invalid or the text string is empty.
if(VertexFactory.IsInitialized())
{
PDI->ReserveMemoryForMeshes(TextBatches.Num());
for (const FTextBatch& TextBatch : TextBatches)
{
// Draw the mesh.
FMeshBatch Mesh;
FMeshBatchElement& BatchElement = Mesh.Elements[0];
BatchElement.IndexBuffer = &IndexBuffer;
Mesh.VertexFactory = &VertexFactory;
Mesh.MaterialRenderProxy = TextBatch.Material->GetRenderProxy();
BatchElement.FirstIndex = TextBatch.IndexBufferOffset;
BatchElement.NumPrimitives = TextBatch.IndexBufferCount / 3;
BatchElement.MinVertexIndex = TextBatch.VertexBufferOffset;
BatchElement.MaxVertexIndex = TextBatch.VertexBufferOffset + TextBatch.VertexBufferCount - 1;
Mesh.ReverseCulling = IsLocalToWorldDeterminantNegative();
Mesh.bDisableBackfaceCulling = false;
Mesh.Type = PT_TriangleList;
Mesh.DepthPriorityGroup = SDPG_World;
Mesh.LODIndex = 0;
PDI->DrawMesh(Mesh, 1.0f);
}
}
}
// Engine\Source\Runtime\Renderer\Private\PrimitiveSceneInfo.cpp
class FBatchingSPDI : public FStaticPrimitiveDrawInterface
{
public:
(...)
virtual void DrawMesh(const FMeshBatch& Mesh, float ScreenSize) final override
{
if (Mesh.HasAnyDrawCalls())
{
checkSlow(IsInParallelRenderingThread());
FPrimitiveSceneProxy* PrimitiveSceneProxy = PrimitiveSceneInfo->Proxy;
const ERHIFeatureLevel::Type FeatureLevel = PrimitiveSceneInfo->Scene->GetFeatureLevel();
if (!Mesh.Validate(PrimitiveSceneProxy, FeatureLevel))
{
return;
}
FStaticMeshBatch* StaticMesh = new(PrimitiveSceneInfo->StaticMeshes) FStaticMeshBatch(
PrimitiveSceneInfo,
Mesh,
CurrentHitProxy ? CurrentHitProxy->Id : FHitProxyId()
);
StaticMesh->PreparePrimitiveUniformBuffer(PrimitiveSceneProxy, FeatureLevel);
// Volumetric self shadow mesh commands need to be generated every frame, as they depend on single frame uniform buffers with self shadow data.
const bool bSupportsCachingMeshDrawCommands = SupportsCachingMeshDrawCommands(*StaticMesh, FeatureLevel) && !PrimitiveSceneProxy->CastsVolumetricTranslucentShadow();
const FMaterial& Material = Mesh.MaterialRenderProxy->GetIncompleteMaterialWithFallback(FeatureLevel);
bool bUseSkyMaterial = Material.IsSky();
bool bUseSingleLayerWaterMaterial = Material.GetShadingModels().HasShadingModel(MSM_SingleLayerWater);
bool bUseAnisotropy = Material.GetShadingModels().HasAnyShadingModel({MSM_DefaultLit, MSM_ClearCoat}) && Material.MaterialUsesAnisotropy_RenderThread();
bool bSupportsNaniteRendering = SupportsNaniteRendering(StaticMesh->VertexFactory, PrimitiveSceneProxy, Mesh.MaterialRenderProxy, FeatureLevel);
bool bSupportsGPUScene = StaticMesh->VertexFactory->SupportsGPUScene(FeatureLevel);
bool bUseForWaterInfoTextureDepth = Mesh.bUseForWaterInfoTextureDepth;
bool bUseForLumenSceneCapture = Mesh.bUseForLumenSurfaceCacheCapture;
FStaticMeshBatchRelevance* StaticMeshRelevance = new(PrimitiveSceneInfo->StaticMeshRelevances) FStaticMeshBatchRelevance(
*StaticMesh,
ScreenSize,
bSupportsCachingMeshDrawCommands,
bUseSkyMaterial,
bUseSingleLayerWaterMaterial,
bUseAnisotropy,
bSupportsNaniteRendering,
bSupportsGPUScene,
bUseForWaterInfoTextureDepth,
bUseForLumenSceneCapture,
FeatureLevel
);
}
}
(...)
}
FBatchingSPDI::DrawMesh最主要作用是将PrimitiveSceneProxy转换成FStaticMeshBatch
UWorld发生改变而调用其PropagateLightingScenarioChange函数时,会在其中调用UStaticMeshComponent, ULightComponent等类型的PropagateLightingScenarioChange函数,而其中则调用了FScene::UpdateAllPrimitiveSceneInfos,从而在改变发生时重新收集一次StaticMeshBatches
Dynamic Mesh Batches
上述CachedMeshBatch在帧间不发生变化,因此由FScene来收集并缓存即可,而DynamicMeshBatch在帧间会发生变化,应当每帧都要收集,因此交由FSceneRenderer负责。
下面以FMobileShadingRenderer为例简述收集DynamicMeshBatch的流程(因为FSceneRender::Render是纯虚函数,由其子类实现,每个子类的流程各不相同)
FMobileSceneRenderer
以下为FMobileSceneRenderer大致的调用流程:
FMobileShadingRenderer::Render --> FMobileSceneRenderer::InitViews --> FVisibilityTaskData::ProcessRenderThreadTasks --> FVisibilityTaskData::GatherDynamicMeshElements
TaskDatas.VisibilityTaskData::FinishGatherDynamicMeshElements --> FVisibilityTaskData::SetupMeshPasses --> ComputeDynamicMeshRelevance
// Engine\Source\Runtime\Renderer\Private\MobileShadingRenderer.cpp
void FMobileSceneRenderer::Render(FRDGBuilder& GraphBuilder)
{
(...)
InitViews(GraphBuilder, SceneTexturesConfig, InstanceCullingManager, VirtualTextureUpdater.Get(), InitViewTaskDatas);
(...)
}
void FMobileSceneRenderer::InitViews(
FRDGBuilder& GraphBuilder,
FSceneTexturesConfig& SceneTexturesConfig,
FInstanceCullingManager& InstanceCullingManager,
FVirtualTextureUpdater* VirtualTextureUpdater,
FInitViewTaskDatas& TaskDatas)
{
(...)
TaskDatas.VisibilityTaskData->ProcessRenderThreadTasks();
TaskDatas.VisibilityTaskData->FinishGatherDynamicMeshElements(BasePassDepthStencilAccess, InstanceCullingManager, VirtualTextureUpdater);
(...)
}
// Engine\Source\Runtime\Renderer\Private\SceneVisibility.cpp
void FVisibilityTaskData::ProcessRenderThreadTasks()
{
(...)
if (TaskConfig.Schedule == EVisibilityTaskSchedule::RenderThread)
{
(...)
GatherDynamicMeshElements(*DynamicMeshElements.PrimitiveViewMasks);
}
else
{
if (DynamicMeshElements.CommandPipe)
{
SCOPED_NAMED_EVENT(WaitForGatherDynamicMeshElements, FColor::Magenta);
// Wait on the command pipe first as it will be continually updating the render thread event (and process tasks while we wait).
Tasks.DynamicMeshElementsPipe->Wait(ENamedThreads::GetRenderThread_Local());
}
else
{
Tasks.ComputeRelevance.Wait();
check(DynamicMeshElements.PrimitiveViewMasks);
GatherDynamicMeshElements(*DynamicMeshElements.PrimitiveViewMasks);
}
(...)
}
(...)
}
void FVisibilityTaskData::GatherDynamicMeshElements(FDynamicPrimitiveIndexList&& DynamicPrimitiveIndexList)
{
(...)
}
void FVisibilityTaskData::FinishGatherDynamicMeshElements(FExclusiveDepthStencil::Type BasePassDepthStencilAccess, FInstanceCullingManager& InstanceCullingManager, FVirtualTextureUpdater* VirtualTextureUpdater)
{
(...)
Tasks.MeshPassSetup = UE::Tasks::Launch(UE_SOURCE_LOCATION, [this, BasePassDepthStencilAccess, &InstanceCullingManager]
{
FOptionalTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
SetupMeshPasses(BasePassDepthStencilAccess, InstanceCullingManager);
}, TaskConfig.TaskPriority);
FSceneRenderer::DynamicReadBufferForInitViews.Commit(RHICmdList);
}
void FVisibilityTaskData::SetupMeshPasses(FExclusiveDepthStencil::Type BasePassDepthStencilAccess, FInstanceCullingManager& InstanceCullingManager)
{
DynamicMeshElements.ContextContainer.MergeContexts(DynamicMeshElements.DynamicPrimitives);
{
SCOPED_NAMED_EVENT(DynamicRelevance, FColor::Magenta);
for (FViewInfo* View : Views)
{
View->DynamicMeshElementsPassRelevance.SetNum(View->DynamicMeshElements.Num());
}
for (FDynamicPrimitive DynamicPrimitive : DynamicMeshElements.DynamicPrimitives)
{
(...)
for (int32 ElementIndex = DynamicPrimitive.StartElementIndex; ElementIndex < DynamicPrimitive.EndElementIndex; ++ElementIndex)
{
const FMeshBatchAndRelevance& MeshBatch = View.DynamicMeshElements[ElementIndex];
FMeshPassMask& PassRelevance = View.DynamicMeshElementsPassRelevance[ElementIndex];
ComputeDynamicMeshRelevance(ShadingPath, bAddLightmapDensityCommands, ViewRelevance, MeshBatch, View, PassRelevance, PrimitiveSceneInfo, Bounds);
}
}
}
(...)
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
(...)
SceneRenderer.SetupMeshPass(View, BasePassDepthStencilAccess, ViewCommands, InstanceCullingManager);
}
}
其中的GatherDynamicMeshElements有很多嵌套调用,简而言之它将收集DynamicMeshBatch的任务加入队列,而其中加入队列的具体操作需要委托给FDynamicMeshElementContext类型的同名方法各自完成。
场景渲染器FSceneRenderer(Render方法由其子类实现,如FDeferredShadingSceneRenderer, FMobileSceneRenderer)在渲染之初,会执行可见性测试和剔除,以便剔除被遮挡和被隐藏的物体,在此阶段的末期会调用GatherDynamicMeshElements收集当前场景所有的FPrimitiveSceneProxy。
在GatherDynamicMeshElements中,会给每个FSceneRenderer创建一个FMeshElementCollector的对象,以便收集场景中所有可见的FPrimitiveSceneProxy的网格数据。FMeshElementCollector和FSceneRenderer是一一对应关系,每个FSceneRenderer拥有一个FMeshElementCollector。Collector通常拥有一组需要渲染的FMeshBatch列表,以及它们的管理数据和状态,在收集完对应View的可见MeshBatch后调用其AddMesh函数将MeshBatch加入当前View中。
在MeshBatch收集的最后,需要对记录当前MeshBatch会被哪些MeshPass引用的PassMask进行设置,这个操作在FVisibilityTaskData::FinishGatherDynamicMeshElements中调用FVisibilityTaskData::SetupMeshPasses,然后再嵌套调用ComputeDynamicMeshRelevance完成PassMask.Set
FDeferredShadingSceneRenderer
FDeferredShadingSceneRenderer与FMobileSceneRenderer的流程有些区别,其中没有InitViews函数,而是从BeginInitViews进入
FDeferredShadingSceneRenderer::BeginInitViews --> FVisibilityTaskData::ProcessRenderThreadTasks --> FVisibilityTaskData::GatherDynamicMeshElements
FVisibilityTaskData::FinishGatherDynamicMeshElements --> FVisibilityTaskData::SetupMeshPasses --> ComputeDynamicMeshRelevance
// Engine\Source\Runtime\Renderer\Private\DeferredShadingRenderer.cpp
void FDeferredShadingSceneRenderer::Render(FRDGBuilder& GraphBuilder)
{
(...)
{
RDG_GPU_STAT_SCOPE(GraphBuilder, VisibilityCommands);
BeginInitViews(GraphBuilder, SceneTexturesConfig, InstanceCullingManager, ExternalAccessQueue, InitViewTaskDatas);
}
(...)
InitViewTaskDatas.VisibilityTaskData->FinishGatherDynamicMeshElements(BasePassDepthStencilAccess, InstanceCullingManager, VirtualTextureUpdater.Get());
(...)
}
void FDeferredShadingSceneRenderer::BeginInitViews(
FRDGBuilder& GraphBuilder,
const FSceneTexturesConfig& SceneTexturesConfig,
FInstanceCullingManager& InstanceCullingManager,
FRDGExternalAccessQueue& ExternalAccessQueue,
FInitViewTaskDatas& TaskDatas)
{
(...)
TaskDatas.VisibilityTaskData->ProcessRenderThreadTasks();
(...)
}
FDeferredShadingSceneRenderer的入口与FMobileSceneRenderer不同,但是进入FVisibilityTaskData成员函数调用后就一致了,这里不再赘述。
生成MeshDrawCommand
基本类型
FMeshDrawCommand
FMeshDrawCommand是FMeshBatch和RHI之间的接口,是由MeshPass特定的FMeshPassProcessor根据FMeshBatch创建的。
FMeshDrawCommand是一种更加紧凑、更加简洁、更有利于CPU访问的数据结构,用于表示场景数据。它是一个完全无状态的绘制描述,存储了RHI需要知道的,关于网格体绘制的所有信息:
- 要使用的着色器
- 其资源绑定
- DrawCall参数
这也就是在RHI级别之上进行缓存并合并DrawCall。
FMeshPassProcessor
FMeshPassProcessor,负责将FMeshBatch转换为用于给定通道的网格体绘制命令。这是最终的绘制筛选发生的地方,会选择适当的着色器并收集着色器绑定。
自定义MeshPassProcessor必须派生自FMeshPassProcessor,并且需要覆盖AddMeshBatch函数。AddMeshBatch进行的工作:
- 绘制筛选。例如,如果一个材质具有半透明的绘制模式,那么就不应该在FDepthPassMeshProcessor中处理它。
- 选择着色器和管道状态(深度/模具/混合状态)
- 最后调用BuildMeshDrawCommands,为pass/material/vertex factory/primitive进行着色器绑定,并将新的绘制命令添加到相关列表中
UE的ShaderBindings可以是UniformBuffers, Samplers, Textures, ShaderResourceViews, LooseParameters(FShaderParameter)。FMeshPassProcessor并不是将Shader Bindings随RHICmdList.SetShaderParameter一起直接发送到RHI,而只将它们记录到FMeshDrawSingleShaderBindings类中。BuildMeshDrawCommands函数在所有通道之间共享代码,它将在通道着色器上调用GetShaderBindings。
FMeshPassProcessor常见的子类有:
- FDepthPassMeshProcessor:深度通道网格处理器,对应EMeshPass::DepthPass
- FBasePassMeshProcessor:几何通道网格处理器,对应EMeshPass::BasePass
- FCustomDepthPassMeshProcessor:自定义深度通道网格处理器,对应EMeshPass::CustomDepth
- FShadowDepthPassMeshProcessor:阴影通道网格处理器,对应EMeshPass::CSMShadowDepth
- FTranslucencyDepthPassMeshProcessor:透明深度通道网格处理器,没有对应的EMeshPass。
- FLightmapDensityMeshProcessor:光照图网格处理器,对应EMeshPass::LightmapDensity
FMeshDrawCommandPassSetupTask
-
FMeshDrawCommandPassSetupTask: 并行设置MeshDrawCommand的任务。包含动态网格绘制命令的生成、排序、合并等。
-
FMeshDrawCommandPassSetupTaskContext: FMeshDrawCommandPassSetupTask所需的上下文,其中包含View相关的数据、MeshPassProcessor、命令相关的数据、需在渲染线程预分配的资源、透明物体排序所需的信息。
-
FMeshDrawCommandInitResourcesTask: MeshDrawCommand所需的预分配资源
FMeshBatch转换为FMeshDrawCommand
Static/Cached
FScene::Update中在最后调用FPrimitiveSceneInfo::CacheMeshDrawCommands,生成MeshDrawCommand
// Engine\Source\Runtime\Renderer\Private\PrimitiveSceneInfo.cpp
void FPrimitiveSceneInfo::CacheMeshDrawCommands(FScene* Scene, TArrayView<FPrimitiveSceneInfo*> SceneInfos)
{
SCOPED_NAMED_EVENT(FPrimitiveSceneInfo_CacheMeshDrawCommands, FColor::Emerald);
CSV_SCOPED_TIMING_STAT_EXCLUSIVE(FPrimitiveSceneInfo_CacheMeshDrawCommands);
QUICK_SCOPE_CYCLE_COUNTER(STAT_CacheMeshDrawCommands);
// 计数并行的线程数量
const int BATCH_SIZE = WITH_EDITOR ? 1 : GMeshDrawCommandsBatchSize;
const int NumBatches = (SceneInfos.Num() + BATCH_SIZE - 1) / BATCH_SIZE;
// 线程回调
auto DoWorkLambda = [Scene, SceneInfos, BATCH_SIZE](FCachedPassMeshDrawListContext& DrawListContext, int32 Index)
{
SCOPED_NAMED_EVENT(FPrimitiveSceneInfo_CacheMeshDrawCommand, FColor::Green);
struct FMeshInfoAndIndex
{
int32 InfoIndex;
int32 MeshIndex;
};
TArray<FMeshInfoAndIndex, SceneRenderingAllocator> MeshBatches;
MeshBatches.Reserve(3 * BATCH_SIZE);
// 遍历当前线程的范围,逐个处理PrimitiveSceneInfo
int LocalNum = FMath::Min((Index * BATCH_SIZE) + BATCH_SIZE, SceneInfos.Num());
for (int LocalIndex = (Index * BATCH_SIZE); LocalIndex < LocalNum; LocalIndex++)
{
FPrimitiveSceneInfo* SceneInfo = SceneInfos[LocalIndex];
check(SceneInfo->StaticMeshCommandInfos.Num() == 0);
SceneInfo->StaticMeshCommandInfos.AddDefaulted(EMeshPass::Num * SceneInfo->StaticMeshes.Num());
FPrimitiveSceneProxy* SceneProxy = SceneInfo->Proxy;
// 体积透明阴影需要每帧更新,不能缓存
if (!SceneProxy->CastsVolumetricTranslucentShadow())
{
// 将PrimitiveSceneInfo的所有静态网格添加到MeshBatch列表
for (int32 MeshIndex = 0; MeshIndex < SceneInfo->StaticMeshes.Num(); MeshIndex++)
{
FStaticMeshBatch& Mesh = SceneInfo->StaticMeshes[MeshIndex];
// 检测是否支持缓存MeshDrawCommand
if (SupportsCachingMeshDrawCommands(Mesh))
{
MeshBatches.Add(FMeshInfoAndIndex{ LocalIndex, MeshIndex });
}
}
}
}
// 遍历所有MeshPass,将每个静态元素生成的MeshDrawCommand添加到对应Pass的缓存列表中
for (int32 PassIndex = 0; PassIndex < EMeshPass::Num; PassIndex++)
{
const EShadingPath ShadingPath = GetFeatureLevelShadingPath(Scene->GetFeatureLevel());
EMeshPass::Type PassType = (EMeshPass::Type)PassIndex;
if ((FPassProcessorManager::GetPassFlags(ShadingPath, PassType) & EMeshPassFlags::CachedMeshCommands) != EMeshPassFlags::None)
{
// 构建FCachedPassMeshDrawListContext
FCachedPassMeshDrawListContext::FMeshPassScope MeshPassScope(DrawListContext, PassType);
// 创建Pass的FMeshPassProcessor
FMeshPassProcessor* PassMeshProcessor = FPassProcessorManager::CreateMeshPassProcessor(ShadingPath, PassType, Scene->GetFeatureLevel(), Scene, nullptr, &DrawListContext);
if (PassMeshProcessor != nullptr)
{
for (const FMeshInfoAndIndex& MeshAndInfo : MeshBatches)
{
FPrimitiveSceneInfo* SceneInfo = SceneInfos[MeshAndInfo.InfoIndex];
FStaticMeshBatch& Mesh = SceneInfo->StaticMeshes[MeshAndInfo.MeshIndex];
FStaticMeshBatchRelevance& MeshRelevance = SceneInfo->StaticMeshRelevances[MeshAndInfo.MeshIndex];
check(!MeshRelevance.CommandInfosMask.Get(PassType));
uint64 BatchElementMask = ~0ull;
// 添加MeshBatch到PassMeshProcessor,内部会将FMeshBatch转换到FMeshDrawCommand
PassMeshProcessor->AddMeshBatch(Mesh, BatchElementMask, SceneInfo->Proxy);
FCachedMeshDrawCommandInfo CommandInfo = DrawListContext.GetCommandInfoAndReset();
if (CommandInfo.CommandIndex != -1 || CommandInfo.StateBucketId != -1)
{
static_assert(sizeof(MeshRelevance.CommandInfosMask) * 8 >= EMeshPass::Num, "CommandInfosMask is too small to contain all mesh passes.");
MeshRelevance.CommandInfosMask.Set(PassType);
MeshRelevance.CommandInfosBase++;
int CommandInfoIndex = MeshAndInfo.MeshIndex * EMeshPass::Num + PassType;
// 将CommandInfo缓存到PrimitiveSceneInfo中
FCachedMeshDrawCommandInfo& CurrentCommandInfo = SceneInfo->StaticMeshCommandInfos[CommandInfoIndex];
checkf(CurrentCommandInfo.MeshPass == EMeshPass::Num,
TEXT("SceneInfo->StaticMeshCommandInfos[%d] is not expected to be initialized yet. MeshPass is %d, but expected EMeshPass::Num (%d)."),
CommandInfoIndex, (int32)EMeshPass::Num, CurrentCommandInfo.MeshPass);
CurrentCommandInfo = CommandInfo;
}
}
// 销毁FMeshPassProcessor
delete PassMeshProcessor;
}
}
}
(...)
};
// 并行
bool bAnyLooseParameterBuffers = false;
if (GMeshDrawCommandsCacheMultithreaded && FApp::ShouldUseThreadingForPerformance())
{
(...)
ParallelForTemplate(
NumBatches,
[&DrawListContexts, &DoWorkLambda](int32 Index)
{
FOptionalTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
DoWorkLambda(DrawListContexts[Index], Index);
},
EParallelForFlags::Unbalanced
);
(...)
}
// 单线程
else
{
(...)
}
(...)
}
Dynamic
在上一阶段,FVisibilityTaskData::SetupMeshPasses中,在ComputeDynamicMeshRelevance调用中完成了PassMask.Set。
FVisibilityTaskData::SetupMeshPasses继续往下执行至最后,会调用FSceneRenderer::SetupMeshPass。
在FSceneRenderer::SetupMeshPass中,会针对每个View,分别调用SetupMeshPass处理View对应的FMeshPassProcessor(FMobileSceneRenderer::InitViews中调用ProcessRenderThreadTasks完成MeshBatch创建后会继续调用FinishGatherDynamicMeshElements函数进行MeshDrawCommand的收集,在其中先调用调用了FVisibilityTaskData::SetupMeshPass函数,并在其中调用FSceneRenderer::SetupMeshPass)。
FSceneRenderer::SetupMeshPass中进行的操作是:遍历每种MeshPass,创建对应的FMeshPassProcessor,而后获取指定Pass的FParallelMeshDrawCommandPass对象,使用FParallelMeshDrawCommandPass::DispatchPassSetup并行地处理Pass,创建此Pass的所有FMeshDrawCommand。其中,FParallelMeshDrawCommandPass对象是DispatchPassSetup传入的参数之一。
需要注意的是,AddMeshBatch由FMeshPassProcessor子类各自实现,其中会完成GetShader(有些子类多封装了一个Process函数进行嵌套调用),并最后调用基类的BuildMeshDrawCommands函数(将MeshBatch, Material, Shader等作为参数传入),完成MeshDrawCommand的创建。
MeshDrawCommand Dispatch机制
需要注意的是,FSceneRenderer::SetupMeshPass中遍历了所有EMeshPass枚举类型,也正是在此时完成了对于不同MeshPass的Dispatch。具体而言,针对不同的MeshPass,创建其对应的MeshPassProcessor,并获取当前MeshPass对应的FParallelMeshDrawCommandPass对象,最后调用FParallelMeshDrawCommandPass对象的DispatchPassSetup方法(MeshPass类型和对应的MeshPassProcessor都是需要传入的参数),完成对不同MeshPass对应的MeshDrawCommand的分发。
重要函数
// Engine\Source\Runtime\Renderer\Private\SceneVisibility.cpp
void FVisibilityTaskData::FinishGatherDynamicMeshElements(FExclusiveDepthStencil::Type BasePassDepthStencilAccess, FInstanceCullingManager& InstanceCullingManager, FVirtualTextureUpdater* VirtualTextureUpdater)
{
(...)
Tasks.MeshPassSetup = UE::Tasks::Launch(UE_SOURCE_LOCATION, [this, BasePassDepthStencilAccess, &InstanceCullingManager]
{
FOptionalTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
SetupMeshPasses(BasePassDepthStencilAccess, InstanceCullingManager);
}, TaskConfig.TaskPriority);
FSceneRenderer::DynamicReadBufferForInitViews.Commit(RHICmdList);
}
void FVisibilityTaskData::SetupMeshPasses(FExclusiveDepthStencil::Type BasePassDepthStencilAccess, FInstanceCullingManager& InstanceCullingManager)
{
(...)
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
(...)
SceneRenderer.SetupMeshPass(View, BasePassDepthStencilAccess, ViewCommands, InstanceCullingManager);
}
}
// Engine\Source\Runtime\Renderer\Private\SceneRendering.cpp
void FSceneRenderer::SetupMeshPass(FViewInfo& View, FExclusiveDepthStencil::Type BasePassDepthStencilAccess, FViewCommands& ViewCommands, FInstanceCullingManager& InstanceCullingManager)
{
(...)
for (int32 PassIndex = 0; PassIndex < EMeshPass::Num; PassIndex++)
{
const EMeshPass::Type PassType = (EMeshPass::Type)PassIndex;
if ((FPassProcessorManager::GetPassFlags(ShadingPath, PassType) & EMeshPassFlags::MainView) != EMeshPassFlags::None)
{
// Mobile: BasePass and MobileBasePassCSM lists need to be merged and sorted after shadow pass.
if (ShadingPath == EShadingPath::Mobile && (PassType == EMeshPass::BasePass || PassType == EMeshPass::MobileBasePassCSM))
{
continue;
}
if (ViewFamily.UseDebugViewPS() && ShadingPath == EShadingPath::Deferred)
{
switch (PassType)
{
(...)
}
}
FMeshPassProcessor* MeshPassProcessor = FPassProcessorManager::CreateMeshPassProcessor(ShadingPath, PassType, Scene->GetFeatureLevel(), Scene, &View, nullptr);
FParallelMeshDrawCommandPass& Pass = View.ParallelMeshDrawCommandPasses[PassIndex];
if (ShouldDumpMeshDrawCommandInstancingStats())
{
Pass.SetDumpInstancingStats(GetMeshPassName(PassType));
}
(...)
FName PassName(GetMeshPassName(PassType));
Pass.DispatchPassSetup(
Scene,
View,
FInstanceCullingContext(PassName, ShaderPlatform, &InstanceCullingManager, ViewIds, View.PrevViewInfo.HZB, InstanceCullingMode, CullingFlags),
PassType,
BasePassDepthStencilAccess,
MeshPassProcessor,
View.DynamicMeshElements,
&View.DynamicMeshElementsPassRelevance,
View.NumVisibleDynamicMeshElements[PassType],
ViewCommands.DynamicMeshCommandBuildRequests[PassType],
ViewCommands.DynamicMeshCommandBuildFlags[PassType],
ViewCommands.NumDynamicMeshCommandBuildRequestElements[PassType],
ViewCommands.MeshCommands[PassIndex]);
}
}
}
// Engine\Source\Runtime\Renderer\Private\MeshDrawCommands.cpp
void FParallelMeshDrawCommandPass::DispatchPassSetup(
FScene* Scene,
const FViewInfo& View,
FInstanceCullingContext&& InstanceCullingContext,
EMeshPass::Type PassType,
FExclusiveDepthStencil::Type BasePassDepthStencilAccess,
FMeshPassProcessor* MeshPassProcessor,
const TArray<FMeshBatchAndRelevance, SceneRenderingAllocator>& DynamicMeshElements,
const TArray<FMeshPassMask, SceneRenderingAllocator>* DynamicMeshElementsPassRelevance,
int32 NumDynamicMeshElements,
TArray<const FStaticMeshBatch*, SceneRenderingAllocator>& InOutDynamicMeshCommandBuildRequests,
TArray<EMeshDrawCommandCullingPayloadFlags, SceneRenderingAllocator> InOutDynamicMeshCommandBuildFlags,
int32 NumDynamicMeshCommandBuildRequestElements,
FMeshCommandOneFrameArray& InOutMeshDrawCommands,
FMeshPassProcessor* MobileBasePassCSMMeshPassProcessor,
FMeshCommandOneFrameArray* InOutMobileBasePassCSMMeshDrawCommands
)
{
TRACE_CPUPROFILER_EVENT_SCOPE(ParallelMdcDispatchPassSetup);
check(!TaskEventRef.IsValid() && MeshPassProcessor != nullptr && TaskContext.PrimitiveIdBufferData == nullptr);
check((PassType == EMeshPass::Num) == (DynamicMeshElementsPassRelevance == nullptr));
MaxNumDraws = InOutMeshDrawCommands.Num() + NumDynamicMeshElements + NumDynamicMeshCommandBuildRequestElements;
// 设置TaskContext的数据,收集生成MeshCommand所需的数据。
(...)
// Only apply instancing for ISR to main view passes
const bool bIsMainViewPass = PassType != EMeshPass::Num && (FPassProcessorManager::GetPassFlags(TaskContext.ShadingPath, TaskContext.PassType) & EMeshPassFlags::MainView) != EMeshPassFlags::None;
// GPUCULL_TODO: Note the InstanceFactor is ignored by the GPU-Scene supported instances, but is used for legacy primitives.
TaskContext.InstanceFactor = (bIsMainViewPass && View.IsInstancedStereoPass()) ? 2 : 1;
TaskContext.InstanceCullingContext = MoveTemp(InstanceCullingContext);
// 设置基于view的透明排序键
(...)
switch (PassType)
{
case EMeshPass::TranslucencyStandard: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_TranslucencyStandard; break;
case EMeshPass::TranslucencyStandardModulate: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_TranslucencyStandardModulate; break;
case EMeshPass::TranslucencyAfterDOF: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_TranslucencyAfterDOF; break;
case EMeshPass::TranslucencyAfterDOFModulate: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_TranslucencyAfterDOFModulate; break;
case EMeshPass::TranslucencyAfterMotionBlur: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_TranslucencyAfterMotionBlur; break;
case EMeshPass::TranslucencyAll: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_AllTranslucency; break;
}
// 交换命令列表
(...)
if (MaxNumDraws > 0)
{
// 根据最大绘制数量(MaxNumDraws)在渲染线程预分配资源.
(...)
// 如果是并行方式, 便创建并行任务实例并加入TaskGraph系统执行.
if (bExecuteInParallel)
{
if (IsOnDemandShaderCreationEnabled())
{
TaskEventRef = TGraphTask<FMeshDrawCommandPassSetupTask>::CreateTask().ConstructAndDispatchWhenReady(TaskContext);
}
else
{
FGraphEventArray DependentGraphEvents;
DependentGraphEvents.Add(TGraphTask<FMeshDrawCommandPassSetupTask>::CreateTask().ConstructAndDispatchWhenReady(TaskContext));
TaskEventRef = TGraphTask<FMeshDrawCommandInitResourcesTask>::CreateTask(&DependentGraphEvents).ConstructAndDispatchWhenReady(TaskContext);
}
}
else
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_MeshPassSetupImmediate);
FMeshDrawCommandPassSetupTask Task(TaskContext);
Task.AnyThreadTask();
if (!IsOnDemandShaderCreationEnabled())
{
FMeshDrawCommandInitResourcesTask DependentTask(TaskContext);
DependentTask.AnyThreadTask();
}
}
(...)
}
}
class FMeshDrawCommandPassSetupTask
{
public:
(...)
void AnyThreadTask()
{
(...)
if (bMobileShadingBasePass)
{
MergeMobileBasePassMeshDrawCommands(
Context.View->MobileCSMVisibilityInfo,
Context.PrimitiveBounds->Num(),
Context.MeshDrawCommands,
Context.MobileBasePassCSMMeshDrawCommands
);
GenerateMobileBasePassDynamicMeshDrawCommands(
*Context.View,
Context.ShadingPath,
Context.PassType,
Context.MeshPassProcessor,
Context.MobileBasePassCSMMeshPassProcessor,
*Context.DynamicMeshElements,
Context.DynamicMeshElementsPassRelevance,
Context.NumDynamicMeshElements,
Context.DynamicMeshCommandBuildRequests,
Context.DynamicMeshCommandBuildFlags,
Context.NumDynamicMeshCommandBuildRequestElements,
Context.MeshDrawCommands,
Context.MeshDrawCommandStorage,
Context.MinimalPipelineStatePassSet,
Context.NeedsShaderInitialisation
);
}
else
{
GenerateDynamicMeshDrawCommands(
*Context.View,
Context.ShadingPath,
Context.PassType,
Context.MeshPassProcessor,
*Context.DynamicMeshElements,
Context.DynamicMeshElementsPassRelevance,
Context.NumDynamicMeshElements,
Context.DynamicMeshCommandBuildRequests,
Context.DynamicMeshCommandBuildFlags,
Context.NumDynamicMeshCommandBuildRequestElements,
Context.MeshDrawCommands,
Context.MeshDrawCommandStorage,
Context.MinimalPipelineStatePassSet,
Context.NeedsShaderInitialisation
);
}
if (Context.MeshDrawCommands.Num() > 0)
{
if (Context.PassType != EMeshPass::Num)
{
ApplyViewOverridesToMeshDrawCommands(
Context.ShadingPath,
Context.PassType,
Context.bReverseCulling,
Context.bRenderSceneTwoSided,
Context.BasePassDepthStencilAccess,
Context.DefaultBasePassDepthStencilAccess,
Context.MeshDrawCommands,
Context.MeshDrawCommandStorage,
Context.MinimalPipelineStatePassSet,
Context.NeedsShaderInitialisation,
Context.TempVisibleMeshDrawCommands
);
}
// Update sort keys.
if (bNeedsUpdateMobilePassMeshSortKeys)
{
UpdateMobilePassMeshSortKeys(
Context.ViewOrigin,
*Context.PrimitiveBounds,
Context.MeshDrawCommands
);
}
else if (Context.TranslucencyPass != ETranslucencyPass::TPT_MAX)
{
(...)
}
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_SortVisibleMeshDrawCommands);
Context.MeshDrawCommands.Sort(FCompareFMeshDrawCommands());
}
if (Context.bUseGPUScene)
{
Context.InstanceCullingContext.SetupDrawCommands(
Context.MeshDrawCommands,
true,
Context.Scene,
Context.MaxInstances,
Context.VisibleMeshDrawCommandsNum,
Context.NewPassVisibleMeshDrawCommandsNum);
CollectMeshDrawCommandPassStats(Context.MeshDrawCommands, Context.InstanceCullingContext);
}
}
}
(...)
}
void GenerateDynamicMeshDrawCommands(
const FViewInfo& View,
EShadingPath ShadingPath,
EMeshPass::Type PassType,
FMeshPassProcessor* PassMeshProcessor,
const TArray<FMeshBatchAndRelevance, SceneRenderingAllocator>& DynamicMeshElements,
const TArray<FMeshPassMask, SceneRenderingAllocator>* DynamicMeshElementsPassRelevance,
int32 MaxNumDynamicMeshElements,
const TArray<const FStaticMeshBatch*, SceneRenderingAllocator>& DynamicMeshCommandBuildRequests,
const TArray<EMeshDrawCommandCullingPayloadFlags, SceneRenderingAllocator> DynamicMeshCommandBuildFlags,
int32 MaxNumBuildRequestElements,
FMeshCommandOneFrameArray& VisibleCommands,
FDynamicMeshDrawCommandStorage& MeshDrawCommandStorage,
FGraphicsMinimalPipelineStateSet& MinimalPipelineStatePassSet,
bool& NeedsShaderInitialisation
)
{
(...)
{
const int32 NumCommandsBefore = VisibleCommands.Num();
const int32 NumDynamicMeshBatches = DynamicMeshElements.Num();
for (int32 MeshIndex = 0; MeshIndex < NumDynamicMeshBatches; MeshIndex++)
{
if (!DynamicMeshElementsPassRelevance || (*DynamicMeshElementsPassRelevance)[MeshIndex].Get(PassType))
{
const FMeshBatchAndRelevance& MeshAndRelevance = DynamicMeshElements[MeshIndex];
const uint64 BatchElementMask = ~0ull;
PassMeshProcessor->AddMeshBatch(*MeshAndRelevance.Mesh, BatchElementMask, MeshAndRelevance.PrimitiveSceneProxy);
}
}
const int32 NumCommandsGenerated = VisibleCommands.Num() - NumCommandsBefore;
checkf(NumCommandsGenerated <= MaxNumDynamicMeshElements,
TEXT("Generated %d mesh draw commands for DynamicMeshElements, while preallocating resources only for %d of them."), NumCommandsGenerated, MaxNumDynamicMeshElements);
}
{
const int32 NumCommandsBefore = VisibleCommands.Num();
const int32 NumStaticMeshBatches = DynamicMeshCommandBuildRequests.Num();
for (int32 MeshIndex = 0; MeshIndex < NumStaticMeshBatches; MeshIndex++)
{
const FStaticMeshBatch* StaticMeshBatch = DynamicMeshCommandBuildRequests[MeshIndex];
const uint64 DefaultBatchElementMask = ~0ul;
const int32 StartCommandIndex = VisibleCommands.Num();
if (StaticMeshBatch->bViewDependentArguments)
{
FMeshBatch ViewDepenedentMeshBatch(*StaticMeshBatch);
StaticMeshBatch->PrimitiveSceneInfo->Proxy->ApplyViewDependentMeshArguments(View, ViewDepenedentMeshBatch);
PassMeshProcessor->AddMeshBatch(ViewDepenedentMeshBatch, DefaultBatchElementMask, StaticMeshBatch->PrimitiveSceneInfo->Proxy, StaticMeshBatch->Id);
}
else
{
PassMeshProcessor->AddMeshBatch(*StaticMeshBatch, DefaultBatchElementMask, StaticMeshBatch->PrimitiveSceneInfo->Proxy, StaticMeshBatch->Id);
}
// Patch the culling payload flags for the generated visible mesh commands.
// Might be better to pass CullingPayloadFlags through AddMeshBatch() but that will involve a lot of plumbing.
const EMeshDrawCommandCullingPayloadFlags CullingPayloadFlags = DynamicMeshCommandBuildFlags.IsValidIndex(MeshIndex) ? DynamicMeshCommandBuildFlags[MeshIndex] : EMeshDrawCommandCullingPayloadFlags::NoScreenSizeCull;
if (CullingPayloadFlags != EMeshDrawCommandCullingPayloadFlags::NoScreenSizeCull)
{
const int32 EndCommandIndex = VisibleCommands.Num();
for (int32 CommandIndex = StartCommandIndex; CommandIndex < EndCommandIndex; ++CommandIndex)
{
VisibleCommands[CommandIndex].CullingPayloadFlags = CullingPayloadFlags;
}
}
}
const int32 NumCommandsGenerated = VisibleCommands.Num() - NumCommandsBefore;
checkf(NumCommandsGenerated <= MaxNumBuildRequestElements,
TEXT("Generated %d mesh draw commands for DynamicMeshCommandBuildRequests, while preallocating resources only for %d of them."), NumCommandsGenerated, MaxNumBuildRequestElements);
}
}
生成RHICommandList
FMeshBatch转换成FMeshDrawCommand后,每个Pass都对应了一个FMeshPassProcessor,每个FMeshPassProcessor保存了该Pass需要绘制的所有FMeshDrawCommand,以便渲染器在合适的时间触发并渲染。
以DeferredShadingRenderer的深度Pass为例
FDeferredShadingSceneRenderer::Render --> FDeferredShadingSceneRenderer::RenderPrePass --> FParallelMeshDrawCommandPass::DispatchDraw --> FDrawVisibleMeshCommandsAnyThreadTask::DoTask --> FInstanceCullingContext::SubmitDrawCommands
// Engine\Source\Runtime\Renderer\Private\DeferredShadingRenderer.cpp
void FDeferredShadingSceneRenderer::Render(FRDGBuilder& GraphBuilder)
{
(...)
auto RenderPrepassAndVelocity = [&](auto& InViews, auto& InNaniteBasePassVisibility, auto& NaniteRasterResults, auto& PrimaryNaniteViews)
{
FRDGTextureRef FirstStageDepthBuffer = nullptr;
{
(...)
// Draw the scene pre-pass / early z pass, populating the scene depth buffer and HiZ
if (bNeedsPrePass)
{
RenderPrePass(GraphBuilder, InViews, SceneTextures.Depth.Target, InstanceCullingManager, &FirstStageDepthBuffer);
}
else
{
// We didn't do the prepass, but we still want the HMD mask if there is one
RenderPrePassHMD(GraphBuilder, InViews, SceneTextures.Depth.Target);
}
(...)
}
(...)
};
(...)
}
// Engine\Source\Runtime\Renderer\Private\DepthRendering.cpp
void FDeferredShadingSceneRenderer::RenderPrePass(FRDGBuilder& GraphBuilder, TArrayView<FViewInfo> InViews, FRDGTextureRef SceneDepthTexture, FInstanceCullingManager& InstanceCullingManager, FRDGTextureRef* FirstStageDepthBuffer)
{
(...)
const bool bParallelDepthPass = GRHICommandList.UseParallelAlgorithms() && CVarParallelPrePass.GetValueOnRenderThread();
(...)
auto RenderDepthPass = [&](uint8 DepthMeshPass)
{
check(DepthMeshPass == EMeshPass::DepthPass || DepthMeshPass == EMeshPass::SecondStageDepthPass);
const bool bSecondStageDepthPass = DepthMeshPass == EMeshPass::SecondStageDepthPass;
if (bParallelDepthPass)
{
RDG_WAIT_FOR_TASKS_CONDITIONAL(GraphBuilder, IsDepthPassWaitForTasksEnabled());
// 遍历所有view,每个view都渲染一次深度Pass
for (int32 ViewIndex = 0; ViewIndex < InViews.Num(); ++ViewIndex)
{
FViewInfo& View = InViews[ViewIndex];
// 处理深度Pass的渲染资源和状态。
RDG_GPU_MASK_SCOPE(GraphBuilder, View.GPUMask);
RDG_EVENT_SCOPE_CONDITIONAL(GraphBuilder, InViews.Num() > 1, "View%d", ViewIndex);
FMeshPassProcessorRenderState DrawRenderState;
SetupDepthPassState(DrawRenderState);
const bool bShouldRenderView = View.ShouldRenderView() && (bSecondStageDepthPass ? View.bUsesSecondStageDepthPass : true);
if (bShouldRenderView)
{
View.BeginRenderView();
FDepthPassParameters* PassParameters = GetDepthPassParameters(GraphBuilder, View, SceneDepthTexture);
View.ParallelMeshDrawCommandPasses[DepthMeshPass].BuildRenderingCommands(GraphBuilder, Scene->GPUScene, PassParameters->InstanceCullingDrawParams);
GraphBuilder.AddPass(
bSecondStageDepthPass ? RDG_EVENT_NAME("SecondStageDepthPassParallel") : RDG_EVENT_NAME("DepthPassParallel"),
PassParameters,
ERDGPassFlags::Raster | ERDGPassFlags::SkipRenderPass,
[this, &View, PassParameters, DepthMeshPass](const FRDGPass* InPass, FRHICommandListImmediate& RHICmdList)
{
// 构造绘制指令存储容器
FRDGParallelCommandListSet ParallelCommandListSet(InPass, RHICmdList, GET_STATID(STAT_CLP_Prepass), View, FParallelCommandListBindings(PassParameters));
ParallelCommandListSet.SetHighPriority();
// 触发并行绘制
View.ParallelMeshDrawCommandPasses[DepthMeshPass].DispatchDraw(&ParallelCommandListSet, RHICmdList, &PassParameters->InstanceCullingDrawParams);
});
RenderPrePassEditorPrimitives(GraphBuilder, View, PassParameters, DrawRenderState, DepthPass.EarlyZPassMode, InstanceCullingManager);
}
}
}
else
{
for (int32 ViewIndex = 0; ViewIndex < InViews.Num(); ++ViewIndex)
{
FViewInfo& View = InViews[ViewIndex];
RDG_GPU_MASK_SCOPE(GraphBuilder, View.GPUMask);
RDG_EVENT_SCOPE_CONDITIONAL(GraphBuilder, InViews.Num() > 1, "View%d", ViewIndex);
FMeshPassProcessorRenderState DrawRenderState;
SetupDepthPassState(DrawRenderState);
const bool bShouldRenderView = View.ShouldRenderView() && (bSecondStageDepthPass ? View.bUsesSecondStageDepthPass : true);
if (bShouldRenderView)
{
View.BeginRenderView();
FDepthPassParameters* PassParameters = GetDepthPassParameters(GraphBuilder, View, SceneDepthTexture);
View.ParallelMeshDrawCommandPasses[DepthMeshPass].BuildRenderingCommands(GraphBuilder, Scene->GPUScene, PassParameters->InstanceCullingDrawParams);
GraphBuilder.AddPass(
bSecondStageDepthPass ? RDG_EVENT_NAME("SecondStageDepthPass") : RDG_EVENT_NAME("DepthPass"),
PassParameters,
ERDGPassFlags::Raster,
[this, &View, PassParameters, DepthMeshPass](FRHICommandList& RHICmdList)
{
SetStereoViewport(RHICmdList, View, 1.0f);
View.ParallelMeshDrawCommandPasses[DepthMeshPass].DispatchDraw(nullptr, RHICmdList, &PassParameters->InstanceCullingDrawParams);
});
RenderPrePassEditorPrimitives(GraphBuilder, View, PassParameters, DrawRenderState, DepthPass.EarlyZPassMode, InstanceCullingManager);
}
}
}
};
(...)
}
// Engine\Source\Runtime\Renderer\Private\MeshDrawCommands.cpp
void FParallelMeshDrawCommandPass::DispatchDraw(FParallelCommandListSet* ParallelCommandListSet, FRHICommandList& RHICmdList, const FInstanceCullingDrawParams* InstanceCullingDrawParams) const
{
(...)
if (ParallelCommandListSet)
{
const ENamedThreads::Type RenderThread = ENamedThreads::GetRenderThread();
// 处理前序任务
FGraphEventArray Prereqs;
if (ParallelCommandListSet->GetPrereqs())
{
Prereqs.Append(*ParallelCommandListSet->GetPrereqs());
}
if (TaskEventRef.IsValid())
{
Prereqs.Add(TaskEventRef);
}
// 构造与工作线程数量相同的并行绘制任务数
const int32 NumThreads = FMath::Min<int32>(FTaskGraphInterface::Get().GetNumWorkerThreads(), ParallelCommandListSet->Width);
const int32 NumTasks = FMath::Min<int32>(NumThreads, FMath::DivideAndRoundUp(MaxNumDraws, ParallelCommandListSet->MinDrawsPerCommandList));
const int32 NumDrawsPerTask = FMath::DivideAndRoundUp(MaxNumDraws, NumTasks);
// 遍历NumTasks次,构造NumTasks个绘制任务(FDrawVisibleMeshCommandsAnyThreadTask)实例
for (int32 TaskIndex = 0; TaskIndex < NumTasks; TaskIndex++)
{
const int32 StartIndex = TaskIndex * NumDrawsPerTask;
const int32 NumDraws = FMath::Min(NumDrawsPerTask, MaxNumDraws - StartIndex);
checkSlow(NumDraws > 0);
FRHICommandList* CmdList = ParallelCommandListSet->NewParallelCommandList();
// 构造FDrawVisibleMeshCommandsAnyThreadTask实例并加入TaskGraph中
// 其中TaskContext.MeshDrawCommands就是上一部分由FMeshPassProcessor生成的
FGraphEventRef AnyThreadCompletionEvent = TGraphTask<FDrawVisibleMeshCommandsAnyThreadTask>::CreateTask(&Prereqs, RenderThread)
.ConstructAndDispatchWhenReady(*CmdList, TaskContext.InstanceCullingContext, TaskContext.MeshDrawCommands, TaskContext.MinimalPipelineStatePassSet,
OverrideArgs,
TaskContext.InstanceFactor,
TaskIndex, NumTasks);
// 将事件加入ParallelCommandListSet,以便追踪深度Pass的并行绘制是否完成。
ParallelCommandListSet->AddParallelCommandList(CmdList, AnyThreadCompletionEvent, NumDraws);
}
}
else
{
(...)
}
}
class FDrawVisibleMeshCommandsAnyThreadTask : public FRenderTask
{
FRHICommandList& RHICmdList;
const FInstanceCullingContext& InstanceCullingContext;
const FMeshCommandOneFrameArray& VisibleMeshDrawCommands;
const FGraphicsMinimalPipelineStateSet& GraphicsMinimalPipelineStateSet;
const FMeshDrawCommandOverrideArgs OverrideArgs;
uint32 InstanceFactor;
int32 TaskIndex;
int32 TaskNum;
public:
(...)
void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
{
FOptionalTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
SCOPED_NAMED_EVENT_TEXT("DrawVisibleMeshCommandsAnyThreadTask", FColor::Magenta);
checkSlow(RHICmdList.IsInsideRenderPass());
// 计算绘制的范围
const int32 DrawNum = VisibleMeshDrawCommands.Num();
const int32 NumDrawsPerTask = TaskIndex < DrawNum ? FMath::DivideAndRoundUp(DrawNum, TaskNum) : 0;
const int32 StartIndex = TaskIndex * NumDrawsPerTask;
const int32 NumDraws = FMath::Min(NumDrawsPerTask, DrawNum - StartIndex);
// 将绘制所需的数据传递到绘制接口
InstanceCullingContext.SubmitDrawCommands(
VisibleMeshDrawCommands,
GraphicsMinimalPipelineStateSet,
OverrideArgs,
StartIndex,
NumDraws,
InstanceFactor,
RHICmdList);
RHICmdList.EndRenderPass();
RHICmdList.FinishRecording();
}
};
// Engine\Source\Runtime\Renderer\Private\InstanceCulling\InstanceCullingContext.cpp
void FInstanceCullingContext::SubmitDrawCommands(
const FMeshCommandOneFrameArray& VisibleMeshDrawCommands,
const FGraphicsMinimalPipelineStateSet& GraphicsMinimalPipelineStateSet,
const FMeshDrawCommandOverrideArgs& OverrideArgs,
int32 StartIndex,
int32 NumMeshDrawCommands,
uint32 InInstanceFactor,
FRHICommandList& RHICmdList) const
{
(...)
if (IsEnabled())
{
(...)
// 遍历给定范围的绘制指令,一个一个提交。
for (int32 DrawCommandIndex = StartIndex; DrawCommandIndex < StartIndex + NumMeshDrawCommands; DrawCommandIndex++)
{
(...)
SceneArgs.PrimitiveIdOffset = OverrideArgs.InstanceDataByteOffset + DrawCommandInfo.InstanceDataByteOffset;
// 提交单个MeshDrawCommand.
FMeshDrawCommand::SubmitDraw(*VisibleMeshDrawCommand.MeshDrawCommand, GraphicsMinimalPipelineStateSet, SceneArgs, InstanceFactor, RHICmdList, StateCache);
// If MDC was split to a more than one batch, submit them without changing state
for (uint32 BatchIdx = 1; BatchIdx < DrawCommandInfo.NumBatches; ++BatchIdx)
{
SceneArgs.PrimitiveIdOffset += DrawCommandInfo.BatchDataStride;
SceneArgs.IndirectArgsByteOffset += sizeof(FRHIDrawIndexedIndirectParameters);
FMeshDrawCommand::SubmitDrawEnd(*VisibleMeshDrawCommand.MeshDrawCommand, SceneArgs, InstanceFactor, RHICmdList);
}
}
}
else
{
FMeshDrawCommandSceneArgs SceneArgs;
SubmitMeshDrawCommandsRange(VisibleMeshDrawCommands, GraphicsMinimalPipelineStateSet, SceneArgs, 0, false, StartIndex, NumMeshDrawCommands, InInstanceFactor, RHICmdList);
}
}
// Engine\Source\Runtime\Renderer\Private\MeshPassProcessor.cpp
void FMeshDrawCommand::SubmitDraw(
const FMeshDrawCommand& RESTRICT MeshDrawCommand,
const FGraphicsMinimalPipelineStateSet& GraphicsMinimalPipelineStateSet,
const FMeshDrawCommandSceneArgs& SceneArgs,
uint32 InstanceFactor,
FRHICommandList& RHICmdList,
FMeshDrawCommandStateCache& RESTRICT StateCache)
{
#if MESH_DRAW_COMMAND_DEBUG_DATA && RHI_WANT_BREADCRUMB_EVENTS
if (MeshDrawCommand.DebugData.ResourceName.IsValid())
{
TCHAR NameBuffer[FName::StringBufferSize];
const uint32 NameLen = MeshDrawCommand.DebugData.ResourceName.ToString(NameBuffer);
BREADCRUMB_EVENTF(RHICmdList, MeshDrawCommand, TEXT("%s %.*s"), *MeshDrawCommand.DebugData.MaterialName, NameLen, NameBuffer);
}
else
{
BREADCRUMB_EVENTF(RHICmdList, MeshDrawCommand, TEXT("%s"), *MeshDrawCommand.DebugData.MaterialName);
}
#endif
#if WANTS_DRAW_MESH_EVENTS
FMeshDrawEvent MeshEvent(MeshDrawCommand, InstanceFactor, RHICmdList);
#endif
bool bAllowSkipDrawCommand = true;
if (SubmitDrawBegin(MeshDrawCommand, GraphicsMinimalPipelineStateSet, SceneArgs, InstanceFactor, RHICmdList, StateCache, bAllowSkipDrawCommand))
{
SubmitDrawEnd(MeshDrawCommand, SceneArgs, InstanceFactor, RHICmdList);
}
}
// UE4中SubmitDraw的工作被拆分到了FMeshDrawCommand::SubmitDrawBegin和FMeshDrawCommand::SubmitDrawEnd
bool FMeshDrawCommand::SubmitDrawBegin(
const FMeshDrawCommand& RESTRICT MeshDrawCommand,
const FGraphicsMinimalPipelineStateSet& GraphicsMinimalPipelineStateSet,
const FMeshDrawCommandSceneArgs& SceneArgs,
uint32 InstanceFactor,
FRHICommandList& RHICmdList,
FMeshDrawCommandStateCache& RESTRICT StateCache,
bool bAllowSkipDrawCommand)
{
(...)
// 设置和缓存PSO
if (MeshDrawCommand.CachedPipelineId.GetId() != StateCache.PipelineId)
{
FGraphicsPipelineStateInitializer GraphicsPSOInit = MeshPipelineState.AsGraphicsPipelineStateInitializer();
RHICmdList.ApplyCachedRenderTargets(GraphicsPSOInit);
EPSOPrecacheResult PSOPrecacheResult = RetrieveAndCachePSOPrecacheResult(MeshPipelineState, GraphicsPSOInit, bAllowSkipDrawCommand);
(...)
// We can set the new StencilRef here to avoid the set below
bool bApplyAdditionalState = true;
SetGraphicsPipelineState(RHICmdList, GraphicsPSOInit, MeshDrawCommand.StencilRef, EApplyRendertargetOption::CheckApply, bApplyAdditionalState, PSOPrecacheResult);
StateCache.SetPipelineState(MeshDrawCommand.CachedPipelineId.GetId());
StateCache.StencilRef = MeshDrawCommand.StencilRef;
}
// 设置和缓存模板值
if (MeshDrawCommand.StencilRef != StateCache.StencilRef)
{
RHICmdList.SetStencilRef(MeshDrawCommand.StencilRef);
StateCache.StencilRef = MeshDrawCommand.StencilRef;
}
// Platforms that use global UB binding don't need to set PrimitiveIdStream
const int8 PrimitiveIdStreamIndex = (IsUniformBufferStaticSlotValid(SceneArgs.BatchedPrimitiveSlot) ? -1 : MeshDrawCommand.PrimitiveIdStreamIndex);
// 设置顶点数据
for (int32 VertexBindingIndex = 0; VertexBindingIndex < MeshDrawCommand.VertexStreams.Num(); VertexBindingIndex++)
{
const FVertexInputStream& Stream = MeshDrawCommand.VertexStreams[VertexBindingIndex];
if (PrimitiveIdStreamIndex != -1 && Stream.StreamIndex == PrimitiveIdStreamIndex)
{
RHICmdList.SetStreamSource(Stream.StreamIndex, SceneArgs.PrimitiveIdsBuffer, SceneArgs.PrimitiveIdOffset);
StateCache.VertexStreams[Stream.StreamIndex] = Stream;
}
else if (StateCache.VertexStreams[Stream.StreamIndex] != Stream)
{
RHICmdList.SetStreamSource(Stream.StreamIndex, Stream.VertexBuffer, Stream.Offset);
StateCache.VertexStreams[Stream.StreamIndex] = Stream;
}
}
// 设置shader绑定的资源
MeshDrawCommand.ShaderBindings.SetOnCommandList(RHICmdList, MeshPipelineState.BoundShaderState.AsBoundShaderState(), StateCache.ShaderBindings);
return true;
}
void FMeshDrawCommand::SubmitDrawEnd(const FMeshDrawCommand& MeshDrawCommand, const FMeshDrawCommandSceneArgs& SceneArgs, uint32 InstanceFactor, FRHICommandList& RHICmdList)
{
(...)
// 根据不同的数据调用不同类型的绘制指令到RHICommandList
if (MeshDrawCommand.IndexBuffer)
{
if (MeshDrawCommand.NumPrimitives > 0 && !bDoOverrideArgs)
{
RHICmdList.DrawIndexedPrimitive(
MeshDrawCommand.IndexBuffer,
MeshDrawCommand.VertexParams.BaseVertexIndex,
0,
MeshDrawCommand.VertexParams.NumVertices,
MeshDrawCommand.FirstIndex,
MeshDrawCommand.NumPrimitives,
MeshDrawCommand.NumInstances * InstanceFactor
);
}
else
{
RHICmdList.DrawIndexedPrimitiveIndirect(
MeshDrawCommand.IndexBuffer,
bDoOverrideArgs ? SceneArgs.IndirectArgsBuffer : MeshDrawCommand.IndirectArgs.Buffer,
bDoOverrideArgs ? SceneArgs.IndirectArgsByteOffset : MeshDrawCommand.IndirectArgs.Offset
);
}
}
else
{
if (MeshDrawCommand.NumPrimitives > 0 && !bDoOverrideArgs)
{
RHICmdList.DrawPrimitive(
MeshDrawCommand.VertexParams.BaseVertexIndex + MeshDrawCommand.FirstIndex,
MeshDrawCommand.NumPrimitives,
MeshDrawCommand.NumInstances * InstanceFactor);
}
else
{
RHICmdList.DrawPrimitiveIndirect(
bDoOverrideArgs ? SceneArgs.IndirectArgsBuffer : MeshDrawCommand.IndirectArgs.Buffer,
bDoOverrideArgs ? SceneArgs.IndirectArgsByteOffset : MeshDrawCommand.IndirectArgs.Offset
);
}
}
}
Reference
Unreal Engine Documentation: Mesh Drawing Pipeline in Unreal Engine
【UE4 C++】渲染流水线笔记:从MeshBatch到DrawCommand