blender/source/gameengine/Rasterizer/RAS_MeshObject.cpp
Benoit Bolsee 42557f90bd BGE performance, 3rd round: culling and rasterizer.
This commit extend the technique of dynamic linked list to the mesh
slots so as to eliminate dumb scan or map lookup. It provides massive 
performance improvement in the culling and in the rasterizer when 
the majority of objects are static.

Other improvements:
- Compute the opengl matrix only for objects that are visible.
- Simplify hash function for GEN_HasedPtr
- Scan light list instead of general object list to render shadows
- Remove redundant opengl calls to set specularity, shinyness and diffuse
  between each mesh slots.
- Cache GPU material to avoid frequent call to GPU_material_from_blender
- Only set once the fixed elements of mesh slot
- Use more inline function

The following table shows the performance increase between 2.48, 1st round
and this round of improvement. The test was done with a scene containing 
40000 objects, of which 1000 are in the view frustrum approximately. The
object are simple textured cube to make sure the GPU is not the bottleneck.
As some of the rasterizer processing time has moved under culling, I present
the sum of scenegraph(includes culling)+rasterizer time

Scenegraph+rasterizer(ms)       2.48      1st round       3rd round

All objects static,            323.0           86.0             7.2
all visible, 1000 in 
the view frustrum

All objects static,            219.0           49.7             N/A(*)
all invisible.

All objects moving,            323.0          105.6            34.7
all visible, 1000 in 
the view frustrum

Scene destruction              40min          40min              4s

(*) : this time is not representative because the frame rate was at 60fps.
      In that case, the GPU holds down the GE by frame sync. By design, the
      overhead of the rasterizer is 0 when the the objects are invisible. 

This table shows a global speed up between 9x and 45x compared to 2.48a
for scenegraph, culling and rasterizer overhead. The speed up goes much
higher when objects are invisible.

An additional 2-4x speed up is possible in the scenegraph by upgrading
the Moto library to use Eigen2 BLAS library instead of C++ classes but
the scenegraph is already so fast that it is not a priority right now.

Next speed up in logic: many things to do there...
2009-05-07 09:13:01 +00:00

529 lines
12 KiB
C++

/**
* $Id$
* ***** BEGIN GPL LICENSE BLOCK *****
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
* All rights reserved.
*
* The Original Code is: all of this file.
*
* Contributor(s): none yet.
*
* ***** END GPL LICENSE BLOCK *****
*/
#include "RAS_MeshObject.h"
#include "RAS_IRasterizer.h"
#include "MT_MinMax.h"
#include "MT_Point3.h"
#include <algorithm>
/* polygon sorting */
struct RAS_MeshObject::polygonSlot
{
float m_z;
int m_index[4];
polygonSlot() {}
/* pnorm is the normal from the plane equation that the distance from is
* used to sort again. */
void get(const RAS_TexVert *vertexarray, const unsigned short *indexarray,
int offset, int nvert, const MT_Vector3& pnorm)
{
MT_Vector3 center(0, 0, 0);
int i;
for(i=0; i<nvert; i++) {
m_index[i] = indexarray[offset+i];
center += vertexarray[m_index[i]].getXYZ();
}
/* note we don't divide center by the number of vertices, since all
* polygons have the same number of vertices, and that we leave out
* the 4-th component of the plane equation since it is constant. */
m_z = MT_dot(pnorm, center);
}
void set(unsigned short *indexarray, int offset, int nvert)
{
int i;
for(i=0; i<nvert; i++)
indexarray[offset+i] = m_index[i];
}
};
struct RAS_MeshObject::backtofront
{
bool operator()(const polygonSlot &a, const polygonSlot &b) const
{
return a.m_z < b.m_z;
}
};
struct RAS_MeshObject::fronttoback
{
bool operator()(const polygonSlot &a, const polygonSlot &b) const
{
return a.m_z > b.m_z;
}
};
/* mesh object */
STR_String RAS_MeshObject::s_emptyname = "";
RAS_MeshObject::RAS_MeshObject(Mesh* mesh, int lightlayer)
: m_lightlayer(lightlayer),
m_bModified(true),
m_bMeshModified(true),
m_mesh(mesh),
m_bDeformed(false)
{
}
RAS_MeshObject::~RAS_MeshObject()
{
vector<RAS_Polygon*>::iterator it;
for(it=m_Polygons.begin(); it!=m_Polygons.end(); it++)
delete (*it);
}
bool RAS_MeshObject::MeshModified()
{
return m_bMeshModified;
}
unsigned int RAS_MeshObject::GetLightLayer()
{
return m_lightlayer;
}
int RAS_MeshObject::NumMaterials()
{
return m_materials.size();
}
const STR_String& RAS_MeshObject::GetMaterialName(unsigned int matid)
{
RAS_MeshMaterial* mmat = GetMeshMaterial(matid);
if(mmat)
return mmat->m_bucket->GetPolyMaterial()->GetMaterialName();
return s_emptyname;
}
RAS_MeshMaterial* RAS_MeshObject::GetMeshMaterial(unsigned int matid)
{
if (m_materials.size() > 0 && (matid < m_materials.size()))
{
list<RAS_MeshMaterial>::iterator it = m_materials.begin();
while (matid--) ++it;
return &*it;
}
return NULL;
}
int RAS_MeshObject::NumPolygons()
{
return m_Polygons.size();
}
RAS_Polygon* RAS_MeshObject::GetPolygon(int num) const
{
return m_Polygons[num];
}
list<RAS_MeshMaterial>::iterator GetFirstMaterial();
list<RAS_MeshMaterial>::iterator GetLastMaterial();
list<RAS_MeshMaterial>::iterator RAS_MeshObject::GetFirstMaterial()
{
return m_materials.begin();
}
list<RAS_MeshMaterial>::iterator RAS_MeshObject::GetLastMaterial()
{
return m_materials.end();
}
void RAS_MeshObject::SetName(STR_String name)
{
m_name = name;
}
const STR_String& RAS_MeshObject::GetName()
{
return m_name;
}
const STR_String& RAS_MeshObject::GetTextureName(unsigned int matid)
{
RAS_MeshMaterial* mmat = GetMeshMaterial(matid);
if(mmat)
return mmat->m_bucket->GetPolyMaterial()->GetTextureName();
return s_emptyname;
}
RAS_MeshMaterial *RAS_MeshObject::GetMeshMaterial(RAS_IPolyMaterial *mat)
{
list<RAS_MeshMaterial>::iterator mit;
/* find a mesh material */
for(mit = m_materials.begin(); mit != m_materials.end(); mit++)
if(mit->m_bucket->GetPolyMaterial() == mat)
return &*mit;
return NULL;
}
int RAS_MeshObject::GetMaterialId(RAS_IPolyMaterial *mat)
{
list<RAS_MeshMaterial>::iterator mit;
int imat;
/* find a mesh material */
for(imat=0, mit = m_materials.begin(); mit != m_materials.end(); mit++, imat++)
if(mit->m_bucket->GetPolyMaterial() == mat)
return imat;
return -1;
}
RAS_Polygon* RAS_MeshObject::AddPolygon(RAS_MaterialBucket *bucket, int numverts)
{
RAS_MeshMaterial *mmat;
RAS_Polygon *poly;
RAS_MeshSlot *slot;
/* find a mesh material */
mmat = GetMeshMaterial(bucket->GetPolyMaterial());
/* none found, create a new one */
if(!mmat) {
RAS_MeshMaterial meshmat;
meshmat.m_bucket = bucket;
meshmat.m_baseslot = meshmat.m_bucket->AddMesh(numverts);
meshmat.m_baseslot->m_mesh = this;
m_materials.push_back(meshmat);
mmat = &m_materials.back();
}
/* add it to the bucket, this also adds new display arrays */
slot = mmat->m_baseslot;
slot->AddPolygon(numverts);
/* create a new polygon */
RAS_DisplayArray *darray = slot->CurrentDisplayArray();
poly = new RAS_Polygon(bucket, darray, numverts);
m_Polygons.push_back(poly);
return poly;
}
void RAS_MeshObject::DebugColor(unsigned int abgr)
{
/*int numpolys = NumPolygons();
for (int i=0;i<numpolys;i++) {
RAS_Polygon* poly = m_polygons[i];
for (int v=0;v<poly->VertexCount();v++)
RAS_TexVert* vtx = poly->GetVertex(v)->setDebugRGBA(abgr);
}
*/
/* m_debugcolor = abgr; */
}
void RAS_MeshObject::SetVertexColor(RAS_IPolyMaterial* mat,MT_Vector4 rgba)
{
RAS_MeshMaterial *mmat = GetMeshMaterial(mat);
RAS_MeshSlot *slot = mmat->m_baseslot;
RAS_MeshSlot::iterator it;
size_t i;
for(slot->begin(it); !slot->end(it); slot->next(it))
for(i=it.startvertex; i<it.endvertex; i++)
it.vertex[i].SetRGBA(rgba);
}
void RAS_MeshObject::AddVertex(RAS_Polygon *poly, int i,
const MT_Point3& xyz,
const MT_Point2& uv,
const MT_Point2& uv2,
const MT_Vector4& tangent,
const unsigned int rgba,
const MT_Vector3& normal,
bool flat,
int origindex)
{
RAS_TexVert texvert(xyz, uv, uv2, tangent, rgba, normal, flat, origindex);
RAS_MeshMaterial *mmat;
RAS_DisplayArray *darray;
RAS_MeshSlot *slot;
int offset;
mmat = GetMeshMaterial(poly->GetMaterial()->GetPolyMaterial());
slot = mmat->m_baseslot;
darray = slot->CurrentDisplayArray();
{ /* Shared Vertex! */
/* find vertices shared between faces, with the restriction
* that they exist in the same display array, and have the
* same uv coordinate etc */
vector<SharedVertex>& sharedmap = m_sharedvertex_map[origindex];
vector<SharedVertex>::iterator it;
for(it = sharedmap.begin(); it != sharedmap.end(); it++)
{
if(it->m_darray != darray)
continue;
if(!it->m_darray->m_vertex[it->m_offset].closeTo(&texvert))
continue;
/* found one, add it and we're done */
if(poly->IsVisible())
slot->AddPolygonVertex(it->m_offset);
poly->SetVertexOffset(i, it->m_offset);
return;
}
}
/* no shared vertex found, add a new one */
offset = slot->AddVertex(texvert);
if(poly->IsVisible())
slot->AddPolygonVertex(offset);
poly->SetVertexOffset(i, offset);
{ /* Shared Vertex! */
SharedVertex shared;
shared.m_darray = darray;
shared.m_offset = offset;
m_sharedvertex_map[origindex].push_back(shared);
}
}
int RAS_MeshObject::NumVertices(RAS_IPolyMaterial* mat)
{
RAS_MeshMaterial *mmat;
RAS_MeshSlot *slot;
RAS_MeshSlot::iterator it;
size_t len = 0;
mmat = GetMeshMaterial(mat);
slot = mmat->m_baseslot;
for(slot->begin(it); !slot->end(it); slot->next(it))
len += it.endvertex - it.startvertex;
return len;
}
RAS_TexVert* RAS_MeshObject::GetVertex(unsigned int matid,
unsigned int index)
{
RAS_MeshMaterial *mmat;
RAS_MeshSlot *slot;
RAS_MeshSlot::iterator it;
size_t len;
mmat = GetMeshMaterial(matid);
if(!mmat)
return NULL;
slot = mmat->m_baseslot;
len = 0;
for(slot->begin(it); !slot->end(it); slot->next(it)) {
if(index >= len + it.endvertex - it.startvertex)
len += it.endvertex - it.startvertex;
else
return &it.vertex[index - len];
}
return NULL;
}
void RAS_MeshObject::AddMeshUser(void *clientobj, SG_QList *head)
{
list<RAS_MeshMaterial>::iterator it;
for(it = m_materials.begin();it!=m_materials.end();++it) {
/* always copy from the base slot, which is never removed
* since new objects can be created with the same mesh data */
RAS_MeshSlot *ms = it->m_bucket->CopyMesh(it->m_baseslot);
ms->m_clientObj = clientobj;
it->m_slots.insert(clientobj, ms);
head->QAddBack(ms);
}
}
void RAS_MeshObject::UpdateBuckets(void* clientobj,
double* oglmatrix,
bool useObjectColor,
const MT_Vector4& rgbavec,
bool visible,
bool culled)
{
list<RAS_MeshMaterial>::iterator it;
for(it = m_materials.begin();it!=m_materials.end();++it) {
RAS_MeshSlot **msp = it->m_slots[clientobj];
if(!msp)
continue;
RAS_MeshSlot *ms = *msp;
ms->m_mesh = this;
ms->m_OpenGLMatrix = oglmatrix;
ms->m_bObjectColor = useObjectColor;
ms->m_RGBAcolor = rgbavec;
ms->m_bVisible = visible;
ms->m_bCulled = culled || !visible;
if (!ms->m_bCulled)
ms->m_bucket->ActivateMesh(ms);
/* split if necessary */
#ifdef USE_SPLIT
ms->Split();
#endif
}
}
void RAS_MeshObject::RemoveFromBuckets(void *clientobj)
{
list<RAS_MeshMaterial>::iterator it;
for(it = m_materials.begin();it!=m_materials.end();++it) {
RAS_MeshSlot **msp = it->m_slots[clientobj];
if(!msp)
continue;
RAS_MeshSlot *ms = *msp;
it->m_bucket->RemoveMesh(ms);
it->m_slots.remove(clientobj);
}
}
//void RAS_MeshObject::Transform(const MT_Transform& trans)
//{
//m_trans.translate(MT_Vector3(0,0,1));//.operator *=(trans);
// for (int i=0;i<m_Polygons.size();i++)
// {
// m_Polygons[i]->Transform(trans);
// }
//}
/*
void RAS_MeshObject::RelativeTransform(const MT_Vector3& vec)
{
for (int i=0;i<m_Polygons.size();i++)
{
m_Polygons[i]->RelativeTransform(vec);
}
}
*/
void RAS_MeshObject::SortPolygons(RAS_MeshSlot& ms, const MT_Transform &transform)
{
// Limitations: sorting is quite simple, and handles many
// cases wrong, partially due to polygons being sorted per
// bucket.
//
// a) mixed triangles/quads are sorted wrong
// b) mixed materials are sorted wrong
// c) more than 65k faces are sorted wrong
// d) intersecting objects are sorted wrong
// e) intersecting polygons are sorted wrong
//
// a) can be solved by making all faces either triangles or quads
// if they need to be z-sorted. c) could be solved by allowing
// larger buckets, b) and d) cannot be solved easily if we want
// to avoid excessive state changes while drawing. e) would
// require splitting polygons.
RAS_MeshSlot::iterator it;
size_t j;
for(ms.begin(it); !ms.end(it); ms.next(it)) {
unsigned int nvert = (int)it.array->m_type;
unsigned int totpoly = it.totindex/nvert;
if(totpoly <= 1)
continue;
if(it.array->m_type == RAS_DisplayArray::LINE)
continue;
// Extract camera Z plane...
const MT_Vector3 pnorm(transform.getBasis()[2]);
// unneeded: const MT_Scalar pval = transform.getOrigin()[2];
vector<polygonSlot> slots(totpoly);
/* get indices and z into temporary array */
for(j=0; j<totpoly; j++)
slots[j].get(it.vertex, it.index, j*nvert, nvert, pnorm);
/* sort (stable_sort might be better, if flickering happens?) */
std::sort(slots.begin(), slots.end(), backtofront());
/* get indices from temporary array again */
for(j=0; j<totpoly; j++)
slots[j].set(it.index, j*nvert, nvert);
}
}
void RAS_MeshObject::SchedulePolygons(int drawingmode)
{
if (m_bModified)
{
m_bModified = false;
m_bMeshModified = true;
}
}