blender/source/gameengine/Rasterizer/RAS_MaterialBucket.h
Benoit Bolsee 42557f90bd BGE performance, 3rd round: culling and rasterizer.
This commit extend the technique of dynamic linked list to the mesh
slots so as to eliminate dumb scan or map lookup. It provides massive 
performance improvement in the culling and in the rasterizer when 
the majority of objects are static.

Other improvements:
- Compute the opengl matrix only for objects that are visible.
- Simplify hash function for GEN_HasedPtr
- Scan light list instead of general object list to render shadows
- Remove redundant opengl calls to set specularity, shinyness and diffuse
  between each mesh slots.
- Cache GPU material to avoid frequent call to GPU_material_from_blender
- Only set once the fixed elements of mesh slot
- Use more inline function

The following table shows the performance increase between 2.48, 1st round
and this round of improvement. The test was done with a scene containing 
40000 objects, of which 1000 are in the view frustrum approximately. The
object are simple textured cube to make sure the GPU is not the bottleneck.
As some of the rasterizer processing time has moved under culling, I present
the sum of scenegraph(includes culling)+rasterizer time

Scenegraph+rasterizer(ms)       2.48      1st round       3rd round

All objects static,            323.0           86.0             7.2
all visible, 1000 in 
the view frustrum

All objects static,            219.0           49.7             N/A(*)
all invisible.

All objects moving,            323.0          105.6            34.7
all visible, 1000 in 
the view frustrum

Scene destruction              40min          40min              4s

(*) : this time is not representative because the frame rate was at 60fps.
      In that case, the GPU holds down the GE by frame sync. By design, the
      overhead of the rasterizer is 0 when the the objects are invisible. 

This table shows a global speed up between 9x and 45x compared to 2.48a
for scenegraph, culling and rasterizer overhead. The speed up goes much
higher when objects are invisible.

An additional 2-4x speed up is possible in the scenegraph by upgrading
the Moto library to use Eigen2 BLAS library instead of C++ classes but
the scenegraph is already so fast that it is not a priority right now.

Next speed up in logic: many things to do there...
2009-05-07 09:13:01 +00:00

239 lines
5.8 KiB
C++

/**
* $Id$
*
* ***** BEGIN GPL LICENSE BLOCK *****
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
* All rights reserved.
*
* The Original Code is: all of this file.
*
* Contributor(s): none yet.
*
* ***** END GPL LICENSE BLOCK *****
*/
#ifndef __RAS_MATERIALBUCKET
#define __RAS_MATERIALBUCKET
#include "RAS_TexVert.h"
#include "GEN_Map.h"
#include "STR_HashedString.h"
#include "SG_QList.h"
#include "MT_Transform.h"
#include "RAS_IPolygonMaterial.h"
#include "RAS_IRasterizer.h"
#include "RAS_Deformer.h"
#include <vector>
#include <set>
#include <list>
using namespace std;
/* Display List Slot */
class KX_ListSlot
{
protected:
int m_refcount;
public:
KX_ListSlot(){ m_refcount=1; }
virtual ~KX_ListSlot() {}
virtual int Release() {
if (--m_refcount > 0)
return m_refcount;
delete this;
return 0;
}
virtual KX_ListSlot* AddRef() {
m_refcount++;
return this;
}
virtual void SetModified(bool mod)=0;
};
class RAS_DisplayArray;
class RAS_MeshSlot;
class RAS_MeshMaterial;
class RAS_MaterialBucket;
struct DerivedMesh;
/* An array with data used for OpenGL drawing */
class RAS_DisplayArray
{
public:
vector<RAS_TexVert> m_vertex;
vector<unsigned short> m_index;
enum { LINE = 2, TRIANGLE = 3, QUAD = 4 } m_type;
//RAS_MeshSlot *m_origSlot;
int m_users;
enum { BUCKET_MAX_INDEX = 65535 };
enum { BUCKET_MAX_VERTEX = 65535 };
};
/* Entry of a RAS_MeshObject into RAS_MaterialBucket */
typedef std::vector<RAS_DisplayArray*> RAS_DisplayArrayList;
// The QList is used to link the mesh slots to the object
// The DList is used to link the visible mesh slots to the material bucket
class RAS_MeshSlot : public SG_QList
{
friend class RAS_ListRasterizer;
private:
// indices into display arrays
int m_startarray;
int m_endarray;
int m_startindex;
int m_endindex;
int m_startvertex;
int m_endvertex;
RAS_DisplayArrayList m_displayArrays;
// for construction only
RAS_DisplayArray* m_currentArray;
public:
// for rendering
RAS_MaterialBucket* m_bucket;
RAS_MeshObject* m_mesh;
void* m_clientObj;
RAS_Deformer* m_pDeformer;
DerivedMesh* m_pDerivedMesh;
double* m_OpenGLMatrix;
// visibility
bool m_bVisible;
bool m_bCulled;
// object color
bool m_bObjectColor;
MT_Vector4 m_RGBAcolor;
// display lists
KX_ListSlot* m_DisplayList;
bool m_bDisplayList;
// joined mesh slots
RAS_MeshSlot* m_joinSlot;
MT_Matrix4x4 m_joinInvTransform;
list<RAS_MeshSlot*> m_joinedSlots;
RAS_MeshSlot();
RAS_MeshSlot(const RAS_MeshSlot& slot);
virtual ~RAS_MeshSlot();
void init(RAS_MaterialBucket *bucket, int numverts);
struct iterator {
RAS_DisplayArray *array;
RAS_TexVert *vertex;
unsigned short *index;
size_t startvertex;
size_t endvertex;
size_t totindex;
size_t arraynum;
};
void begin(iterator& it);
void next(iterator& it);
bool end(iterator& it);
/* used during construction */
void SetDisplayArray(int numverts);
RAS_DisplayArray *CurrentDisplayArray();
void SetDeformer(RAS_Deformer* deformer);
void AddPolygon(int numverts);
int AddVertex(const RAS_TexVert& tv);
void AddPolygonVertex(int offset);
/* optimization */
bool Split(bool force=false);
bool Join(RAS_MeshSlot *target, MT_Scalar distance);
bool Equals(RAS_MeshSlot *target);
#ifdef USE_SPLIT
bool IsCulled();
#else
bool IsCulled() { return m_bCulled; }
#endif
void SetCulled(bool culled) { m_bCulled = culled; }
};
/* Used by RAS_MeshObject, to point to it's slots in a bucket */
class RAS_MeshMaterial
{
public:
RAS_MeshSlot *m_baseslot;
class RAS_MaterialBucket *m_bucket;
GEN_Map<GEN_HashedPtr,RAS_MeshSlot*> m_slots;
};
/* Contains a list of display arrays with the same material,
* and a mesh slot for each mesh that uses display arrays in
* this bucket */
class RAS_MaterialBucket
{
public:
RAS_MaterialBucket(RAS_IPolyMaterial* mat);
virtual ~RAS_MaterialBucket();
/* Bucket Sorting */
struct less;
typedef set<RAS_MaterialBucket*, less> Set;
/* Material Properties */
RAS_IPolyMaterial* GetPolyMaterial() const;
bool IsAlpha() const;
bool IsZSort() const;
/* Rendering */
bool ActivateMaterial(const MT_Transform& cameratrans, RAS_IRasterizer* rasty,
RAS_IRenderTools *rendertools);
void RenderMeshSlot(const MT_Transform& cameratrans, RAS_IRasterizer* rasty,
RAS_IRenderTools* rendertools, RAS_MeshSlot &ms);
/* Mesh Slot Access */
list<RAS_MeshSlot>::iterator msBegin();
list<RAS_MeshSlot>::iterator msEnd();
class RAS_MeshSlot* AddMesh(int numverts);
class RAS_MeshSlot* CopyMesh(class RAS_MeshSlot *ms);
void RemoveMesh(class RAS_MeshSlot* ms);
void Optimize(MT_Scalar distance);
void ActivateMesh(RAS_MeshSlot* slot)
{
m_activeMeshSlotsHead.AddBack(slot);
}
SG_DList& GetActiveMeshSlots()
{
return m_activeMeshSlotsHead;
}
RAS_MeshSlot* GetNextActiveMeshSlot()
{
return (RAS_MeshSlot*)m_activeMeshSlotsHead.Remove();
}
private:
list<RAS_MeshSlot> m_meshSlots; // all the mesh slots
RAS_IPolyMaterial* m_material;
SG_DList m_activeMeshSlotsHead; // only those which must be rendered
};
#endif //__RAS_MATERIAL_BUCKET