diff --git a/AMDiS/bin/Makefile.am b/AMDiS/bin/Makefile.am
index 0decc669dcc35bc00007efe79a29f63acf814ef9..91b0407eca9b77f541e7fad010180f3179d4cc25 100644
--- a/AMDiS/bin/Makefile.am
+++ b/AMDiS/bin/Makefile.am
@@ -88,6 +88,10 @@ $(SOURCE_DIR)/Serializable.h $(SOURCE_DIR)/BallProject.h $(SOURCE_DIR)/CylinderP
 $(SOURCE_DIR)/MacroReader.h $(SOURCE_DIR)/MacroReader.cc \
 $(SOURCE_DIR)/ValueReader.h $(SOURCE_DIR)/ValueReader.cc \
 $(SOURCE_DIR)/Projection.h $(SOURCE_DIR)/Projection.cc \
+$(SOURCE_DIR)/SubAssembler.h $(SOURCE_DIR)/SubAssembler.cc \
+$(SOURCE_DIR)/ZeroOrderAssembler.h $(SOURCE_DIR)/ZeroOrderAssembler.cc \
+$(SOURCE_DIR)/FirstOrderAssembler.h $(SOURCE_DIR)/FirstOrderAssembler.cc \
+$(SOURCE_DIR)/SecondOrderAssembler.h $(SOURCE_DIR)/SecondOrderAssembler.cc \
 $(SOURCE_DIR)/Assembler.h $(SOURCE_DIR)/Assembler.cc \
 $(SOURCE_DIR)/AdaptInfo.h $(SOURCE_DIR)/AdaptInfo.cc \
 $(SOURCE_DIR)/Marker.h $(SOURCE_DIR)/Marker.cc \
@@ -221,6 +225,8 @@ $(SOURCE_DIR)/ElementInfo.h \
 $(SOURCE_DIR)/VertexInfo.h \
 $(SOURCE_DIR)/PeriodicInfo.h \
 $(SOURCE_DIR)/OpenMP.h \
+$(SOURCE_DIR)/ScalableQuadrature.h $(SOURCE_DIR)/ScalableQuadrature.cc \
+$(SOURCE_DIR)/SubElInfo.h $(SOURCE_DIR)/SubElInfo.cc \
 $(SOURCE_DIR)/parareal/ProblemBase.h \
 $(SOURCE_DIR)/parareal/AdaptParaReal.h $(SOURCE_DIR)/parareal/AdaptParaReal.cc
 
@@ -245,9 +251,6 @@ $(COMPOSITE_SOURCE_DIR)/ElementLevelSet.cc \
 $(COMPOSITE_SOURCE_DIR)/CompositeFEMOperator.h \
 $(COMPOSITE_SOURCE_DIR)/CompositeFEMOperator.cc \
 $(COMPOSITE_SOURCE_DIR)/SubPolytope.h $(COMPOSITE_SOURCE_DIR)/SubPolytope.cc \
-$(COMPOSITE_SOURCE_DIR)/ScalableQuadrature.h \
-$(COMPOSITE_SOURCE_DIR)/ScalableQuadrature.cc \
-$(COMPOSITE_SOURCE_DIR)/SubElInfo.h $(COMPOSITE_SOURCE_DIR)/SubElInfo.cc \
 $(COMPOSITE_SOURCE_DIR)/SubElementAssembler.h \
 $(COMPOSITE_SOURCE_DIR)/SubElementAssembler.cc \
 $(COMPOSITE_SOURCE_DIR)/TranslateLsFct.h
diff --git a/AMDiS/bin/Makefile.in b/AMDiS/bin/Makefile.in
index 91d1f3a2523927393c148c30dd9e94b708b09e32..e1cf7751db3240bdc22470e4630c83e06ae620a3 100644
--- a/AMDiS/bin/Makefile.in
+++ b/AMDiS/bin/Makefile.in
@@ -123,11 +123,19 @@ am__libamdis_la_SOURCES_DIST = $(PARALLEL_DIR)/ConditionalEstimator.h \
 	$(SOURCE_DIR)/CylinderProject.h $(SOURCE_DIR)/MacroReader.h \
 	$(SOURCE_DIR)/MacroReader.cc $(SOURCE_DIR)/ValueReader.h \
 	$(SOURCE_DIR)/ValueReader.cc $(SOURCE_DIR)/Projection.h \
-	$(SOURCE_DIR)/Projection.cc $(SOURCE_DIR)/Assembler.h \
-	$(SOURCE_DIR)/Assembler.cc $(SOURCE_DIR)/AdaptInfo.h \
-	$(SOURCE_DIR)/AdaptInfo.cc $(SOURCE_DIR)/Marker.h \
-	$(SOURCE_DIR)/Marker.cc $(SOURCE_DIR)/SystemVector.h \
-	$(SOURCE_DIR)/MatrixVector.h $(SOURCE_DIR)/MatrixVector.cc \
+	$(SOURCE_DIR)/Projection.cc $(SOURCE_DIR)/SubAssembler.h \
+	$(SOURCE_DIR)/SubAssembler.cc \
+	$(SOURCE_DIR)/ZeroOrderAssembler.h \
+	$(SOURCE_DIR)/ZeroOrderAssembler.cc \
+	$(SOURCE_DIR)/FirstOrderAssembler.h \
+	$(SOURCE_DIR)/FirstOrderAssembler.cc \
+	$(SOURCE_DIR)/SecondOrderAssembler.h \
+	$(SOURCE_DIR)/SecondOrderAssembler.cc \
+	$(SOURCE_DIR)/Assembler.h $(SOURCE_DIR)/Assembler.cc \
+	$(SOURCE_DIR)/AdaptInfo.h $(SOURCE_DIR)/AdaptInfo.cc \
+	$(SOURCE_DIR)/Marker.h $(SOURCE_DIR)/Marker.cc \
+	$(SOURCE_DIR)/SystemVector.h $(SOURCE_DIR)/MatrixVector.h \
+	$(SOURCE_DIR)/MatrixVector.cc \
 	$(SOURCE_DIR)/SurfaceQuadrature.h \
 	$(SOURCE_DIR)/SurfaceQuadrature.cc $(SOURCE_DIR)/LeafData.h \
 	$(SOURCE_DIR)/LeafData.cc $(SOURCE_DIR)/BoundaryManager.h \
@@ -237,7 +245,10 @@ am__libamdis_la_SOURCES_DIST = $(PARALLEL_DIR)/ConditionalEstimator.h \
 	$(SOURCE_DIR)/VtkWriter.cc $(SOURCE_DIR)/DataCollector.h \
 	$(SOURCE_DIR)/DataCollector.cc $(SOURCE_DIR)/ElementInfo.h \
 	$(SOURCE_DIR)/VertexInfo.h $(SOURCE_DIR)/PeriodicInfo.h \
-	$(SOURCE_DIR)/OpenMP.h $(SOURCE_DIR)/parareal/ProblemBase.h \
+	$(SOURCE_DIR)/OpenMP.h $(SOURCE_DIR)/ScalableQuadrature.h \
+	$(SOURCE_DIR)/ScalableQuadrature.cc $(SOURCE_DIR)/SubElInfo.h \
+	$(SOURCE_DIR)/SubElInfo.cc \
+	$(SOURCE_DIR)/parareal/ProblemBase.h \
 	$(SOURCE_DIR)/parareal/AdaptParaReal.h \
 	$(SOURCE_DIR)/parareal/AdaptParaReal.cc
 @USE_PARALLEL_AMDIS_TRUE@am__objects_1 =  \
@@ -262,15 +273,17 @@ am_libamdis_la_OBJECTS = $(am__objects_1) \
 	libamdis_la-CreatorMap.lo libamdis_la-ProblemInterpolScal.lo \
 	libamdis_la-ProblemInterpolVec.lo libamdis_la-MacroReader.lo \
 	libamdis_la-ValueReader.lo libamdis_la-Projection.lo \
-	libamdis_la-Assembler.lo libamdis_la-AdaptInfo.lo \
-	libamdis_la-Marker.lo libamdis_la-MatrixVector.lo \
-	libamdis_la-SurfaceQuadrature.lo libamdis_la-LeafData.lo \
-	libamdis_la-BoundaryManager.lo libamdis_la-DirichletBC.lo \
-	libamdis_la-RobinBC.lo libamdis_la-MatVecMultiplier.lo \
-	libamdis_la-FileWriter.lo libamdis_la-ElementFileWriter.lo \
-	libamdis_la-ElInfo.lo libamdis_la-ElInfoStack.lo \
-	libamdis_la-Operator.lo libamdis_la-Mesh.lo \
-	libamdis_la-AdaptStationary.lo \
+	libamdis_la-SubAssembler.lo libamdis_la-ZeroOrderAssembler.lo \
+	libamdis_la-FirstOrderAssembler.lo \
+	libamdis_la-SecondOrderAssembler.lo libamdis_la-Assembler.lo \
+	libamdis_la-AdaptInfo.lo libamdis_la-Marker.lo \
+	libamdis_la-MatrixVector.lo libamdis_la-SurfaceQuadrature.lo \
+	libamdis_la-LeafData.lo libamdis_la-BoundaryManager.lo \
+	libamdis_la-DirichletBC.lo libamdis_la-RobinBC.lo \
+	libamdis_la-MatVecMultiplier.lo libamdis_la-FileWriter.lo \
+	libamdis_la-ElementFileWriter.lo libamdis_la-ElInfo.lo \
+	libamdis_la-ElInfoStack.lo libamdis_la-Operator.lo \
+	libamdis_la-Mesh.lo libamdis_la-AdaptStationary.lo \
 	libamdis_la-AdaptInstationary.lo \
 	libamdis_la-DiagonalPreconditioner.lo \
 	libamdis_la-ILUPreconditioner.lo \
@@ -303,7 +316,8 @@ am_libamdis_la_OBJECTS = $(am__objects_1) \
 	libamdis_la-Triangle.lo libamdis_la-TecPlotWriter.lo \
 	libamdis_la-ValueWriter.lo libamdis_la-MemoryPool.lo \
 	libamdis_la-MemoryManager.lo libamdis_la-VtkWriter.lo \
-	libamdis_la-DataCollector.lo libamdis_la-AdaptParaReal.lo
+	libamdis_la-DataCollector.lo libamdis_la-ScalableQuadrature.lo \
+	libamdis_la-SubElInfo.lo libamdis_la-AdaptParaReal.lo
 libamdis_la_OBJECTS = $(am_libamdis_la_OBJECTS)
 libcompositeFEM_la_LIBADD =
 am_libcompositeFEM_la_OBJECTS = libcompositeFEM_la-CFE_Integration.lo \
@@ -314,8 +328,6 @@ am_libcompositeFEM_la_OBJECTS = libcompositeFEM_la-CFE_Integration.lo \
 	libcompositeFEM_la-ElementLevelSet.lo \
 	libcompositeFEM_la-CompositeFEMOperator.lo \
 	libcompositeFEM_la-SubPolytope.lo \
-	libcompositeFEM_la-ScalableQuadrature.lo \
-	libcompositeFEM_la-SubElInfo.lo \
 	libcompositeFEM_la-SubElementAssembler.lo
 libcompositeFEM_la_OBJECTS = $(am_libcompositeFEM_la_OBJECTS)
 DEFAULT_INCLUDES = -I. -I$(srcdir)
@@ -519,6 +531,10 @@ $(SOURCE_DIR)/Serializable.h $(SOURCE_DIR)/BallProject.h $(SOURCE_DIR)/CylinderP
 $(SOURCE_DIR)/MacroReader.h $(SOURCE_DIR)/MacroReader.cc \
 $(SOURCE_DIR)/ValueReader.h $(SOURCE_DIR)/ValueReader.cc \
 $(SOURCE_DIR)/Projection.h $(SOURCE_DIR)/Projection.cc \
+$(SOURCE_DIR)/SubAssembler.h $(SOURCE_DIR)/SubAssembler.cc \
+$(SOURCE_DIR)/ZeroOrderAssembler.h $(SOURCE_DIR)/ZeroOrderAssembler.cc \
+$(SOURCE_DIR)/FirstOrderAssembler.h $(SOURCE_DIR)/FirstOrderAssembler.cc \
+$(SOURCE_DIR)/SecondOrderAssembler.h $(SOURCE_DIR)/SecondOrderAssembler.cc \
 $(SOURCE_DIR)/Assembler.h $(SOURCE_DIR)/Assembler.cc \
 $(SOURCE_DIR)/AdaptInfo.h $(SOURCE_DIR)/AdaptInfo.cc \
 $(SOURCE_DIR)/Marker.h $(SOURCE_DIR)/Marker.cc \
@@ -652,6 +668,8 @@ $(SOURCE_DIR)/ElementInfo.h \
 $(SOURCE_DIR)/VertexInfo.h \
 $(SOURCE_DIR)/PeriodicInfo.h \
 $(SOURCE_DIR)/OpenMP.h \
+$(SOURCE_DIR)/ScalableQuadrature.h $(SOURCE_DIR)/ScalableQuadrature.cc \
+$(SOURCE_DIR)/SubElInfo.h $(SOURCE_DIR)/SubElInfo.cc \
 $(SOURCE_DIR)/parareal/ProblemBase.h \
 $(SOURCE_DIR)/parareal/AdaptParaReal.h $(SOURCE_DIR)/parareal/AdaptParaReal.cc
 
@@ -672,9 +690,6 @@ $(COMPOSITE_SOURCE_DIR)/ElementLevelSet.cc \
 $(COMPOSITE_SOURCE_DIR)/CompositeFEMOperator.h \
 $(COMPOSITE_SOURCE_DIR)/CompositeFEMOperator.cc \
 $(COMPOSITE_SOURCE_DIR)/SubPolytope.h $(COMPOSITE_SOURCE_DIR)/SubPolytope.cc \
-$(COMPOSITE_SOURCE_DIR)/ScalableQuadrature.h \
-$(COMPOSITE_SOURCE_DIR)/ScalableQuadrature.cc \
-$(COMPOSITE_SOURCE_DIR)/SubElInfo.h $(COMPOSITE_SOURCE_DIR)/SubElInfo.cc \
 $(COMPOSITE_SOURCE_DIR)/SubElementAssembler.h \
 $(COMPOSITE_SOURCE_DIR)/SubElementAssembler.cc \
 $(COMPOSITE_SOURCE_DIR)/TranslateLsFct.h
@@ -787,6 +802,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-Estimator.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-FileWriter.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-FiniteElemSpace.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-FirstOrderAssembler.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-FixVec.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-Flag.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-GNUPlotWriter.Plo@am__quote@
@@ -839,9 +855,13 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-ResidualEstimator.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-ResidualParallelEstimator.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-RobinBC.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-ScalableQuadrature.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-SecondOrderAssembler.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-SparseVector.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-StandardProblemIteration.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-StlVector.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-SubAssembler.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-SubElInfo.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-SurfaceQuadrature.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-TecPlotWriter.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-Tetrahedron.Plo@am__quote@
@@ -853,6 +873,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-ValueWriter.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-VertexVector.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-VtkWriter.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-ZeroOrderAssembler.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-demangle.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcompositeFEM_la-CFE_Integration.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcompositeFEM_la-CFE_NormAndErrorFcts.Plo@am__quote@
@@ -861,8 +882,6 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcompositeFEM_la-ElementLevelSet.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcompositeFEM_la-LevelSetAdaptMesh.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcompositeFEM_la-PenaltyOperator.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcompositeFEM_la-ScalableQuadrature.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcompositeFEM_la-SubElInfo.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcompositeFEM_la-SubElementAssembler.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcompositeFEM_la-SubPolytope.Plo@am__quote@
 
@@ -1111,6 +1130,34 @@ libamdis_la-Projection.lo: $(SOURCE_DIR)/Projection.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-Projection.lo `test -f '$(SOURCE_DIR)/Projection.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/Projection.cc
 
+libamdis_la-SubAssembler.lo: $(SOURCE_DIR)/SubAssembler.cc
+@am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-SubAssembler.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-SubAssembler.Tpo" -c -o libamdis_la-SubAssembler.lo `test -f '$(SOURCE_DIR)/SubAssembler.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/SubAssembler.cc; \
+@am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libamdis_la-SubAssembler.Tpo" "$(DEPDIR)/libamdis_la-SubAssembler.Plo"; else rm -f "$(DEPDIR)/libamdis_la-SubAssembler.Tpo"; exit 1; fi
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$(SOURCE_DIR)/SubAssembler.cc' object='libamdis_la-SubAssembler.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-SubAssembler.lo `test -f '$(SOURCE_DIR)/SubAssembler.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/SubAssembler.cc
+
+libamdis_la-ZeroOrderAssembler.lo: $(SOURCE_DIR)/ZeroOrderAssembler.cc
+@am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-ZeroOrderAssembler.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-ZeroOrderAssembler.Tpo" -c -o libamdis_la-ZeroOrderAssembler.lo `test -f '$(SOURCE_DIR)/ZeroOrderAssembler.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/ZeroOrderAssembler.cc; \
+@am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libamdis_la-ZeroOrderAssembler.Tpo" "$(DEPDIR)/libamdis_la-ZeroOrderAssembler.Plo"; else rm -f "$(DEPDIR)/libamdis_la-ZeroOrderAssembler.Tpo"; exit 1; fi
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$(SOURCE_DIR)/ZeroOrderAssembler.cc' object='libamdis_la-ZeroOrderAssembler.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-ZeroOrderAssembler.lo `test -f '$(SOURCE_DIR)/ZeroOrderAssembler.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/ZeroOrderAssembler.cc
+
+libamdis_la-FirstOrderAssembler.lo: $(SOURCE_DIR)/FirstOrderAssembler.cc
+@am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-FirstOrderAssembler.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-FirstOrderAssembler.Tpo" -c -o libamdis_la-FirstOrderAssembler.lo `test -f '$(SOURCE_DIR)/FirstOrderAssembler.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/FirstOrderAssembler.cc; \
+@am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libamdis_la-FirstOrderAssembler.Tpo" "$(DEPDIR)/libamdis_la-FirstOrderAssembler.Plo"; else rm -f "$(DEPDIR)/libamdis_la-FirstOrderAssembler.Tpo"; exit 1; fi
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$(SOURCE_DIR)/FirstOrderAssembler.cc' object='libamdis_la-FirstOrderAssembler.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-FirstOrderAssembler.lo `test -f '$(SOURCE_DIR)/FirstOrderAssembler.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/FirstOrderAssembler.cc
+
+libamdis_la-SecondOrderAssembler.lo: $(SOURCE_DIR)/SecondOrderAssembler.cc
+@am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-SecondOrderAssembler.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-SecondOrderAssembler.Tpo" -c -o libamdis_la-SecondOrderAssembler.lo `test -f '$(SOURCE_DIR)/SecondOrderAssembler.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/SecondOrderAssembler.cc; \
+@am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libamdis_la-SecondOrderAssembler.Tpo" "$(DEPDIR)/libamdis_la-SecondOrderAssembler.Plo"; else rm -f "$(DEPDIR)/libamdis_la-SecondOrderAssembler.Tpo"; exit 1; fi
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$(SOURCE_DIR)/SecondOrderAssembler.cc' object='libamdis_la-SecondOrderAssembler.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-SecondOrderAssembler.lo `test -f '$(SOURCE_DIR)/SecondOrderAssembler.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/SecondOrderAssembler.cc
+
 libamdis_la-Assembler.lo: $(SOURCE_DIR)/Assembler.cc
 @am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-Assembler.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-Assembler.Tpo" -c -o libamdis_la-Assembler.lo `test -f '$(SOURCE_DIR)/Assembler.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/Assembler.cc; \
 @am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libamdis_la-Assembler.Tpo" "$(DEPDIR)/libamdis_la-Assembler.Plo"; else rm -f "$(DEPDIR)/libamdis_la-Assembler.Tpo"; exit 1; fi
@@ -1608,6 +1655,20 @@ libamdis_la-DataCollector.lo: $(SOURCE_DIR)/DataCollector.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-DataCollector.lo `test -f '$(SOURCE_DIR)/DataCollector.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/DataCollector.cc
 
+libamdis_la-ScalableQuadrature.lo: $(SOURCE_DIR)/ScalableQuadrature.cc
+@am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-ScalableQuadrature.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-ScalableQuadrature.Tpo" -c -o libamdis_la-ScalableQuadrature.lo `test -f '$(SOURCE_DIR)/ScalableQuadrature.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/ScalableQuadrature.cc; \
+@am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libamdis_la-ScalableQuadrature.Tpo" "$(DEPDIR)/libamdis_la-ScalableQuadrature.Plo"; else rm -f "$(DEPDIR)/libamdis_la-ScalableQuadrature.Tpo"; exit 1; fi
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$(SOURCE_DIR)/ScalableQuadrature.cc' object='libamdis_la-ScalableQuadrature.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-ScalableQuadrature.lo `test -f '$(SOURCE_DIR)/ScalableQuadrature.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/ScalableQuadrature.cc
+
+libamdis_la-SubElInfo.lo: $(SOURCE_DIR)/SubElInfo.cc
+@am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-SubElInfo.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-SubElInfo.Tpo" -c -o libamdis_la-SubElInfo.lo `test -f '$(SOURCE_DIR)/SubElInfo.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/SubElInfo.cc; \
+@am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libamdis_la-SubElInfo.Tpo" "$(DEPDIR)/libamdis_la-SubElInfo.Plo"; else rm -f "$(DEPDIR)/libamdis_la-SubElInfo.Tpo"; exit 1; fi
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$(SOURCE_DIR)/SubElInfo.cc' object='libamdis_la-SubElInfo.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-SubElInfo.lo `test -f '$(SOURCE_DIR)/SubElInfo.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/SubElInfo.cc
+
 libamdis_la-AdaptParaReal.lo: $(SOURCE_DIR)/parareal/AdaptParaReal.cc
 @am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-AdaptParaReal.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-AdaptParaReal.Tpo" -c -o libamdis_la-AdaptParaReal.lo `test -f '$(SOURCE_DIR)/parareal/AdaptParaReal.cc' || echo '$(srcdir)/'`$(SOURCE_DIR)/parareal/AdaptParaReal.cc; \
 @am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libamdis_la-AdaptParaReal.Tpo" "$(DEPDIR)/libamdis_la-AdaptParaReal.Plo"; else rm -f "$(DEPDIR)/libamdis_la-AdaptParaReal.Tpo"; exit 1; fi
@@ -1671,20 +1732,6 @@ libcompositeFEM_la-SubPolytope.lo: $(COMPOSITE_SOURCE_DIR)/SubPolytope.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcompositeFEM_la_CXXFLAGS) $(CXXFLAGS) -c -o libcompositeFEM_la-SubPolytope.lo `test -f '$(COMPOSITE_SOURCE_DIR)/SubPolytope.cc' || echo '$(srcdir)/'`$(COMPOSITE_SOURCE_DIR)/SubPolytope.cc
 
-libcompositeFEM_la-ScalableQuadrature.lo: $(COMPOSITE_SOURCE_DIR)/ScalableQuadrature.cc
-@am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcompositeFEM_la_CXXFLAGS) $(CXXFLAGS) -MT libcompositeFEM_la-ScalableQuadrature.lo -MD -MP -MF "$(DEPDIR)/libcompositeFEM_la-ScalableQuadrature.Tpo" -c -o libcompositeFEM_la-ScalableQuadrature.lo `test -f '$(COMPOSITE_SOURCE_DIR)/ScalableQuadrature.cc' || echo '$(srcdir)/'`$(COMPOSITE_SOURCE_DIR)/ScalableQuadrature.cc; \
-@am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libcompositeFEM_la-ScalableQuadrature.Tpo" "$(DEPDIR)/libcompositeFEM_la-ScalableQuadrature.Plo"; else rm -f "$(DEPDIR)/libcompositeFEM_la-ScalableQuadrature.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$(COMPOSITE_SOURCE_DIR)/ScalableQuadrature.cc' object='libcompositeFEM_la-ScalableQuadrature.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcompositeFEM_la_CXXFLAGS) $(CXXFLAGS) -c -o libcompositeFEM_la-ScalableQuadrature.lo `test -f '$(COMPOSITE_SOURCE_DIR)/ScalableQuadrature.cc' || echo '$(srcdir)/'`$(COMPOSITE_SOURCE_DIR)/ScalableQuadrature.cc
-
-libcompositeFEM_la-SubElInfo.lo: $(COMPOSITE_SOURCE_DIR)/SubElInfo.cc
-@am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcompositeFEM_la_CXXFLAGS) $(CXXFLAGS) -MT libcompositeFEM_la-SubElInfo.lo -MD -MP -MF "$(DEPDIR)/libcompositeFEM_la-SubElInfo.Tpo" -c -o libcompositeFEM_la-SubElInfo.lo `test -f '$(COMPOSITE_SOURCE_DIR)/SubElInfo.cc' || echo '$(srcdir)/'`$(COMPOSITE_SOURCE_DIR)/SubElInfo.cc; \
-@am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libcompositeFEM_la-SubElInfo.Tpo" "$(DEPDIR)/libcompositeFEM_la-SubElInfo.Plo"; else rm -f "$(DEPDIR)/libcompositeFEM_la-SubElInfo.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$(COMPOSITE_SOURCE_DIR)/SubElInfo.cc' object='libcompositeFEM_la-SubElInfo.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@	$(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcompositeFEM_la_CXXFLAGS) $(CXXFLAGS) -c -o libcompositeFEM_la-SubElInfo.lo `test -f '$(COMPOSITE_SOURCE_DIR)/SubElInfo.cc' || echo '$(srcdir)/'`$(COMPOSITE_SOURCE_DIR)/SubElInfo.cc
-
 libcompositeFEM_la-SubElementAssembler.lo: $(COMPOSITE_SOURCE_DIR)/SubElementAssembler.cc
 @am__fastdepCXX_TRUE@	if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcompositeFEM_la_CXXFLAGS) $(CXXFLAGS) -MT libcompositeFEM_la-SubElementAssembler.lo -MD -MP -MF "$(DEPDIR)/libcompositeFEM_la-SubElementAssembler.Tpo" -c -o libcompositeFEM_la-SubElementAssembler.lo `test -f '$(COMPOSITE_SOURCE_DIR)/SubElementAssembler.cc' || echo '$(srcdir)/'`$(COMPOSITE_SOURCE_DIR)/SubElementAssembler.cc; \
 @am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/libcompositeFEM_la-SubElementAssembler.Tpo" "$(DEPDIR)/libcompositeFEM_la-SubElementAssembler.Plo"; else rm -f "$(DEPDIR)/libcompositeFEM_la-SubElementAssembler.Tpo"; exit 1; fi
diff --git a/AMDiS/compositeFEM/src/SubPolytope.cc b/AMDiS/compositeFEM/src/SubPolytope.cc
index 4403dc7f6319880ea0e86c1e37a3d49e0fa9aaaa..c160dbff4a2af347f1a27459f487b063ef70f6d8 100644
--- a/AMDiS/compositeFEM/src/SubPolytope.cc
+++ b/AMDiS/compositeFEM/src/SubPolytope.cc
@@ -16,11 +16,9 @@ namespace AMDiS {
     //                false -  no
     ////////////////////////////////////////////////////////////////////////////
 
-    int zeroCounter;
-
     for (int i = 0; i < numIntPoints; i++) {
-      zeroCounter = 0;
-      for (int j = 0; j < dim+1; j++) {
+      int zeroCounter = 0;
+      for (int j = 0; j < dim + 1; j++) {
 	if (fabs((*intPoints)[i][j]) <= 1.e-15 ) { 
 	  zeroCounter++; 
 	}
@@ -119,7 +117,7 @@ namespace AMDiS {
     TEST_EXIT(dim == 1 && numIntPoints == 1)("invalid call of this routine\n");
 
     VectorOfFixVecs<DimVec<double> > *subElVertices = 
-      NEW VectorOfFixVecs<DimVec<double> >(dim, dim+1, NO_INIT);
+      NEW VectorOfFixVecs<DimVec<double> >(dim, dim + 1, NO_INIT);
     DimVec<double> vertex(dim, DEFAULT_VALUE, 1.0);
 
     /**
@@ -132,8 +130,7 @@ namespace AMDiS {
        *  subelement.
        */
       vertex[1] = 0.0;
-    }
-    else {
+    } else {
       /**
        *  The vertex in element with barycentric coordinates (0,1) is in 
        *  subelement.
@@ -189,7 +186,7 @@ namespace AMDiS {
       ("invalid call of this routine\n");
 
     VectorOfFixVecs<DimVec<double> >*subElVertices = 
-      NEW VectorOfFixVecs<DimVec<double> >(dim, dim+1, NO_INIT);
+      NEW VectorOfFixVecs<DimVec<double> >(dim, dim + 1, NO_INIT);
     DimVec<double> vertex(dim, DEFAULT_VALUE, 1.0);
 
     /**
@@ -197,7 +194,7 @@ namespace AMDiS {
      *  a subelement of element.
      */
     for (int i = 0; i < numIntPoints; i++) {
-      for (int j = 0; j < dim+1; j++) {
+      for (int j = 0; j < dim + 1; j++) {
 	if ( fabs((*intPoints)[i][j]) <= 1.e-15 ) {
 	  vertex[j] = 0.0; 
 	};
@@ -232,7 +229,7 @@ namespace AMDiS {
 
   int SubPolytope::getIndexSecondFaceIntPoint0(int indexFirstFace, int dim)
   {
-    for (int i = 0; i < dim+1; i++) {
+    for (int i = 0; i < dim + 1; i++) {
       if ( fabs((*intPoints)[0][i]) <= 1.e-15  &&  i != indexFirstFace ) {
 	return i;
       }
@@ -276,7 +273,7 @@ namespace AMDiS {
       ("invalid index for vertex of a tetrahedron");
 
     VectorOfFixVecs<DimVec<double> > *subElVertices = 
-      NEW VectorOfFixVecs<DimVec<double> >(dim, dim+1, NO_INIT);
+      NEW VectorOfFixVecs<DimVec<double> >(dim, dim + 1, NO_INIT);
     DimVec<double> vertexA(dim, DEFAULT_VALUE, 0.0);
     DimVec<double> vertexB(dim, DEFAULT_VALUE, 0.0);
 
@@ -319,7 +316,7 @@ namespace AMDiS {
     // Get the edges including the intersection points.
     for (int i = 0; i < numIntPoints; i++) {
       int k = 0;
-      for (int j = 0; j < dim+1; j++) {
+      for (int j = 0; j < dim + 1; j++) {
 	if (fabs((*intPoints)[i][j]) > 1.e-15 ) {
 	  indexEdge[k] = j;
 	  k++;
@@ -332,7 +329,7 @@ namespace AMDiS {
     // Get the vertex of element adjacent with indexElVertInPol1 whose
     // common edge with indexElVertInPol1 doesn't contain an
     // intersection point, and store it in indexElVertInPol2.
-    for (int i = 0; i < dim+1; i++) {
+    for (int i = 0; i < dim + 1; i++) {
       if (intPointOnEdge[indexElVertInPol1][i] == false  &&  
 	  i != indexElVertInPol1 ) {
 	indexElVertInPol2 = i;
diff --git a/AMDiS/src/Assembler.cc b/AMDiS/src/Assembler.cc
index 287cc65b6c819515c2c7a49bc2f988edca82e06a..c64faa917a6ccf567e77138397cb0bc3f23ae043 100644
--- a/AMDiS/src/Assembler.cc
+++ b/AMDiS/src/Assembler.cc
@@ -11,1329 +11,6 @@
 
 namespace AMDiS {
 
-  std::vector<SubAssembler*> ZeroOrderAssembler::optimizedSubAssemblers;
-  std::vector<SubAssembler*> FirstOrderAssembler::optimizedSubAssemblersGrdPhi;
-  std::vector<SubAssembler*> FirstOrderAssembler::optimizedSubAssemblersGrdPsi;
-  std::vector<SubAssembler*> SecondOrderAssembler::optimizedSubAssemblers;
-  
-  std::vector<SubAssembler*> ZeroOrderAssembler::standardSubAssemblers;
-  std::vector<SubAssembler*> FirstOrderAssembler::standardSubAssemblersGrdPhi;
-  std::vector<SubAssembler*> FirstOrderAssembler::standardSubAssemblersGrdPsi;
-  std::vector<SubAssembler*> SecondOrderAssembler::standardSubAssemblers;
-
-  SubAssembler::SubAssembler(Operator *op,
-			     Assembler *assembler,
-			     Quadrature *quadrat,
-			     int order, 
-			     bool optimized,
-			     FirstOrderType type) 
-    : nRow(0),
-      nCol(0),
-      coordsAtQPs(NULL),
-      coordsNumAllocated(0),
-      quadrature(quadrat),
-      psiFast(NULL),
-      phiFast(NULL),
-      owner(assembler),
-      symmetric(true),
-      opt(optimized),
-      firstCall(true)
-  {
-    const BasisFunction *psi = assembler->rowFESpace->getBasisFcts();
-    const BasisFunction *phi = assembler->colFESpace->getBasisFcts();
-
-    nRow = psi->getNumber();
-    nCol = phi->getNumber();
-
-    int maxThreads = omp_get_max_threads();
-    terms.resize(maxThreads);
-
-    switch (order) {
-    case 0:
-      terms = op->zeroOrder;
-      break;
-    case 1:
-      if(type == GRD_PHI)
-	terms = op->firstOrderGrdPhi;
-      else 
-	terms = op->firstOrderGrdPsi;
-      break;
-    case 2:
-      terms = op->secondOrder;
-      break;
-    }
-
-    // check if all terms are symmetric
-    symmetric = true;
-    for (int i = 0; i < static_cast<int>(terms[0].size()); i++) {
-      if (!(terms[0][i])->isSymmetric()) {
-	symmetric = false;
-	break;
-      }
-    }  
-
-    dim = assembler->rowFESpace->getMesh()->getDim();
-  }
-
-  FastQuadrature *SubAssembler::updateFastQuadrature(FastQuadrature *quadFast,
-						     const BasisFunction *psi,
-						     Flag updateFlag)
-  {
-    if (!quadFast) {
-      quadFast = FastQuadrature::provideFastQuadrature(psi,
-						       *quadrature,
-						       updateFlag);
-    } else {
-      if (!quadFast->initialized(updateFlag))
-	quadFast->init(updateFlag);
-    }
-
-    return quadFast;
-  }
-
-  void SubAssembler::initElement(const ElInfo* elInfo,
-				 Quadrature *quad)
-  {
-    // set corrdsAtQPs invalid
-    coordsValid = false;
-
-    // set values at QPs invalid
-    std::map<const DOFVectorBase<double>*, ValuesAtQPs*>::iterator it1;
-    for (it1 = valuesAtQPs.begin(); it1 != valuesAtQPs.end(); ++it1) {
-      ((*it1).second)->valid = false;
-    }
-
-    // set gradients at QPs invalid
-    std::map<const DOFVectorBase<double>*, GradientsAtQPs*>::iterator it2;
-    for (it2 = gradientsAtQPs.begin(); it2 != gradientsAtQPs.end(); ++it2) {
-      ((*it2).second)->valid = false;
-    }
-
-    int myRank = omp_get_thread_num();
-    // calls initElement of each term
-    std::vector<OperatorTerm*>::iterator it;
-    for (it = terms[myRank].begin(); it != terms[myRank].end(); ++it) {
-      (*it)->initElement(elInfo, this, quad);
-    }
-  }
-
-  WorldVector<double>* SubAssembler::getCoordsAtQPs(const ElInfo* elInfo,
-						    Quadrature *quad)
-  {
-    Quadrature *localQuad = quad ? quad : quadrature;
-  
-    const int nPoints = localQuad->getNumPoints();
-
-    // already calculated for this element ?
-    if (coordsValid) {
-      return coordsAtQPs;
-    }
-   
-    if (coordsAtQPs)  {
-      if (coordsNumAllocated != nPoints) {
-	DELETE [] coordsAtQPs;
-        coordsAtQPs = NEW WorldVector<double>[nPoints];
-	coordsNumAllocated = nPoints;
-      }
-    } else {
-      coordsAtQPs = NEW WorldVector<double>[nPoints];
-      coordsNumAllocated = nPoints;
-    }
-
-    // set new values
-    WorldVector<double>* k = &(coordsAtQPs[0]);
-    for (int l = 0; k < &(coordsAtQPs[nPoints]); ++k, ++l) {
-      elInfo->coordToWorld(localQuad->getLambda(l), k);
-    }
-
-    // mark values as valid
-    coordsValid = true;
-
-    return coordsAtQPs;
-  }
-
-  double* SubAssembler::getVectorAtQPs(DOFVectorBase<double>* dv, 
-				       const ElInfo* elInfo,
-				       Quadrature *quad)
-  {
-    FUNCNAME("SubAssembler::getVectorAtQPs()");
-
-    const DOFVectorBase<double>* vec = dv ? dv : owner->operat->getUhOld();
-
-    TEST_EXIT_DBG(vec)("no dof vector!\n");
-
-    if (valuesAtQPs[vec] && valuesAtQPs[vec]->valid) 
-      return valuesAtQPs[vec]->values.getValArray();
-
-    Quadrature *localQuad = quad ? quad : quadrature;
-
-    if (!valuesAtQPs[vec]) {
-      valuesAtQPs[vec] = new ValuesAtQPs;
-    }
-    valuesAtQPs[vec]->values.resize(localQuad->getNumPoints());
-
-    double *values = valuesAtQPs[vec]->values.getValArray();
-
-    bool sameFESpaces = 
-      (vec->getFESpace() == owner->rowFESpace) || 
-      (vec->getFESpace() == owner->colFESpace);
-
-    if (opt && !quad && sameFESpaces) {
-      const BasisFunction *psi = owner->rowFESpace->getBasisFcts();
-      const BasisFunction *phi = owner->colFESpace->getBasisFcts();
-      if (vec->getFESpace()->getBasisFcts() == psi) {
-	psiFast = updateFastQuadrature(psiFast, psi, INIT_PHI);
-      } else if(vec->getFESpace()->getBasisFcts() == phi) {
-	phiFast = updateFastQuadrature(phiFast, phi, INIT_PHI);
-      }
-    }
-
-    // calculate new values
-    const BasisFunction *basFcts = vec->getFESpace()->getBasisFcts();
-
-    if (opt && !quad && sameFESpaces) {
-      if (psiFast->getBasisFunctions() == basFcts) {
-	vec->getVecAtQPs(elInfo, NULL, psiFast, values);
-      } else if(phiFast->getBasisFunctions() == basFcts) {
-	vec->getVecAtQPs(elInfo, NULL, phiFast, values);
-      } else {
-	vec->getVecAtQPs(elInfo, localQuad, NULL, values);
-      }
-    } else {
-      vec->getVecAtQPs(elInfo, localQuad, NULL, values);
-    }
-  
-    valuesAtQPs[vec]->valid = true;
-
-    return values;
-  }
-
-  WorldVector<double>* SubAssembler::getGradientsAtQPs(DOFVectorBase<double>* dv, 
-						       const ElInfo* elInfo,
-						       Quadrature *quad)
-  {
-    FUNCNAME("SubAssembler::getGradientsAtQPs()");
-
-    const DOFVectorBase<double>* vec = dv ? dv : owner->operat->getUhOld();
-
-    TEST_EXIT_DBG(vec)("no dof vector!\n");
-
-    if (gradientsAtQPs[vec] && gradientsAtQPs[vec]->valid) 
-      return gradientsAtQPs[vec]->values.getValArray();
-
-    Quadrature *localQuad = quad ? quad : quadrature;
-
-    if (!gradientsAtQPs[vec]) {
-      gradientsAtQPs[vec] = new GradientsAtQPs;
-    } 
-    gradientsAtQPs[vec]->values.resize(localQuad->getNumPoints());
-
-    WorldVector<double> *values = gradientsAtQPs[vec]->values.getValArray();
-
-    const BasisFunction *psi = owner->rowFESpace->getBasisFcts();
-    const BasisFunction *phi = owner->colFESpace->getBasisFcts();
-
-    bool sameFESpaces = 
-      (vec->getFESpace() == owner->rowFESpace) || 
-      (vec->getFESpace() == owner->colFESpace);
-
-    if (opt && !quad && sameFESpaces) {
-      if (vec->getFESpace()->getBasisFcts() == psi) {
-	psiFast = updateFastQuadrature(psiFast, psi, INIT_GRD_PHI);
-      } else if(vec->getFESpace()->getBasisFcts() == phi) {
-	phiFast = updateFastQuadrature(phiFast, phi, INIT_GRD_PHI);
-      }
-    }
-  
-    // calculate new values
-    const BasisFunction *basFcts = vec->getFESpace()->getBasisFcts();
-
-    if (opt && !quad && sameFESpaces) {
-      if (psiFast->getBasisFunctions() == basFcts) {
-	vec->getGrdAtQPs(elInfo, NULL, psiFast, values);
-      } else {
-	vec->getGrdAtQPs(elInfo, NULL, phiFast, values);
-      }
-    } else {
-      vec->getGrdAtQPs(elInfo, localQuad, NULL, values);
-    }
-   
-    gradientsAtQPs[vec]->valid = true;
-
-    return values;
-  }
-
-  ZeroOrderAssembler::ZeroOrderAssembler(Operator *op,
-					 Assembler *assembler,
-					 Quadrature *quad,
-					 bool optimized)
-    : SubAssembler(op, assembler, quad, 0, optimized)
-  {}
-
-  FirstOrderAssembler::FirstOrderAssembler(Operator *op,
-					   Assembler *assembler,
-					   Quadrature *quad,
-					   bool optimized,
-					   FirstOrderType type)
-    : SubAssembler(op, assembler, quad, 1, optimized, type)
-  {}
-
-  SecondOrderAssembler::SecondOrderAssembler(Operator *op,
-					     Assembler *assembler,
-					     Quadrature *quad,
-					     bool optimized)
-    : SubAssembler(op, assembler, quad, 2, optimized)
-  {}
-
-  ZeroOrderAssembler* 
-  ZeroOrderAssembler::getSubAssembler(Operator* op,
-				      Assembler *assembler,
-				      Quadrature *quad,
-				      bool optimized)
-  {
-    // check if a assembler is needed at all
-    if (op->zeroOrder.size() == 0) {
-      return NULL;
-    }
-
-    ZeroOrderAssembler *newAssembler;
-
-    std::vector<SubAssembler*> *subAssemblers =
-	optimized ?
-	&optimizedSubAssemblers :
-    &standardSubAssemblers;
-
-    int myRank = omp_get_thread_num();
-    std::vector<OperatorTerm*> opTerms  = op->zeroOrder[myRank];
-
-    sort(opTerms.begin(), opTerms.end());
-
-    // check if a new assembler is needed
-    if (quad) {
-      for (int i = 0; i < static_cast<int>( subAssemblers->size()); i++) {
-	std::vector<OperatorTerm*> assTerms = *((*subAssemblers)[i]->getTerms());
-
-	sort(assTerms.begin(), assTerms.end());
-
-	if ((opTerms == assTerms) && 
-	    ((*subAssemblers)[i]->getQuadrature() == quad)) {
-	
-	  return dynamic_cast<ZeroOrderAssembler*>((*subAssemblers)[i]);
-	}
-      }
-    }
- 
-    // check if all terms are pw_const
-    bool pwConst = true;
-    for (int i = 0; i < static_cast<int>( op->zeroOrder[myRank].size()); i++) {
-      if (!op->zeroOrder[myRank][i]->isPWConst()) {
-	pwConst = false;
-	break;
-      }
-    }  
-
-    // create new assembler
-    if (!optimized) {
-      newAssembler = NEW Stand0(op, assembler, quad);
-    } else {
-      if (pwConst) {
-	newAssembler = NEW Pre0(op, assembler, quad);
-      } else {
-	newAssembler = NEW Quad0(op, assembler, quad);
-      }
-    }
-
-    subAssemblers->push_back(newAssembler);
-    return newAssembler;
-  }
-
-  FirstOrderAssembler* 
-  FirstOrderAssembler::getSubAssembler(Operator* op,
-				       Assembler *assembler,
-				       Quadrature *quad,
-				       FirstOrderType type,
-				       bool optimized)
-  {
-    std::vector<SubAssembler*> *subAssemblers =
-	optimized ?
-	(type == GRD_PSI ? 
-	 &optimizedSubAssemblersGrdPsi : 
-	 &optimizedSubAssemblersGrdPhi) :
-    (type == GRD_PSI ? 
-     &standardSubAssemblersGrdPsi :
-     &standardSubAssemblersGrdPhi);
-
-    int myRank = omp_get_thread_num();
-    std::vector<OperatorTerm*> opTerms 
-	= (type == GRD_PSI) ? 
-	    op->firstOrderGrdPsi[myRank] : 
-            op->firstOrderGrdPhi[myRank];
-
-    // check if a assembler is needed at all
-    if (opTerms.size() == 0) {
-      return NULL;
-    }
-
-    sort(opTerms.begin(), opTerms.end());
-
-    FirstOrderAssembler *newAssembler;
-
-    // check if a new assembler is needed
-    for (int i = 0; i < static_cast<int>( subAssemblers->size()); i++) {
-
-      std::vector<OperatorTerm*> assTerms = *((*subAssemblers)[i]->getTerms());
-    
-      sort(assTerms.begin(), assTerms.end());   
-
-      if ((opTerms == assTerms) && 
-	  ((*subAssemblers)[i]->getQuadrature() == quad)) {
-
-	return dynamic_cast<FirstOrderAssembler*>((*subAssemblers)[i]);
-      }
-    }
-
-    // check if all terms are pw_const
-    bool pwConst = true;
-    for (int i = 0; i < static_cast<int>( opTerms.size()); i++) {
-      if (!(opTerms[i])->isPWConst()) {
-	pwConst = false;
-	break;
-      }
-    }  
-
-    // create new assembler
-    if (!optimized) {
-      newAssembler = 
-	(type == GRD_PSI) ?
-	dynamic_cast<FirstOrderAssembler*>(NEW Stand10(op, assembler, quad)) :
-	dynamic_cast<FirstOrderAssembler*>(NEW Stand01(op, assembler, quad));    
-    } else {
-      if (pwConst) {
-	newAssembler = 
-	  (type == GRD_PSI) ?
-	  dynamic_cast<FirstOrderAssembler*>(NEW Pre10(op, assembler, quad)) :
-	  dynamic_cast<FirstOrderAssembler*>(NEW Pre01(op, assembler, quad));
-      } else {
-	newAssembler = 
-	  (type == GRD_PSI) ?
-	  dynamic_cast<FirstOrderAssembler*>( NEW Quad10(op, assembler, quad)) :
-	  dynamic_cast<FirstOrderAssembler*>( NEW Quad01(op, assembler, quad));
-      }
-    }
-
-    subAssemblers->push_back(newAssembler);
-    return newAssembler;
-  };
-
-  SecondOrderAssembler* 
-  SecondOrderAssembler::getSubAssembler(Operator* op,
-					Assembler *assembler,
-					Quadrature *quad,
-					bool optimized)
-  {
-    int myRank = omp_get_thread_num();
-
-    // check if a assembler is needed at all
-    if (op->secondOrder[myRank].size() == 0) {
-      return NULL;
-    }
-
-    SecondOrderAssembler *newAssembler;
-
-    std::vector<SubAssembler*> *subAssemblers =
-	optimized ?
-	&optimizedSubAssemblers :
-    &standardSubAssemblers;
-
-    std::vector<OperatorTerm*> opTerms  = op->zeroOrder[myRank];
-
-    sort(opTerms.begin(), opTerms.end());
-
-    // check if a new assembler is needed
-    for (int i = 0; i < static_cast<int>( subAssemblers->size()); i++) {
-      std::vector<OperatorTerm*> assTerms = *((*subAssemblers)[i]->getTerms());
-    
-      sort(assTerms.begin(), assTerms.end());
-
-      if ((opTerms == assTerms) && 
-	  ((*subAssemblers)[i]->getQuadrature() == quad)) {
-	
-	return dynamic_cast<SecondOrderAssembler*>((*subAssemblers)[i]);
-      }
-    }
-
-    // check if all terms are pw_const
-    bool pwConst = true;
-    for (int i = 0; i < static_cast<int>( op->secondOrder[myRank].size()); i++) {
-      if (!op->secondOrder[myRank][i]->isPWConst()) {
-	pwConst = false;
-	break;
-      }
-    }  
-
-    // create new assembler
-    if (!optimized) {
-      newAssembler = NEW Stand2(op, assembler, quad);
-    } else {
-      if (pwConst) {
-	newAssembler = NEW Pre2(op, assembler, quad);
-      } else {
-	newAssembler = NEW Quad2(op, assembler, quad);
-      }
-    }
-
-    subAssemblers->push_back(newAssembler);
-    return newAssembler;
-  }
-
-  Stand0::Stand0(Operator *op, Assembler *assembler, Quadrature *quad)
-    : ZeroOrderAssembler(op, assembler, quad, false)
-  {
-  }
-
-  void Stand0::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
-    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();
-
-    double *phival = GET_MEMORY(double, nCol);
-    int nPoints = quadrature->getNumPoints();
-    double *c = GET_MEMORY(double, nPoints);
-
-    for (int iq = 0; iq < nPoints; iq++) {
-      c[iq] = 0.0;
-    }
-
-    int myRank = omp_get_thread_num();
-    std::vector<OperatorTerm*>::iterator termIt;
-    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
-      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, nPoints, c);
-    }
-      
-    if (symmetric) {
-      for (int iq = 0; iq < nPoints; iq++) {
-	c[iq] *= elInfo->getDet();
-
-	// calculate phi at QPs only once!
-	for (int i = 0; i < nCol; i++) {
-	  phival[i] = (*(phi->getPhi(i)))(quadrature->getLambda(iq));
-	}
-
-	for (int i = 0; i < nRow; i++) {
-	  double psival = (*(psi->getPhi(i)))(quadrature->getLambda(iq));
-	  (*mat)[i][i] += quadrature->getWeight(iq) * c[iq] * psival * phival[i];
-	  for (int j = i + 1; j < nCol; j++) {
-	    double val = quadrature->getWeight(iq) * c[iq] * psival * phival[j];
-	    (*mat)[i][j] += val;
-	    (*mat)[j][i] += val;
-	  }
-	}
-      }
-    } else {      //  non symmetric assembling 
-      for (int iq = 0; iq < nPoints; iq++) {
-	c[iq] *= elInfo->getDet();
-
-	// calculate phi at QPs only once!
-	for (int i = 0; i < nCol; i++) {
-	  phival[i] = (*(phi->getPhi(i)))(quadrature->getLambda(iq));
-	}
-
-	for (int i = 0; i < nRow; i++) {
-	  double psival = (*(psi->getPhi(i)))(quadrature->getLambda(iq));
-	  for (int j = 0; j < nCol; j++) {
-	    (*mat)[i][j] += quadrature->getWeight(iq) * c[iq] * psival * phival[j];
-	  }
-	}
-      }
-    }
-
-    FREE_MEMORY(phival, double, nCol);
-    FREE_MEMORY(c, double, nPoints);
-  }
-
-  void Stand0::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
-  {
-    int nPoints = quadrature->getNumPoints();
-
-    double *c = GET_MEMORY(double, nPoints);
-
-    for (int iq = 0; iq < nPoints; iq++) {
-      c[iq] = 0.0;
-    }
-
-    int myRank = omp_get_thread_num();
-    std::vector<OperatorTerm*>::iterator termIt;
-    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
-      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, nPoints, c);
-    }
-
-    for (int iq = 0; iq < nPoints; iq++) {
-      c[iq] *= elInfo->getDet();
-
-      for (int i = 0; i < nRow; i++) {
-	double psi = (*(owner->getRowFESpace()->getBasisFcts()->getPhi(i)))
-	  (quadrature->getLambda(iq));
-	(*vec)[i] += quadrature->getWeight(iq) * c[iq] * psi;
-      }
-    }
-    
-    FREE_MEMORY(c, double, nPoints);
-  }
-
-  Quad0::Quad0(Operator *op, Assembler *assembler, Quadrature *quad)
-    : ZeroOrderAssembler(op, assembler, quad, true)
-  {
-    cPtrs.resize(omp_get_max_threads());
-  }
-
-  Quad0::~Quad0()
-  {
-    for (int i = 0; i < omp_get_max_threads(); i++) {
-      FREE_MEMORY(cPtrs[i], double, quadrature->getNumPoints());
-    }
-  }
-
-  void Quad0::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    int nPoints = quadrature->getNumPoints();
-    int myRank = omp_get_thread_num();
-
-    if (firstCall) {
-      cPtrs[myRank] = GET_MEMORY(double, nPoints);
-      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
-      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI);
-      basFcts = owner->getColFESpace()->getBasisFcts();
-      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
-      firstCall = false;
-    }
-
-    double *c = cPtrs[myRank];
-    for (int iq = 0; iq < nPoints; iq++) {
-      c[iq] = 0.0;
-    }
-
-    std::vector<OperatorTerm*>::iterator termIt;
-    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
-      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, nPoints, c);
-    }
-
-    if (symmetric) {
-      for (int iq = 0; iq < nPoints; iq++) {
-	c[iq] *= elInfo->getDet();
-
-	const double *psi = psiFast->getPhi(iq);
-	const double *phi = phiFast->getPhi(iq);
-	for (int i = 0; i < nRow; i++) {
-	  (*mat)[i][i] += quadrature->getWeight(iq) * c[iq] * psi[i] * phi[i];
-	  for (int j = i + 1; j < nCol; j++) {
-	    double val = quadrature->getWeight(iq) * c[iq] * psi[i] * phi[j];
-	    (*mat)[i][j] += val;
-	    (*mat)[j][i] += val;
-	  }
-	}
-      }
-    } else {      /*  non symmetric assembling   */
-      for (int iq = 0; iq < nPoints; iq++) {
-	c[iq] *= elInfo->getDet();
-
-	const double *psi = psiFast->getPhi(iq);
-	const double *phi = phiFast->getPhi(iq);
-	for (int i = 0; i < nRow; i++) {
-	  for (int j = 0; j < nCol; j++) {
-	    (*mat)[i][j] += quadrature->getWeight(iq) * c[iq] * psi[i] * phi[j];
-	  }
-	}
-      }
-    }
-  }
-
-  void Quad0::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
-  {
-    if (firstCall) {
-      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
-      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI);
-      basFcts = owner->getColFESpace()->getBasisFcts();
-      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
-      firstCall = false;
-    }
-
-    int nPoints = quadrature->getNumPoints();
-    double *c = GET_MEMORY(double, nPoints);
-
-    for (int iq = 0; iq < nPoints; iq++) {
-      c[iq] = 0.0;
-    }
-
-    int myRank = omp_get_thread_num();
-    std::vector<OperatorTerm*>::iterator termIt;
-    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
-      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, nPoints, c);
-    }
-
-    for (int iq = 0; iq < nPoints; iq++) {
-      c[iq] *= elInfo->getDet();
-
-      const double *psi = psiFast->getPhi(iq);
-      for (int i = 0; i < nRow; i++) {
-	(*vec)[i] += quadrature->getWeight(iq) * c[iq] * psi[i];
-      }
-    }
-    FREE_MEMORY(c, double, nPoints);
-  }
-
-  Pre0::Pre0(Operator *op, Assembler *assembler, Quadrature *quad) 
-    : ZeroOrderAssembler(op, assembler, quad, true)
-  {
-  }
-
-  void Pre0::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    if (firstCall) {
-      q00 = Q00PsiPhi::provideQ00PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
-					owner->getColFESpace()->getBasisFcts(), 
-					quadrature);
-      q0 = Q0Psi::provideQ0Psi(owner->getRowFESpace()->getBasisFcts(),
-			       quadrature);
-      firstCall = false;
-    }
-
-    //    c[0] = 0.0;
-    double c = 0.0;
-    int myRank = omp_get_thread_num();
-    int size = static_cast<int>(terms[myRank].size());
-
-    for (int i = 0; i < size; i++) {
-      (static_cast<ZeroOrderTerm*>((terms[myRank][i])))->getC(elInfo, 1, &c);
-    }
-
-    c *= elInfo->getDet();
-
-    if (symmetric) {
-      for (int i = 0; i < nRow; i++) {
-	(*mat)[i][i] += c * q00->getValue(i,i);
-	for (int j = i + 1; j < nCol; j++) {
-	  double val = c * q00->getValue(i, j);
-	  (*mat)[i][j] += val;
-	  (*mat)[j][i] += val;
-	}
-      }
-    } else {
-      for (int i = 0; i < nRow; i++)
-	for (int j = 0; j < nCol; j++)
-	  (*mat)[i][j] += c * q00->getValue(i, j);
-    }
-  }
-
-  void Pre0::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
-  {
-    if (firstCall) {
-      q00 = Q00PsiPhi::provideQ00PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
-					owner->getColFESpace()->getBasisFcts(), 
-					quadrature);
-      q0 = Q0Psi::provideQ0Psi(owner->getRowFESpace()->getBasisFcts(),
-			       quadrature);
-      firstCall = false;
-    }
-
-    std::vector<OperatorTerm*>::iterator termIt;
-
-    int myRank = omp_get_thread_num();
-    double c = 0.0;
-    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
-      (static_cast<ZeroOrderTerm*>( *termIt))->getC(elInfo, 1, &c);
-    }
-
-    c *= elInfo->getDet();
-
-    for (int i = 0; i < nRow; i++)
-      (*vec)[i] += c * q0->getValue(i);
-  }
-
-  Stand10::Stand10(Operator *op, Assembler *assembler, Quadrature *quad) 
-    : FirstOrderAssembler(op, assembler, quad, false, GRD_PSI)
-  {}
-
-
-  void Stand10::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    DimVec<double> grdPsi(dim, DEFAULT_VALUE, 0.0);
-    double *phival = GET_MEMORY(double, nCol);
-
-    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
-    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();
-
-    int nPoints = quadrature->getNumPoints();
-
-    VectorOfFixVecs<DimVec<double> > Lb(dim, nPoints, NO_INIT);
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq].set(0.0);
-    }
-
-    int myRank = omp_get_thread_num();
-    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
-      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
-    }
-  
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq] *= elInfo->getDet();
-
-      for (int i = 0; i < nCol; i++) {
-	phival[i] = (*(phi->getPhi(i)))(quadrature->getLambda(iq));
-      }
-
-      for (int i = 0; i < nRow; i++) {
-	(*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
-	for (int j = 0; j < nCol; j++) {
-	  (*mat)[i][j] += quadrature->getWeight(iq) * (Lb[iq] * grdPsi) * phival[j];
-	}
-      }
-    }
-    FREE_MEMORY(phival, double, nCol);
-  }
-
-
-  Quad10::Quad10(Operator *op, Assembler *assembler, Quadrature *quad) 
-    : FirstOrderAssembler(op, assembler, quad, true, GRD_PSI)
-  {
-  }
-
-
-  void Quad10::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    VectorOfFixVecs<DimVec<double> > *grdPsi;
-    const double *phi;
-
-    if (firstCall) {
-      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
-      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_GRD_PHI);
-      basFcts = owner->getColFESpace()->getBasisFcts();
-      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
-      firstCall = false;
-    }
-
-    int nPoints = quadrature->getNumPoints();
-
-    VectorOfFixVecs<DimVec<double> > Lb(dim,nPoints,NO_INIT);
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq].set(0.0);
-    }
-
-    int myRank = omp_get_thread_num();
-    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
-      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
-    }
-  
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq] *= elInfo->getDet();
-
-      phi    = phiFast->getPhi(iq);
-      grdPsi = psiFast->getGradient(iq);
-
-      for (int i = 0; i < nRow; i++) {
-	for (int j = 0; j < nCol; j++)
-	  (*mat)[i][j] += quadrature->getWeight(iq) * (Lb[iq] * (*grdPsi)[i]) * phi[j];
-      }
-    }
-  }
-
-
-  Pre10::Pre10(Operator *op, Assembler *assembler, Quadrature *quad) 
-    : FirstOrderAssembler(op, assembler, quad, true, GRD_PSI)
-  {
-  }
-
-
-  void Pre10::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    VectorOfFixVecs<DimVec<double> > Lb(dim, 1, NO_INIT);
-    const int *k;
-    const double *values;
-
-    if (firstCall) {
-      q10 = Q10PsiPhi::provideQ10PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
-					owner->getColFESpace()->getBasisFcts(), 
-					quadrature);
-      q1 = Q1Psi::provideQ1Psi(owner->getRowFESpace()->getBasisFcts(),
-			       quadrature);
-      firstCall = false;
-    }
-
-    const int **nEntries = q10->getNumberEntries();
-    int myRank = omp_get_thread_num();
-    Lb[0].set(0.0);
-
-    for (int i = 0; i < static_cast<int>( terms[myRank].size()); i++) {
-      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, 1, Lb);
-    }
-
-    Lb[0] *= elInfo->getDet();
-
-    for (int i = 0; i < nRow; i++) {
-      for (int j = 0; j < nCol; j++) {
-	k = q10->getKVec(i, j);
-	values = q10->getValVec(i, j);
-	double val = 0.0;
-	for (int m = 0; m < nEntries[i][j]; m++) {
-	  val += values[m] * Lb[0][k[m]];
-	}
-	(*mat)[i][j] += val;
-      }
-    }
-  }
-
-
-  Stand01::Stand01(Operator *op, Assembler *assembler, Quadrature *quad) 
-    : FirstOrderAssembler(op, assembler, quad, false, GRD_PHI)
-  {}
-
-
-  void Stand01::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    VectorOfFixVecs<DimVec<double> > grdPhi(dim, nCol, NO_INIT);
-
-    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
-    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();
-
-    int nPoints = quadrature->getNumPoints();
-    VectorOfFixVecs<DimVec<double> > Lb(dim, nPoints, NO_INIT);
-    int myRank = omp_get_thread_num();
-
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq].set(0.0);
-    }
-    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
-      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
-    }
-  
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq] *= elInfo->getDet();
-
-      for (int i = 0; i < nCol; i++) {
-	(*(phi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPhi[i]);
-      }
-
-      for (int i = 0; i < nRow; i++) {
-	double psival = (*(psi->getPhi(i)))(quadrature->getLambda(iq));
-	for (int j = 0; j < nCol; j++)
-	  (*mat)[i][j] += quadrature->getWeight(iq) * ((Lb[iq] * psival) * grdPhi[j]);
-      }
-    } 
-  }
-
-  void Stand10::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
-  {
-    DimVec<double> grdPsi(dim, DEFAULT_VALUE, 0.0);
-    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
-    int nPoints = quadrature->getNumPoints();
-    VectorOfFixVecs<DimVec<double> > Lb(dim,nPoints,NO_INIT);
-    int myRank = omp_get_thread_num();
-
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq].set(0.0);
-    }
-    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
-      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
-    }
-  
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq] *= elInfo->getDet();
-
-      for (int i = 0; i < nRow; i++) {
-	(*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
-	(*vec)[i] += quadrature->getWeight(iq) * (Lb[iq] * grdPsi);
-      }
-    }
-  }
-
-  Quad01::Quad01(Operator *op, Assembler *assembler, Quadrature *quad) 
-    : FirstOrderAssembler(op, assembler, quad, true, GRD_PHI)
-  {
-  }
-
-  void Quad01::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    VectorOfFixVecs<DimVec<double> > *grdPhi;
-
-    if (firstCall) {
-      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
-      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI);
-      basFcts = owner->getColFESpace()->getBasisFcts();
-      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_GRD_PHI);
-      firstCall = false;
-    }
-
-    int nPoints = quadrature->getNumPoints();
-    VectorOfFixVecs<DimVec<double> > Lb(dim,nPoints,NO_INIT);
-    int myRank = omp_get_thread_num();
-
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq].set(0.0);
-    }
-    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
-      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
-    }
-  
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq] *= elInfo->getDet();
-
-      const double *psi = psiFast->getPhi(iq);
-      grdPhi = phiFast->getGradient(iq);
-
-      for (int i = 0; i < nRow; i++) {
-	for (int j = 0; j < nCol; j++)
-	  (*mat)[i][j] += quadrature->getWeight(iq) * (Lb[iq] * (*grdPhi)[j]) * psi[i];
-      }
-    }
-  }
-
-  void Quad10::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
-  {
-    VectorOfFixVecs<DimVec<double> > *grdPsi;
-
-    if (firstCall) {
-      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
-      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_GRD_PHI);
-      basFcts = owner->getColFESpace()->getBasisFcts();
-      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
-      firstCall = false;
-    }
-
-    int nPoints = quadrature->getNumPoints();
-    VectorOfFixVecs<DimVec<double> > Lb(dim,nPoints,NO_INIT);
-    int myRank = omp_get_thread_num();
-
-    for (int iq = 0; iq < nPoints; iq++) {
-      Lb[iq].set(0.0);
-    }
-    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
-      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
-    }
-  
-    for (int iq = 0; iq < nPoints; iq++) {
-
-      Lb[iq] *= elInfo->getDet();
-      grdPsi = psiFast->getGradient(iq);
-
-      for (int i = 0; i < nRow; i++) {
-	(*vec)[i] += quadrature->getWeight(iq) * (Lb[iq] * (*grdPsi)[i]);
-      }
-    }
-  }
-
-  Pre01::Pre01(Operator *op, Assembler *assembler, Quadrature *quad) 
-    : FirstOrderAssembler(op, assembler, quad, true, GRD_PHI)
-  {
-  }
-
-  void Pre01::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    VectorOfFixVecs<DimVec<double> > Lb(dim,1,NO_INIT);
-
-    const int *l;
-    const double *values;
-
-    if (firstCall) {
-      q01 = Q01PsiPhi::provideQ01PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
-					owner->getColFESpace()->getBasisFcts(), 
-					quadrature);
-      q1 = Q1Psi::provideQ1Psi(owner->getRowFESpace()->getBasisFcts(),
-			       quadrature);
-      firstCall = false;
-    }
-
-    const int **nEntries = q01->getNumberEntries();
-    int myRank = omp_get_thread_num();
-    Lb[0].set(0.0);
-
-    for (int i = 0; i < static_cast<int>( terms[myRank].size()); i++) {
-      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, 1, Lb);
-    }
-
-    Lb[0] *= elInfo->getDet();
-
-    for (int i = 0; i < nRow; i++) {
-      for (int j = 0; j < nCol; j++) {
-	l = q01->getLVec(i, j);
-	values = q01->getValVec(i, j);
-	double val = 0.0;
-	for (int m = 0; m < nEntries[i][j]; m++) {
-	  val += values[m] * Lb[0][l[m]];
-	}
-	(*mat)[i][j] += val;
-      }
-    }
-  }
-
-  void Pre10::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
-  {
-    VectorOfFixVecs<DimVec<double> > Lb(dim,1,NO_INIT);
-
-    const int *k;
-    const double *values;
-
-    if (firstCall) {
-      q10 = Q10PsiPhi::provideQ10PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
-					owner->getColFESpace()->getBasisFcts(), 
-					quadrature);
-      q1 = Q1Psi::provideQ1Psi(owner->getRowFESpace()->getBasisFcts(),
-			       quadrature);
-      firstCall = false;
-    }
-
-    const int *nEntries = q1->getNumberEntries();
-    int myRank = omp_get_thread_num();
-    Lb[0].set(0.0);
-
-    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
-      (static_cast<FirstOrderTerm*>(terms[myRank][i]))->getLb(elInfo, 1, Lb);
-    }
-
-    Lb[0] *= elInfo->getDet();
-
-    for (int i = 0; i < nRow; i++) {
-      k = q1->getKVec(i);
-      values = q1->getValVec(i);
-      double val = 0.0;
-      for (int m = 0; m < nEntries[i]; m++) {
-	val += values[m] * Lb[0][k[m]];
-      }
-      (*vec)[i] += val;
-    }
-  }
-
-  Pre2::Pre2(Operator *op, Assembler *assembler, Quadrature *quad) 
-    : SecondOrderAssembler(op, assembler, quad, true)
-  {
-    q11 = Q11PsiPhi::provideQ11PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
-				      owner->getColFESpace()->getBasisFcts(), 
-				      quadrature);
-    tmpLALt.resize(omp_get_max_threads());
-    for (int i = 0; i < omp_get_max_threads(); i++) {
-      tmpLALt[i] = NEW DimMat<double>*;
-      *(tmpLALt[i]) = NEW DimMat<double>(dim, NO_INIT);
-    }
-  }
-
-  Pre2::~Pre2()
-  {
-    for (int i = 0; i < static_cast<int>(tmpLALt.size()); i++) {
-      DELETE *(tmpLALt[i]);
-      DELETE tmpLALt[i];
-    }
-  }
-
-  void Pre2::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    const int **nEntries;
-    const int *k, *l;
-    const double *values;
-
-    int myRank = omp_get_thread_num();
-    DimMat<double> **LALt = tmpLALt[myRank];
-    DimMat<double> &tmpMat = *LALt[0];
-    tmpMat.set(0.0);
-
-    for (int i = 0; i < static_cast<int>( terms[myRank].size()); i++) {
-      (static_cast<SecondOrderTerm*>(terms[myRank][i]))->getLALt(elInfo, 1, LALt);
-    }
-
-    tmpMat *= elInfo->getDet();
-    nEntries = q11->getNumberEntries();
-
-    if (symmetric) {
-      for (int i = 0; i < nRow; i++) {
-	k = q11->getKVec(i, i);
-	l = q11->getLVec(i, i);
-	values = q11->getValVec(i, i);
-	double val = 0.0;
-	for (int m = 0; m < nEntries[i][i]; m++) {
-	  val += values[m] * tmpMat[k[m]][l[m]];
-	}
-	(*mat)[i][i] += val;
-
-	for (int j = i + 1; j < nCol; j++) {
-	  k = q11->getKVec(i, j);
-	  l = q11->getLVec(i, j);
-	  values = q11->getValVec(i, j);
-	  val = 0.0;
-	  for (int m = 0; m < nEntries[i][j]; m++) {
-	    val += values[m] * tmpMat[k[m]][l[m]];
-	  }
-	  (*mat)[i][j] += val;
-	  (*mat)[j][i] += val;
-	}
-      }
-    } else {  /*  A not symmetric or psi != phi        */
-      for (int i = 0; i < nRow; i++) {
-	for (int j = 0; j < nCol; j++) {
-	  k = q11->getKVec(i, j);
-	  l = q11->getLVec(i, j);
-	  values = q11->getValVec(i, j);
-	  double val = 0.0;
-	  for (int m = 0; m < nEntries[i][j]; m++) {
-	    val += values[m] * tmpMat[k[m]][l[m]];
-	  }
-	  (*mat)[i][j] += val;
-	}
-      }
-    }
-  }
-
-  Quad2::Quad2(Operator *op, Assembler *assembler, Quadrature *quad) 
-    : SecondOrderAssembler(op, assembler, quad, true)
-  {
-    tmpLALt.resize(omp_get_max_threads());
-  }
-
-  Quad2::~Quad2()
-  {
-    if (!firstCall) {
-      int nPoints = quadrature->getNumPoints();
-      for (int i = 0; i < static_cast<int>(tmpLALt.size()); i++) {
-	for (int j = 0; j < nPoints; j++) {
-	  DELETE tmpLALt[i][j];
-	}
-	DELETE [] tmpLALt[i];
-      }
-    }
-  }
-
-  void Quad2::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    int nPoints = quadrature->getNumPoints();
-    int myRank = omp_get_thread_num();
-
-    if (firstCall) {
-      tmpLALt[myRank] = NEW DimMat<double>*[nPoints];
-      for (int j = 0; j < nPoints; j++) {
-	tmpLALt[myRank][j] = NEW DimMat<double>(dim, NO_INIT);
-      }
-    
-      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
-      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_GRD_PHI);
-      basFcts = owner->getColFESpace()->getBasisFcts();
-      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_GRD_PHI);
-      firstCall = false;
-    }
-
-    DimMat<double> **LALt = tmpLALt[myRank];
-    
-    for (int i = 0; i < nPoints; i++) {
-      LALt[i]->set(0.0);
-    }
-    
-    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
-      (static_cast<SecondOrderTerm*>(terms[myRank][i]))->getLALt(elInfo, nPoints, LALt);
-    }
-        
-    VectorOfFixVecs< DimVec<double> > *grdPsi, *grdPhi;
-
-    if (symmetric) {
-      for (int iq = 0; iq < nPoints; iq++) {
-	(*LALt[iq]) *= elInfo->getDet();
-
-	grdPsi = psiFast->getGradient(iq);
-	grdPhi = phiFast->getGradient(iq);
-
-	for (int i = 0; i < nRow; i++) {
-	  (*mat)[i][i] += quadrature->getWeight(iq) * 
-	    xAy((*grdPsi)[i], (*LALt[iq]), (*grdPhi)[i]);
-
-	  for (int j = i + 1; j < nCol; j++) {
-	    double val = quadrature->getWeight(iq) * 
-	      xAy((*grdPsi)[i], (*LALt[iq]), (*grdPhi)[j]);
-	    (*mat)[i][j] += val;
-	    (*mat)[j][i] += val;
-	  }
-	}
-      }
-    } else {      /*  non symmetric assembling   */
-      for (int iq = 0; iq < nPoints; iq++) {
-	(*LALt[iq]) *= elInfo->getDet();
-
-	grdPsi = psiFast->getGradient(iq);
-	grdPhi = phiFast->getGradient(iq);
-
-	for (int i = 0; i < nRow; i++) {
-	  for (int j = 0; j < nCol; j++) {
-	    (*mat)[i][j] += quadrature->getWeight(iq) * 
-	      xAy((*grdPsi)[i], (*LALt[iq]), (*grdPhi)[j]);
-	  }
-	}
-      }
-    }
-  }
-
-  Stand2::Stand2(Operator *op, Assembler *assembler, Quadrature *quad) 
-    : SecondOrderAssembler(op, assembler, quad, false)
-  {}
-
-  void Stand2::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
-  {
-    DimVec<double> grdPsi(dim, NO_INIT);
-    VectorOfFixVecs<DimVec<double> > grdPhi(dim, nCol, NO_INIT);
-
-    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
-    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();
-
-    int nPoints = quadrature->getNumPoints();
-
-    DimMat<double> **LALt = NEW DimMat<double>*[nPoints];
-    for (int iq = 0; iq < nPoints; iq++) {
-      LALt[iq] = NEW DimMat<double>(dim, NO_INIT);
-      LALt[iq]->set(0.0);
-    }
-
-    int myRank = omp_get_thread_num();
-
-    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
-      (static_cast<SecondOrderTerm*>(terms[myRank][i]))->getLALt(elInfo, nPoints, LALt);
-    }
-
-    if (symmetric) {
-      for (int iq = 0; iq < nPoints; iq++) {
-	(*LALt[iq]) *= elInfo->getDet();
-
-	for (int i = 0; i < nCol; i++) {
-	  (*(phi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPhi[i]);
-	}
-
-	for (int i = 0; i < nRow; i++) {
-	  (*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
-	  (*mat)[i][i] += quadrature->getWeight(iq) * 
-	    (grdPsi * ((*LALt[iq]) * grdPhi[i]));
-
-	  for (int j = i + 1; j < nCol; j++) {
-	    double val = quadrature->getWeight(iq) * (grdPsi * ((*LALt[iq]) * grdPhi[j]));
-	    (*mat)[i][j] += val;
-	    (*mat)[j][i] += val;
-	  }
-	}
-      }
-    } else {      /*  non symmetric assembling   */
-      for (int iq = 0; iq < nPoints; iq++) {
-	(*LALt[iq]) *= elInfo->getDet();
-
-	for (int i = 0; i < nCol; i++) {
-	  (*(phi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPhi[i]);
-	}
-
-	for (int i = 0; i < nRow; i++) {
-	  (*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
-	  for (int j = 0; j < nCol; j++) {
-	    (*mat)[i][j] += quadrature->getWeight(iq) *
-	      (grdPsi * ((*LALt[iq]) * grdPhi[j]));
-	  }
-	}
-      }
-    }
-
-    for (int iq = 0; iq < nPoints; iq++) 
-      DELETE LALt[iq];
-
-    DELETE [] LALt;
-  }
-
   Assembler::Assembler(Operator  *op,
 		       const FiniteElemSpace *rowFESpace_,
 		       const FiniteElemSpace *colFESpace_) 
@@ -1405,6 +82,61 @@ namespace AMDiS {
     }      
   }
 
+  void Assembler::calculateElementMatrix(const ElInfo *rowElInfo,
+					 const ElInfo *colElInfo,
+					 ElementMatrix *userMat,
+					 double factor)
+  {
+    FUNCNAME("Assembler::calculateElementMatrix()");
+
+    if (remember && ((factor != 1.0) || (operat->uhOld))) {
+      rememberElMat = true;
+    }
+  
+    if (rememberElMat && !elementMatrix)
+      elementMatrix = NEW ElementMatrix(nRow, nCol);
+
+    Element *el = rowElInfo->getElement();
+
+    
+    //    checkForNewTraverse();
+    lastVecEl = lastMatEl = NULL;
+
+    checkQuadratures();
+    
+    if ((el != lastMatEl && el != lastVecEl) || !operat->isOptimized()) {
+      initElement(rowElInfo);
+    }
+
+    if (el != lastMatEl || !operat->isOptimized()) {
+      if (rememberElMat) {
+	elementMatrix->set(0.0);
+      }
+      lastMatEl = el;
+    } else {
+      if (rememberElMat) {
+	axpy(factor, *elementMatrix, *userMat);
+	return;
+      }
+    }
+  
+    ElementMatrix *mat = rememberElMat ? elementMatrix : userMat;
+
+    if (secondOrderAssembler)
+      secondOrderAssembler->calculateElementMatrix(rowElInfo, mat);
+    if (firstOrderAssemblerGrdPsi)
+      firstOrderAssemblerGrdPsi->calculateElementMatrix(rowElInfo, mat);
+    if (firstOrderAssemblerGrdPhi)
+      firstOrderAssemblerGrdPhi->calculateElementMatrix(rowElInfo, mat);
+    if (zeroOrderAssembler) {
+      zeroOrderAssembler->calculateElementMatrix(rowElInfo, mat);
+    }
+
+    if (rememberElMat && userMat) {
+      axpy(factor, *elementMatrix, *userMat);
+    }      
+  }
+
   void Assembler::calculateElementVector(const ElInfo *elInfo, 
 					 ElementVector *userVec,
 					 double factor)
@@ -1520,7 +252,7 @@ namespace AMDiS {
       ZeroOrderAssembler::getSubAssembler(op, this, quad0, opt);
   }
 
-  StandardAssembler::StandardAssembler(Operator  *op,
+  StandardAssembler::StandardAssembler(Operator *op,
 				       Quadrature *quad2,
 				       Quadrature *quad1GrdPsi,
 				       Quadrature *quad1GrdPhi,
diff --git a/AMDiS/src/Assembler.h b/AMDiS/src/Assembler.h
index d1b07fae0d95609a6d417c3a451714057fc6f85c..f097a8b83ae0cc1b5d48cb32ec7adb3536dda6ac 100644
--- a/AMDiS/src/Assembler.h
+++ b/AMDiS/src/Assembler.h
@@ -33,876 +33,19 @@
 #include <vector>
 #include "FixVec.h"
 #include "MemoryManager.h"
-#include "Operator.h"
+#include "ZeroOrderAssembler.h"
+#include "FirstOrderAssembler.h"
+#include "SecondOrderAssembler.h"
+#include "ElInfo.h"
 
 namespace AMDiS {
 
-  class ElInfo;
+  //  class ElInfo;
   class Element;
-  class Assembler;
   class Quadrature;
-  class FastQuadrature;
-  class FiniteElemSpace;
   class ElementMatrix;
   class ElementVector;
-  class BasisFunction;
-  class Q00PsiPhi;
-  class Q10PsiPhi;
-  class Q01PsiPhi;
-  class Q11PsiPhi;
-  class Q0Psi;
-  class Q1Psi;
-  class Q2Psi;
-
-  template<typename T> class DOFVectorBase;
-
-  // ============================================================================
-  // ===== class SubAssembler ===================================================
-  // ============================================================================
-
-  /**
-   * \ingroup Assembler
-   * 
-   * \brief
-   * Base class for SecondOrderAssembler, FirstOrderAssembler, 
-   * ZeroOrderAssembler. The task of a SubAssembler is to assemble a list of 
-   * terms of a special order and add their contributions to a DOFMatrix or a 
-   * DOFVector. An Assembler can consist of up to four SubAssemblers: one 
-   * SecondOrderAssembler for second order terms, one ZeroOrderAssembler for 
-   * terms of order zero, and two FirstOrderAssemblers. One for terms with
-   * derivatives of the basis functions connected to to row DOFs and one for 
-   * those connected to the column DOFs.
-   */
-  class SubAssembler
-  {
-  public:
-    MEMORY_MANAGED(SubAssembler);
-
-    /** \brief
-     * Creates a SubAssembler belonging to assembler for the terms of given 
-     * order of Operator op. If order is equal to one, type spezifies what kind 
-     * of FirstOrderType are to assemble. During construction of a SubAssembler
-     * the needs and properties of the terms are considered.
-     */
-    SubAssembler(Operator *op,
-		 Assembler *assembler,
-		 Quadrature *quadrat,
-		 int order, 
-		 bool optimized,
-		 FirstOrderType type = GRD_PHI);
-
-    /** \brief
-     * Destructor
-     */
-    virtual ~SubAssembler() {};
-
-    /** \brief
-     * Calculates the element matrix for elInfo and adds it to mat. Memory
-     * for mat must be provided by the caller.
-     */
-    virtual void calculateElementMatrix(const ElInfo *elInfo,
-					ElementMatrix *mat) = 0;
-
-    /** \brief
-     * Calculates the element vector for elInfo and adds it to vec. Memory
-     * for vec must be provided by the caller.
-     */
-    virtual void calculateElementVector(const ElInfo *elInfo,
-					ElementVector *vec) = 0;
-
-    /** \brief
-     * Returns \ref terms
-     */
-    inline std::vector<OperatorTerm*> *getTerms() { 
-      return &terms[omp_get_thread_num()]; 
-    };
-
-    /** \brief
-     * Returns \ref quadrature.
-     */ 
-    inline Quadrature *getQuadrature() {
-      return quadrature;
-    };
-
-    /** \brief
-     * Sets \ref quadrature to q.
-     */
-    inline void setQuadrature(Quadrature* q) {
-      quadrature = q;
-    };
-  
-    /** \brief
-     * Returns a vector with the world coordinates of the quadrature points
-     * of \ref quadrature on the element of elInfo. 
-     * Used by \ref OperatorTerm::initElement().
-     */
-    WorldVector<double>* getCoordsAtQPs(const ElInfo* elInfo, 
-					Quadrature *quad = NULL);
-
-    /** \brief
-     * DOFVector dv evaluated at quadrature points.
-     * Used by \ref OperatorTerm::initElement().
-     */
-    double* getVectorAtQPs(DOFVectorBase<double>* dv, const ElInfo* elInfo,
-			   Quadrature *quad = NULL);
-
-    /** \brief
-     * Gradients of DOFVector dv evaluated at quadrature points.
-     * Used by \ref OperatorTerm::initElement().
-     */
-    WorldVector<double>* getGradientsAtQPs(DOFVectorBase<double>* dv, 
-					   const ElInfo* elInfo,
-					   Quadrature *quad = NULL);
-
-    /** \brief
-     * Called once for each ElInfo when \ref calculateElementMatrix() or
-     * \ref calculateElementVector() is called for the first time for this
-     * Element.
-     */
-    virtual void initElement(const ElInfo *elInfo,
-			     Quadrature *quad = NULL);
-
-    /** \brief
-     * Returns \ref psiFast.
-     */
-    const FastQuadrature *getPsiFast() const { 
-      return psiFast; 
-    };
-
-    /** \brief
-     * Returns \ref phiFast.
-     */
-    const FastQuadrature *getPhiFast() const { 
-      return phiFast; 
-    };
-
-  protected:
-    /** \brief
-     * Updates \ref psiFast and \ref phiFast.
-     */
-    FastQuadrature *updateFastQuadrature(FastQuadrature *quadFast,
-					 const BasisFunction *psi,
-					 Flag updateFlag);
-  
-  protected:
-    /** \brief
-     * Problem dimension
-     */
-    int dim;
-
-    /** \brief
-     * Number of rows of the element matrix and length of the element
-     * vector. Is equal to the number of row basis functions
-     */
-    int nRow;
-
-    /** \brief
-     * Number of columns of the element matrix. Is equal to the number
-     * of column basis functions
-     */
-    int nCol;
-
-    /** \brief
-     * Used for \ref getVectorAtQPs().
-     */
-    class ValuesAtQPs {
-    public:
-      Vector<double> values;
-      bool valid;
-    };
-
-    /** \brief
-     * Used for \ref getGradientsAtQPs().
-     */
-    class GradientsAtQPs {
-    public:
-      Vector<WorldVector<double> > values;
-      bool valid;
-    };
-
-    /** \brief
-     * Used for \ref getVectorAtQPs().
-     */
-    std::map<const DOFVectorBase<double>*, ValuesAtQPs*> valuesAtQPs;
-
-    /** \brief
-     * Used for \ref getGradientsAtQPs().
-     */
-    std::map<const DOFVectorBase<double>*, GradientsAtQPs*> gradientsAtQPs;
-
-    /** \brief
-     * Set and updated by \ref initElement() for each ElInfo. 
-     * coordsAtQPs[i] points to the coordinates of the i-th quadrature point.
-     */
-    WorldVector<double> *coordsAtQPs;
-
-    /** \brief
-     * Used for \ref getCoordsAtQPs().
-     */
-    bool coordsValid;
-
-    /** \brief
-     * Used for \ref getCoordsAtQP(). Stores the number of allocated WorldVectors.
-     */
-    int coordsNumAllocated;
-
-    /** \brief
-     * Needed Quadrature. Constructed in the constructor of SubAssembler
-     */
-    Quadrature *quadrature;
-
-    /** \brief
-     * FastQuadrature for row basis functions
-     */
-    FastQuadrature *psiFast;
-
-    /** \brief
-     * FastQuadrature for column basis functions
-     */
-    FastQuadrature *phiFast;
-
-    /** \brief
-     * Corresponding Assembler
-     */
-    Assembler* owner;
-
-    /** \brief
-     * Flag that specifies whether the element matrix is symmetric.
-     */
-    bool symmetric;
-
-    /** \brief
-     * List of all terms with a contribution to this SubAssembler
-     */
-    std::vector< std::vector<OperatorTerm*> > terms;
-
-    /** \brief
-     *
-     */
-    bool opt;
-
-    /** \brief
-     *
-     */
-    bool firstCall;
-
-    friend class Assembler;
-  };
-
-  // ============================================================================
-  // ===== class ZeroOrderAssembler =============================================
-  // ============================================================================
-
-  /**
-   * \ingroup Assembler
-   * 
-   * \brief
-   * SubAssembler for zero order terms.
-   */
-  class ZeroOrderAssembler : public SubAssembler
-  {
-  public:
-    MEMORY_MANAGED(ZeroOrderAssembler);
-
-    /** \brief
-     * Creates and returns the ZeroOrderAssembler for Operator op and
-     * the given assembler. If all terms are piecewise constant precalculated 
-     * integrals can be used while assembling and the returned 
-     * ZeroOrderAssembler is of type Pre0. Otherwise a Quad0 object will
-     * be returned.
-     */
-    static ZeroOrderAssembler* getSubAssembler(Operator *op,
-					       Assembler *assembler,
-					       Quadrature *quadrat,
-					       bool optimized);
-
-
-    /** \brief
-     * Destructor.
-     */
-    virtual ~ZeroOrderAssembler() {};
-
-  protected:
-    /** \brief
-     * Constructor.
-     */
-    ZeroOrderAssembler(Operator *op, 
-		       Assembler *assembler, 
-		       Quadrature *quadrat,
-		       bool optimized);
-
-  protected:
-    /** \brief
-     * List of all yet created optimized SubAssembler objects.
-     */
-    static std::vector<SubAssembler*> optimizedSubAssemblers;
-
-    /** \brief
-     * List of all yet created standard SubAssembler objects.
-     */ 
-    static std::vector<SubAssembler*> standardSubAssemblers;
-  };
-
-  // ============================================================================
-  // ===== class Stand0 =========================================================
-  // ============================================================================
-
-  /**
-   * \ingroup Assembler
-   * 
-   * \brief
-   * Standard zero order assembler.
-   */
-  class Stand0 :  public ZeroOrderAssembler 
-  {
-  public:
-    MEMORY_MANAGED(Stand0);
-
-    /** \brief
-     * Constructor.
-     */
-    Stand0(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *elInfo, ElementVector *vec);
-  };
-
-  // ============================================================================
-  // ===== class Quad0 ==========================================================
-  // ============================================================================
-
-  /** 
-   * \ingroup Assembler
-   *
-   * \brief
-   * Zero order assembler using fast quadratures.
-   */
-  class Quad0 :  public ZeroOrderAssembler 
-  {
-  public:
-    MEMORY_MANAGED(Quad0);
-
-    /** \brief
-     * Constructor.
-     */
-    Quad0(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    ~Quad0();
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *elInfo, ElementVector *vec);
-
-  protected:
-    std::vector<double*> cPtrs;
-  };
-
-
-  // ============================================================================
-  // ===== class Pre0 ===========================================================
-  // ============================================================================
-
-  /**
-   * \ingroup Assembler
-   * 
-   * \brief
-   * Zero order assembler using precaculated integrals.
-   */
-  class Pre0 : public ZeroOrderAssembler
-  {
-  public:
-    MEMORY_MANAGED(Pre0);
-
-    /** \brief
-     * Constructor.
-     */
-    Pre0(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *elInfo, ElementVector *vec);
-
-  protected:
-    /** \brief
-     * Integral of the product of psi and phi.
-     */
-    const Q00PsiPhi *q00;
-
-    /** \brief
-     * Integral of psi.
-     */
-    const Q0Psi *q0;
- 
-    friend class ZeroOrderAssembler;
-  };
-
-  // ============================================================================
-  // ===== class FirstOrderAssembler ============================================
-  // ============================================================================
-
-  /**
-   * \ingroup Assembler
-   * 
-   * \brief
-   * SubAssembler for first order terms.
-   */
-  class FirstOrderAssembler : public SubAssembler
-  {
-  public:
-    MEMORY_MANAGED(FirstOrderAssembler);
-
-    /** \brief
-     * Creates and returns the FirstOrderAssembler for Operator op and
-     * the given assembler. If all terms are piecewise constant precalculated 
-     * integrals can be used while assembling and the returned 
-     * ZeroOrderAssembler is of type Pre0. Otherwise a Quad0 object will
-     * be returned.
-     */
-    static FirstOrderAssembler* getSubAssembler(Operator *op,
-						Assembler *assembler,
-						Quadrature *quadrat,
-						FirstOrderType type,
-						bool optimized);
-  
-    /** \brief
-     * Destructor.
-     */
-    virtual ~FirstOrderAssembler() {};
-
-  protected:
-    /** \brief
-     * Constructor.
-     */
-    FirstOrderAssembler(Operator *op,
-			Assembler *assembler,
-			Quadrature *quadrat,
-			bool optimized,
-			FirstOrderType type);
-
-
-  protected:
-    /** \brief
-     * List of all yet created optimized zero order assemblers for grdPsi.
-     */
-    static std::vector<SubAssembler*> optimizedSubAssemblersGrdPsi;
-
-    /** \brief
-     * List of all yet created standard zero order assemblers for grdPsi.
-     */
-    static std::vector<SubAssembler*> standardSubAssemblersGrdPsi;
-
-    /** \brief
-     * List of all yet created optimized zero order assemblers for grdPhi.
-     */
-    static std::vector<SubAssembler*> optimizedSubAssemblersGrdPhi;
-
-    /** \brief
-     * List of all yet created standard zero order assemblers for grdPhi.
-     */
-    static std::vector<SubAssembler*> standardSubAssemblersGrdPhi;
-  };
-
-  // ============================================================================
-  // ===== class Stand10 ========================================================
-  // ============================================================================
-
-  /**
-   * \ingroup Assembler
-   * 
-   * \brief
-   * Standard first order assembler for grdPsi.
-   */
-  class Stand10 : public FirstOrderAssembler
-  {
-  public:
-    MEMORY_MANAGED(Stand10);
-
-    /** \brief
-     * Constructor.
-     */
-    Stand10(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *, ElementVector */*vec*/);
-  };
-
-  // ============================================================================
-  // ===== class Stand10 ========================================================
-  // ============================================================================
-
-  /**
-   * \ingroup Assembler
-   * 
-   * \brief
-   * Standard first order assembler for grdPhi.
-   */
-  class Stand01 : public FirstOrderAssembler
-  {
-  public:
-    MEMORY_MANAGED(Stand01);
-
-    /** \brief
-     * Constructor.
-     */
-    Stand01(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *, ElementVector *) {
-      ERROR_EXIT("should not be called\n");
-    };
-  };
-
-  // ============================================================================
-  // ===== class Quad10 =========================================================
-  // ============================================================================
-
-  /** \brief
-   * First order assembler for grdPsi using fast quadratures.
-   */
-  class Quad10 : public FirstOrderAssembler
-  {
-  public:
-    MEMORY_MANAGED(Quad10);
-
-    /** \brief
-     * Constructor.
-     */
-    Quad10(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *, ElementVector */*vec*/);
-  };
-
-  // ============================================================================
-  // ===== class Quad01 =========================================================
-  // ============================================================================
-
-  /** \brief
-   * First order assembler for grdPhi using fast quadratures.
-   */
-  class Quad01 : public FirstOrderAssembler 
-  {
-  public:
-    MEMORY_MANAGED(Quad01);
-
-    /** \brief
-     * Constructor.
-     */
-    Quad01(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *, ElementVector */*vec*/) {
-      ERROR_EXIT("should not be called\n");
-    };
-  };
-
-  // ============================================================================
-  // ===== class Pre10 ==========================================================
-  // ============================================================================
-
-  /** \brief
-   * First order assembler for grdPsi using precalculated integrals
-   */
-  class Pre10 : public FirstOrderAssembler 
-  {
-  public:
-    MEMORY_MANAGED(Pre10);
-
-    /** \brief
-     * Constructor.
-     */
-    Pre10(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *, ElementVector */*vec*/);
-
-  protected:
-    /** \brief
-     * Integral of the product of the derivative of psi and phi.
-     */
-    const Q10PsiPhi *q10;
-
-    /** \brief
-     * Integral of the derivative of psi.
-     */
-    const Q1Psi *q1;
-
-    friend class FirstOrderAssembler;
-  };
-
-  // ============================================================================
-  // ===== class Pre01 ==========================================================
-  // ============================================================================
-
-  /** \brief
-   *  First order assembler for grdPhi using precalculated integrals
-   */
-  class Pre01 : public FirstOrderAssembler 
-  {
-  public:
-    MEMORY_MANAGED(Pre01);
-
-    /** \brief
-     * Constructor.
-     */
-    Pre01(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *, ElementVector */*vec*/) {
-      ERROR_EXIT("should not be called\n");
-    };
-
-  protected:
-    /** \brief
-     * Integral of the product of psi and the derivative of phi.
-     */
-    const Q01PsiPhi *q01;
-
-    /** \brief
-     * Integral of the derivative of phi.
-     */
-    const Q1Psi *q1;
-
-    friend class FirstOrderAssembler;
-  };
-
-  // ============================================================================
-  // ===== class SecondOrderAssembler ===========================================
-  // ============================================================================
-
-  /**
-   * \ingroup Assembler
-   * 
-   * \brief
-   * SubAssembler for second order terms.
-   */
-  class SecondOrderAssembler : public SubAssembler
-  {
-  public:
-    MEMORY_MANAGED(SecondOrderAssembler);
-
-    /** \brief
-     * Creates and returns the SecondOrderAssembler for Operator op and
-     * the given assembler. If all terms are piecewise constant precalculated 
-     * integrals can be used while assembling and the returned 
-     * ZeroOrderAssembler is of type Pre0. Otherwise a Quad0 object will
-     * be returned.
-     */
-    static SecondOrderAssembler* getSubAssembler(Operator *op,
-						 Assembler *assembler,
-						 Quadrature *quadrat,
-						 bool optimized);
-
-    /** \brief
-     * Destructor.
-     */
-    virtual ~SecondOrderAssembler() {};
-
-  protected:
-    /** \brief
-     * Constructor.
-     */
-    SecondOrderAssembler(Operator *op,
-			 Assembler *assembler,
-			 Quadrature *quadrat,
-			 bool optimized);
-
-  protected:
-    /** \brief
-     * List of all yet created optimized second order assemblers.
-     */
-    static std::vector<SubAssembler*> optimizedSubAssemblers;
-
-    /** \brief
-     * List of all yet created standard second order assemblers.
-     */
-    static std::vector<SubAssembler*> standardSubAssemblers;
-  };
-
-  // ============================================================================
-  // ===== class Stand2 =========================================================
-  // ============================================================================
-
-  /**
-   * \ingroup Assembler
-   * 
-   * \brief
-   * Standard second order assembler
-   */
-  class Stand2 : public SecondOrderAssembler 
-  {
-  public:
-    MEMORY_MANAGED(Stand2);
-
-    /** \brief
-     * Constructor.
-     */
-    Stand2(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *, ElementVector */*vec*/) {
-      ERROR_EXIT("should not be called\n");
-    };
-  };
-
-  // ============================================================================
-  // ===== class Quad2 ==========================================================
-  // ============================================================================
-
-  /** \brief
-   * Second order assembler using fast quadratures.
-   */
-  class Quad2 : public SecondOrderAssembler 
-  {
-  public:
-    MEMORY_MANAGED(Quad2);
-
-    /** \brief
-     * Constructor.
-     */
-    Quad2(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    ~Quad2();
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *, ElementVector */*vec*/) {
-      ERROR_EXIT("should not be called\n");
-    };
-
-  protected:
-    /** \brief
-     * Thread safe temporary vector of DimMats for calculation in calculateElementMatrix().
-     */
-    std::vector< DimMat<double>** > tmpLALt;
-  };
-
-  // ============================================================================
-  // ===== class Pre2 ===========================================================
-  // ============================================================================
-
-  /** \brief
-   * Second order assembler using predefined integrals.
-   */
-  class Pre2 : public SecondOrderAssembler 
-  {
-  public:
-    MEMORY_MANAGED(Pre2);
-
-    /** \brief
-     * Constructor.
-     */
-    Pre2(Operator *op, Assembler *assembler, Quadrature *quad);
-
-    /** \brief
-     * Destructor.
-     */
-    ~Pre2();
-
-    /** \brief
-     * Implements SubAssembler::calculateElementMatrix().
-     */
-    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
-
-    /** \brief
-     * Implements SubAssembler::calculateElementVector().
-     */
-    void calculateElementVector(const ElInfo *, ElementVector *) {
-      ERROR_EXIT("should not be called\n");
-    };
-
-  protected:
-    /** \brief
-     * Integral of the product of the derivative of psi and the derivative 
-     * of phi.
-     */
-    const Q11PsiPhi *q11;
-
-    /** \brief
-     * Thread safe temporary vector of DimMats for calculation in calculateElementMatrix().
-     */
-    std::vector< DimMat<double>** > tmpLALt;
-
-    friend class SecondOrderAssembler;
-  };
-
-  // ============================================================================
-  // ===== class Assembler ======================================================
-  // ============================================================================
+  class Operator;
 
   /**
    * \ingroup Assembler
@@ -920,9 +63,9 @@ namespace AMDiS {
     /** \brief
      * Constructor.
      */
-    Assembler(Operator  *op,
-	      const FiniteElemSpace *rowFESpace_,
-	      const FiniteElemSpace *colFESpace_=NULL);
+    Assembler(Operator *op,
+	      const FiniteElemSpace *rowFESpace,
+	      const FiniteElemSpace *colFESpace = NULL);
 
     virtual ~Assembler() {};
 
@@ -936,16 +79,21 @@ namespace AMDiS {
     /** \brief
      * Assembles the element matrix for the given ElInfo
      */
-    virtual void calculateElementMatrix(const ElInfo *elInfo, 
-					ElementMatrix *userMat, 
-					double factor=1.0);
+    void calculateElementMatrix(const ElInfo *elInfo, 
+				ElementMatrix *userMat, 
+				double factor = 1.0);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *userMat,
+				double factor = 1.0);
 
     /** \brief
      * Assembles the element vector for the given ElInfo
      */
-    virtual void calculateElementVector(const ElInfo *elInfo, 
-					ElementVector *userVec, 
-					double factor=1.0);
+    void calculateElementVector(const ElInfo *elInfo, 
+				ElementVector *userVec, 
+				double factor = 1.0);
 
     /** \brief
      * Returns \ref rowFESpace.
@@ -1000,11 +148,9 @@ namespace AMDiS {
      * Returns \ref firstOrderAssemblerGrdPsi or \ref firstOrderAssemblerGrdPhi
      * depending on type.
      */
-    inline FirstOrderAssembler* getFirstOrderAssembler(FirstOrderType type = 
-						       GRD_PSI) 
+    inline FirstOrderAssembler* getFirstOrderAssembler(FirstOrderType type = GRD_PSI) 
     {
-      return
-	(type == GRD_PSI) ? 
+      return (type == GRD_PSI) ? 
 	firstOrderAssemblerGrdPsi : 
 	firstOrderAssemblerGrdPhi;
     };
diff --git a/AMDiS/src/DOFMatrix.cc b/AMDiS/src/DOFMatrix.cc
index aac17249cf98f9584e8f3825682ef57635a3db08..6f705d1ba0badbe7050e88d91b78e3e10dd5b825 100644
--- a/AMDiS/src/DOFMatrix.cc
+++ b/AMDiS/src/DOFMatrix.cc
@@ -463,6 +463,36 @@ namespace AMDiS {
     addElementMatrix(factor, *elementMatrix, bound);   
   }
 
+  void DOFMatrix::assemble(double factor, ElInfo *rowElInfo, ElInfo *colElInfo,
+			   const BoundaryType *bound, Operator *op)
+  {
+    FUNCNAME("DOFMatrix::assemble()");
+
+    if (!op && operators.size() == 0) {
+      return;
+    }
+
+    Operator *operat = op ? op : operators[0];
+    operat->getAssembler(omp_get_thread_num())->initElementMatrix(elementMatrix, rowElInfo);
+    
+    if (op) {
+      op->getElementMatrix(rowElInfo, colElInfo, elementMatrix);
+    } else {
+      std::vector<Operator*>::iterator it;
+      std::vector<double*>::iterator factorIt;
+      for (it = operators.begin(), factorIt = operatorFactor.begin();	
+	   it != operators.end(); 
+	   ++it, ++factorIt) {
+	(*it)->getElementMatrix(rowElInfo, colElInfo,
+				elementMatrix, 
+				*factorIt ? **factorIt : 1.0);
+      }      
+    }
+   
+    addElementMatrix(factor, *elementMatrix, bound);       
+  }
+
+
   Flag DOFMatrix::getAssembleFlag()
   {
     Flag fillFlag(0);
diff --git a/AMDiS/src/DOFMatrix.h b/AMDiS/src/DOFMatrix.h
index deb7a3acfe83084319618ca305eb2e0050491cea..a83fe2dd9eb5489abb728f25113564b701accf32 100644
--- a/AMDiS/src/DOFMatrix.h
+++ b/AMDiS/src/DOFMatrix.h
@@ -362,6 +362,10 @@ namespace AMDiS {
   
     void assemble(double factor, ElInfo *elInfo, 
 		  const BoundaryType *bound, Operator *op = NULL);
+
+
+    void assemble(double factor, ElInfo *rowElInfo, ElInfo *colElInfo,
+		  const BoundaryType *bound, Operator *op = NULL);
     
 
     /** \brief
diff --git a/AMDiS/src/DOFVector.hh b/AMDiS/src/DOFVector.hh
index 93a2bb432aa511d9f87a8358ef989b083104b5a2..bc7179a9785d05c8f1a78e52eba961185dfc25a6 100644
--- a/AMDiS/src/DOFVector.hh
+++ b/AMDiS/src/DOFVector.hh
@@ -15,6 +15,7 @@
 #include "ElementVector.h"
 #include "Assembler.h"
 #include "OpenMP.h"
+#include "Operator.h"
 
 namespace AMDiS {
 
diff --git a/AMDiS/src/FirstOrderAssembler.cc b/AMDiS/src/FirstOrderAssembler.cc
new file mode 100644
index 0000000000000000000000000000000000000000..05181fc9f7dfdcd287dedcfdb58ade0af00fe8c6
--- /dev/null
+++ b/AMDiS/src/FirstOrderAssembler.cc
@@ -0,0 +1,459 @@
+#include <vector>
+#include "Assembler.h"
+#include "FirstOrderAssembler.h"
+#include "Operator.h"
+#include "QPsiPhi.h"
+#include "FiniteElemSpace.h"
+#include "Quadrature.h"
+#include "DOFVector.h"
+#include "ElementMatrix.h"
+#include "OpenMP.h"
+
+namespace AMDiS {
+
+  std::vector<SubAssembler*> FirstOrderAssembler::optimizedSubAssemblersGrdPhi;
+  std::vector<SubAssembler*> FirstOrderAssembler::optimizedSubAssemblersGrdPsi;
+
+  std::vector<SubAssembler*> FirstOrderAssembler::standardSubAssemblersGrdPhi;
+  std::vector<SubAssembler*> FirstOrderAssembler::standardSubAssemblersGrdPsi;
+
+
+  FirstOrderAssembler::FirstOrderAssembler(Operator *op,
+					   Assembler *assembler,
+					   Quadrature *quad,
+					   bool optimized,
+					   FirstOrderType type)
+    : SubAssembler(op, assembler, quad, 1, optimized, type)
+  {}
+
+  FirstOrderAssembler* 
+  FirstOrderAssembler::getSubAssembler(Operator* op,
+				       Assembler *assembler,
+				       Quadrature *quad,
+				       FirstOrderType type,
+				       bool optimized)
+  {
+    std::vector<SubAssembler*> *subAssemblers =
+	optimized ?
+	(type == GRD_PSI ? 
+	 &optimizedSubAssemblersGrdPsi : 
+	 &optimizedSubAssemblersGrdPhi) :
+    (type == GRD_PSI ? 
+     &standardSubAssemblersGrdPsi :
+     &standardSubAssemblersGrdPhi);
+
+    int myRank = omp_get_thread_num();
+    std::vector<OperatorTerm*> opTerms 
+	= (type == GRD_PSI) ? 
+	    op->firstOrderGrdPsi[myRank] : 
+            op->firstOrderGrdPhi[myRank];
+
+    // check if a assembler is needed at all
+    if (opTerms.size() == 0) {
+      return NULL;
+    }
+
+    sort(opTerms.begin(), opTerms.end());
+
+    FirstOrderAssembler *newAssembler;
+
+    // check if a new assembler is needed
+    for (int i = 0; i < static_cast<int>( subAssemblers->size()); i++) {
+
+      std::vector<OperatorTerm*> assTerms = *((*subAssemblers)[i]->getTerms());
+    
+      sort(assTerms.begin(), assTerms.end());   
+
+      if ((opTerms == assTerms) && 
+	  ((*subAssemblers)[i]->getQuadrature() == quad)) {
+
+	return dynamic_cast<FirstOrderAssembler*>((*subAssemblers)[i]);
+      }
+    }
+
+    // check if all terms are pw_const
+    bool pwConst = true;
+    for (int i = 0; i < static_cast<int>( opTerms.size()); i++) {
+      if (!(opTerms[i])->isPWConst()) {
+	pwConst = false;
+	break;
+      }
+    }  
+
+    // create new assembler
+    if (!optimized) {
+      newAssembler = 
+	(type == GRD_PSI) ?
+	dynamic_cast<FirstOrderAssembler*>(NEW Stand10(op, assembler, quad)) :
+	dynamic_cast<FirstOrderAssembler*>(NEW Stand01(op, assembler, quad));    
+    } else {
+      if (pwConst) {
+	newAssembler = 
+	  (type == GRD_PSI) ?
+	  dynamic_cast<FirstOrderAssembler*>(NEW Pre10(op, assembler, quad)) :
+	  dynamic_cast<FirstOrderAssembler*>(NEW Pre01(op, assembler, quad));
+      } else {
+	newAssembler = 
+	  (type == GRD_PSI) ?
+	  dynamic_cast<FirstOrderAssembler*>( NEW Quad10(op, assembler, quad)) :
+	  dynamic_cast<FirstOrderAssembler*>( NEW Quad01(op, assembler, quad));
+      }
+    }
+
+    subAssemblers->push_back(newAssembler);
+    return newAssembler;
+  };
+
+  Stand10::Stand10(Operator *op, Assembler *assembler, Quadrature *quad) 
+    : FirstOrderAssembler(op, assembler, quad, false, GRD_PSI)
+  {}
+
+
+  void Stand10::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    DimVec<double> grdPsi(dim, DEFAULT_VALUE, 0.0);
+    double *phival = GET_MEMORY(double, nCol);
+
+    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
+    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();
+
+    int nPoints = quadrature->getNumPoints();
+
+    VectorOfFixVecs<DimVec<double> > Lb(dim, nPoints, NO_INIT);
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq].set(0.0);
+    }
+
+    int myRank = omp_get_thread_num();
+    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
+      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
+    }
+  
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq] *= elInfo->getDet();
+
+      for (int i = 0; i < nCol; i++) {
+	phival[i] = (*(phi->getPhi(i)))(quadrature->getLambda(iq));
+      }
+
+      for (int i = 0; i < nRow; i++) {
+	(*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
+	for (int j = 0; j < nCol; j++) {
+	  (*mat)[i][j] += quadrature->getWeight(iq) * (Lb[iq] * grdPsi) * phival[j];
+	}
+      }
+    }
+    FREE_MEMORY(phival, double, nCol);
+  }
+
+
+  Quad10::Quad10(Operator *op, Assembler *assembler, Quadrature *quad) 
+    : FirstOrderAssembler(op, assembler, quad, true, GRD_PSI)
+  {
+  }
+
+
+  void Quad10::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    VectorOfFixVecs<DimVec<double> > *grdPsi;
+    const double *phi;
+
+    if (firstCall) {
+      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
+      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_GRD_PHI);
+      basFcts = owner->getColFESpace()->getBasisFcts();
+      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
+      firstCall = false;
+    }
+
+    int nPoints = quadrature->getNumPoints();
+
+    VectorOfFixVecs<DimVec<double> > Lb(dim,nPoints,NO_INIT);
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq].set(0.0);
+    }
+
+    int myRank = omp_get_thread_num();
+    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
+      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
+    }
+  
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq] *= elInfo->getDet();
+
+      phi    = phiFast->getPhi(iq);
+      grdPsi = psiFast->getGradient(iq);
+
+      for (int i = 0; i < nRow; i++) {
+	for (int j = 0; j < nCol; j++)
+	  (*mat)[i][j] += quadrature->getWeight(iq) * (Lb[iq] * (*grdPsi)[i]) * phi[j];
+      }
+    }
+  }
+
+
+  Pre10::Pre10(Operator *op, Assembler *assembler, Quadrature *quad) 
+    : FirstOrderAssembler(op, assembler, quad, true, GRD_PSI)
+  {
+  }
+
+
+  void Pre10::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    VectorOfFixVecs<DimVec<double> > Lb(dim, 1, NO_INIT);
+    const int *k;
+    const double *values;
+
+    if (firstCall) {
+      q10 = Q10PsiPhi::provideQ10PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
+					owner->getColFESpace()->getBasisFcts(), 
+					quadrature);
+      q1 = Q1Psi::provideQ1Psi(owner->getRowFESpace()->getBasisFcts(),
+			       quadrature);
+      firstCall = false;
+    }
+
+    const int **nEntries = q10->getNumberEntries();
+    int myRank = omp_get_thread_num();
+    Lb[0].set(0.0);
+
+    for (int i = 0; i < static_cast<int>( terms[myRank].size()); i++) {
+      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, 1, Lb);
+    }
+
+    Lb[0] *= elInfo->getDet();
+
+    for (int i = 0; i < nRow; i++) {
+      for (int j = 0; j < nCol; j++) {
+	k = q10->getKVec(i, j);
+	values = q10->getValVec(i, j);
+	double val = 0.0;
+	for (int m = 0; m < nEntries[i][j]; m++) {
+	  val += values[m] * Lb[0][k[m]];
+	}
+	(*mat)[i][j] += val;
+      }
+    }
+  }
+
+
+  Stand01::Stand01(Operator *op, Assembler *assembler, Quadrature *quad) 
+    : FirstOrderAssembler(op, assembler, quad, false, GRD_PHI)
+  {}
+
+
+  void Stand01::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    VectorOfFixVecs<DimVec<double> > grdPhi(dim, nCol, NO_INIT);
+
+    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
+    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();
+
+    int nPoints = quadrature->getNumPoints();
+    VectorOfFixVecs<DimVec<double> > Lb(dim, nPoints, NO_INIT);
+    int myRank = omp_get_thread_num();
+
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq].set(0.0);
+    }
+    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
+      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
+    }
+  
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq] *= elInfo->getDet();
+
+      for (int i = 0; i < nCol; i++) {
+	(*(phi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPhi[i]);
+      }
+
+      for (int i = 0; i < nRow; i++) {
+	double psival = (*(psi->getPhi(i)))(quadrature->getLambda(iq));
+	for (int j = 0; j < nCol; j++)
+	  (*mat)[i][j] += quadrature->getWeight(iq) * ((Lb[iq] * psival) * grdPhi[j]);
+      }
+    } 
+  }
+
+  void Stand10::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
+  {
+    DimVec<double> grdPsi(dim, DEFAULT_VALUE, 0.0);
+    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
+    int nPoints = quadrature->getNumPoints();
+    VectorOfFixVecs<DimVec<double> > Lb(dim,nPoints,NO_INIT);
+    int myRank = omp_get_thread_num();
+
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq].set(0.0);
+    }
+    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
+      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
+    }
+  
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq] *= elInfo->getDet();
+
+      for (int i = 0; i < nRow; i++) {
+	(*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
+	(*vec)[i] += quadrature->getWeight(iq) * (Lb[iq] * grdPsi);
+      }
+    }
+  }
+
+  Quad01::Quad01(Operator *op, Assembler *assembler, Quadrature *quad) 
+    : FirstOrderAssembler(op, assembler, quad, true, GRD_PHI)
+  {
+  }
+
+  void Quad01::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    VectorOfFixVecs<DimVec<double> > *grdPhi;
+
+    if (firstCall) {
+      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
+      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI);
+      basFcts = owner->getColFESpace()->getBasisFcts();
+      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_GRD_PHI);
+      firstCall = false;
+    }
+
+    int nPoints = quadrature->getNumPoints();
+    VectorOfFixVecs<DimVec<double> > Lb(dim,nPoints,NO_INIT);
+    int myRank = omp_get_thread_num();
+
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq].set(0.0);
+    }
+    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
+      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
+    }
+  
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq] *= elInfo->getDet();
+
+      const double *psi = psiFast->getPhi(iq);
+      grdPhi = phiFast->getGradient(iq);
+
+      for (int i = 0; i < nRow; i++) {
+	for (int j = 0; j < nCol; j++)
+	  (*mat)[i][j] += quadrature->getWeight(iq) * (Lb[iq] * (*grdPhi)[j]) * psi[i];
+      }
+    }
+  }
+
+  void Quad10::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
+  {
+    VectorOfFixVecs<DimVec<double> > *grdPsi;
+
+    if (firstCall) {
+      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
+      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_GRD_PHI);
+      basFcts = owner->getColFESpace()->getBasisFcts();
+      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
+      firstCall = false;
+    }
+
+    int nPoints = quadrature->getNumPoints();
+    VectorOfFixVecs<DimVec<double> > Lb(dim,nPoints,NO_INIT);
+    int myRank = omp_get_thread_num();
+
+    for (int iq = 0; iq < nPoints; iq++) {
+      Lb[iq].set(0.0);
+    }
+    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
+      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, nPoints, Lb);
+    }
+  
+    for (int iq = 0; iq < nPoints; iq++) {
+
+      Lb[iq] *= elInfo->getDet();
+      grdPsi = psiFast->getGradient(iq);
+
+      for (int i = 0; i < nRow; i++) {
+	(*vec)[i] += quadrature->getWeight(iq) * (Lb[iq] * (*grdPsi)[i]);
+      }
+    }
+  }
+
+  Pre01::Pre01(Operator *op, Assembler *assembler, Quadrature *quad) 
+    : FirstOrderAssembler(op, assembler, quad, true, GRD_PHI)
+  {
+  }
+
+  void Pre01::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    VectorOfFixVecs<DimVec<double> > Lb(dim,1,NO_INIT);
+
+    const int *l;
+    const double *values;
+
+    if (firstCall) {
+      q01 = Q01PsiPhi::provideQ01PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
+					owner->getColFESpace()->getBasisFcts(), 
+					quadrature);
+      q1 = Q1Psi::provideQ1Psi(owner->getRowFESpace()->getBasisFcts(),
+			       quadrature);
+      firstCall = false;
+    }
+
+    const int **nEntries = q01->getNumberEntries();
+    int myRank = omp_get_thread_num();
+    Lb[0].set(0.0);
+
+    for (int i = 0; i < static_cast<int>( terms[myRank].size()); i++) {
+      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, 1, Lb);
+    }
+
+    Lb[0] *= elInfo->getDet();
+
+    for (int i = 0; i < nRow; i++) {
+      for (int j = 0; j < nCol; j++) {
+	l = q01->getLVec(i, j);
+	values = q01->getValVec(i, j);
+	double val = 0.0;
+	for (int m = 0; m < nEntries[i][j]; m++) {
+	  val += values[m] * Lb[0][l[m]];
+	}
+	(*mat)[i][j] += val;
+      }
+    }
+  }
+
+  void Pre10::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
+  {
+    VectorOfFixVecs<DimVec<double> > Lb(dim,1,NO_INIT);
+
+    const int *k;
+    const double *values;
+
+    if (firstCall) {
+      q10 = Q10PsiPhi::provideQ10PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
+					owner->getColFESpace()->getBasisFcts(), 
+					quadrature);
+      q1 = Q1Psi::provideQ1Psi(owner->getRowFESpace()->getBasisFcts(),
+			       quadrature);
+      firstCall = false;
+    }
+
+    const int *nEntries = q1->getNumberEntries();
+    int myRank = omp_get_thread_num();
+    Lb[0].set(0.0);
+
+    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
+      (static_cast<FirstOrderTerm*>(terms[myRank][i]))->getLb(elInfo, 1, Lb);
+    }
+
+    Lb[0] *= elInfo->getDet();
+
+    for (int i = 0; i < nRow; i++) {
+      k = q1->getKVec(i);
+      values = q1->getValVec(i);
+      double val = 0.0;
+      for (int m = 0; m < nEntries[i]; m++) {
+	val += values[m] * Lb[0][k[m]];
+      }
+      (*vec)[i] += val;
+    }
+  }
+
+}
diff --git a/AMDiS/src/FirstOrderAssembler.h b/AMDiS/src/FirstOrderAssembler.h
new file mode 100644
index 0000000000000000000000000000000000000000..af3b7f2963b09a805c6a18d0cb880385a0a9e919
--- /dev/null
+++ b/AMDiS/src/FirstOrderAssembler.h
@@ -0,0 +1,325 @@
+#ifndef AMDIS_FIRST_ORDER_ASSEMBLER_H
+#define AMDIS_FIRST_ORDER_ASSEMBLER_H
+
+#include "SubAssembler.h"
+
+namespace AMDiS {
+
+  class Q10PsiPhi;
+  class Q01PsiPhi;
+  class Q1Psi;
+
+  /**
+   * \ingroup Assembler
+   * 
+   * \brief
+   * SubAssembler for first order terms.
+   */
+  class FirstOrderAssembler : public SubAssembler
+  {
+  public:
+    MEMORY_MANAGED(FirstOrderAssembler);
+
+    /** \brief
+     * Creates and returns the FirstOrderAssembler for Operator op and
+     * the given assembler. If all terms are piecewise constant precalculated 
+     * integrals can be used while assembling and the returned 
+     * ZeroOrderAssembler is of type Pre0. Otherwise a Quad0 object will
+     * be returned.
+     */
+    static FirstOrderAssembler* getSubAssembler(Operator *op,
+						Assembler *assembler,
+						Quadrature *quadrat,
+						FirstOrderType type,
+						bool optimized);
+  
+    /** \brief
+     * Destructor.
+     */
+    virtual ~FirstOrderAssembler() {};
+
+  protected:
+    /** \brief
+     * Constructor.
+     */
+    FirstOrderAssembler(Operator *op,
+			Assembler *assembler,
+			Quadrature *quadrat,
+			bool optimized,
+			FirstOrderType type);
+
+
+  protected:
+    /** \brief
+     * List of all yet created optimized zero order assemblers for grdPsi.
+     */
+    static std::vector<SubAssembler*> optimizedSubAssemblersGrdPsi;
+
+    /** \brief
+     * List of all yet created standard zero order assemblers for grdPsi.
+     */
+    static std::vector<SubAssembler*> standardSubAssemblersGrdPsi;
+
+    /** \brief
+     * List of all yet created optimized zero order assemblers for grdPhi.
+     */
+    static std::vector<SubAssembler*> optimizedSubAssemblersGrdPhi;
+
+    /** \brief
+     * List of all yet created standard zero order assemblers for grdPhi.
+     */
+    static std::vector<SubAssembler*> standardSubAssemblersGrdPhi;
+  };
+
+  // ============================================================================
+  // ===== class Stand10 ========================================================
+  // ============================================================================
+
+  /**
+   * \ingroup Assembler
+   * 
+   * \brief
+   * Standard first order assembler for grdPsi.
+   */
+  class Stand10 : public FirstOrderAssembler
+  {
+  public:
+    MEMORY_MANAGED(Stand10);
+
+    /** \brief
+     * Constructor.
+     */
+    Stand10(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *, ElementVector */*vec*/);
+  };
+
+  // ============================================================================
+  // ===== class Stand10 ========================================================
+  // ============================================================================
+
+  /**
+   * \ingroup Assembler
+   * 
+   * \brief
+   * Standard first order assembler for grdPhi.
+   */
+  class Stand01 : public FirstOrderAssembler
+  {
+  public:
+    MEMORY_MANAGED(Stand01);
+
+    /** \brief
+     * Constructor.
+     */
+    Stand01(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *, ElementVector *) {
+      ERROR_EXIT("should not be called\n");
+    };
+  };
+
+  // ============================================================================
+  // ===== class Quad10 =========================================================
+  // ============================================================================
+
+  /** \brief
+   * First order assembler for grdPsi using fast quadratures.
+   */
+  class Quad10 : public FirstOrderAssembler
+  {
+  public:
+    MEMORY_MANAGED(Quad10);
+
+    /** \brief
+     * Constructor.
+     */
+    Quad10(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *, ElementVector */*vec*/);
+  };
+
+  // ============================================================================
+  // ===== class Quad01 =========================================================
+  // ============================================================================
+
+  /** \brief
+   * First order assembler for grdPhi using fast quadratures.
+   */
+  class Quad01 : public FirstOrderAssembler 
+  {
+  public:
+    MEMORY_MANAGED(Quad01);
+
+    /** \brief
+     * Constructor.
+     */
+    Quad01(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *, ElementVector */*vec*/) {
+      ERROR_EXIT("should not be called\n");
+    };
+  };
+
+  // ============================================================================
+  // ===== class Pre10 ==========================================================
+  // ============================================================================
+
+  /** \brief
+   * First order assembler for grdPsi using precalculated integrals
+   */
+  class Pre10 : public FirstOrderAssembler 
+  {
+  public:
+    MEMORY_MANAGED(Pre10);
+
+    /** \brief
+     * Constructor.
+     */
+    Pre10(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *, ElementVector */*vec*/);
+
+  protected:
+    /** \brief
+     * Integral of the product of the derivative of psi and phi.
+     */
+    const Q10PsiPhi *q10;
+
+    /** \brief
+     * Integral of the derivative of psi.
+     */
+    const Q1Psi *q1;
+
+    friend class FirstOrderAssembler;
+  };
+
+  // ============================================================================
+  // ===== class Pre01 ==========================================================
+  // ============================================================================
+
+  /** \brief
+   *  First order assembler for grdPhi using precalculated integrals
+   */
+  class Pre01 : public FirstOrderAssembler 
+  {
+  public:
+    MEMORY_MANAGED(Pre01);
+
+    /** \brief
+     * Constructor.
+     */
+    Pre01(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *, ElementVector */*vec*/) {
+      ERROR_EXIT("should not be called\n");
+    };
+
+  protected:
+    /** \brief
+     * Integral of the product of psi and the derivative of phi.
+     */
+    const Q01PsiPhi *q01;
+
+    /** \brief
+     * Integral of the derivative of phi.
+     */
+    const Q1Psi *q1;
+
+    friend class FirstOrderAssembler;
+  };
+
+
+}
+
+#endif // AMDIS_FIRST_ORDER_ASSEMBLER_H
diff --git a/AMDiS/src/Global.h b/AMDiS/src/Global.h
index 5f0f8cc63417e6b41603f50a6c6c08983f333eb1..b9aacee3a4f560e0d6f5f63e7dbacaf16342276d 100644
--- a/AMDiS/src/Global.h
+++ b/AMDiS/src/Global.h
@@ -512,6 +512,16 @@ namespace AMDiS {
   const int RescheduleErrorCode = 23;
 
   std::string memSizeStr(int size);
+
+  /**
+   * \ingroup Assembler
+   * \brief
+   * Specifies the type of a FirstOrderTerm 
+   */
+  enum FirstOrderType {
+    GRD_PSI,
+    GRD_PHI
+  };
 }
 
 #endif // AMDIS_GLOBAL_H
diff --git a/AMDiS/src/Operator.cc b/AMDiS/src/Operator.cc
index 06460c686429ad4996e3fd9dd4f294af22e0d890..d2ff25732e4ee565b0ea22460e538f7515408631 100644
--- a/AMDiS/src/Operator.cc
+++ b/AMDiS/src/Operator.cc
@@ -87,7 +87,7 @@ namespace AMDiS {
 	    val += Lambda[i][k] * matrix[k][l] * Lambda[i][l];
 	val *= factor;
 	LALt[i][i] += val;
-	for (j = i+1; j <= dim; j++) {
+	for (j = i + 1; j <= dim; j++) {
 	  val = 0.0;
 	  for (k = 0; k < dimOfWorld; k++)
 	    for (l = 0; l < dimOfWorld; l++)
@@ -218,6 +218,21 @@ namespace AMDiS {
     assembler[myRank]->calculateElementMatrix(elInfo, userMat, factor);
   }
 
+  void Operator::getElementMatrix(const ElInfo *rowElInfo, 
+				  const ElInfo *colElInfo,
+				  ElementMatrix *userMat, 
+				  double factor)
+  {
+    int myRank = omp_get_thread_num();
+
+    if (!assembler[myRank]) {
+      initAssembler(myRank, NULL, NULL, NULL, NULL);
+    }
+
+    assembler[myRank]->calculateElementMatrix(rowElInfo, colElInfo, 
+    					      userMat, factor);
+  }
+
   void Operator::getElementVector(const ElInfo *elInfo, 
 				  ElementVector *userVec, 
 				  double factor)
diff --git a/AMDiS/src/Operator.h b/AMDiS/src/Operator.h
index 9942094d55623dee02b9242555428185e92cffd8..28cc15c31e02c65d2f72cb0e6c74203ed9d69c58 100644
--- a/AMDiS/src/Operator.h
+++ b/AMDiS/src/Operator.h
@@ -30,6 +30,7 @@
 #include "ElInfo.h"
 #include "AbstractFunction.h"
 #include "OpenMP.h"
+#include "SubAssembler.h"
 
 namespace AMDiS {
 
@@ -43,16 +44,6 @@ namespace AMDiS {
   class Quadrature;
   template<typename T> class DOFVectorBase;
 
-  /**
-   * \ingroup Assembler
-   * \brief
-   * Specifies the type of a FirstOrderTerm 
-   */
-  enum FirstOrderType {
-    GRD_PSI,
-    GRD_PHI
-  };
-
   // ============================================================================
   // ===== class OperatorTerm ===================================================
   // ============================================================================
@@ -3517,6 +3508,11 @@ namespace AMDiS {
 				  ElementMatrix *userMat, 
 				  double factor = 1.0);
 
+    virtual void getElementMatrix(const ElInfo *rowElInfo,
+				  const ElInfo *colElInfo,
+				  ElementMatrix *userMat,
+				  double factor = 1.0);
+
     /** \brief
      * Calculates the element vector for this ElInfo and adds it multiplied by
      * factor to userVec.
diff --git a/AMDiS/src/ProblemVec.cc b/AMDiS/src/ProblemVec.cc
index 3ce3acd3ae2d2747d1e6d6ada2be0f67990f4897..4fc8beb957b6873beac0a4c065e650773b9b2057 100644
--- a/AMDiS/src/ProblemVec.cc
+++ b/AMDiS/src/ProblemVec.cc
@@ -727,15 +727,21 @@ namespace AMDiS {
 	  matrix->getBoundaryManager()->initMatrix(matrix);
 
 	if (componentSpaces[i] == componentSpaces[j]) {
+	  //	  std::cout << "--- assembleOnOneMesh ---\n";
 	  assembleOnOneMesh(componentSpaces[i],
 			    assembleFlag,
 			    assembleMatrix ? matrix : NULL,
 			    (i == j) ? rhs_->getDOFVector(i) : NULL);
+	  //	  std::cout << "--- Finished ---\n";
+	  //	  std::cout << "--- Dim: " << matrix->getUsedSize() << "x" << matrix->getNumCols() << "---\n";
 	} else {
+	  //	  std::cout << "--- assembleOnDifMesh ---\n";
 	  assembleOnDifMeshes(componentSpaces[i], componentSpaces[j],
 			      assembleFlag,
 			      assembleMatrix ? matrix : NULL,
 			      (i == j) ? rhs_->getDOFVector(i) : NULL);	  
+	  //	  std::cout << "--- Finished ---\n";
+	  //	  std::cout << "--- Dim: " << matrix->getUsedSize() << "x" << matrix->getNumCols() << "---\n";
 	}
 
 	if (assembleMatrix && matrix->getBoundaryManager())
@@ -1007,8 +1013,13 @@ namespace AMDiS {
 	else
 	  std::cout << "Col = small\n";
 
+	if (useGetBound_) {
+	  basisFcts->getBound(rowElInfo, bound);
+	}
 
-	ERROR_EXIT("KOMMT GLEICH!\n");
+	if (matrix) {
+	  matrix->assemble(1.0, colElInfo, rowElInfo, bound);
+	}
       }
 
       if (useGetBound_) {
diff --git a/AMDiS/compositeFEM/src/ScalableQuadrature.cc b/AMDiS/src/ScalableQuadrature.cc
similarity index 100%
rename from AMDiS/compositeFEM/src/ScalableQuadrature.cc
rename to AMDiS/src/ScalableQuadrature.cc
diff --git a/AMDiS/compositeFEM/src/ScalableQuadrature.h b/AMDiS/src/ScalableQuadrature.h
similarity index 100%
rename from AMDiS/compositeFEM/src/ScalableQuadrature.h
rename to AMDiS/src/ScalableQuadrature.h
diff --git a/AMDiS/src/SecondOrderAssembler.cc b/AMDiS/src/SecondOrderAssembler.cc
new file mode 100644
index 0000000000000000000000000000000000000000..538ef7ce962b11aca25931dd8bb44b5292f2ce15
--- /dev/null
+++ b/AMDiS/src/SecondOrderAssembler.cc
@@ -0,0 +1,318 @@
+#include <vector>
+#include "Assembler.h"
+#include "SecondOrderAssembler.h"
+#include "Operator.h"
+#include "QPsiPhi.h"
+#include "FiniteElemSpace.h"
+#include "Quadrature.h"
+#include "DOFVector.h"
+#include "ElementMatrix.h"
+#include "OpenMP.h"
+
+namespace AMDiS {
+
+  std::vector<SubAssembler*> SecondOrderAssembler::optimizedSubAssemblers; 
+  std::vector<SubAssembler*> SecondOrderAssembler::standardSubAssemblers;
+
+  SecondOrderAssembler::SecondOrderAssembler(Operator *op,
+					     Assembler *assembler,
+					     Quadrature *quad,
+					     bool optimized)
+    : SubAssembler(op, assembler, quad, 2, optimized)
+  {}
+
+  SecondOrderAssembler* 
+  SecondOrderAssembler::getSubAssembler(Operator* op,
+					Assembler *assembler,
+					Quadrature *quad,
+					bool optimized)
+  {
+    int myRank = omp_get_thread_num();
+
+    // check if a assembler is needed at all
+    if (op->secondOrder[myRank].size() == 0) {
+      return NULL;
+    }
+
+    SecondOrderAssembler *newAssembler;
+
+    std::vector<SubAssembler*> *subAssemblers =
+	optimized ?
+	&optimizedSubAssemblers :
+    &standardSubAssemblers;
+
+    std::vector<OperatorTerm*> opTerms  = op->zeroOrder[myRank];
+
+    sort(opTerms.begin(), opTerms.end());
+
+    // check if a new assembler is needed
+    for (int i = 0; i < static_cast<int>( subAssemblers->size()); i++) {
+      std::vector<OperatorTerm*> assTerms = *((*subAssemblers)[i]->getTerms());
+    
+      sort(assTerms.begin(), assTerms.end());
+
+      if ((opTerms == assTerms) && 
+	  ((*subAssemblers)[i]->getQuadrature() == quad)) {
+	
+	return dynamic_cast<SecondOrderAssembler*>((*subAssemblers)[i]);
+      }
+    }
+
+    // check if all terms are pw_const
+    bool pwConst = true;
+    for (int i = 0; i < static_cast<int>( op->secondOrder[myRank].size()); i++) {
+      if (!op->secondOrder[myRank][i]->isPWConst()) {
+	pwConst = false;
+	break;
+      }
+    }  
+
+    // create new assembler
+    if (!optimized) {
+      newAssembler = NEW Stand2(op, assembler, quad);
+    } else {
+      if (pwConst) {
+	newAssembler = NEW Pre2(op, assembler, quad);
+      } else {
+	newAssembler = NEW Quad2(op, assembler, quad);
+      }
+    }
+
+    subAssemblers->push_back(newAssembler);
+    return newAssembler;
+  }
+
+  Pre2::Pre2(Operator *op, Assembler *assembler, Quadrature *quad) 
+    : SecondOrderAssembler(op, assembler, quad, true)
+  {
+    q11 = Q11PsiPhi::provideQ11PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
+				      owner->getColFESpace()->getBasisFcts(), 
+				      quadrature);
+    tmpLALt.resize(omp_get_max_threads());
+    for (int i = 0; i < omp_get_max_threads(); i++) {
+      tmpLALt[i] = NEW DimMat<double>*;
+      *(tmpLALt[i]) = NEW DimMat<double>(dim, NO_INIT);
+    }
+  }
+
+  Pre2::~Pre2()
+  {
+    for (int i = 0; i < static_cast<int>(tmpLALt.size()); i++) {
+      DELETE *(tmpLALt[i]);
+      DELETE tmpLALt[i];
+    }
+  }
+
+  void Pre2::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    const int **nEntries;
+    const int *k, *l;
+    const double *values;
+
+    int myRank = omp_get_thread_num();
+    DimMat<double> **LALt = tmpLALt[myRank];
+    DimMat<double> &tmpMat = *LALt[0];
+    tmpMat.set(0.0);
+
+    for (int i = 0; i < static_cast<int>( terms[myRank].size()); i++) {
+      (static_cast<SecondOrderTerm*>(terms[myRank][i]))->getLALt(elInfo, 1, LALt);
+    }
+
+    tmpMat *= elInfo->getDet();
+    nEntries = q11->getNumberEntries();
+
+    if (symmetric) {
+      for (int i = 0; i < nRow; i++) {
+	k = q11->getKVec(i, i);
+	l = q11->getLVec(i, i);
+	values = q11->getValVec(i, i);
+	double val = 0.0;
+	for (int m = 0; m < nEntries[i][i]; m++) {
+	  val += values[m] * tmpMat[k[m]][l[m]];
+	}
+	(*mat)[i][i] += val;
+
+	for (int j = i + 1; j < nCol; j++) {
+	  k = q11->getKVec(i, j);
+	  l = q11->getLVec(i, j);
+	  values = q11->getValVec(i, j);
+	  val = 0.0;
+	  for (int m = 0; m < nEntries[i][j]; m++) {
+	    val += values[m] * tmpMat[k[m]][l[m]];
+	  }
+	  (*mat)[i][j] += val;
+	  (*mat)[j][i] += val;
+	}
+      }
+    } else {  /*  A not symmetric or psi != phi        */
+      for (int i = 0; i < nRow; i++) {
+	for (int j = 0; j < nCol; j++) {
+	  k = q11->getKVec(i, j);
+	  l = q11->getLVec(i, j);
+	  values = q11->getValVec(i, j);
+	  double val = 0.0;
+	  for (int m = 0; m < nEntries[i][j]; m++) {
+	    val += values[m] * tmpMat[k[m]][l[m]];
+	  }
+	  (*mat)[i][j] += val;
+	}
+      }
+    }
+  }
+
+  Quad2::Quad2(Operator *op, Assembler *assembler, Quadrature *quad) 
+    : SecondOrderAssembler(op, assembler, quad, true)
+  {
+    tmpLALt.resize(omp_get_max_threads());
+  }
+
+  Quad2::~Quad2()
+  {
+    if (!firstCall) {
+      int nPoints = quadrature->getNumPoints();
+      for (int i = 0; i < static_cast<int>(tmpLALt.size()); i++) {
+	for (int j = 0; j < nPoints; j++) {
+	  DELETE tmpLALt[i][j];
+	}
+	DELETE [] tmpLALt[i];
+      }
+    }
+  }
+
+  void Quad2::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    int nPoints = quadrature->getNumPoints();
+    int myRank = omp_get_thread_num();
+
+    if (firstCall) {
+      tmpLALt[myRank] = NEW DimMat<double>*[nPoints];
+      for (int j = 0; j < nPoints; j++) {
+	tmpLALt[myRank][j] = NEW DimMat<double>(dim, NO_INIT);
+      }
+    
+      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
+      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_GRD_PHI);
+      basFcts = owner->getColFESpace()->getBasisFcts();
+      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_GRD_PHI);
+      firstCall = false;
+    }
+
+    DimMat<double> **LALt = tmpLALt[myRank];
+    
+    for (int i = 0; i < nPoints; i++) {
+      LALt[i]->set(0.0);
+    }
+    
+    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
+      (static_cast<SecondOrderTerm*>(terms[myRank][i]))->getLALt(elInfo, nPoints, LALt);
+    }
+        
+    VectorOfFixVecs< DimVec<double> > *grdPsi, *grdPhi;
+
+    if (symmetric) {
+      for (int iq = 0; iq < nPoints; iq++) {
+	(*LALt[iq]) *= elInfo->getDet();
+
+	grdPsi = psiFast->getGradient(iq);
+	grdPhi = phiFast->getGradient(iq);
+
+	for (int i = 0; i < nRow; i++) {
+	  (*mat)[i][i] += quadrature->getWeight(iq) * 
+	    xAy((*grdPsi)[i], (*LALt[iq]), (*grdPhi)[i]);
+
+	  for (int j = i + 1; j < nCol; j++) {
+	    double val = quadrature->getWeight(iq) * 
+	      xAy((*grdPsi)[i], (*LALt[iq]), (*grdPhi)[j]);
+	    (*mat)[i][j] += val;
+	    (*mat)[j][i] += val;
+	  }
+	}
+      }
+    } else {      /*  non symmetric assembling   */
+      for (int iq = 0; iq < nPoints; iq++) {
+	(*LALt[iq]) *= elInfo->getDet();
+
+	grdPsi = psiFast->getGradient(iq);
+	grdPhi = phiFast->getGradient(iq);
+
+	for (int i = 0; i < nRow; i++) {
+	  for (int j = 0; j < nCol; j++) {
+	    (*mat)[i][j] += quadrature->getWeight(iq) * 
+	      xAy((*grdPsi)[i], (*LALt[iq]), (*grdPhi)[j]);
+	  }
+	}
+      }
+    }
+  }
+
+  Stand2::Stand2(Operator *op, Assembler *assembler, Quadrature *quad) 
+    : SecondOrderAssembler(op, assembler, quad, false)
+  {}
+
+  void Stand2::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    DimVec<double> grdPsi(dim, NO_INIT);
+    VectorOfFixVecs<DimVec<double> > grdPhi(dim, nCol, NO_INIT);
+
+    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
+    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();
+
+    int nPoints = quadrature->getNumPoints();
+
+    DimMat<double> **LALt = NEW DimMat<double>*[nPoints];
+    for (int iq = 0; iq < nPoints; iq++) {
+      LALt[iq] = NEW DimMat<double>(dim, NO_INIT);
+      LALt[iq]->set(0.0);
+    }
+
+    int myRank = omp_get_thread_num();
+
+    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
+      (static_cast<SecondOrderTerm*>(terms[myRank][i]))->getLALt(elInfo, nPoints, LALt);
+    }
+
+    if (symmetric) {
+      for (int iq = 0; iq < nPoints; iq++) {
+	(*LALt[iq]) *= elInfo->getDet();
+
+	for (int i = 0; i < nCol; i++) {
+	  (*(phi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPhi[i]);
+	}
+
+	for (int i = 0; i < nRow; i++) {
+	  (*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
+	  (*mat)[i][i] += quadrature->getWeight(iq) * 
+	    (grdPsi * ((*LALt[iq]) * grdPhi[i]));
+
+	  for (int j = i + 1; j < nCol; j++) {
+	    double val = quadrature->getWeight(iq) * (grdPsi * ((*LALt[iq]) * grdPhi[j]));
+	    (*mat)[i][j] += val;
+	    (*mat)[j][i] += val;
+	  }
+	}
+      }
+    } else {      /*  non symmetric assembling   */
+      for (int iq = 0; iq < nPoints; iq++) {
+	(*LALt[iq]) *= elInfo->getDet();
+
+	for (int i = 0; i < nCol; i++) {
+	  (*(phi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPhi[i]);
+	}
+
+	for (int i = 0; i < nRow; i++) {
+	  (*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
+	  for (int j = 0; j < nCol; j++) {
+	    (*mat)[i][j] += quadrature->getWeight(iq) *
+	      (grdPsi * ((*LALt[iq]) * grdPhi[j]));
+	  }
+	}
+      }
+    }
+
+    for (int iq = 0; iq < nPoints; iq++) 
+      DELETE LALt[iq];
+
+    DELETE [] LALt;
+  }
+
+}
diff --git a/AMDiS/src/SecondOrderAssembler.h b/AMDiS/src/SecondOrderAssembler.h
new file mode 100644
index 0000000000000000000000000000000000000000..732ec3cecf66e7f73dcf1379ad7ca008d9dfa11a
--- /dev/null
+++ b/AMDiS/src/SecondOrderAssembler.h
@@ -0,0 +1,202 @@
+#ifndef AMDIS_SECOND_ORDER_ASSEMBLER_H
+#define AMDIS_SECOND_ORDER_ASSEMBLER_H
+
+#include "SubAssembler.h"
+
+namespace AMDiS {
+
+  class Q11PsiPhi;
+
+  /**
+   * \ingroup Assembler
+   * 
+   * \brief
+   * SubAssembler for second order terms.
+   */
+  class SecondOrderAssembler : public SubAssembler
+  {
+  public:
+    MEMORY_MANAGED(SecondOrderAssembler);
+
+    /** \brief
+     * Creates and returns the SecondOrderAssembler for Operator op and
+     * the given assembler. If all terms are piecewise constant precalculated 
+     * integrals can be used while assembling and the returned 
+     * ZeroOrderAssembler is of type Pre0. Otherwise a Quad0 object will
+     * be returned.
+     */
+    static SecondOrderAssembler* getSubAssembler(Operator *op,
+						 Assembler *assembler,
+						 Quadrature *quadrat,
+						 bool optimized);
+
+    /** \brief
+     * Destructor.
+     */
+    virtual ~SecondOrderAssembler() {};
+
+  protected:
+    /** \brief
+     * Constructor.
+     */
+    SecondOrderAssembler(Operator *op,
+			 Assembler *assembler,
+			 Quadrature *quadrat,
+			 bool optimized);
+
+  protected:
+    /** \brief
+     * List of all yet created optimized second order assemblers.
+     */
+    static std::vector<SubAssembler*> optimizedSubAssemblers;
+
+    /** \brief
+     * List of all yet created standard second order assemblers.
+     */
+    static std::vector<SubAssembler*> standardSubAssemblers;
+  };
+
+  // ============================================================================
+  // ===== class Stand2 =========================================================
+  // ============================================================================
+
+  /**
+   * \ingroup Assembler
+   * 
+   * \brief
+   * Standard second order assembler
+   */
+  class Stand2 : public SecondOrderAssembler 
+  {
+  public:
+    MEMORY_MANAGED(Stand2);
+
+    /** \brief
+     * Constructor.
+     */
+    Stand2(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *, ElementVector */*vec*/) {
+      ERROR_EXIT("should not be called\n");
+    };
+  };
+
+  // ============================================================================
+  // ===== class Quad2 ==========================================================
+  // ============================================================================
+
+  /** \brief
+   * Second order assembler using fast quadratures.
+   */
+  class Quad2 : public SecondOrderAssembler 
+  {
+  public:
+    MEMORY_MANAGED(Quad2);
+
+    /** \brief
+     * Constructor.
+     */
+    Quad2(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    ~Quad2();
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *, ElementVector */*vec*/) {
+      ERROR_EXIT("should not be called\n");
+    };
+
+  protected:
+    /** \brief
+     * Thread safe temporary vector of DimMats for calculation in calculateElementMatrix().
+     */
+    std::vector< DimMat<double>** > tmpLALt;
+  };
+
+  // ============================================================================
+  // ===== class Pre2 ===========================================================
+  // ============================================================================
+
+  /** \brief
+   * Second order assembler using predefined integrals.
+   */
+  class Pre2 : public SecondOrderAssembler 
+  {
+  public:
+    MEMORY_MANAGED(Pre2);
+
+    /** \brief
+     * Constructor.
+     */
+    Pre2(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    /** \brief
+     * Destructor.
+     */
+    ~Pre2();
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *, ElementVector *) {
+      ERROR_EXIT("should not be called\n");
+    };
+
+  protected:
+    /** \brief
+     * Integral of the product of the derivative of psi and the derivative 
+     * of phi.
+     */
+    const Q11PsiPhi *q11;
+
+    /** \brief
+     * Thread safe temporary vector of DimMats for calculation in calculateElementMatrix().
+     */
+    std::vector< DimMat<double>** > tmpLALt;
+
+    friend class SecondOrderAssembler;
+  };
+
+}
+
+#endif // AMDIS_SECOND_ORDER_ASSEMBLER_H
diff --git a/AMDiS/src/SubAssembler.cc b/AMDiS/src/SubAssembler.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0414f82c29952ff8cd5ad90565381a2bd02d8bb8
--- /dev/null
+++ b/AMDiS/src/SubAssembler.cc
@@ -0,0 +1,255 @@
+#include "SubAssembler.h"
+#include "Assembler.h"
+#include "FiniteElemSpace.h"
+#include "Operator.h"
+#include "BasisFunction.h"
+#include "OpenMP.h"
+#include "Mesh.h"
+#include "Quadrature.h"
+#include "DOFVector.h"
+
+namespace AMDiS {
+
+  SubAssembler::SubAssembler(Operator *op,
+			     Assembler *assembler,
+			     Quadrature *quadrat,
+			     int order, 
+			     bool optimized,
+			     FirstOrderType type) 
+    : nRow(0),
+      nCol(0),
+      coordsAtQPs(NULL),
+      coordsNumAllocated(0),
+      quadrature(quadrat),
+      psiFast(NULL),
+      phiFast(NULL),
+      owner(assembler),
+      symmetric(true),
+      opt(optimized),
+      firstCall(true)
+  {
+    const BasisFunction *psi = assembler->rowFESpace->getBasisFcts();
+    const BasisFunction *phi = assembler->colFESpace->getBasisFcts();
+
+    nRow = psi->getNumber();
+    nCol = phi->getNumber();
+
+    int maxThreads = omp_get_max_threads();
+    terms.resize(maxThreads);
+
+    switch (order) {
+    case 0:
+      terms = op->zeroOrder;
+      break;
+    case 1:
+      if (type == GRD_PHI)
+	terms = op->firstOrderGrdPhi;
+      else 
+	terms = op->firstOrderGrdPsi;
+      break;
+    case 2:
+      terms = op->secondOrder;
+      break;
+    }
+
+    // check if all terms are symmetric
+    symmetric = true;
+    for (int i = 0; i < static_cast<int>(terms[0].size()); i++) {
+      if (!(terms[0][i])->isSymmetric()) {
+	symmetric = false;
+	break;
+      }
+    }  
+
+    dim = assembler->rowFESpace->getMesh()->getDim();
+  }
+
+  FastQuadrature *SubAssembler::updateFastQuadrature(FastQuadrature *quadFast,
+						     const BasisFunction *psi,
+						     Flag updateFlag)
+  {
+    if (!quadFast) {
+      quadFast = FastQuadrature::provideFastQuadrature(psi,
+						       *quadrature,
+						       updateFlag);
+    } else {
+      if (!quadFast->initialized(updateFlag))
+	quadFast->init(updateFlag);
+    }
+
+    return quadFast;
+  }
+
+  void SubAssembler::initElement(const ElInfo* elInfo,
+				 Quadrature *quad)
+  {
+    // set corrdsAtQPs invalid
+    coordsValid = false;
+
+    // set values at QPs invalid
+    std::map<const DOFVectorBase<double>*, ValuesAtQPs*>::iterator it1;
+    for (it1 = valuesAtQPs.begin(); it1 != valuesAtQPs.end(); ++it1) {
+      ((*it1).second)->valid = false;
+    }
+
+    // set gradients at QPs invalid
+    std::map<const DOFVectorBase<double>*, GradientsAtQPs*>::iterator it2;
+    for (it2 = gradientsAtQPs.begin(); it2 != gradientsAtQPs.end(); ++it2) {
+      ((*it2).second)->valid = false;
+    }
+
+    int myRank = omp_get_thread_num();
+    // calls initElement of each term
+    std::vector<OperatorTerm*>::iterator it;
+    for (it = terms[myRank].begin(); it != terms[myRank].end(); ++it) {
+      (*it)->initElement(elInfo, this, quad);
+    }
+  }
+
+  WorldVector<double>* SubAssembler::getCoordsAtQPs(const ElInfo* elInfo,
+						    Quadrature *quad)
+  {
+    Quadrature *localQuad = quad ? quad : quadrature;
+  
+    const int nPoints = localQuad->getNumPoints();
+
+    // already calculated for this element ?
+    if (coordsValid) {
+      return coordsAtQPs;
+    }
+   
+    if (coordsAtQPs)  {
+      if (coordsNumAllocated != nPoints) {
+	DELETE [] coordsAtQPs;
+        coordsAtQPs = NEW WorldVector<double>[nPoints];
+	coordsNumAllocated = nPoints;
+      }
+    } else {
+      coordsAtQPs = NEW WorldVector<double>[nPoints];
+      coordsNumAllocated = nPoints;
+    }
+
+    // set new values
+    WorldVector<double>* k = &(coordsAtQPs[0]);
+    for (int l = 0; k < &(coordsAtQPs[nPoints]); ++k, ++l) {
+      elInfo->coordToWorld(localQuad->getLambda(l), k);
+    }
+
+    // mark values as valid
+    coordsValid = true;
+
+    return coordsAtQPs;
+  }
+
+  double* SubAssembler::getVectorAtQPs(DOFVectorBase<double>* dv, 
+				       const ElInfo* elInfo,
+				       Quadrature *quad)
+  {
+    FUNCNAME("SubAssembler::getVectorAtQPs()");
+
+    const DOFVectorBase<double>* vec = dv ? dv : owner->operat->getUhOld();
+
+    TEST_EXIT_DBG(vec)("no dof vector!\n");
+
+    if (valuesAtQPs[vec] && valuesAtQPs[vec]->valid) 
+      return valuesAtQPs[vec]->values.getValArray();
+
+    Quadrature *localQuad = quad ? quad : quadrature;
+
+    if (!valuesAtQPs[vec]) {
+      valuesAtQPs[vec] = new ValuesAtQPs;
+    }
+    valuesAtQPs[vec]->values.resize(localQuad->getNumPoints());
+
+    double *values = valuesAtQPs[vec]->values.getValArray();
+
+    bool sameFESpaces = 
+      (vec->getFESpace() == owner->rowFESpace) || 
+      (vec->getFESpace() == owner->colFESpace);
+
+    if (opt && !quad && sameFESpaces) {
+      const BasisFunction *psi = owner->rowFESpace->getBasisFcts();
+      const BasisFunction *phi = owner->colFESpace->getBasisFcts();
+      if (vec->getFESpace()->getBasisFcts() == psi) {
+	psiFast = updateFastQuadrature(psiFast, psi, INIT_PHI);
+      } else if(vec->getFESpace()->getBasisFcts() == phi) {
+	phiFast = updateFastQuadrature(phiFast, phi, INIT_PHI);
+      }
+    }
+
+    // calculate new values
+    const BasisFunction *basFcts = vec->getFESpace()->getBasisFcts();
+
+    if (opt && !quad && sameFESpaces) {
+      if (psiFast->getBasisFunctions() == basFcts) {
+	vec->getVecAtQPs(elInfo, NULL, psiFast, values);
+      } else if(phiFast->getBasisFunctions() == basFcts) {
+	vec->getVecAtQPs(elInfo, NULL, phiFast, values);
+      } else {
+	vec->getVecAtQPs(elInfo, localQuad, NULL, values);
+      }
+    } else {
+      vec->getVecAtQPs(elInfo, localQuad, NULL, values);
+    }
+  
+    valuesAtQPs[vec]->valid = true;
+
+    return values;
+  }
+
+  WorldVector<double>* SubAssembler::getGradientsAtQPs(DOFVectorBase<double>* dv, 
+						       const ElInfo* elInfo,
+						       Quadrature *quad)
+  {
+    FUNCNAME("SubAssembler::getGradientsAtQPs()");
+
+    const DOFVectorBase<double>* vec = dv ? dv : owner->operat->getUhOld();
+
+    TEST_EXIT_DBG(vec)("no dof vector!\n");
+
+    if (gradientsAtQPs[vec] && gradientsAtQPs[vec]->valid) 
+      return gradientsAtQPs[vec]->values.getValArray();
+
+    Quadrature *localQuad = quad ? quad : quadrature;
+
+    if (!gradientsAtQPs[vec]) {
+      gradientsAtQPs[vec] = new GradientsAtQPs;
+    } 
+    gradientsAtQPs[vec]->values.resize(localQuad->getNumPoints());
+
+    WorldVector<double> *values = gradientsAtQPs[vec]->values.getValArray();
+
+    const BasisFunction *psi = owner->rowFESpace->getBasisFcts();
+    const BasisFunction *phi = owner->colFESpace->getBasisFcts();
+
+    bool sameFESpaces = 
+      (vec->getFESpace() == owner->rowFESpace) || 
+      (vec->getFESpace() == owner->colFESpace);
+
+    if (opt && !quad && sameFESpaces) {
+      if (vec->getFESpace()->getBasisFcts() == psi) {
+	psiFast = updateFastQuadrature(psiFast, psi, INIT_GRD_PHI);
+      } else if(vec->getFESpace()->getBasisFcts() == phi) {
+	phiFast = updateFastQuadrature(phiFast, phi, INIT_GRD_PHI);
+      }
+    }
+  
+    // calculate new values
+    const BasisFunction *basFcts = vec->getFESpace()->getBasisFcts();
+
+    if (opt && !quad && sameFESpaces) {
+      if (psiFast->getBasisFunctions() == basFcts) {
+	vec->getGrdAtQPs(elInfo, NULL, psiFast, values);
+      } else {
+	vec->getGrdAtQPs(elInfo, NULL, phiFast, values);
+      }
+    } else {
+      vec->getGrdAtQPs(elInfo, localQuad, NULL, values);
+    }
+   
+    gradientsAtQPs[vec]->valid = true;
+
+    return values;
+  }
+
+}
diff --git a/AMDiS/src/SubAssembler.h b/AMDiS/src/SubAssembler.h
new file mode 100644
index 0000000000000000000000000000000000000000..a74420dbb7467aaa8b4e7b8ed0b0c32fadb2ccf9
--- /dev/null
+++ b/AMDiS/src/SubAssembler.h
@@ -0,0 +1,261 @@
+#ifndef AMDIS_SUB_ASSEMBLER_H
+#define AMDIS_SUB_ASSEMBLER_H
+
+#include <map>
+#include <vector>
+#include "FixVec.h"
+#include "Flag.h"
+#include "MemoryManager.h"
+#include "OpenMP.h"
+
+namespace AMDiS {
+
+  class Assembler;
+  class Quadrature;
+  class FastQuadrature;
+  class Operator;
+  class OperatorTerm;
+  class BasisFunction;
+  class FiniteElemSpace;
+  class ElInfo;
+  class ElementMatrix;
+  class ElementVector;
+
+  template<typename T> class DOFVectorBase;
+
+  /**
+   * \ingroup Assembler
+   * 
+   * \brief
+   * Base class for SecondOrderAssembler, FirstOrderAssembler, 
+   * ZeroOrderAssembler. The task of a SubAssembler is to assemble a list of 
+   * terms of a special order and add their contributions to a DOFMatrix or a 
+   * DOFVector. An Assembler can consist of up to four SubAssemblers: one 
+   * SecondOrderAssembler for second order terms, one ZeroOrderAssembler for 
+   * terms of order zero, and two FirstOrderAssemblers. One for terms with
+   * derivatives of the basis functions connected to to row DOFs and one for 
+   * those connected to the column DOFs.
+   */
+  class SubAssembler
+  {
+  public:
+    MEMORY_MANAGED(SubAssembler);
+
+    /** \brief
+     * Creates a SubAssembler belonging to assembler for the terms of given 
+     * order of Operator op. If order is equal to one, type spezifies what kind 
+     * of FirstOrderType are to assemble. During construction of a SubAssembler
+     * the needs and properties of the terms are considered.
+     */
+    SubAssembler(Operator *op,
+		 Assembler *assembler,
+		 Quadrature *quadrat,
+		 int order, 
+		 bool optimized,
+		 FirstOrderType type = GRD_PHI);
+
+    /** \brief
+     * Destructor
+     */
+    virtual ~SubAssembler() {};
+
+    /** \brief
+     * Calculates the element matrix for elInfo and adds it to mat. Memory
+     * for mat must be provided by the caller.
+     */
+    virtual void calculateElementMatrix(const ElInfo *elInfo,
+					ElementMatrix *mat) = 0;
+
+    virtual void calculateElementMatrix(const ElInfo *rowElInfo,
+					const ElInfo *colElInfo,
+					ElementMatrix *mat) = 0;
+
+    /** \brief
+     * Calculates the element vector for elInfo and adds it to vec. Memory
+     * for vec must be provided by the caller.
+     */
+    virtual void calculateElementVector(const ElInfo *elInfo,
+					ElementVector *vec) = 0;
+
+    /** \brief
+     * Returns \ref terms
+     */
+    inline std::vector<OperatorTerm*> *getTerms() { 
+      return &terms[omp_get_thread_num()]; 
+    };
+
+    /** \brief
+     * Returns \ref quadrature.
+     */ 
+    inline Quadrature *getQuadrature() {
+      return quadrature;
+    };
+
+    /** \brief
+     * Sets \ref quadrature to q.
+     */
+    inline void setQuadrature(Quadrature* q) {
+      quadrature = q;
+    };
+  
+    /** \brief
+     * Returns a vector with the world coordinates of the quadrature points
+     * of \ref quadrature on the element of elInfo. 
+     * Used by \ref OperatorTerm::initElement().
+     */
+    WorldVector<double>* getCoordsAtQPs(const ElInfo* elInfo, 
+					Quadrature *quad = NULL);
+
+    /** \brief
+     * DOFVector dv evaluated at quadrature points.
+     * Used by \ref OperatorTerm::initElement().
+     */
+    double* getVectorAtQPs(DOFVectorBase<double>* dv, const ElInfo* elInfo,
+			   Quadrature *quad = NULL);
+
+    /** \brief
+     * Gradients of DOFVector dv evaluated at quadrature points.
+     * Used by \ref OperatorTerm::initElement().
+     */
+    WorldVector<double>* getGradientsAtQPs(DOFVectorBase<double>* dv, 
+					   const ElInfo* elInfo,
+					   Quadrature *quad = NULL);
+
+    /** \brief
+     * Called once for each ElInfo when \ref calculateElementMatrix() or
+     * \ref calculateElementVector() is called for the first time for this
+     * Element.
+     */
+    virtual void initElement(const ElInfo *elInfo,
+			     Quadrature *quad = NULL);
+
+    /** \brief
+     * Returns \ref psiFast.
+     */
+    const FastQuadrature *getPsiFast() const { 
+      return psiFast; 
+    };
+
+    /** \brief
+     * Returns \ref phiFast.
+     */
+    const FastQuadrature *getPhiFast() const { 
+      return phiFast; 
+    };
+
+  protected:
+    /** \brief
+     * Updates \ref psiFast and \ref phiFast.
+     */
+    FastQuadrature *updateFastQuadrature(FastQuadrature *quadFast,
+					 const BasisFunction *psi,
+					 Flag updateFlag);
+  
+  protected:
+    /** \brief
+     * Problem dimension
+     */
+    int dim;
+
+    /** \brief
+     * Number of rows of the element matrix and length of the element
+     * vector. Is equal to the number of row basis functions
+     */
+    int nRow;
+
+    /** \brief
+     * Number of columns of the element matrix. Is equal to the number
+     * of column basis functions
+     */
+    int nCol;
+
+    /** \brief
+     * Used for \ref getVectorAtQPs().
+     */
+    class ValuesAtQPs {
+    public:
+      Vector<double> values;
+      bool valid;
+    };
+
+    /** \brief
+     * Used for \ref getGradientsAtQPs().
+     */
+    class GradientsAtQPs {
+    public:
+      Vector<WorldVector<double> > values;
+      bool valid;
+    };
+
+    /** \brief
+     * Used for \ref getVectorAtQPs().
+     */
+    std::map<const DOFVectorBase<double>*, ValuesAtQPs*> valuesAtQPs;
+
+    /** \brief
+     * Used for \ref getGradientsAtQPs().
+     */
+    std::map<const DOFVectorBase<double>*, GradientsAtQPs*> gradientsAtQPs;
+
+    /** \brief
+     * Set and updated by \ref initElement() for each ElInfo. 
+     * coordsAtQPs[i] points to the coordinates of the i-th quadrature point.
+     */
+    WorldVector<double> *coordsAtQPs;
+
+    /** \brief
+     * Used for \ref getCoordsAtQPs().
+     */
+    bool coordsValid;
+
+    /** \brief
+     * Used for \ref getCoordsAtQP(). Stores the number of allocated WorldVectors.
+     */
+    int coordsNumAllocated;
+
+    /** \brief
+     * Needed Quadrature. Constructed in the constructor of SubAssembler
+     */
+    Quadrature *quadrature;
+
+    /** \brief
+     * FastQuadrature for row basis functions
+     */
+    FastQuadrature *psiFast;
+
+    /** \brief
+     * FastQuadrature for column basis functions
+     */
+    FastQuadrature *phiFast;
+
+    /** \brief
+     * Corresponding Assembler
+     */
+    Assembler* owner;
+
+    /** \brief
+     * Flag that specifies whether the element matrix is symmetric.
+     */
+    bool symmetric;
+
+    /** \brief
+     * List of all terms with a contribution to this SubAssembler
+     */
+    std::vector< std::vector<OperatorTerm*> > terms;
+
+    /** \brief
+     *
+     */
+    bool opt;
+
+    /** \brief
+     *
+     */
+    bool firstCall;
+
+    friend class Assembler;
+  };
+
+}
+
+#endif // AMDSIS_SUB_ASSEMBLER_H
diff --git a/AMDiS/compositeFEM/src/SubElInfo.cc b/AMDiS/src/SubElInfo.cc
similarity index 92%
rename from AMDiS/compositeFEM/src/SubElInfo.cc
rename to AMDiS/src/SubElInfo.cc
index d0056c3fe3df8607cdd97d93702d7e83cc1200a9..a3fead77baf13a61ad662458dc6e8be1e0ee9ad9 100644
--- a/AMDiS/compositeFEM/src/SubElInfo.cc
+++ b/AMDiS/src/SubElInfo.cc
@@ -18,7 +18,7 @@ namespace AMDiS {
 
     FixVec<WorldVector<double>, VERTEX> worldCoords(dim, NO_INIT);
 
-    lambda = NEW VectorOfFixVecs<DimVec<double> >(dim, dim+1, NO_INIT);
+    lambda = NEW VectorOfFixVecs<DimVec<double> >(dim, dim + 1, NO_INIT);
     for (int i = 0; i <= dim; i++) {
       (*lambda)[i] = (*lambda_)[i];
     }
diff --git a/AMDiS/compositeFEM/src/SubElInfo.h b/AMDiS/src/SubElInfo.h
similarity index 94%
rename from AMDiS/compositeFEM/src/SubElInfo.h
rename to AMDiS/src/SubElInfo.h
index dd9706e49dc897e095b84256b6fe481b59be015d..f50723f929e3e3d636d4dfd6cc1cee36ca3fb07e 100644
--- a/AMDiS/compositeFEM/src/SubElInfo.h
+++ b/AMDiS/src/SubElInfo.h
@@ -33,8 +33,8 @@ namespace AMDiS {
     /** 
      * Constructor
      */
-    SubElInfo(VectorOfFixVecs<DimVec<double> > *lambda_, 
-	      const ElInfo *elInfo_); 
+    SubElInfo(VectorOfFixVecs<DimVec<double> > *lambda, 
+	      const ElInfo *elInfo); 
 
     /**
      * Destructor.
@@ -47,7 +47,7 @@ namespace AMDiS {
      * Get b-th coordinate of the a-th vertex of subelement (barycentric
      * coordinates). 
      */
-    inline const double getLambda(int a,int b) const {
+    inline const double getLambda(int a, int b) const {
       if (lambda) 
 	return (*lambda)[a][b]; 
       else 
diff --git a/AMDiS/src/ZeroOrderAssembler.cc b/AMDiS/src/ZeroOrderAssembler.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e1b98f30c9f683a12a4d179fb217876174b21232
--- /dev/null
+++ b/AMDiS/src/ZeroOrderAssembler.cc
@@ -0,0 +1,385 @@
+#include <vector>
+#include "Assembler.h"
+#include "ZeroOrderAssembler.h"
+#include "Operator.h"
+#include "QPsiPhi.h"
+#include "FiniteElemSpace.h"
+#include "Quadrature.h"
+#include "DOFVector.h"
+#include "ElementMatrix.h"
+#include "OpenMP.h"
+#include "SubElInfo.h"
+#include "ScalableQuadrature.h"
+
+namespace AMDiS {
+
+  std::vector<SubAssembler*> ZeroOrderAssembler::optimizedSubAssemblers;
+  std::vector<SubAssembler*> ZeroOrderAssembler::standardSubAssemblers;
+
+  ZeroOrderAssembler::ZeroOrderAssembler(Operator *op,
+					 Assembler *assembler,
+					 Quadrature *quad,
+					 bool optimized)
+    : SubAssembler(op, assembler, quad, 0, optimized)
+  {}
+
+  ZeroOrderAssembler* ZeroOrderAssembler::getSubAssembler(Operator* op,
+							  Assembler *assembler,
+							  Quadrature *quad,
+							  bool optimized)
+  {
+    // check if a assembler is needed at all
+    if (op->zeroOrder.size() == 0) {
+      return NULL;
+    }
+
+    ZeroOrderAssembler *newAssembler;
+
+    std::vector<SubAssembler*> *subAssemblers =
+	optimized ?
+	&optimizedSubAssemblers :
+    &standardSubAssemblers;
+
+    int myRank = omp_get_thread_num();
+    std::vector<OperatorTerm*> opTerms  = op->zeroOrder[myRank];
+
+    sort(opTerms.begin(), opTerms.end());
+
+    // check if a new assembler is needed
+    if (quad) {
+      for (int i = 0; i < static_cast<int>( subAssemblers->size()); i++) {
+	std::vector<OperatorTerm*> assTerms = *((*subAssemblers)[i]->getTerms());
+
+	sort(assTerms.begin(), assTerms.end());
+
+	if ((opTerms == assTerms) && 
+	    ((*subAssemblers)[i]->getQuadrature() == quad)) {
+	
+	  return dynamic_cast<ZeroOrderAssembler*>((*subAssemblers)[i]);
+	}
+      }
+    }
+ 
+    // check if all terms are pw_const
+    bool pwConst = true;
+    for (int i = 0; i < static_cast<int>( op->zeroOrder[myRank].size()); i++) {
+      if (!op->zeroOrder[myRank][i]->isPWConst()) {
+	pwConst = false;
+	break;
+      }
+    }  
+
+    // create new assembler
+    if (!optimized) {
+      newAssembler = NEW StandardZOA(op, assembler, quad);
+    } else {
+      if (pwConst) {
+	newAssembler = NEW PrecalcZOA(op, assembler, quad);
+      } else {
+	newAssembler = NEW FastQuadZOA(op, assembler, quad);
+      }
+    }
+
+    subAssemblers->push_back(newAssembler);
+    return newAssembler;
+  }
+
+  StandardZOA::StandardZOA(Operator *op, Assembler *assembler, Quadrature *quad)
+    : ZeroOrderAssembler(op, assembler, quad, false)
+  {
+  }
+
+  void StandardZOA::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
+    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();
+
+    double *phival = GET_MEMORY(double, nCol);
+    int nPoints = quadrature->getNumPoints();
+    double *c = GET_MEMORY(double, nPoints);
+
+    for (int iq = 0; iq < nPoints; iq++) {
+      c[iq] = 0.0;
+    }
+
+    int myRank = omp_get_thread_num();
+    std::vector<OperatorTerm*>::iterator termIt;
+    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
+      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, nPoints, c);
+    }
+      
+    if (symmetric) {
+      for (int iq = 0; iq < nPoints; iq++) {
+	c[iq] *= elInfo->getDet();
+
+	// calculate phi at QPs only once!
+	for (int i = 0; i < nCol; i++) {
+	  phival[i] = (*(phi->getPhi(i)))(quadrature->getLambda(iq));
+	}
+
+	for (int i = 0; i < nRow; i++) {
+	  double psival = (*(psi->getPhi(i)))(quadrature->getLambda(iq));
+	  (*mat)[i][i] += quadrature->getWeight(iq) * c[iq] * psival * phival[i];
+	  for (int j = i + 1; j < nCol; j++) {
+	    double val = quadrature->getWeight(iq) * c[iq] * psival * phival[j];
+	    (*mat)[i][j] += val;
+	    (*mat)[j][i] += val;
+	  }
+	}
+      }
+    } else {      //  non symmetric assembling 
+      for (int iq = 0; iq < nPoints; iq++) {
+	c[iq] *= elInfo->getDet();
+
+	// calculate phi at QPs only once!
+	for (int i = 0; i < nCol; i++) {
+	  phival[i] = (*(phi->getPhi(i)))(quadrature->getLambda(iq));
+	}
+
+	for (int i = 0; i < nRow; i++) {
+	  double psival = (*(psi->getPhi(i)))(quadrature->getLambda(iq));
+	  for (int j = 0; j < nCol; j++) {
+	    (*mat)[i][j] += quadrature->getWeight(iq) * c[iq] * psival * phival[j];
+	  }
+	}
+      }
+    }
+
+    FREE_MEMORY(phival, double, nCol);
+    FREE_MEMORY(c, double, nPoints);
+  }
+
+  void StandardZOA::calculateElementMatrix(const ElInfo *rowElInfo,
+					   const ElInfo *colElInfo,
+					   ElementMatrix *mat) 
+  {
+    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
+    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();
+
+    int nPoints = quadrature->getNumPoints();
+    double *c = GET_MEMORY(double, nPoints);
+
+    int myRank = omp_get_thread_num();
+    std::vector<OperatorTerm*>::iterator termIt;
+    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
+      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(rowElInfo, nPoints, c);
+    }
+
+    ScalableQuadrature *scalQuadrature = NEW ScalableQuadrature(quadrature);
+    //    SubElInfo *subElInfo = NEW ScalElInfo(rowElInfo);
+
+    for (int iq = 0; iq < nPoints; iq++) {
+      c[iq] *= rowElInfo->getDet();
+
+	// calculate phi at QPs only once!
+	for (int i = 0; i < nCol; i++) {
+	  //	  phival[i] = (*(phi->getPhi(i)))(quadrature->getLambda(iq));
+	}
+
+    }
+
+    DELETE scalQuadrature;
+    //    DELETE subElInfo;
+
+    ERROR_EXIT("SO, HIER GEHTS WEITER\n");
+  }
+
+
+  void StandardZOA::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
+  {
+    int nPoints = quadrature->getNumPoints();
+
+    double *c = GET_MEMORY(double, nPoints);
+
+    for (int iq = 0; iq < nPoints; iq++) {
+      c[iq] = 0.0;
+    }
+
+    int myRank = omp_get_thread_num();
+    std::vector<OperatorTerm*>::iterator termIt;
+    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
+      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, nPoints, c);
+    }
+
+    for (int iq = 0; iq < nPoints; iq++) {
+      c[iq] *= elInfo->getDet();
+
+      for (int i = 0; i < nRow; i++) {
+	double psi = (*(owner->getRowFESpace()->getBasisFcts()->getPhi(i)))
+	  (quadrature->getLambda(iq));
+	(*vec)[i] += quadrature->getWeight(iq) * c[iq] * psi;
+      }
+    }
+    
+    FREE_MEMORY(c, double, nPoints);
+  }
+
+  FastQuadZOA::FastQuadZOA(Operator *op, Assembler *assembler, Quadrature *quad)
+    : ZeroOrderAssembler(op, assembler, quad, true)
+  {
+    cPtrs.resize(omp_get_max_threads());
+  }
+
+  FastQuadZOA::~FastQuadZOA()
+  {
+    for (int i = 0; i < omp_get_max_threads(); i++) {
+      FREE_MEMORY(cPtrs[i], double, quadrature->getNumPoints());
+    }
+  }
+
+  void FastQuadZOA::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    int nPoints = quadrature->getNumPoints();
+    int myRank = omp_get_thread_num();
+
+    if (firstCall) {
+      cPtrs[myRank] = GET_MEMORY(double, nPoints);
+      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
+      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI);
+      basFcts = owner->getColFESpace()->getBasisFcts();
+      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
+      firstCall = false;
+    }
+
+    double *c = cPtrs[myRank];
+    for (int iq = 0; iq < nPoints; iq++) {
+      c[iq] = 0.0;
+    }
+
+    std::vector<OperatorTerm*>::iterator termIt;
+    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
+      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, nPoints, c);
+    }
+
+    if (symmetric) {
+      for (int iq = 0; iq < nPoints; iq++) {
+	c[iq] *= elInfo->getDet();
+
+	const double *psi = psiFast->getPhi(iq);
+	const double *phi = phiFast->getPhi(iq);
+	for (int i = 0; i < nRow; i++) {
+	  (*mat)[i][i] += quadrature->getWeight(iq) * c[iq] * psi[i] * phi[i];
+	  for (int j = i + 1; j < nCol; j++) {
+	    double val = quadrature->getWeight(iq) * c[iq] * psi[i] * phi[j];
+	    (*mat)[i][j] += val;
+	    (*mat)[j][i] += val;
+	  }
+	}
+      }
+    } else {      /*  non symmetric assembling   */
+      for (int iq = 0; iq < nPoints; iq++) {
+	c[iq] *= elInfo->getDet();
+
+	const double *psi = psiFast->getPhi(iq);
+	const double *phi = phiFast->getPhi(iq);
+	for (int i = 0; i < nRow; i++) {
+	  for (int j = 0; j < nCol; j++) {
+	    (*mat)[i][j] += quadrature->getWeight(iq) * c[iq] * psi[i] * phi[j];
+	  }
+	}
+      }
+    }
+  }
+
+  void FastQuadZOA::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
+  {
+    if (firstCall) {
+      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
+      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI);
+      basFcts = owner->getColFESpace()->getBasisFcts();
+      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
+      firstCall = false;
+    }
+
+    int nPoints = quadrature->getNumPoints();
+    double *c = GET_MEMORY(double, nPoints);
+
+    for (int iq = 0; iq < nPoints; iq++) {
+      c[iq] = 0.0;
+    }
+
+    int myRank = omp_get_thread_num();
+    std::vector<OperatorTerm*>::iterator termIt;
+    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
+      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, nPoints, c);
+    }
+
+    for (int iq = 0; iq < nPoints; iq++) {
+      c[iq] *= elInfo->getDet();
+
+      const double *psi = psiFast->getPhi(iq);
+      for (int i = 0; i < nRow; i++) {
+	(*vec)[i] += quadrature->getWeight(iq) * c[iq] * psi[i];
+      }
+    }
+    FREE_MEMORY(c, double, nPoints);
+  }
+
+  PrecalcZOA::PrecalcZOA(Operator *op, Assembler *assembler, Quadrature *quad) 
+    : ZeroOrderAssembler(op, assembler, quad, true)
+  {
+  }
+
+  void PrecalcZOA::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
+  {
+    if (firstCall) {
+      q00 = Q00PsiPhi::provideQ00PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
+					owner->getColFESpace()->getBasisFcts(), 
+					quadrature);
+      q0 = Q0Psi::provideQ0Psi(owner->getRowFESpace()->getBasisFcts(),
+			       quadrature);
+      firstCall = false;
+    }
+
+    double c = 0.0;
+    int myRank = omp_get_thread_num();
+    int size = static_cast<int>(terms[myRank].size());
+
+    for (int i = 0; i < size; i++) {
+      (static_cast<ZeroOrderTerm*>((terms[myRank][i])))->getC(elInfo, 1, &c);
+    }
+
+    c *= elInfo->getDet();
+
+    if (symmetric) {
+      for (int i = 0; i < nRow; i++) {
+	(*mat)[i][i] += c * q00->getValue(i,i);
+	for (int j = i + 1; j < nCol; j++) {
+	  double val = c * q00->getValue(i, j);
+	  (*mat)[i][j] += val;
+	  (*mat)[j][i] += val;
+	}
+      }
+    } else {
+      for (int i = 0; i < nRow; i++)
+	for (int j = 0; j < nCol; j++)
+	  (*mat)[i][j] += c * q00->getValue(i, j);
+    }
+  }
+
+  void PrecalcZOA::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
+  {
+    if (firstCall) {
+      q00 = Q00PsiPhi::provideQ00PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
+					owner->getColFESpace()->getBasisFcts(), 
+					quadrature);
+      q0 = Q0Psi::provideQ0Psi(owner->getRowFESpace()->getBasisFcts(),
+			       quadrature);
+      firstCall = false;
+    }
+
+    std::vector<OperatorTerm*>::iterator termIt;
+
+    int myRank = omp_get_thread_num();
+    double c = 0.0;
+    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
+      (static_cast<ZeroOrderTerm*>( *termIt))->getC(elInfo, 1, &c);
+    }
+
+    c *= elInfo->getDet();
+
+    for (int i = 0; i < nRow; i++)
+      (*vec)[i] += c * q0->getValue(i);
+  }
+
+}
diff --git a/AMDiS/src/ZeroOrderAssembler.h b/AMDiS/src/ZeroOrderAssembler.h
new file mode 100644
index 0000000000000000000000000000000000000000..9e9d293064cdecf94c83fea098ed3211a2bd9dc1
--- /dev/null
+++ b/AMDiS/src/ZeroOrderAssembler.h
@@ -0,0 +1,184 @@
+#ifndef AMDIS_ZERO_ORDER_ASSEMBLER_H
+#define AMDIS_ZERO_ORDER_ASSEMBLER_H
+
+#include <vector>
+#include "SubAssembler.h"
+
+namespace AMDiS {
+
+  class Q00PsiPhi;
+  class Q0Psi;
+
+  /**
+   * \ingroup Assembler
+   * 
+   * \brief
+   * SubAssembler for zero order terms.
+   */
+  class ZeroOrderAssembler : public SubAssembler
+  {
+  public:
+    MEMORY_MANAGED(ZeroOrderAssembler);
+
+    /** \brief
+     * Creates and returns the ZeroOrderAssembler for Operator op and
+     * the given assembler. If all terms are piecewise constant precalculated 
+     * integrals can be used while assembling and the returned 
+     * ZeroOrderAssembler is of type Pre0. Otherwise a Quad0 object will
+     * be returned.
+     */
+    static ZeroOrderAssembler* getSubAssembler(Operator *op,
+					       Assembler *assembler,
+					       Quadrature *quadrat,
+					       bool optimized);
+
+
+    /** \brief
+     * Destructor.
+     */
+    virtual ~ZeroOrderAssembler() {};
+
+  protected:
+    /** \brief
+     * Constructor.
+     */
+    ZeroOrderAssembler(Operator *op, 
+		       Assembler *assembler, 
+		       Quadrature *quadrat,
+		       bool optimized);
+
+  protected:
+    /** \brief
+     * List of all yet created optimized SubAssembler objects.
+     */
+    static std::vector<SubAssembler*> optimizedSubAssemblers;
+
+    /** \brief
+     * List of all yet created standard SubAssembler objects.
+     */ 
+    static std::vector<SubAssembler*> standardSubAssemblers;
+  };
+
+
+  /**
+   * \ingroup Assembler
+   * 
+   * \brief
+   * Standard zero order assembler.
+   */
+  class StandardZOA :  public ZeroOrderAssembler 
+  {
+  public:
+    MEMORY_MANAGED(StandardZOA);
+
+    /** \brief
+     * Constructor.
+     */
+    StandardZOA(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat);
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *elInfo, ElementVector *vec);
+  };
+
+
+  /** 
+   * \ingroup Assembler
+   *
+   * \brief
+   * Zero order assembler using fast quadratures.
+   */
+  class FastQuadZOA :  public ZeroOrderAssembler
+  {
+  public:
+    MEMORY_MANAGED(FastQuadZOA);
+
+    /** \brief
+     * Constructor.
+     */
+    FastQuadZOA(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    ~FastQuadZOA();
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *elInfo, ElementVector *vec);
+
+  protected:
+    std::vector<double*> cPtrs;
+  };
+
+
+  /**
+   * \ingroup Assembler
+   * 
+   * \brief
+   * Zero order assembler using precaculated integrals.
+   */
+  class PrecalcZOA : public ZeroOrderAssembler
+  {
+  public:
+    MEMORY_MANAGED(PrecalcZOA);
+
+    /** \brief
+     * Constructor.
+     */
+    PrecalcZOA(Operator *op, Assembler *assembler, Quadrature *quad);
+
+    /** \brief
+     * Implements SubAssembler::calculateElementMatrix().
+     */
+    void calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat);
+
+    void calculateElementMatrix(const ElInfo *rowElInfo,
+				const ElInfo *colElInfo,
+				ElementMatrix *mat) 
+    {
+      ERROR_EXIT("CEM not yet\n");
+    }
+
+    /** \brief
+     * Implements SubAssembler::calculateElementVector().
+     */
+    void calculateElementVector(const ElInfo *elInfo, ElementVector *vec);
+
+  protected:
+    /** \brief
+     * Integral of the product of psi and phi.
+     */
+    const Q00PsiPhi *q00;
+
+    /** \brief
+     * Integral of psi.
+     */
+    const Q0Psi *q0;
+ 
+    friend class ZeroOrderAssembler;
+  };
+
+}
+
+#endif // AMDIS_ZERO_ORDER_ASSEMBLER_H