diff --git a/dune/gfe/localgeodesicfeadolcstiffness.hh b/dune/gfe/localgeodesicfeadolcstiffness.hh
index ea2132c05712327a0923d05dee86e8bc9a32dd08..148f2c4ca79ae435782d3c4a690832a497a184cd 100644
--- a/dune/gfe/localgeodesicfeadolcstiffness.hh
+++ b/dune/gfe/localgeodesicfeadolcstiffness.hh
@@ -103,7 +103,12 @@ energy(const typename Basis::LocalView& localView,
       localASolution[i] = aRaw[i];  // may contain a projection onto M -- needs to be done in adouble
     }
 
-    energy = localEnergy_->energy(localView,localASolution);
+    try {
+        energy = localEnergy_->energy(localView,localASolution);
+    } catch (Dune::Exception &e) {
+        trace_off(rank);
+        throw e;
+    }
 
     energy >>= pureEnergy;
 
diff --git a/dune/gfe/riemanniantrsolver.cc b/dune/gfe/riemanniantrsolver.cc
index cd6febcc4a33c3a14b7fae089f0e496e0c5ab198..e70e6649f60d03a8e3e69ce99285c9812fbaad00 100644
--- a/dune/gfe/riemanniantrsolver.cc
+++ b/dune/gfe/riemanniantrsolver.cc
@@ -374,6 +374,8 @@ void RiemannianTrustRegionSolver<Basis,TargetSpace>::solve()
                                                0);
 #endif
     auto& i = statistics_.finalIteration;
+    double totalAssemblyTime = 0.0;
+    double totalSolverTime = 0.0;
     for (i=0; i<maxTrustRegionSteps_; i++) {
 
 /*        std::cout << "current iterate:\n";
@@ -421,6 +423,7 @@ void RiemannianTrustRegionSolver<Basis,TargetSpace>::solve()
 
             if (this->verbosity_ == Solver::FULL)
               std::cout << "Assembly took " << gradientTimer.elapsed() << " sec." << std::endl;
+            totalAssemblyTime += gradientTimer.elapsed();
 
             // Transfer matrix data
 #if HAVE_MPI
@@ -434,6 +437,7 @@ void RiemannianTrustRegionSolver<Basis,TargetSpace>::solve()
 
         CorrectionType corr_global(rhs_global.size());
         corr_global = 0;
+        bool solved = true;
 
         if (rank==0)
         {
@@ -451,10 +455,17 @@ void RiemannianTrustRegionSolver<Basis,TargetSpace>::solve()
             std::cout << "Solve quadratic problem..." << std::endl;
 
             Dune::Timer solutionTimer;
-            innerSolver_->solve();
+            try {
+                innerSolver_->solve();
+            } catch (Dune::Exception &e) {
+                std::cerr << "Error while solving: " << e << std::endl;
+                solved = false;
+                corr_global = 0;
+            }
             std::cout << "Solving the quadratic problem took " << solutionTimer.elapsed() << " seconds." << std::endl;
+            totalSolverTime += solutionTimer.elapsed();
 
-            if (mgStep)
+            if (mgStep && solved)
                 corr_global = mgStep->getSol();
 
             //std::cout << "Correction: " << std::endl << corr_global << std::endl;
@@ -541,71 +552,86 @@ void RiemannianTrustRegionSolver<Basis,TargetSpace>::solve()
 
 
         }
-
-        if (this->verbosity_ == NumProc::FULL)
-            std::cout << "Infinity norm of the correction: " << corr.infinity_norm() << std::endl;
-
-        if (corrGlobalInfinityNorm < this->tolerance_) {
-            if (this->verbosity_ == NumProc::FULL and rank==0)
-                std::cout << "CORRECTION IS SMALL ENOUGH" << std::endl;
-
-            if (this->verbosity_ != NumProc::QUIET and rank==0)
-                std::cout << i+1 << " trust-region steps were taken." << std::endl;
-            break;
-        }
-
-        // ////////////////////////////////////////////////////
-        //   Check whether trust-region step can be accepted
-        // ////////////////////////////////////////////////////
-
+        double energy = 0;
+        double modelDecrease = 0;
         SolutionType newIterate = x_;
-        for (size_t j=0; j<newIterate.size(); j++)
-            newIterate[j] = TargetSpace::exp(newIterate[j], corr[j]);
-
-        double energy    = assembler_->computeEnergy(newIterate);
-        energy = grid_->comm().sum(energy);
-
-        // compute the model decrease
-        // It is $ m(x) - m(x+s) = -<g,s> - 0.5 <s, Hs>
-        // Note that rhs = -g
-        CorrectionType tmp(corr.size());
-        tmp = 0;
-        hessianMatrix_->umv(corr, tmp);
-        double modelDecrease = (rhs*corr) - 0.5 * (corr*tmp);
-        modelDecrease = grid_->comm().sum(modelDecrease);
-
-        double relativeModelDecrease = modelDecrease / std::fabs(energy);
-
-        if (this->verbosity_ == NumProc::FULL and rank==0) {
-            std::cout << "Absolute model decrease: " << modelDecrease
-                      << ",  functional decrease: " << oldEnergy - energy << std::endl;
-            std::cout << "Relative model decrease: " << relativeModelDecrease
-                      << ",  functional decrease: " << (oldEnergy - energy)/energy << std::endl;
-        }
+        if (i == maxTrustRegionSteps_ - 1)
+            std::cout << i+1 << " trust-region steps were taken, the maximum was reached." << std::endl << "Total solver time: " << totalSolverTime << " sec., total assembly time: " << totalAssemblyTime << " sec." << std::endl;
 
-        assert(modelDecrease >= 0);
-
-        if (energy >= oldEnergy and rank==0) {
+        if (solved) {
             if (this->verbosity_ == NumProc::FULL)
-                printf("Richtung ist keine Abstiegsrichtung!\n");
-        }
+                std::cout << "Infinity norm of the correction: " << corr.infinity_norm() << std::endl;
 
-        if (energy >= oldEnergy &&
-            (std::abs((oldEnergy-energy)/energy) < 1e-9 || relativeModelDecrease < 1e-9)) {
-            if (this->verbosity_ == NumProc::FULL and rank==0)
-                std::cout << "Suspecting rounding problems" << std::endl;
+            if (corrGlobalInfinityNorm < this->tolerance_) {
+                if (this->verbosity_ == NumProc::FULL and rank==0)
+                    std::cout << "CORRECTION IS SMALL ENOUGH" << std::endl;
 
-            if (this->verbosity_ != NumProc::QUIET and rank==0)
-                std::cout << i+1 << " trust-region steps were taken." << std::endl;
+                if (this->verbosity_ != NumProc::QUIET and rank==0)
+                    std::cout << i+1 << " trust-region steps were taken" << std::endl << "Total solver time: " << totalSolverTime << " sec., total assembly time: " << totalAssemblyTime << " sec." << std::endl;
+                break;
+            }
 
-            x_ = newIterate;
-            break;
+            // ////////////////////////////////////////////////////
+            //   Check whether trust-region step can be accepted
+            // ////////////////////////////////////////////////////
+
+            for (size_t j=0; j<newIterate.size(); j++)
+                newIterate[j] = TargetSpace::exp(newIterate[j], corr[j]);
+            try {
+                energy  = assembler_->computeEnergy(newIterate);
+            } catch (Dune::Exception &e) {
+                std::cerr << "Error while computing the energy of the new Iterate: " << e << std::endl;
+                std::cerr << "Redoing trust region step with smaller radius..." << std::endl;
+                newIterate = x_;
+                solved = false;
+                energy = oldEnergy;
+            }
+            if (solved) {
+                energy = grid_->comm().sum(energy);
+
+                // compute the model decrease
+                // It is $ m(x) - m(x+s) = -<g,s> - 0.5 <s, Hs>
+                // Note that rhs = -g
+                CorrectionType tmp(corr.size());
+                tmp = 0;
+                hessianMatrix_->umv(corr, tmp);
+                modelDecrease = (rhs*corr) - 0.5 * (corr*tmp);
+                modelDecrease = grid_->comm().sum(modelDecrease);
+
+                double relativeModelDecrease = modelDecrease / std::fabs(energy);
+
+                if (this->verbosity_ == NumProc::FULL and rank==0) {
+                    std::cout << "Absolute model decrease: " << modelDecrease
+                              << ",  functional decrease: " << oldEnergy - energy << std::endl;
+                    std::cout << "Relative model decrease: " << relativeModelDecrease
+                              << ",  functional decrease: " << (oldEnergy - energy)/energy << std::endl;
+                }
+                assert(modelDecrease >= 0);
+
+
+                if (energy >= oldEnergy and rank==0) {
+                    if (this->verbosity_ == NumProc::FULL)
+                        printf("Richtung ist keine Abstiegsrichtung!\n");
+                }
+
+                if (energy >= oldEnergy &&
+                    (std::abs((oldEnergy-energy)/energy) < 1e-9 || relativeModelDecrease < 1e-9)) {
+                    if (this->verbosity_ == NumProc::FULL and rank==0)
+                        std::cout << "Suspecting rounding problems" << std::endl;
+
+                    if (this->verbosity_ != NumProc::QUIET and rank==0)
+                        std::cout << i+1 << " trust-region steps were taken." << std::endl;
+
+                    x_ = newIterate;
+                    break;
+                }
+            }
         }
 
         // //////////////////////////////////////////////
         //   Check for acceptance of the step
         // //////////////////////////////////////////////
-        if ( (oldEnergy-energy) / modelDecrease > 0.9) {
+        if (solved && (oldEnergy-energy) / modelDecrease > 0.9) {
             // very successful iteration
 
             x_ = newIterate;
@@ -616,8 +642,8 @@ void RiemannianTrustRegionSolver<Basis,TargetSpace>::solve()
 
             recomputeGradientHessian = true;
 
-        } else if ( (oldEnergy-energy) / modelDecrease > 0.01
-                    || std::abs(oldEnergy-energy) < 1e-12) {
+        } else if (solved && ((oldEnergy-energy) / modelDecrease > 0.01
+                    || std::abs(oldEnergy-energy) < 1e-12)) {
             // successful iteration
             x_ = newIterate;
 
diff --git a/dune/gfe/surfacecosseratenergy.hh b/dune/gfe/surfacecosseratenergy.hh
index 3f31724ca165fdfb387c9119ce4ebee94b94fd95..ce516ace831adcd4cfe47103c98ff78e49a521e2 100644
--- a/dune/gfe/surfacecosseratenergy.hh
+++ b/dune/gfe/surfacecosseratenergy.hh
@@ -238,124 +238,6 @@ public:
     b3_ = parameters.template get<double>("b3");
   }
 
-    /** \brief The energy \f$ W_{mp}(\overline{U}) \f$, as written in
-   * the first equation of (4.4) in Neff's paper
-   */
-  RT quadraticMembraneEnergy(const Dune::GFE::CosseratStrain<field_type,3,dim>& U) const
-  {
-      Dune::FieldMatrix<field_type,3,3> UMinus1 = U.matrix();
-      for (int i=0; i<dim; i++)
-          UMinus1[i][i] -= 1;
-
-      return mu_ * sym(UMinus1).frobenius_norm2()
-              + mu_c_ * skew(UMinus1).frobenius_norm2()
-              + (mu_*lambda_)/(2*mu_ + lambda_) * traceSquared(sym(UMinus1));
-  }
-
-  /** \brief The energy \f$ W_{mp}(\overline{U}) \f$, as written in
-   * the second equation of (4.4) in Neff's paper
-   */
-  RT longQuadraticMembraneEnergy(const Dune::GFE::CosseratStrain<field_type,3,dim>& U) const
-  {
-      RT result = 0;
-
-      // shear-stretch energy
-      Dune::FieldMatrix<field_type,dim-1,dim-1> sym2x2;
-      for (int i=0; i<dim-1; i++)
-          for (int j=0; j<dim-1; j++)
-              sym2x2[i][j] = 0.5 * (U.matrix()[i][j] + U.matrix()[j][i]) - (i==j);
-
-      result += mu_ * sym2x2.frobenius_norm2();
-
-      // first order drill energy
-      Dune::FieldMatrix<field_type,dim-1,dim-1> skew2x2;
-      for (int i=0; i<dim-1; i++)
-          for (int j=0; j<dim-1; j++)
-              skew2x2[i][j] = 0.5 * (U.matrix()[i][j] - U.matrix()[j][i]);
-
-      result += mu_c_ * skew2x2.frobenius_norm2();
-
-
-      // classical transverse shear energy
-      result += kappa_ * (mu_ + mu_c_)/2 * (U.matrix()[2][0]*U.matrix()[2][0] + U.matrix()[2][1]*U.matrix()[2][1]);
-
-      // elongational stretch energy
-      result += mu_*lambda_ / (2*mu_ + lambda_) * traceSquared(sym2x2);
-
-      return result;
-  }
-
-  /** \brief Energy for large-deformation problems (private communication by Patrizio Neff)
-   */
-  RT nonquadraticMembraneEnergy(const Dune::GFE::CosseratStrain<field_type,3,dim>& U) const
-  {
-      Dune::FieldMatrix<field_type,3,3> UMinus1 = U.matrix();
-      for (int i=0; i<dim; i++)
-          UMinus1[i][i] -= 1;
-
-      RT detU = U.determinant();
-
-      return mu_ * sym(UMinus1).frobenius_norm2() + mu_c_ * skew(UMinus1).frobenius_norm2()
-              + (mu_*lambda_)/(2*mu_ + lambda_) * 0.5 * ((detU-1)*(detU-1) + (1.0/detU -1)*(1.0/detU -1));
-  }
-
-  /** \brief The energy \f$ W_{mp}(\overline{U}) \f$, as written in
-   * the second equation of (4.4) in Neff's paper
-   */
-  RT longNonquadraticMembraneEnergy(const Dune::GFE::CosseratStrain<field_type,dim,dim-1>& U) const
-  {
-      RT result = 0;
-
-      // shear-stretch energy
-      Dune::FieldMatrix<field_type,dim-1,dim-1> sym2x2;
-      for (int i=0; i<dim-1; i++)
-          for (int j=0; j<dim-1; j++)
-              sym2x2[i][j] = 0.5 * (U.matrix()[i][j] + U.matrix()[j][i]) - (i==j);
-
-      result += mu_ * sym2x2.frobenius_norm2();
-
-      // first order drill energy
-      Dune::FieldMatrix<field_type,dim-1,dim-1> skew2x2;
-      for (int i=0; i<dim-1; i++)
-          for (int j=0; j<dim-1; j++)
-              skew2x2[i][j] = 0.5 * (U.matrix()[i][j] - U.matrix()[j][i]);
-
-      result += mu_c_ * skew2x2.frobenius_norm2();
-
-
-      // classical transverse shear energy
-      result += kappa_ * (mu_ + mu_c_)/2 * (U.matrix()[2][0]*U.matrix()[2][0] + U.matrix()[2][1]*U.matrix()[2][1]);
-
-      // elongational stretch energy
-      RT detU = U.determinant();
-      result += (mu_*lambda_)/(2*mu_ + lambda_) * 0.5 * ((detU-1)*(detU-1) + (1.0/detU -1)*(1.0/detU -1));
-
-      return result;
-  }
-
-  RT curvatureEnergy(const Tensor3<field_type,3,3,dim-1>& DR) const
-  {
-#ifdef DONT_USE_CURL
-      return mu_ * std::pow(L_c_ * L_c_ * DR.frobenius_norm2(),q_/2.0);
-#else
-      return mu_ * std::pow(L_c_ * L_c_ * curl(DR).frobenius_norm2(),q_/2.0);
-#endif
-  }
-
-  RT bendingEnergy(const Dune::FieldMatrix<field_type,dim,dim>& R, const Tensor3<field_type,3,3,dim-1>& DR) const
-  {
-      // left-multiply the derivative of the third director (in DR[][2][]) with R^T
-      Dune::FieldMatrix<field_type,3,3> RT_DR3(0);
-      for (int i=0; i<3; i++)
-          for (int j=0; j<dim; j++)
-              for (int k=0; k<3; k++)
-                  RT_DR3[i][j] += R[k][i] * DR[k][2][j];
-
-      return mu_ * sym(RT_DR3).frobenius_norm2()
-             + mu_c_ * skew(RT_DR3).frobenius_norm2()
-             + mu_*lambda_/(2*mu_+lambda_) * traceSquared(RT_DR3);
-  }
-
 RT energy(const typename Basis::LocalView& localView,
        const std::vector<RigidBodyMotion<field_type,dim> >& localSolution) const
 {
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 15adcea1d22c242e457e2dfc4094afb4d7ccef85..75a0de76eda797c620c95777ab14db4b5a76739f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,11 +1,13 @@
 set(programs compute-disc-error
              cosserat-continuum
-             film-on-substrate
              gradient-flow
              harmonicmaps
              rod3d)
 #            rodobstacle)
 
+if (dune-parmg_FOUND)
+	set(programs film-on-substrate ${programs})
+endif()
 foreach(_program ${programs})
   add_executable(${_program} ${_program}.cc)
   target_link_dune_default_libraries(${_program})
diff --git a/src/film-on-substrate.cc b/src/film-on-substrate.cc
index c63d0c092e470a0c594de4d8ce58d0c4d644008f..c88e018ced274672058f297fe8efff1e5d2232d6 100644
--- a/src/film-on-substrate.cc
+++ b/src/film-on-substrate.cc
@@ -14,6 +14,7 @@
 #include <dune/common/bitsetvector.hh>
 #include <dune/common/parametertree.hh>
 #include <dune/common/parametertreeparser.hh>
+#include <dune/common/timer.hh>
 #include <dune/common/version.hh>
 
 #include <dune/grid/uggrid.hh>
@@ -72,7 +73,7 @@
 #  define WORLD_DIM 3
 #endif
 const int dim = WORLD_DIM;
-const int order = 1;
+const int order = 2;
 
 #if DUNE_VERSION_LT(DUNE_COMMON, 2, 7)
 template<>
@@ -115,9 +116,13 @@ struct NeumannFunction
 
 int main (int argc, char *argv[]) try
 {
+  Dune::Timer overallTimer;
   // initialize MPI, finalize is done automatically on exit
   Dune::MPIHelper& mpiHelper = MPIHelper::instance(argc, argv);
 
+  if (mpiHelper.rank()==0)
+    std::cout << "ORDER = " << order << std::endl;
+
   // Start Python interpreter
   Python::start();
   Python::Reference main = Python::import("__main__");
@@ -144,7 +149,7 @@ int main (int argc, char *argv[]) try
   ParameterTreeParser::readOptions(argc, argv, parameterSet);
 
   // read solver settings
-  const int numLevels                   = parameterSet.get<int>("numLevels");
+  int numLevels                   = parameterSet.get<int>("numLevels");
   int numHomotopySteps                  = parameterSet.get<int>("numHomotopySteps");
   const double tolerance                = parameterSet.get<double>("tolerance");
   const int maxTrustRegionSteps         = parameterSet.get<int>("maxTrustRegionSteps");
@@ -182,9 +187,36 @@ int main (int argc, char *argv[]) try
     grid = std::shared_ptr<GridType>(GmshReader<GridType>::read(path + "/" + gridFile));
   }
 
-  grid->globalRefine(numLevels-1);
+  grid->setRefinementType(GridType::RefinementType::COPY);
+
+  // Make Python function that computes which vertices are on the Dirichlet boundary,
+  // based on the vertex positions.
+  std::string lambda = std::string("lambda x: (") + parameterSet.get<std::string>("dirichletVerticesPredicate") + std::string(")");
+  PythonFunction<FieldVector<double,dim>, bool> pythonDirichletVertices(Python::evaluate(lambda));
+
+  // Same for the Neumann boundary
+  lambda = std::string("lambda x: (") + parameterSet.get<std::string>("neumannVerticesPredicate", "0") + std::string(")");
+  PythonFunction<FieldVector<double,dim>, bool> pythonNeumannVertices(Python::evaluate(lambda));
+
+  // Same for the boundary that will get wrinkled
+  lambda = std::string("lambda x: (") + parameterSet.get<std::string>("surfaceShellVerticesPredicate", "0") + std::string(")");
+  PythonFunction<FieldVector<double,dim>, bool> pythonSurfaceShellVertices(Python::evaluate(lambda));
+
+  while (numLevels > 0) {
+    for (auto&& e : elements(grid->leafGridView())){
+      bool isSurfaceShell = false;
+      for (int i = 0; i < e.geometry().corners(); i++) {
+          isSurfaceShell = isSurfaceShell || pythonSurfaceShellVertices(e.geometry().corner(i));
+      }
+      grid->mark(isSurfaceShell ? 1 : 0,e);
+    }
 
-  grid->loadBalance();
+    grid->adapt();
+
+    grid->loadBalance();
+
+    numLevels--;
+  }
 
   if (mpiHelper.rank()==0)
     std::cout << "There are " << grid->leafGridView().comm().size() << " processes" << std::endl;
@@ -210,18 +242,7 @@ int main (int argc, char *argv[]) try
 
   const GridView::IndexSet& indexSet = gridView.indexSet();
 
-  // Make Python function that computes which vertices are on the Dirichlet boundary,
-  // based on the vertex positions.
-  std::string lambda = std::string("lambda x: (") + parameterSet.get<std::string>("dirichletVerticesPredicate") + std::string(")");
-  PythonFunction<FieldVector<double,dim>, bool> pythonDirichletVertices(Python::evaluate(lambda));
 
-  // Same for the Neumann boundary
-  lambda = std::string("lambda x: (") + parameterSet.get<std::string>("neumannVerticesPredicate", "0") + std::string(")");
-  PythonFunction<FieldVector<double,dim>, bool> pythonNeumannVertices(Python::evaluate(lambda));
-
-  // Same for the boundary that will get wrinkled
-  lambda = std::string("lambda x: (") + parameterSet.get<std::string>("surfaceShellVerticesPredicate", "0") + std::string(")");
-  PythonFunction<FieldVector<double,dim>, bool> pythonSurfaceShellVertices(Python::evaluate(lambda));
   for (auto&& v : vertices(gridView))
   {
     bool isDirichlet = pythonDirichletVertices(v.geometry().corner(0));
@@ -238,8 +259,10 @@ int main (int argc, char *argv[]) try
   auto neumannBoundary = std::make_shared<BoundaryPatch<GridView>>(gridView, neumannVertices);
   BoundaryPatch<GridView> surfaceShellBoundary(gridView, surfaceShellVertices);
 
-  if (mpiHelper.rank()==0)
+  if (mpiHelper.rank()==0) {
     std::cout << "Neumann boundary has " << neumannBoundary->numFaces() << " faces\n";
+    std::cout << "Shell boundary has " << surfaceShellBoundary.numFaces() << " faces\n";
+  }
 
 
   BitSetVector<1> dirichletNodes(feBasis.size(), false);
@@ -342,8 +365,7 @@ int main (int argc, char *argv[]) try
 
 #if DUNE_VERSION_LT(DUNE_ELASTICITY, 2, 8)
     std::shared_ptr<NeumannFunction> neumannFunction;
-    neumannFunction = std::make_shared<NeumannFunction>(parameterSet.get<FieldVector<double,dim> >("neumannValues"),
-                                                     homotopyParameter);
+    neumannFunction = std::make_shared<NeumannFunction>(neumannValues, homotopyParameter);
 #else
       // A constant vector-valued function, for simple Neumann boundary values
     std::shared_ptr<std::function<Dune::FieldVector<double,dim>(Dune::FieldVector<double,dim>)>> neumannFunctionPtr;
@@ -512,6 +534,8 @@ int main (int argc, char *argv[]) try
 
     x = solver.getSol();
 
+    std::cout << "Overall calculation took " << overallTimer.elapsed() << " sec." << std::endl;
+
     /////////////////////////////////
     //   Output result
     /////////////////////////////////