diff --git a/Makefile.am b/Makefile.am
index 50ab512e7f6b318a872f0c986b90155ec451f684..8fdd1e779419a8179e7b65717153726e5023152b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -4,6 +4,10 @@
 #LDADD = $(UG_LDFLAGS) $(AMIRAMESH_LDFLAGS) $(UG_LIBS) $(AMIRAMESH_LIBS)
 #AM_CPPFLAGS = $(UG_CPPFLAGS) $(AMIRAMESH_CPPFLAGS)
 
+# Lapack++
+LAPACKPP_CPPFLAGS = -I/home/haile/sander/lapack++-inst/include/lapackpp
+LAPACKPP_LDFLAGS  = -L/home/haile/sander/lapack++-inst/lib -llapackpp
+
 IPOPT_DIR = /home/haile/sander/COIN/Ipopt
 
 noinst_PROGRAMS = staticrod staticrod2 rod3d dirneucoupling
@@ -12,10 +16,12 @@ staticrod_SOURCES = staticrod.cc
 staticrod2_SOURCES = staticrod2.cc
 rod3d_SOURCES = rod3d.cc
 
-dirneucoupling_SOURCES  = dirneucoupling.cc
-dirneucoupling_CXXFLAGS = $(UG_CPPFLAGS) $(AMIRAMESH_CPPFLAGS) $(MPI_CPPFLAGS) -I$(IPOPT_DIR)/IPOPT/include
+dirneucoupling_SOURCES  = dirneucoupling.cc linearsolver.cc
+dirneucoupling_CXXFLAGS = $(UG_CPPFLAGS) $(AMIRAMESH_CPPFLAGS) $(MPI_CPPFLAGS) -I$(IPOPT_DIR)/IPOPT/include \
+                          $(LAPACKPP_CPPFLAGS)
 dirneucoupling_LDADD    = $(UG_LDFLAGS) $(AMIRAMESH_LDFLAGS) $(UG_LIBS) $(AMIRAMESH_LIBS) $(MPI_LDFLAGS) \
-                          -L$(IPOPT_DIR)/lib -lipopt -llapack -lblas -lg2c
+                          -L$(IPOPT_DIR)/lib -lipopt -llapack -lblas -lg2c \
+                          $(LAPACKPP_LDFLAGS)
 
 # don't follow the full GNU-standard
 # we need automake 1.5
diff --git a/dirneucoupling.cc b/dirneucoupling.cc
index 1146d64e5ea62ee676ae8f7ac81ca35ea91ff236..8acf3b423dc981b94b9e945c666bed8cfeda3abd 100644
--- a/dirneucoupling.cc
+++ b/dirneucoupling.cc
@@ -317,7 +317,13 @@ int main (int argc, char *argv[]) try
         std::cout << "resultant force: " << resultantForce << std::endl;
         std::cout << "resultant torque: " << resultantTorque << std::endl;
 
-        VectorType neumannValues(grid.size(dim));
+        // For the time being the Neumann data coming from the rod is a dg function (== not continuous)
+        // Maybe that is not necessary
+        DGIndexSet<GridType> dgIndexSet(grid,grid.maxLevel());
+        dgIndexSet.setup(grid,grid.maxLevel());
+
+        VectorType neumannValues(dgIndexSet.size());
+
         // Using that index 0 is always the left boundary for a uniformly refined OneDGrid
         computeAveragePressure<GridType>(resultantForce, resultantTorque, 
                                          interfaceBoundary[grid.maxLevel()], 
@@ -326,6 +332,7 @@ int main (int argc, char *argv[]) try
 
         rhs3d = 0;
         assembleAndAddNeumannTerm<GridType, VectorType>(interfaceBoundary[grid.maxLevel()],
+                                                        dgIndexSet,
                                                         neumannValues,
                                                         rhs3d);
 
diff --git a/linearsolver.cc b/linearsolver.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d66153a4c895c2fe8a3589ada15c9f4c2400daae
--- /dev/null
+++ b/linearsolver.cc
@@ -0,0 +1,35 @@
+// The following line switches of the f2c.h in UG.  Boy, is this disgusting!
+#define F2C_INCLUDE
+
+#include "linearsolver.hh"
+#include "lapackpp.h"
+
+using namespace Dune;
+
+// Solve a small linear system using lapack++
+void linearSolver(const FieldMatrix<double,6,12>& A,
+                  FieldVector<double,12>& x,
+                  const FieldVector<double,6>& b)
+{
+    int N = 6;
+    int M = 12;
+
+    LaGenMatDouble matrix(6,12);
+
+    for (int i=0; i<N; i++)
+        for (int j=0; j<M; j++)
+            matrix(i,j) = A[i][j];
+
+    LaVectorDouble X(M);
+    for (int i=0; i<M; i++)
+        X(i) = x[i];
+
+    LaVectorDouble B(N);
+    for (int i=0; i<N; i++)
+        B(i) = b[i];
+
+    LaLinearSolve(matrix, X, B);
+
+    for (int i=0; i<M; i++)
+        x[i] = X(i);
+}
diff --git a/linearsolver.hh b/linearsolver.hh
new file mode 100644
index 0000000000000000000000000000000000000000..e42607ac9e22bdd5b64c698decfd5290df7a3a7b
--- /dev/null
+++ b/linearsolver.hh
@@ -0,0 +1,10 @@
+#ifndef LINEAR_SOLVER_HH
+#define LINEAR_SOLVER_HH
+
+#include <dune/common/fmatrix.hh>
+
+void linearSolver(const Dune::FieldMatrix<double,6,12>& A,
+                  Dune::FieldVector<double,12>& x,
+                  const Dune::FieldVector<double,6>& b);
+
+#endif
diff --git a/src/averageinterface.hh b/src/averageinterface.hh
index 781a5e1a8e446e4de46fb46e68fb97c36f444e3c..b5db85718470ad40feafd07a859ef1f452c54c9c 100644
--- a/src/averageinterface.hh
+++ b/src/averageinterface.hh
@@ -2,18 +2,12 @@
 #define AVERAGE_INTERFACE_HH
 
 #include <dune/common/fmatrix.hh>
-#include "svd.hh"
+#include <dune/disc/shapefunctions/lagrangeshapefunctions.hh>
 
-// template parameter dim is only there do make it compile when dim!=3
-template <class T, int dim>
-Dune::FieldVector<T,dim> crossProduct(const Dune::FieldVector<T,dim>& a, const Dune::FieldVector<T,dim>& b)
-{
-    Dune::FieldVector<T,dim> r;
-    r[0] = a[1]*b[2] - a[2]*b[1];
-    r[1] = a[2]*b[0] - a[0]*b[2];
-    r[2] = a[0]*b[1] - a[1]*b[0];
-    return r;
-}
+#include "../../contact/src/dgindexset.hh"
+#include "../../common/crossproduct.hh"
+#include "svd.hh"
+#include "../linearsolver.hh"
 
 // Given a resultant force and torque (from a rod problem), this method computes the corresponding
 // Neumann data for a 3d elasticity problem.
@@ -33,52 +27,124 @@ void computeAveragePressure(const Dune::FieldVector<double,GridType::dimension>&
     typedef typename GridType::template Codim<dim>::LevelIterator VertexIterator;
 
     // set up output array
-    pressure.resize(indexSet.size(dim));
+    DGIndexSet<GridType> dgIndexSet(grid,level);
+    dgIndexSet.setup(grid,level);
+    pressure.resize(dgIndexSet.size());
     pressure = 0;
     
-    ctype area = interface.area();
-    
-    VertexIterator vIt    = indexSet.template begin<dim, Dune::All_Partition>();
-    VertexIterator vEndIt = indexSet.template end<dim, Dune::All_Partition>();
-    
-    for (; vIt!=vEndIt; ++vIt) {
+    typename GridType::template Codim<0>::LevelIterator eIt    = indexSet.template begin<0,Dune::All_Partition>();
+    typename GridType::template Codim<0>::LevelIterator eEndIt = indexSet.template end<0,Dune::All_Partition>();
 
-        int vIdx = indexSet.index(*vIt);
+    for (; eIt!=eEndIt; ++eIt) {
 
-        if (interface.containsVertex(vIdx)) {
+        typename GridType::template Codim<0>::Entity::LevelIntersectionIterator nIt    = eIt->ilevelbegin();
+        typename GridType::template Codim<0>::Entity::LevelIntersectionIterator nEndIt = eIt->ilevelend();
+        
+        for (; nIt!=nEndIt; ++nIt) {
+            
+            if (!interface.contains(*eIt,nIt))
+                continue;
 
-            // force part
-            pressure[vIdx] = resultantForce;
-            pressure[vIdx] /= area;
+            const Dune::LagrangeShapeFunctionSet<double, double, dim-1>& baseSet
+                = Dune::LagrangeShapeFunctions<double, double, dim-1>::general(nIt.intersectionGlobal().type(),1);
 
-            // torque part
-            double x = (vIt->geometry()[0] - crossSection.r) * crossSection.q.director(0);
-            double y = (vIt->geometry()[0] - crossSection.r) * crossSection.q.director(1);
+            if (baseSet.size() != 4)
+                DUNE_THROW(Dune::NotImplemented, "average interface only for quad faces");
+            
+            // four rows because a face may have no more than four vertices
+            Dune::FieldVector<double,4> mu(0);
+            Dune::FieldVector<double,3> mu_tilde[4][3];
             
-            Dune::FieldVector<double,3> localTorque;
-            for (int i=0; i<3; i++)
-                localTorque[i] = resultantTorque * crossSection.q.director(i);
+            for (int i=0; i<4; i++)
+                for (int j=0; j<3; j++)
+                    mu_tilde[i][j] = 0;
 
-            // add it up
-            pressure[vIdx][0] += -2 * M_PI * localTorque[2] * y / (area*area);
-            pressure[vIdx][1] +=  2 * M_PI * localTorque[2] * x / (area*area);
-            pressure[vIdx][2] +=  4 * M_PI * localTorque[0] * y / (area*area);
-            pressure[vIdx][2] += -4 * M_PI * localTorque[1] * x / (area*area);
+            for (int i=0; i<nIt.intersectionGlobal().corners(); i++) {
+                
+                const Dune::QuadratureRule<double, dim-1>& quad 
+                    = Dune::QuadratureRules<double, dim-1>::rule(nIt.intersectionGlobal().type(), dim-1);
+                
+                for (size_t qp=0; qp<quad.size(); qp++) {
+                    
+                    // Local position of the quadrature point
+                    const Dune::FieldVector<double,dim-1>& quadPos = quad[qp].position();
+                    
+                    const double integrationElement         = nIt.intersectionGlobal().integrationElement(quadPos);
+                    
+                    // \mu_i = \int_t \varphi_i \ds
+                    mu[i] += quad[qp].weight() * integrationElement * baseSet[i].evaluateFunction(0,quadPos);
+                    
+                    // \tilde{\mu}_i^j = \int_t \varphi_i \times (x - x_0) \ds
+                    Dune::FieldVector<double,dim> worldPos = nIt.intersectionGlobal().global(quadPos);
+
+                    for (int j=0; j<dim; j++) {
+
+                        // Vector-valued basis function
+                        Dune::FieldVector<double,dim> phi_i(0);
+                        phi_i[j] = baseSet[i].evaluateFunction(0,quadPos);
+                        
+                        mu_tilde[i][j].axpy(quad[qp].weight() * integrationElement,
+                                            crossProduct(worldPos-crossSection.r, phi_i));
+
+                    }
+                    
+                }
+                
+            }
+
+
+//             std::cout << "tilde{mu}\n" << std::endl;
+//             for (int i=0; i<4; i++)
+//                 for (int j=0; j<3; j++)
+//                     std::cout << "i: " << i << ",  j: " << j << ",   " << mu_tilde[i][j] << std::endl;
+
+
+            // Set up matrix
+            Dune::FieldMatrix<double, 6, 12> matrix(0);
+            for (int i=0; i<4; i++)
+                for (int j=0; j<3; j++)
+                    matrix[j][i*3+j] = mu[i];
+
+            for (int i=0; i<4; i++)
+                for (int j=0; j<3; j++)
+                    for (int k=0; k<3; k++)
+                        matrix[3+k][3*i+j] = mu_tilde[i][j][k];
+
+            Dune::FieldVector<double,12> u;
+            Dune::FieldVector<double,6> b;
+
+            for (int i=0; i<3; i++) {
+                b[i]   = resultantForce[i];
+                b[i+3] = resultantTorque[i];
+            }
+
+//             std::cout << b << std::endl;
+//             std::cout << matrix << std::endl;
+
+            //matrix.solve(u,b);
+            linearSolver(matrix, u, b);
+            //std::cout << u << std::endl;
+
+            for (int i=0; i<3; i++) {
+                pressure[dgIndexSet(*eIt, nIt.numberInSelf())][i]   = u[i];
+                pressure[dgIndexSet(*eIt, nIt.numberInSelf())+1][i] = u[i+3];
+                pressure[dgIndexSet(*eIt, nIt.numberInSelf())+2][i] = u[i+6];
+                pressure[dgIndexSet(*eIt, nIt.numberInSelf())+3][i] = u[i+9];
+            }
 
         }
 
     }
 
+
     // /////////////////////////////////////////////////////////////////////////////////////
     //   Compute the overall force and torque to see whether the preceding code is correct
     // /////////////////////////////////////////////////////////////////////////////////////
 
     Dune::FieldVector<double,3> outputForce(0), outputTorque(0);
-    Dune::LeafP1Function<GridType,double,dim> pressureFunction(grid);
-    *pressureFunction = pressure;
 
-    typename GridType::template Codim<0>::LevelIterator eIt    = indexSet.template begin<0,Dune::All_Partition>();
-    typename GridType::template Codim<0>::LevelIterator eEndIt = indexSet.template end<0,Dune::All_Partition>();
+    eIt    = indexSet.template begin<0,Dune::All_Partition>();
+    eEndIt = indexSet.template end<0,Dune::All_Partition>();
 
     for (; eIt!=eEndIt; ++eIt) {
 
@@ -89,6 +155,9 @@ void computeAveragePressure(const Dune::FieldVector<double,GridType::dimension>&
             
             if (!interface.contains(*eIt,nIt))
                 continue;
+
+            const Dune::LagrangeShapeFunctionSet<double, double, dim-1>& baseSet
+                = Dune::LagrangeShapeFunctions<double, double, dim-1>::general(nIt.intersectionGlobal().type(),1);
             
             const Dune::QuadratureRule<double, dim-1>& quad 
                 = Dune::QuadratureRules<double, dim-1>::rule(nIt.intersectionGlobal().type(), dim-1);
@@ -101,9 +170,13 @@ void computeAveragePressure(const Dune::FieldVector<double,GridType::dimension>&
                 const double integrationElement         = nIt.intersectionGlobal().integrationElement(quadPos);
                 
                 // Evaluate function
-                Dune::FieldVector<double,dim> localPressure;
-                pressureFunction.evalalllocal(*eIt, nIt.intersectionSelfLocal().global(quadPos), localPressure);
+                Dune::FieldVector<double,dim> localPressure(0);
                 
+                for (size_t i=0; i<baseSet.size(); i++) 
+                    localPressure.axpy(baseSet[i].evaluateFunction(0,quadPos),
+                                       pressure[dgIndexSet(*eIt,nIt.numberInSelf())+i]);
+
+
                 // Sum up the total force
                 outputForce.axpy(quad[qp].weight()*integrationElement, localPressure);
 
@@ -118,8 +191,12 @@ void computeAveragePressure(const Dune::FieldVector<double,GridType::dimension>&
 
     }
 
-    std::cout << "Output force:  " << outputForce << std::endl;
-    std::cout << "Output torque: " << outputTorque << "      " << resultantTorque[0]/outputTorque[0] << std::endl;
+    outputForce  -= resultantForce;
+    outputTorque -= resultantTorque;
+    assert( outputForce.two_norm() < 1e-6 );
+    assert( outputTorque.two_norm() < 1e-6 );
+//     std::cout << "Output force:  " << outputForce << std::endl;
+//     std::cout << "Output torque: " << outputTorque << "      " << resultantTorque[0]/outputTorque[0] << std::endl;
 
 }