diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 10f3f480644e027896cb9d7aad74bfc2e117c001..5dbf3f1ef27e47509d278975e5d46ab451f3e2b6 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -58,6 +58,7 @@ code-spelling-check:
   image: registry.dune-project.org/docker/ci/debian:11
   script:
   - codespell
+    --ignore-words-list numer
 
 # Verify code-formatting rules
 code-formatting-check:
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index be52cfccb403e159965b4f9c1a4bbe109a0462c1..4828ea3c48b08a153d4fac29554119733dee1565 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -1 +1,2 @@
 add_subdirectory("doxygen")
+add_subdirectory("manual")
diff --git a/doc/manual/.gitignore b/doc/manual/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..6569ed5d0d142b51b1df08c840736e1e209c203b
--- /dev/null
+++ b/doc/manual/.gitignore
@@ -0,0 +1,20 @@
+*.aux
+*.log
+*.gz
+*.toc
+*.out
+*.upa
+*.bcf
+*.run.xml
+*.bbl
+*.blg
+*.sublime-*
+*.tdo
+*.upb
+*.fdb_latexmk
+*.fls
+*.loc
+*.soc
+
+# Final documents
+dune-gfe-manual.pdf
diff --git a/doc/manual/CMakeLists.txt b/doc/manual/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..213d4c9421cec4b57eaf68d167f0e3ff759f720b
--- /dev/null
+++ b/doc/manual/CMakeLists.txt
@@ -0,0 +1,5 @@
+dune_add_latex_document(
+  SOURCE dune-gfe-manual.tex
+  FATHER_TARGET doc
+  BUILD_ON_INSTALL
+  INSTALL ${CMAKE_INSTALL_DOCDIR}/manual)
diff --git a/doc/manual/dune-gfe-manual.bib b/doc/manual/dune-gfe-manual.bib
new file mode 100644
index 0000000000000000000000000000000000000000..b3170467b73116517fc4c8c8394bb1ec74610aa1
--- /dev/null
+++ b/doc/manual/dune-gfe-manual.bib
@@ -0,0 +1,85 @@
+@string{CM = "Comp. Mech."}
+@string{COMMA4 = "Comput. Methods Appl. Mech. Engrg."}
+@string{IJNME = "Int. J. Num. Meth. Eng."}
+@string{JE = "J. Elast."}
+@string{MMS = "Math. Mech. Solids"}
+@string{SIJSSC = "SIAM J. Sci. Stat. Comput."}
+@string{SINUM = "SIAM J. Numer. Anal."}
+@string{SISC = "SIAM J. Sci. Comput."}
+@string{ZAMM = "Z. Angew. Math. Mech."}
+
+
+@Article{sander:2010,
+	author = {Oliver Sander},
+	title = {Geodesic Finite Elements for {C}osserat Rods},
+	journal = IJNME,
+	year = {2010},
+	volume = {82},
+	number = {13},
+	pages = {1645--1670}
+}
+
+@article{sander:2012,
+	author =   {Oliver Sander},
+	title =    {Geodesic Finite Elements on Simplicial Grids},
+	journal =  IJNME,
+	year =     {2012},
+	volume =   {92},
+	number =   {12},
+	pages =    {999--1025}
+}
+
+@InCollection{hardering_sander:2020,
+	author = {Hanne Hardering and Oliver Sander},
+	title = {Geometric Finite Elements},
+	booktitle = {Handbook of Variational Methods for Nonlinear Geometric Data},
+	publisher = {Springer},
+	year = {2020},
+	editor = {Philipp Grohs and Martin Holler and Andreas Weinmann}
+}
+
+@article{sander:2015,
+	author =   {Oliver Sander},
+	title =    {Geodesic Finite Elements of Higher Order},
+	year = {2015},
+	journal = "IMA J. Numer. Anal.",
+	doi = "10.1093/imanum/drv016"
+}
+
+@Book{sander:2020,
+ title = {{DUNE}---The Distributed and Unified Numerics Environment},
+ publisher = {Springer},
+ year = {2020},
+ author = {Oliver Sander}
+}
+
+@InProceedings{absil_mahony_trumpf:2013,
+ author = {Pierre-Antoine Absil and Robert Mahony and Jochen Trumpf},
+ title = {An Extrinsic Look at the {Riemannian} {Hessian}},
+ booktitle = {Geometric Science of Information},
+ pages = {361--368},
+ year = {2013},
+ editor = {Nielsen, F. and Barbaresco, F.},
+ publisher = {Springer},
+ doi = {10.1007/978-3-642-40020-9_39},
+}
+
+@Article{grohs_hardering_sander_sprecher:2019,
+	author = {P. Grohs and H. Hardering and O. Sander and M. Sprecher},
+	title = {Projection-Based Finite Elements for Nonlinear Function Spaces},
+	journal = SINUM,
+	year = {2019},
+	volume = {57},
+	number = {1},
+	pages = {404--428}
+}
+
+@Article{nebel_sander_birsan_neff:2023,
+ author = {Lisa Julia Nebel and Oliver Sander and Mircea Bîrsan and Patrizio Neff},
+ title = {A geometrically nonlinear {Cosserat} shell model for orientable and non-orientable surfaces: {Discretization} with geometric finite elements},
+ journal = COMMA4,
+ year = {2023},
+ volume = {416},
+ pages = {116309},
+ doi = {10.1016/j.cma.2023.116309},
+}
diff --git a/doc/manual/dune-gfe-manual.tex b/doc/manual/dune-gfe-manual.tex
new file mode 100644
index 0000000000000000000000000000000000000000..a5f90eb9901aaac73fbbb972fb766af963be65dd
--- /dev/null
+++ b/doc/manual/dune-gfe-manual.tex
@@ -0,0 +1,849 @@
+\documentclass[a4paper]{article}
+
+\usepackage[utf8]{inputenc}
+
+\usepackage{amssymb, amsmath, amsthm}
+\usepackage{bbold}
+\usepackage{graphicx}
+\usepackage{enumitem}
+\usepackage{amsfonts}
+\usepackage{mathtools}
+\usepackage{orcidlink}
+\usepackage{colonequals}
+\usepackage{lmodern}
+\usepackage{microtype}
+\usepackage{mathrsfs}
+
+\usepackage{algorithm2e}
+
+% Bibliography settings
+%---------------------------
+\usepackage[style=ext-numeric,
+            maxbibnames=99,  % Never write 'et al.' in a bibliography
+            maxcitenames=99,  % Never write 'et al.' in a citation
+            giveninits=true, % Abbreviate first names
+            sortcites=true,  % Sort citations when there are more than one
+            backend=biber]{biblatex}
+\addbibresource{dune-gfe-manual.bib}
+
+% Transform certain titles to "sentence case".  Needs an ext- style.
+% See https://tex.stackexchange.com/questions/22980/sentence-case-for-titles-in-biblatex
+\DeclareFieldFormat
+[article,inbook,incollection,inproceedings,patent,thesis,unpublished]
+{titlecase:title}{\MakeSentenceCase*{#1}}
+
+% Put a thin space between two consecutive initials
+\renewrobustcmd*{\bibinitdelim}{\,}
+
+% Settings for the listings package
+%--------------------------------------
+
+\usepackage{listings}
+\lstset{language=[11]{c++},
+         basicstyle=\small\ttfamily,
+         commentstyle=\textit,
+         columns=flexible,keepspaces=true,
+         showstringspaces=false,      % no special string spaces
+         escapeinside={/*@}{@*/}
+        }
+
+% How to include ranges of an external source code file
+\lstset{rangeprefix=//\ \{\ ,% curly left brace plus space, all in a C++-style comment
+        rangesuffix=\ \},% space plus curly right brace
+        numberstyle=\footnotesize,  % font size for numbers
+        includerangemarker=false}  % Do not show the range markers
+
+\definecolor{interfacecolor}{rgb}{0.95,0.95,1}
+\definecolor{interfaceclasscolor}{rgb}{0.98,0.81,0.81}
+\definecolor{cmakecommandcolor}{rgb}{0.2,0.8,0.8}
+\lstdefinestyle{Example}{}
+\lstdefinestyle{Interface}{backgroundcolor=\color{interfacecolor},frame=single}
+\lstdefinestyle{InterfaceClass}{backgroundcolor=\color{interfaceclasscolor},frame=single}
+\lstdefinestyle{CMakeCommand}{backgroundcolor=\color{cmakecommandcolor},frame=single}
+
+\newcommand{\cpp}[1]{\lstinline[basicstyle=\ttfamily]!#1!}
+
+\lstnewenvironment{cppinterface}[1][]{\lstset{style=Interface}}{}
+\lstnewenvironment{cppinterfaceclass}[1][]{\lstset{style=InterfaceClass}}{}
+
+% For typesetting Dune module names
+\newcommand{\dunemodule}[1]{\texttt{#1}}
+
+%  Custom environments
+%---------------------------------
+
+\newtheorem{definition}{Definition}
+\newtheorem{lemma}{Lemma}
+\newtheorem{remark}{Remark}
+
+
+
+\usepackage{todonotes}
+\newcommand{\todosander}[1]{\todo[inline,color=white,author=OS,caption={}]{#1}}
+
+\usepackage{float}
+
+\usepackage{tikz}
+\usetikzlibrary{arrows}
+
+\usepackage{multirow}
+\usepackage{changes}
+\usepackage{hyperref}
+
+\newcommand{\R}{\mathbb R}
+\newcommand{\N}{\mathbb N}
+
+\DeclarePairedDelimiter{\abs}{\lvert}{\rvert}
+\DeclarePairedDelimiter{\norm}{\lVert}{\rVert}
+
+
+% Identity matrix
+\newcommand{\identity}{\mathbb{1}}
+\newcommand{\Tref}{{T_\textnormal{ref}}}
+
+
+\DeclareMathOperator{\dist}{dist}
+\DeclareMathOperator{\SOdrei}{SO(3)}
+\DeclareMathOperator*{\argmin}{arg\,min}
+
+%%  All graphics files may be in this subdirectory
+\graphicspath{{gfx/}}
+
+
+
+\title{The dune-gfe manual}
+\author{Oliver Sander}
+
+\begin{document}
+
+\maketitle
+
+\begin{abstract}
+ \dunemodule{dune-gfe} is a module in the \textsc{Dune} ecosystem that implements
+ various geometric finite element (GFE) methods.
+ This document collects miscellaneous technical information about the implementation.
+\end{abstract}
+
+\section{Geometric finite elements}
+
+Let $\Omega$ and $M$ be smooth manifolds.  Geometric finite elements provide approximations for maps
+$v : \Omega \to M$ by suitably generalizing piecewise polynomials.  For this,
+$\Omega$ is discretized by a grid $\mathcal{T}$ with elements $T$.
+These elements can be simplices, (hyper-)cubes, or more exotic shapes.
+For details about the construction of geometric finite elements see
+\cite{sander:2010,sander:2012,sander:2015,grohs_hardering_sander_sprecher:2019,hardering_sander:2020}.
+
+\subsection{Representation of target manifolds}
+\label{sec:representation_of_manifolds}
+
+Currently, the module assumes that $M$ is embedded into $\R^n$.  In other words,
+elements of $M$ are stored as points in $\R^n$. In the code, these coordinates
+are typically called \emph{global coordinates}, and they are available via the
+\cpp{globalCoordinates()} method. For tangent vectors there are two representations:
+One can either store tangent vectors as vectors in $\R^n$ as well.  In this case
+they are referred to as \emph{embedded} in the code. The fact that they must be
+tangent to $M \subset \R^n$ is then only implicitly assumed.  Alternatively,
+one can represent a tangent vector at $p \in M$ in the orthonormal basis of $T_p M$
+given by the \cpp{orthonormalFrame()} method.
+
+
+\subsection{Variational problems}
+
+So far, all problems studied using \dunemodule{dune-gfe} have been variational.
+By this we mean that the problem takes the form of an energy functional
+\begin{equation*}
+ J : H(\Omega;M) \to \R,
+\end{equation*}
+where $H(\Omega;M)$ is a suitable space of functions mapping $\Omega$ to $M$.
+Solutions $u$ of the problem are local minimizers of this functional.
+
+Typically, such variational GFE problems are described by an energy density
+\begin{equation*}
+ f : \Omega \times TM \to \R,
+\end{equation*}
+and the energy takes the form
+\begin{equation}
+\label{eq:integral_energy}
+ J(v)
+ \colonequals
+ \int_\Omega f(x, v(x), \nabla v(x))\,dx.
+\end{equation}
+(We disregard the case of densities depending on higher derivatives.)
+
+
+\section{Efficient assembly of the tangent problems}
+
+To minimize the functional $J$ in a GFE space requires first and second derivatives
+of the energy $J$.%
+%
+\footnote{Gradient-descent methods would require first derivatives only, but \dunemodule{dune-gfe}
+is geared towards second-order methods.}
+%
+The second derivative will be a sparse symmetric matrix
+(the \emph{Hesse} or \emph{tangent} matrix).  As in the case of classical
+finite elements, it can be assembled by considering the integral~\eqref{eq:integral_energy}
+element by element, and appropriately summing up the element-wise contributions.
+
+We now consider a single element $T$ of a grid of $\Omega$.
+A GFE discretization approximates $u$ on $T$ by a function $u_h : T \to M$
+that is constructed from interpolating a set of $m$ values $u_1,\dots,u_m \in M$
+to a function on $T$. In the following we write $u_h(u_1,\dots,u_m,\cdot)$ to denote
+the function obtained by interpolating the coefficients $u_1,\dots,u_m$, and
+$u_h(u_1,\dots,u_m,x)$ for the value of that function at $x \in T$. Similarly,
+we will write $\nabla u_h$ for the derivative of the interpolation function
+with respect to $x$. We can therefore consider the energy on $T$ as a
+function on $M^m$:
+\begin{align*}
+ E & \; : M^m \to \R
+ \\
+ E(u_1,\dots,u_m)
+ & \colonequals
+ \int_T f\big(x, u_h(u_1,\dots,u_m,x), \nabla u_h(u_1,\dots,u_m,x) \big)\,dx.
+\end{align*}
+As usual the integral is approximated by a quadrature rule.  We therefore replace it
+by the sum
+\begin{align}
+\label{eq:integral_energy_quadrature}
+ E(u_1,\dots,u_m)
+ \approx
+ \sum_{(\omega,x) \in Q} f\big(x, u_h(u_1,\dots,u_m,x), \nabla u_h(u_1,\dots,u_m,x) \big),
+\end{align}
+where $Q$ is a suitable quadrature rule with weights $\omega_1,\dots,\omega_{\abs{Q}} \in \R$
+and points $x_1,\dots,x_{\abs{Q}} \in T$.
+
+\subsection{Euclidean vs.\ Riemannian derivatives}
+
+For Newton-type methods one needs the gradient and Hessian of the functional $E$.
+However, as the energy $E$ is defined on a Riemannian manifold (the product manifold
+$M^m$), we need the Riemannian derivatives instead of the standard Euclidean ones
+\cite{absil}.
+
+Conceptually, computing the first and second Riemannian derivatives consists of two steps.
+Let first
+\begin{equation*}
+ \bar{E} : (\R^n)^m \to \R
+\end{equation*}
+be any smooth extension of $E$ from the manifold $M^m$ into the surrounding space $(\R^m)^n$,
+and let $\bar{\nabla} \bar{E}$ and $\bar{\nabla}^2 \bar{E}$ be the standard Euclidean
+gradient and Hesse matrix of $E$, respectively. It is well-known (see, e.g., \cite{})
+that the Riemannian gradient can be obtained from the Euclidean one by an orthogonal
+projection of $\bar{\nabla}\bar{E}$ onto the tangent space of $M^m$
+\begin{equation}
+\label{eq:gradient_riemannian_from_euclidean}
+ \nabla E(u_1,\dots,u_m)
+ =
+ \mathcal{P}_{(u_1,\dots,u_m)} \bar{\nabla}\bar{E}(u_1,\dots,u_m),
+\end{equation}
+where for any $(u_1,\dots,u_m) \in M^m$, $\mathcal{P}_{(u_1,\dots,u_m)}$ shall denote
+the orthogonal projection from $T_{(u_1,\dots,u_m)}(\R^m)^n$ onto $T_{(u_1,\dots,u_m)} M^m$.
+Since $M^m$ is a product manifold, the projection operator $\mathcal{P}_{(u_1,\dots,u_m)}$
+is actually a separate projection for each factor space $\mathcal{P}_{u_1}, \dots, \mathcal{P}_{u_m}$.
+
+In~\cite{absil_mahony_trumpf:2013}, \citeauthor{absil_mahony_trumpf:2013} showed a
+corresponding formula for computing the Riemannian Hessian from the Euclidean one
+\begin{align}
+\label{eq:hessian_riemannian_from_euclidean}
+ \operatorname{Hess} E(u_1,\dots,u_m)[z]
+ & =
+ \mathcal{P}_{(u_1,\dots,u_m)} \bar{\nabla}^2 \bar{E}(u_1,\dots,u_m) z
+ \\
+ \nonumber
+ & \quad
+ + \mathfrak{A}_{(u_1,\dots,u_m)}(z, \mathcal{P}^\perp_{(u_1,\dots,u_m)}).
+\end{align}
+Here $\mathfrak{A}_{(u_1,\dots,u_m)}$ is the Weingarten map of $M^m$ at $(u_1,\dots,u_m)$,
+which encodes information about the curvature of embedded submanifolds.
+\begin{definition}[Weingarten map]
+ The Weingarten map of the submanifold $M$ at $x$ is the operator $\mathfrak{A}_x$
+ that takes as arguments a tangent vector $z \in T_x M$ and a normal vector $v \in T^\perp_x M$
+ and returns the tangent vector $\mathfrak{A}_x (z, v) = -\mathcal{P}_xD_z V$,
+ where $V$ is any local extension of $v$ to a normal vector field on $M$.
+\end{definition}
+\todosander{Definition von $D_z$ fehlt.}
+\todosander{Show that this is always a symmetric matrix!}
+\todosander{Note: For this construction we need the \emph{embedded} gradient!}
+
+
+\subsection{Direct assembly of the Euclidean gradient and Hessian}
+\label{sec:direct_assembly}
+
+We first show how to compute the Euclidean gradient and Hesse matrix.
+The \dunemodule{dune-gfe} code always computes the two together, because
+computing them is expensive, and computing the second derivative almost
+always produces the first one as a by-product.
+The easiest way is to use ADOL-C to compute the tangent problems by feeding
+the entire quadrature loop~\eqref{eq:integral_energy_quadrature} as a
+single function, and have it
+compute the first and second derivatives using the \cpp{jacobian} and
+\cpp{hessian2} methods.%
+%
+\footnote{Always use ADOL-C's vector mode for the second derivatives.
+ It is much faster than scalar mode.}
+%
+This is implemented in the class \cpp{LocalGeodesicFEADOLCStiffness}.
+(along with the modifications of Chapter~\ref{sec:euclidean_to_riemannian} that turn
+the Euclidean derivatives into Riemannian ones.)
+It works, and has been used successfully for all my publications up and
+including \cite{nebel_sander_birsan_neff:2023}.
+
+\subsection{Computing Euclidean derivatives by separating the interpolation from the density}
+
+The approach of Chapter~\ref{sec:direct_assembly} is simple but slow.
+A part of the slowness is intrinsic: Due to the nonlinear nature of the
+GFE interpolation, and the fact that in many cases it cannot be given explicitly,
+deriving energies that involve such interpolations is an expensive task.
+Run-time can be saved, however, by differentiating the density $f$ and the interpolation function $u_h$
+separately. This is what the \cpp{LocalIntegralStiffness} class implements.
+Let
+\begin{equation*}
+ \bar{\nabla} \bar{E}(u_1,\dots,u_m) \in \R^{mn}
+\end{equation*}
+be the Euclidean gradient of $\bar{E}$ at $u_1, \dots, u_m \in \R^n$, and
+\begin{equation*}
+ \bar{\nabla}^2 \bar{E}(u_1,\dots,u_m) \in \R^{mn \times mn}
+\end{equation*}
+the corresponding Hesse matrix.  With the chain rule we get
+\begin{equation*}
+ \bar{\nabla} \bar{E}(u_1,\dots,u_m)
+ =
+ \sum_{(x,\omega) \in Q} \mathcal{J}^T_{u_h,x} \nabla f(x, u_h(x), \nabla u_h(x)).
+\end{equation*}
+Here,
+\begin{equation*}
+ \mathcal J_{u_h,x}
+ \colonequals
+ \begin{pmatrix}
+    \frac{\partial u_h(x)}{\partial u_1}, \dots \frac{\partial u_h(x)}{\partial u_m} \\
+    \frac{\partial \nabla u_h(x)}{\partial u_1},
+        \dots, \frac{\partial \nabla u_h(x)}{\partial u_m}
+ \end{pmatrix}
+ \in
+ \R^{(n+dn) \times mn}
+\end{equation*}
+is the Jacobi matrix of the GFE interpolation $(u_1,\dots, u_m) \mapsto (u_h(x), \nabla u_h(x))$
+for fixed $x \in T$, and
+\begin{equation*}
+ \nabla f(x, u_h(x), \nabla u_h(x))
+ \in
+ \R^{n+dn}
+\end{equation*}
+is the derivative of the density with respect to the second and third argument.
+
+To understand the sizes of these objects, note that a pair $(u_h(x),\nabla u_h(x))$
+can be interpreted as a vector in $\R^n \times \R^{dn} \simeq \R^{n+dn}$
+(with $d = \dim \Omega$), and the tuple of coefficients $u_1,\dots,u_m$
+can be interpreted as a vector in $\R^{mn}$.
+
+Likewise, for the second derivative of $E$ we get
+\begin{align*}
+ \bar{\nabla}^2 \bar{E}(u_1,\dots,u_m)
+ & =
+ \sum_{(x,\omega) \in Q} \mathcal J_{u_h,x}^T \nabla^2 f(x,u_h(x),\nabla u_h(x)) \mathcal{J}_{u_h,x}
+ \\
+ %
+ & \qquad \qquad +
+ \sum_{(x,\omega) \in Q} \nabla f(x,u_h(x),\nabla u_h(x)) \mathcal H_{u_h,x}.
+\end{align*}
+where
+\begin{align*}
+ (\mathcal{H}_{u_h,x})_{ijk}
+ \colonequals
+ \frac{\partial^2 (u_h(x))_i}{\partial (u_{j/n})_{j \operatorname{mod} n} \partial (u_{k/n})_{k \operatorname{mod} n}}
+ \qquad
+ i = 1,\dots,n
+ \qquad
+ j,k = 1, \dots, mn
+ \shortintertext{and}
+ (\mathcal{H}_{u_h,x})_{ijk}
+ \colonequals
+ \frac{\partial^2 (\nabla u_h(x))_i}{\partial (u_{j/n})_{j \operatorname{mod} n} \partial (u_{k/n})_{k \operatorname{mod} n}}
+ \qquad
+ i = n+1,\dots,n+dn
+ \quad
+ j,k = 1, \dots, mn
+\end{align*}
+The product of the vector $\nabla f$ and the three-indices-object $\mathcal H_{u_h,x}$
+is to be interpreted as a contraction with respect to the first index of $\mathcal H_{u_h,x}$:
+\begin{equation*}
+ (\nabla f \mathcal H_{u_h,x})_{jk}
+ \colonequals
+ \sum_{i=1}^{n+dn}
+ (\nabla f)_i (\mathcal H_{u_h,x})_{ijk}
+ \in
+ \R^{mn \times mn}.
+\end{equation*}
+
+In this representation, the derivatives of $f$ and of $u_h$ are separated and
+can be computed separately. When done right this saves quite a bit of run-time.
+
+\subsection{From Euclidean to Riemannian derivatives}
+\label{sec:euclidean_to_riemannian}
+
+In the second step the Euclidean gradient and Hessian are modified according to
+\eqref{eq:gradient_riemannian_from_euclidean} and~\eqref{eq:hessian_riemannian_from_euclidean},
+respectively, to obtain the Riemannian counterparts.
+Calling this a second step is a conceptual distinction, because the code
+does not actually compute the Euclidean Hessian before modifying it.
+\todosander{Explain why!}
+For the gradient, this modification is simply the multiplication from the left
+with the projection operator $\mathcal{P}$. In practice, two things happen:
+Applying the projection operator to a vector in $T_x M \simeq \R^n$ produces a
+vector that is tangent to $M$, but still represented as a vector in $\R^n$
+(in \dunemodule{dune-gfe} parlance: an embedded tangent vector,
+see Chapter~\ref{sec:representation_of_manifolds}).
+
+\todosander{Den Rest schreiben!}
+
+\subsection{Computing the derivatives $\mathcal{H}$ and $\mathcal{H}$ of the geometric interpolation}
+
+\begin{remark}
+ $\mathcal H$ is an object with three indices, which is potentially memory- and
+ time-consuming to compute and store.  Note, however, that what is needed is not
+ $\mathcal H$ itself but its contraction with the vector $\nabla f(x,u_h(x),\nabla u_h(x))$.
+ AD systems that implement reverse-mode AD (such as ADOL-C) can compute this
+ contracted second derivative directly (in fact, such contractions is all they can compute).
+ The vector to contract with is called \emph{adjoint} in AD parlance.
+ (Specifically, \dunemodule{dune-gfe} uses the \texttt{hos\_ov\_reverse} method).
+ No full third-order object is ever computed.
+\end{remark}
+
+\begin{remark}
+ If the target manifold $M$ happens to be the Euclidean space, then
+ \begin{equation*}
+  u_h(x) = \sum_i u_i \varphi_i(x)
+ \end{equation*}
+ for a set of shape functions $\varphi_i$.  While Euclidean spaces by themselves
+ are not interesting in a GFE context, they do appear frequently as factors in
+ larger spaces (e.g., Cosserat problems typically use $M = \R^3 \times SO(3)$.
+
+ \todosander{Hier nur den skalaren Fall erklären?  Besser nicht...}
+ Consequently, in that case
+ \begin{align*}
+  \mathcal{J} = \Big( \varphi_1(x), \dots, \varphi_m(x), \nabla \varphi_1(x), \dots \nabla \varphi_m(x)\Big)
+ \end{align*}
+ and
+ \begin{equation*}
+  \mathcal{H} = 0.
+ \end{equation*}
+ The method that computes $\mathcal J$ and $\mathcal H$ is therefore specialized
+ for this case, and hand-codes these expressions.  In my experience, the time gained
+ by not using AD for this case is rather small, though.
+\end{remark}
+
+\begin{remark}
+ If $M$ is a product space, then $\mathcal J$ and $\mathcal H$ are block diagonal.
+ This must be exploited by the implementation.
+\end{remark}
+
+\begin{remark}
+ Besides the trivial case $M = \R^n$, the computation of $\mathcal J$ and $\mathcal H$
+ can also be hand-implemented for other cases.  One example is projection-based
+ interpolation for the spheres $M = S^{n-1}$.
+\end{remark}
+
+
+\section{The derivatives of projection-based interpolation in the sphere}
+
+For the projection-based interpolation mapping into the unit sphere, the derivatives
+required by the geometric finite element method can be computed by hand
+with reasonable effort. The \dunemodule{dune-gfe} module contains these hand-coded
+derivatives because they are faster than computing the derivatives with ADOL-C.
+
+We document here the expressions for the derivatives. They are implemented in the file
+\texttt{interpolationderivatives.hh}, in a specialization of the class
+\cpp{InterpolationDerivatives}.
+
+Let $\varphi_i : T \to \R$ be a set of weight functions that sum up to~$1$
+everywhere, and let $v_1,\dots,v_m \in S^{n-1} \subset \R^n$ be values
+on the unit sphere.  Recall that the projection-based interpolation at
+a point $\xi \in T$ is
+\begin{equation*}
+ I(v_1,\dots,v_m;\xi)
+ \colonequals
+ P(\omega(\xi)),
+\end{equation*}
+where
+\begin{equation*}
+ \omega(\xi) \colonequals \sum_{i=1}^m \varphi_i(\xi)v_i
+\end{equation*}
+is the Euclidean interpolation between the values $v_1,\dots,v_m$, and
+\begin{equation*}
+ P : \R^n \to S^{n-1}
+ \qquad
+ P(x) \colonequals \frac{x}{\abs{x}}
+\end{equation*}
+is the radial projection onto the unit sphere.
+
+\begin{remark}
+ While this is the definition of projection-based interpolation as given in articles
+ such as~\cite{grohs_hardering_sander_sprecher:2019} and \cite{hardering_sander:2020},
+ the code currently implements something subtly different.  Specifically, the
+ copy constructor and assignment operators of \texttt{UnitVector} project their
+ arguments to $S^{n-1}$, which changes the definition of $\omega(\xi)$ given above to
+ \begin{equation*}
+  \omega(\xi)
+  \colonequals
+  \sum_{i=1}^m \varphi_i(\xi) \frac{v_i}{\abs{v_i}}.
+ \end{equation*}
+ While this does not change the value of the projection-based interpolation as long as
+ the input values $v_1,\dots,v_m$ really do have unit length, the chain rule makes
+ additional terms appear in the derivative.
+
+ I am not actually convinced that having the copy constructor and assignment perform
+ a projection is a good idea, and I may change this in the future.
+\end{remark}
+
+
+\subsection{Derivatives of the projection onto the sphere}
+
+By the chain rule, derivatives of $I$ involve derivatives of the projection operator~$P$.
+We therefore start by computing these derivatives.
+
+Before starting, note that for any $N \in \mathbb{Z}$ and $j=1,\dots,n$
+\begin{equation*}
+ \frac{\partial}{\partial x_j} \abs{x}^N
+ =
+ \frac{\partial}{\partial x_j} \Big( \sum_k x_k^2 \Big)^{\frac{N}{2}}
+ =
+ \frac{N}{2} \abs{x}^{N-2} \sum_k \frac{\partial}{\partial x_j} x_k^2
+ =
+ N x_j \abs{x}^{N-2}.
+\end{equation*}
+
+\subsubsection{Partial derivatives}
+
+The partial derivatives do not actually appear in the implementation, but they are
+an instructive intermediate step.
+
+\paragraph{First derivatives}
+
+The Jacobian of $P$ is the matrix
+ \begin{equation*}
+  (\nabla P)_{ij}
+  =
+  \frac{\partial}{\partial x_j} \frac{x_i}{\abs{x}}
+  =
+  \frac{\frac{\partial x_i}{\partial x_j}\abs{x} - x_i \frac{\partial}{\partial x_j} \abs{x}}{\abs{x}^2}
+  =
+  \delta_{ij} \frac{1}{\abs{x}} - \frac{x_i x_j}{\abs{x}^3}.
+ \end{equation*}
+
+\paragraph{Second derivatives}
+The second derivative is the third-order tensor
+\begin{align*}
+ (\nabla^2 P)_{ijk}
+ & =
+ \frac{\partial}{\partial x_j \partial x_k} \frac{x_i}{\abs{x}}
+ \\
+ & =
+ \frac{\partial}{\partial x_k} \Big[ \frac{\delta_{ij}}{\abs{x}} - \frac{x_i x_j}{\abs{x}^3} \Big]
+ \\
+ & =
+ \delta_{ij} \frac{\partial}{\partial x_k} \frac{1}{\abs{x}}
+   - \frac{\partial}{\partial x_k}\Big( x_i x_j \frac{1}{\abs{x}^3} \Big)
+ \\
+ & =
+ -\delta_{ij} \frac{x_k}{\abs{x}^3} - \delta_{ik} x_j \frac{1}{\abs{x}^3}
+   - \delta_{jk} x_i \frac{1}{\abs{x}^3} + 3 x_i x_j \frac{x_k}{\abs{x}^5}
+ \\
+ & =
+ - \Big( \delta_{ij}x_k + \delta_{ik}x_j + \delta_{jk}x_i\Big) \frac{1}{\abs{x}^3}
+ + 3 \frac{x_i x_j x_k}{\abs{x}^5}.
+\end{align*}
+
+
+\paragraph{Third derivatives}
+
+The third derivative is a fourth-order object
+\begin{align*}
+ (\nabla^3 P)_{ijkl}
+ & =
+ \frac{\partial}{\partial x_l}
+ \bigg[ - \Big( \delta_{ij}x_k + \delta_{ik}x_j + \delta_{jk}x_i\Big) \frac{1}{\abs{x}^3}
+ + 3 \frac{x_i x_j x_k}{\abs{x}^5} \bigg]
+ \\
+ & =
+ (-\delta_{ij}\delta_{kl} - \delta_{ik}\delta_{jl} - \delta_{jk} \delta_{il}) \frac{1}{\abs{x}^3}
+ + (-\delta_{ij}x_k - \delta_{ik}x_j - \delta_{jk}x_i)(-3)\frac{x_l}{\abs{x}^5}
+ \\
+ & \qquad
+ + 3\Big(\frac{\delta_{il} x_j x_k}{\abs{x}^5} + \frac{\delta_{jl} x_i x_k}{\abs{x}^5}
+   + \frac{\delta_{kl} x_i x_j}{\abs{x}^5} + x_i x_j x_k (-5) \frac{x_l}{\abs{x}^7} \Big)
+ \\
+ & =
+ (\delta_{ij}\delta_{kl} + \delta_{ik}\delta_{jl} + \delta_{jk} \delta_{il}) \frac{-1}{\abs{x}^3}
+ \\
+ & \qquad
+ + \frac{3}{\abs{x}^5} \Big[ \delta_{ij}x_k x_l + \delta_{ik}x_jx_l + \delta_{jk}x_ix_l
+   + \delta_{il} x_j x_k + \delta_{jl} x_i x_k + \delta_{kl}x_i x_j \Big]
+ \\
+ & \qquad
+ + x_i x_j x_k x_l \frac{-15}{\abs{x}^7}.
+\end{align*}
+
+\subsubsection{Directional derivatives and contractions}
+
+As can be seen below, the final derivative formulas use the derivatives of $P$
+by contracting it with different vectors.  These contractions can be computed
+relatively cheaply, because the derivatives of $P$ consist only of diagonal
+and low-rank terms.
+
+\paragraph{First derivative}
+
+Let $w \in \R^n$ be an adjoint vector and $\delta^1 \in \R^n$ a direction.  Then
+(with Einstein summation)
+\begin{equation*}
+ \nabla P[w,\delta^1]
+ =
+ (\nabla P)_{ij} w_i \delta^1_j
+ =
+ \frac{\langle w,\delta^1\rangle}{\abs{x}} - \frac{\langle w,x\rangle \langle \delta^1,x\rangle}{\abs{x}^3}.
+\end{equation*}
+
+\paragraph{Second derivative}
+
+Let $w \in \R^n$ be an adjoint vector, and $\delta^1, \delta^2 \in \R^n$ directions.
+Then
+\begin{align*}
+ \nabla^2P[w,\delta^1,\delta^2]
+ & =
+ (\nabla^2 P)_{ijk} w_i \delta^1_j \delta^2_k
+ \\
+ & =
+ \big(\langle w, \delta^1 \rangle \langle x,\delta^2 \rangle
+    + \langle w, \delta^2 \rangle \langle x,\delta^1 \rangle
+    + \langle \delta^1, \delta^2\rangle \langle w,x \rangle \big)\frac{-1}{\abs{x}}
+ + 3\langle w,x \rangle \langle \delta^1,x \rangle \langle \delta^2,x \rangle \frac{3}{\abs{x}^5}.
+\end{align*}
+
+
+TODO: WIRD DIE FOLGENDE GEMISCHTE VARIANTE EVENTUELL GEBRAUCHT?
+\begin{align*}
+ (w \nabla^2 P)_{jk}
+ & =
+ \sum_{i=1}^n w_i (\nabla^2 P)_{ijk}
+ \\
+ & =
+ w_i\bigg[- \Big( \delta_{ij}x_k + \delta_{ik}x_j + \delta_{jk}x_i\Big) \frac{1}{\abs{x}^3}
+ + 3 \frac{x_i x_j x_k}{\abs{x}^5} \bigg]
+ \\
+ & =
+ \big( - w_j x_k - w_kx_j - \langle w,x\rangle \delta_{jk}\big)\frac{1}{\abs{x}}
+ + 3\langle w,x \rangle \frac{x_j x_k}{\abs{x}^5}.
+\end{align*}
+
+\paragraph{Third derivative}
+
+Let $w \in \R^n$ be an adjoint vector, and $\delta^1, \delta^2, \delta^3 \in \R^n$ directions.
+Then
+\begin{align*}
+ \nabla^3P[w,\delta^1,\delta^2,\delta^3]
+ & =
+ (\nabla^3 P)_{ijkl} w_i \delta^1_j \delta^2_k \delta^3_l
+ \\
+ & =
+ \big(\langle w, \delta^1 \rangle \langle \delta^2,\delta^3 \rangle
+    + \langle w, \delta^2 \rangle \langle \delta^1,\delta^3 \rangle
+    + \langle w, \delta^3 \rangle \langle \delta^1,\delta^2 \rangle \big)
+    \frac{-1}{\abs{x}^3}
+ \\
+ & \qquad
+ \Big( \langle w,\delta^1 \rangle \langle x, \delta^2 \rangle \langle x, \delta^3 \rangle
+  + \langle w,\delta^2 \rangle \langle w, \delta^2 \rangle \langle w, \delta^3 \rangle
+  + \langle \delta^1,\delta^2 \rangle \langle x, w \rangle \langle x, \delta^3 \rangle
+  \\
+ & \qquad
+  + \langle w,\delta^3 \rangle \langle x, \delta^2 \rangle \langle x, \delta^1 \rangle
+  + \langle \delta^1,\delta^3 \rangle \langle w, x \rangle \langle x, \delta^2 \rangle
+  + \langle \delta^2,\delta^3 \rangle \langle w, x \rangle \langle x, \delta^1 \rangle \Big)
+ \frac{3}{\abs{x}^5}
+ \\
+  & \qquad
+ \langle w,x \rangle \langle \delta^1,x \rangle \langle \delta^2,x \rangle \langle \delta^3,x \rangle
+ \frac{-15}{\abs{x}^7}.
+\end{align*}
+
+Actually we know that at least some of the directions are orthogonal, but this is not
+currently used by the code.
+
+TODO: WIRD DIE FOLGENDE GEMISCHTE VARIANTE EVENTUELL GEBRAUCHT?
+The contraction with a vector $w \in \R^n$ is
+\begin{align*}
+ w_i (\nabla^3 P)_{ijkl}
+ & =
+ \big( w_j \delta_{kl} + w_k \delta_{jl} + w_l \delta_{jk} \big) \frac{-1}{\abs{x}^3}
+ \\
+ & \quad
+ + \frac{3}{\abs{x}^5}
+ \Big[ w_j x_k x_l + w_k x_j x_l + \langle w,x \rangle x_l \delta_{jk} + w_l x_j x_k
+    + \langle w,x \rangle \delta_{jl} x_k + \langle w,x \rangle \delta_{kl} x_j \Big]
+ \\
+ & \quad
+ - \frac{15}{\abs{x}^7} \langle w,x \rangle x_j x_k x_l.
+\end{align*}
+
+
+
+\subsection{Derivatives of the interpolation value}
+
+We now compute the derivatives of
+\begin{equation*}
+ I(v_1,\dots,v_m; \xi)
+ =
+ P(\omega(\xi))
+ =
+ P\Big(\sum_{i=1}^m v_i \varphi_i(\xi)\Big)
+\end{equation*}
+with respect to the coefficients $v_j$, for fixed $\xi$.
+
+\subsubsection{Partial derivatives}
+
+We begin with partial derivatives.  Interpret the $k$-th coefficient $v_k$
+as a vector in $\R^n$.
+
+Then the partial derivative is a third-order object: We compute the first derivative
+of the $\alpha$-th component of the vector $P(\omega) \in S^{n-1} \subset \R^n$
+with respect to the $l$-th component of the $k$-th data point $v_k$
+\begin{align*}
+ \frac{\partial}{\partial v_k^l} \big( P (v) \big)_\alpha
+ & =
+ \frac{\partial P(v)_\alpha}{\partial v^m} \cdot \frac{\partial v^m}{\partial v_k^l}
+ \\
+ & =
+ (\nabla P(v))_{\alpha m} \cdot \frac{\partial v^m}{\partial v_k^l}
+ \\
+ & =
+ (\nabla P(v))_{\alpha m} \cdot \delta_{ml} \varphi_k
+ =
+ (\nabla P(v))_{\alpha l} \varphi_k.
+\end{align*}
+
+The code numbers all coefficient components $v_{1,1},\dots,v_{m,n}$ with a single
+flat index. Therefore, as a data structure the derivative is a matrix
+with $n$ rows and $mn$ columns.
+
+\bigskip
+
+The second derivative is
+\begin{equation*}
+ \frac{\partial^2}{\partial v_j \partial v_k} I(v_1,\dots,v_m;\xi)
+ =
+ \frac{\partial}{\partial v_k} \nabla P(v) \varphi_j (\xi)
+ =
+ \varphi_k \nabla^2 P(v) \varphi_j.
+\end{equation*}
+In index notation, this is an object with five indices
+\begin{equation*}
+ \frac{\partial}{\partial v_\gamma^\delta} \frac{\partial}{\partial v_k^l} \big(P(v)\big)_\alpha
+ =
+ \frac{\partial}{\partial v_\gamma^\delta} \Big[ (\nabla P)_{\alpha l} \varphi_k \Big]
+ =
+ (\nabla^2 P)_{\alpha l \delta} \varphi_k \varphi_\gamma.
+\end{equation*}
+
+
+
+\subsection{Derivatives of the derivative with respect to $\xi$}
+
+\begin{equation*}
+ \nabla I(v;\xi)
+ =
+ \frac{\partial}{\partial \xi} I(v;\xi)
+ =
+ \nabla P(v) \sum_{i=1}^m v_i \nabla \varphi_i
+ =
+ \nabla P(v) \nabla v.
+\end{equation*}
+This is a matrix with entries
+\begin{align*}
+ (\nabla I)_{\alpha \beta}
+ & =
+ \frac{\partial \big(P(v)\big)_\alpha}{\partial \xi_\beta}
+ \\
+ & =
+ \frac{\partial P(v)_\alpha}{\partial v^m} \cdot \frac{\partial v^m}{\partial \xi_\beta}
+ \\
+ & =
+ \frac{\partial P(v)_\alpha}{\partial v^m} \cdot \frac{\partial \sum_i v_i^m \varphi_i(\xi)}{\partial \xi_\beta}
+ \\
+ & =
+ \frac{\partial P(v)_\alpha}{\partial v^m} \cdot \sum_i v_i^m \frac{\partial \varphi_i(\xi)}{\partial \xi_\beta}
+\end{align*}
+
+
+
+The first derivative of this with respect to a coefficients is
+\begin{equation*}
+ \frac{\partial^2}{\partial \xi \partial v_j} I(v_1,\dots,v_m;\xi)
+ =
+ \frac{\partial}{\partial v_j} \Big[ \nabla P(v) \sum_{i=1}^m v_i \nabla \varphi_i \Big]
+ =
+ \varphi_j \nabla^2P(v) \sum_{i=1}^m v_i \nabla \varphi_i + \nabla P(v) \nabla \varphi_j
+\end{equation*}
+
+In coefficients:
+\begin{align*}
+ \frac{\partial^2 I}{\partial \xi_\beta \partial_i^j}
+ & =
+ \frac{\partial}{\partial v_i^j} \Big[ (\nabla P(v))_{\alpha m} \cdot (\nabla v^m)_\beta \Big]
+ \\
+ & =
+ \Big( \frac{\partial}{\partial v_i^j} (\nabla P(v))_{\alpha m} \Big) \cdot (\nabla v^m)_\beta
+ +
+ (\nabla P(v))_{\alpha m} \cdot \Big( \frac{\partial}{\partial v_i^j} (\nabla v^m)_\beta \Big)
+ \\
+ & =
+ \big( \nabla^2 P(v) \big)_{\alpha m j} \cdot \varphi_i \cdot ( \nabla v^m)_\beta
+ +
+ (\nabla P(v))_{\alpha m} \cdot \delta_{jm} (\nabla \varphi_i)\beta
+ \\
+ & =
+ \big( \nabla^2 P(v) \big)_{\alpha m j} \cdot \varphi_i \cdot ( \nabla v^m)_\beta
+ +
+ (\nabla P(v))_{\alpha j} \cdot (\nabla \varphi_i)\beta.
+\end{align*}
+
+
+The second derivative is
+\begin{align*}
+ \frac{\partial^3}{\partial \xi \partial v_j \partial v_k} I(v_1,\dots,v_m;\xi)
+ & =
+ \frac{\partial}{\partial v_k} \Big[ \varphi_j \nabla^2P(v) \sum_{i=1}^m v_i \nabla \varphi_i + \nabla P(v) \nabla \varphi_j \Big] \\
+ & =
+ \varphi_j \nabla^3 P(v) \varphi_k \nabla v + \varphi_k \nabla^2 P(v) \nabla \varphi_k + \nabla^2P(v) \varphi_k \nabla \varphi_j.
+\end{align*}
+
+In coefficients:
+
+\begin{align*}
+ \frac{\partial^3 I_\alpha}{\partial \xi_\beta \partial v_i^j \partial v_k^l}
+ & =
+ \frac{\partial}{\partial v_k^l} \Big[ \big(\nabla^2 P(v)\big)_{amj} \varphi_i \Big] \cdot (\nabla v^m)_\beta
+ \\
+ & \qquad
+ + \big(\nabla^2 P(v)\big)_{\alpha m j} \varphi_i \frac{\partial}{\partial v_k^l} (\nabla v^m)_\beta
+ \\
+ & \qquad
+ + \frac{\partial}{\partial v_k^l} \Big[ \big(\nabla P(v)\big)_{\alpha m} \Big]
+  \cdot \delta_{jm} \cdot (\nabla \varphi_i)_\beta
+ \\
+ & =
+ \big(\nabla^3 P(v) \big)_{\alpha m j l} \varphi_i \varphi_k \cdot \frac{\partial v^m}{\partial \xi_\beta}
+ \\
+ & \qquad
+ + \big(\nabla^2 P(v) \big)_{\alpha m j} \varphi_i \cdot \delta_{ml} \frac{\partial \varphi_k}{\partial \xi_\beta}
+ + \big(\nabla^2 P(v) \big)_{\alpha m l} \varphi_k \cdot \delta_{mj} \frac{\partial \varphi_i}{\partial \xi_\beta}
+ \\
+ & =
+ \big(\nabla^3 P(v) \big)_{\alpha m j l} \varphi_i \varphi_k \cdot \frac{\partial v^m}{\partial \xi_\beta}
+ + \big(\nabla^2 P(v) \big)_{\alpha j l}
+ \Big [\varphi_i \cdot \frac{\partial \varphi_k}{\partial \xi_\beta}
+ + \varphi_k \cdot \frac{\partial \varphi_i}{\partial \xi_\beta} \Big]
+\end{align*}
+
+
+
+
+\printbibliography
+
+\end{document}