/*******************************************************************************
 *
 * ALBERTA:  an Adaptive multi Level finite element toolbox using
 *           Bisectioning refinement and Error control by Residual
 *           Techniques for scientific Applications
 *
 * www.alberta-fem.de
 *
 * file:     assemble_dowb.c
 *
 * description:  fe-space independent assemblation routines of
 *               REAL_D x REAL_D matrices
 *
 *******************************************************************************
 *
 * This file's authors: Claus-Justus Heine
 *                      Abteilung fuer Angewandte Mathematik
 *                      Universitaet Freiburg
 *                      Hermann-Herder-Strasse 10
 *                      79104 Freiburg, Germany
 *
 * (c) by C.-J. Heine (2004-2009)
 *
 ******************************************************************************/

#include "alberta_intern.h"
#include "alberta.h"
#include "assemble.h"

#line 31 "../../../../alberta/src/Common/assemble_fcts.c.in"

#include "M_M_SCM_SCM_assemble_fcts.h"

/* Throughout this file:
 *
 * row_phi == row fe-space, meaning the test space
 * col_phi == column fe-space, meaning the ansatz space
 *
 * First order terms: e.g.:
 *
 * quad_01 == Lb0 assembly, derivative is applied to col_phi, i.e. to the
 * space of ansatz functions.
 *
 * quad_10 == Lb1 assembly, derivative is applied to row_phi, i.e. to the
 * space of test functions.
 */

/* V -- vector valued basis functions C -- Cartesian product space
 * (trivially vector valued) S -- Scalar space
 *
 * VEC_PFX is SS for C-C and S-S combinations, otherwise VV, VC, CV,
 * VS or SV.
 *
 * The boolean values have the following meaning:
 *
 * row_V true: vector valued row basis functions (trial space)
 * col_V true: vector valued column basis functions (ansatz space)
 * row_C true: Cartesian product row basis functions (pseudo vector valued)
 * col_C true: Cartesian product column basis functions
 *
 * The VS_... and VC_... variants differ if the application supplied
 * integral kernel of the operator (e.g. LALt etc.) is a REAL_D
 * block-matrix, for VS_... this means to contract the components of
 * the kernel with the components of the vector valued basis
 * functions, for VC_... the resulting matrix is again a REAL_D block
 * matrix.
 */

#define row_V HAVE_ROW_FCTS_V_TYPE
#define col_V HAVE_COL_FCTS_V_TYPE
#define row_C HAVE_ROW_FCTS_C_TYPE
#define col_C HAVE_COL_FCTS_C_TYPE

#define HAVE_M_DST_TYPE 1
#define HAVE_M_LALT_TYPE 1
#define HAVE_SCM_LB_TYPE 1
#define HAVE_SCM_C_TYPE 1

#undef INLINE_NAME
#define INLINE_NAME(base) _AI_CONCAT(_inline_, NAME(base))

/* <<< ... with pre-computed integrals */

/* <<< condensation routines for vector valued basis functions */

static inline void
M_clear_tmp_mat(REAL_DD  **mat, const FILL_INFO *fill_info)
{
  int i, j;

  for (i = 0; i < fill_info->el_mat->n_row; i++) {
    for (j = 0; j < fill_info->el_mat->n_col; j++) {
      MSET_DOW(0.0, mat[i][j]);
    }
  }
}

#if !HAVE_DM_DST_TYPE
static inline void
DM_clear_tmp_mat(REAL_D **mat, const FILL_INFO *fill_info)
{
  int i, j;

  for (i = 0; i < fill_info->el_mat->n_row; i++) {
    for (j = 0; j < fill_info->el_mat->n_col; j++) {
      DMSET_DOW(0.0, mat[i][j]);
    }
  }
}
#endif

/* <<< VV_condense_el_mat */

/* Row- and column fe-space has vector valued basis functions. */
static inline void
VV_M_condense_el_mat(const FILL_INFO *fill_info,
				 bool symmetric, bool antisym)
{
  REAL_DD **tmp_mat = (REAL_DD **)fill_info->scl_el_mat;
  REAL       **mat     = (REAL **)fill_info->el_mat->data.real;
  int        i, j, n_row, n_col;
  const BAS_FCTS *row_phi, *col_phi;
  REAL val;

  row_phi = fill_info->op_info.row_fe_space->bas_fcts;
  n_row = row_phi->n_bas_fcts;

  if (symmetric) {
    col_phi = row_phi;
    n_col = n_row;
    for (i = 0; i < n_row; i++) {
      const REAL *row_d = PHI_D(row_phi, i, NULL);
      mat[i][i] +=
	MGRAMSCP_DOW((const REAL_D *) tmp_mat[i][i], row_d, row_d);
      for (j = i+1; j < n_col; j++) {
	const REAL *col_d = PHI_D(col_phi, j, NULL);

	val =
	  MGRAMSCP_DOW((const REAL_D *) tmp_mat[i][j], row_d, col_d);
	mat[i][j] += val;
	mat[j][i] += val;
      }
    }
  } else if (antisym) {
    col_phi = row_phi;
    n_col = n_row;
    for (i = 0; i < n_row; i++) {
      const REAL *row_d = PHI_D(row_phi, i, NULL);

      for (j = i+1; j < n_col; j++) {
	const REAL *col_d = PHI_D(col_phi, j, NULL);
	val =
	  MGRAMSCP_DOW((const REAL_D *) tmp_mat[i][j], row_d, col_d);
	mat[i][j] += val;
	mat[j][i] -= val;
      }
    }
  } else {
    col_phi = fill_info->op_info.col_fe_space->bas_fcts;
    n_col = col_phi->n_bas_fcts;
    for (i = 0; i < n_row; i++) {
      for (j = 0; j < n_col; j++) {
	const REAL *row_d = PHI_D(row_phi, i, NULL);
	const REAL *col_d = PHI_D(col_phi, j, NULL);
	mat[i][j] +=
	  MGRAMSCP_DOW((const REAL_D *) tmp_mat[i][j], row_d, col_d);
      }
    }
  }
}

/* >>> */

/* <<< VC_condense_el_mat */

/* Vector-valued row fe-space, column fe-space is a Cartesian product space */
static inline void
VC_M_condense_el_mat(const FILL_INFO *fill_info)
{
  REAL_DD **tmp_mat = (REAL_DD **)fill_info->scl_el_mat;
  REAL_D     **mat     = (REAL_D **)fill_info->el_mat->data.real;
  int        i, j, n_row, n_col;
  const BAS_FCTS *row_phi, *col_phi;

  row_phi = fill_info->op_info.row_fe_space->bas_fcts;
  n_row = row_phi->n_bas_fcts;
  col_phi = fill_info->op_info.col_fe_space->bas_fcts;
  n_col = col_phi->n_bas_fcts;

  for (i = 0; i < n_row; i++) {
    for (j = 0; j < n_col; j++) {
      const REAL *row_d = PHI_D(row_phi, i, NULL);

      MTV_DOW((const REAL_D *) tmp_mat[i][j], row_d, mat[i][j]);
    }
  }
}

/* >>> */

/* <<< CV_condense_el_mat */

/* Vector-valued column fe-space, row fe-space is a Cartesian product space */
static inline void
CV_M_condense_el_mat(const FILL_INFO *fill_info)
{
  REAL_DD **tmp_mat = (REAL_DD **)fill_info->scl_el_mat;
  REAL_D     **mat     = (REAL_D **)fill_info->el_mat->data.real;
  int        i, j, n_row, n_col;
  const BAS_FCTS *row_phi, *col_phi;

  row_phi = fill_info->op_info.row_fe_space->bas_fcts;
  n_row = row_phi->n_bas_fcts;
  col_phi = fill_info->op_info.col_fe_space->bas_fcts;
  n_col = col_phi->n_bas_fcts;

  for (i = 0; i < n_row; i++) {
    for (j = 0; j < n_col; j++) {
      const REAL *col_d = PHI_D(col_phi, j, NULL);

      MV_DOW((const REAL_D *) tmp_mat[i][j], col_d, mat[i][j]);
    }
  }
}

/* >>> */

/* <<< SV_condense_el_mat */

/* Vector-valued column fe-space, row fe-space is a scalar space,
 * e.g. in the case of a divergence constraint.
 *
 * This only makes sense for operators with a REAL_D coefficient
 * matrix. Probably this will really only happen for the divergence
 * constraint of a Stokes-problem.
 */
static inline void
SV_DM_condense_el_mat(const FILL_INFO *fill_info)
{
  REAL_D **tmp_mat = (REAL_D **)fill_info->scl_el_mat;
  REAL   **mat     = (REAL **)fill_info->el_mat->data.real;
  int    i, j, n_row, n_col;
  const BAS_FCTS *row_phi, *col_phi;

  row_phi = fill_info->op_info.row_fe_space->bas_fcts;
  n_row = row_phi->n_bas_fcts;
  col_phi = fill_info->op_info.col_fe_space->bas_fcts;
  n_col = col_phi->n_bas_fcts;

  for (i = 0; i < n_row; i++) {
    for (j = 0; j < n_col; j++) {
      const REAL *col_d = PHI_D(col_phi, j, NULL);

      mat[i][j] += SCP_DOW(tmp_mat[i][j], col_d);
    }
  }
}

# if HAVE_SCM_DST_TYPE
static inline void
SV_SCM_condense_el_mat(const FILL_INFO *fill_info)
{
  REAL **tmp_mat = (REAL **)fill_info->scl_el_mat;
  REAL **mat     = (REAL **)fill_info->el_mat->data.real;
  int  i, j, n_row, n_col;
  const BAS_FCTS *row_phi, *col_phi;

  row_phi = fill_info->op_info.row_fe_space->bas_fcts;
  n_row = row_phi->n_bas_fcts;
  col_phi = fill_info->op_info.col_fe_space->bas_fcts;
  n_col = col_phi->n_bas_fcts;

  for (i = 0; i < n_row; i++) {
    for (j = 0; j < n_col; j++) {
      const REAL *col_d = PHI_D(col_phi, j, NULL);

      mat[i][j] += tmp_mat[i][j] * SUM_DOW(col_d);
    }
  }
}
# endif

/* >>> */

/* <<< VS_condense_el_mat */

/* Vector-valued row fe-space, column fe-space is a scalar space,
 * e.g. in the case of a divergence constraint.
 *
 * This only makes sense for operators with a REAL_D coefficient
 * matrix. Probably this will really only happen for the divergence
 * constraint of a Stokes-problem.
 */
static inline void
VS_DM_condense_el_mat(const FILL_INFO *fill_info)
{
  REAL_D **tmp_mat = (REAL_D **)fill_info->scl_el_mat;
  REAL   **mat     = (REAL **)fill_info->el_mat->data.real;
  int    i, j, n_row, n_col;
  const BAS_FCTS *row_phi, *col_phi;

  row_phi = fill_info->op_info.row_fe_space->bas_fcts;
  n_row = row_phi->n_bas_fcts;
  col_phi = fill_info->op_info.col_fe_space->bas_fcts;
  n_col = col_phi->n_bas_fcts;

  for (i = 0; i < n_row; i++) {
    for (j = 0; j < n_col; j++) {
      const REAL *row_d = PHI_D(row_phi, i, NULL);

      mat[i][j] += SCP_DOW(tmp_mat[i][j], row_d);
    }
  }
}

#if HAVE_SCM_DST_TYPE
static inline void
VS_SCM_condense_el_mat(const FILL_INFO *fill_info)
{
  REAL **tmp_mat = (REAL **)fill_info->scl_el_mat;
  REAL **mat     = (REAL **)fill_info->el_mat->data.real;
  int  i, j, n_row, n_col;
  const BAS_FCTS *row_phi, *col_phi;

  row_phi = fill_info->op_info.row_fe_space->bas_fcts;
  n_row = row_phi->n_bas_fcts;
  col_phi = fill_info->op_info.col_fe_space->bas_fcts;
  n_col = col_phi->n_bas_fcts;

  for (i = 0; i < n_row; i++) {
    for (j = 0; j < n_col; j++) {
      const REAL *row_d = PHI_D(row_phi, i, NULL);

      mat[i][j] += tmp_mat[i][j] * SUM_DOW(row_d);
    }
  }
}
# endif

/* >>> */

#if EMIT_SS_VERSIONS || EMIT_CC_VERSIONS
/* The "standard" case. */
# define EMIT_BLOCK_VARIANT_PRE(name, sym, antisym)			\
  FLATTEN_ATTR								\
  void									\
  NAME(name)(const EL_INFO *el_info, const FILL_INFO *fill_info)	\
  {									\
    REAL_DD **mat = (REAL_DD **)fill_info->el_mat->data.real_dd;	\
									\
    INLINE_NAME(name)(el_info, fill_info, mat);				\
  }									\
  struct _AI_semicolon_dummy
#elif EMIT_VV_VERSIONS
# define EMIT_BLOCK_VARIANT_PRE(name, sym, antisym)			\
  FLATTEN_ATTR								\
  void									\
  NAME(name)(const EL_INFO *el_info, const FILL_INFO *fill_info)	\
  {									\
    REAL_DD **tmp_mat = (REAL_DD **)fill_info->scl_el_mat;	\
  									\
    M_clear_tmp_mat(tmp_mat, fill_info);			\
    									\
    INLINE_NAME(name)(el_info, fill_info, tmp_mat);			\
    VV_M_condense_el_mat(fill_info, sym, antisym);		\
  }									\
  struct _AI_semicolon_dummy
#else
# define EMIT_BLOCK_VARIANT_PRE(name, sym, antisym)			\
  FLATTEN_ATTR								\
  void									\
  NAME(name)(const EL_INFO *el_info, const FILL_INFO *fill_info)	\
  {									\
    REAL_DD **tmp_mat = (REAL_DD **)fill_info->scl_el_mat;	\
  									\
    M_clear_tmp_mat(tmp_mat, fill_info);			\
    									\
    INLINE_NAME(name)(el_info, fill_info, tmp_mat);			\
    _AI_CONCAT(VEC_PFX, _M_condense_el_mat)(fill_info);	\
  }									\
  struct _AI_semicolon_dummy
#endif

/* >>> */

/* <<< pre_2() */

static inline void
INLINE_NAME(pre_2)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		   REAL_DD **mat)
{
  const REAL_BDD *LALt;
  const int  *const*n_entries, *k, *l;
  const REAL *values;
  int        i, j, m, n_row, n_col;
  REAL_DD val;

  LALt = fill_info->op_info.LALt.real_dd(
    el_info,
    fill_info->op_info.quad[2], 0,
    fill_info->op_info.user_data);
  n_entries = fill_info->q11_cache->cache->n_entries;
  n_row = fill_info->q11_cache->cache->n_psi;
  n_col = fill_info->q11_cache->cache->n_phi;

  if (row_V == col_V && fill_info->op_info.LALt_symmetric) {
    for (i = 0; i < n_row; i++) {
      k      = fill_info->q11_cache->cache->k[i][i];
      l      = fill_info->q11_cache->cache->l[i][i];
      values = fill_info->q11_cache->cache->values[i][i];
      for ( m = 0; m < n_entries[i][i]; m++)
	MMAXPY_DOW(
	  values[m], (const REAL_D *) LALt[k[m]][l[m]], mat[i][i]);

      for (j = i+1; j < n_col; j++) {
	k      = fill_info->q11_cache->cache->k[i][j];
	l      = fill_info->q11_cache->cache->l[i][j];
	values = fill_info->q11_cache->cache->values[i][j];
	MSET_DOW(0.0, val);
	for (m = 0; m < n_entries[i][j]; m++)
	  MAXPY_DOW(
	    values[m], (const REAL_D *) LALt[k[m]][l[m]], val);
	MMAXPY_DOW(
	  1.0, (const REAL_D *) val, mat[i][j]);
	MMAXTPY_DOW(
	  1.0, (const REAL_D *) val, mat[j][i]);
      }
    }
  } else { /*  A not symmetric or row_phi != col_phi        */
    for (i = 0; i < n_row; i++) {
      for (j = 0; j < n_col; j++) {
	k      = fill_info->q11_cache->cache->k[i][j];
	l      = fill_info->q11_cache->cache->l[i][j];
	values = fill_info->q11_cache->cache->values[i][j];
	for (m = 0; m < n_entries[i][j]; m++)
	  MMAXPY_DOW(
	    values[m], (const REAL_D *) LALt[k[m]][l[m]], mat[i][j]);
      }
    }
  }
}

EMIT_BLOCK_VARIANT_PRE(pre_2, fill_info->op_info.LALt_symmetric, false);

/* >>> */

/* <<< pre_01() */

static inline void
INLINE_NAME(pre_01)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  const REAL *Lb0;
  const int  *const*n_entries, *l;
  const REAL *values;
  int        i, j, m, n_row, n_col;

  Lb0 = fill_info->op_info.Lb0.real(
    el_info,
    fill_info->op_info.quad[1], 0,
    fill_info->op_info.user_data);
  n_entries = fill_info->q01_cache->cache->n_entries;
  n_row = fill_info->q01_cache->cache->n_psi;
  n_col = fill_info->q01_cache->cache->n_phi;

  for (i = 0; i < n_row; i++) {
    for (j = 0; j < n_col; j++) {
      l      = fill_info->q01_cache->cache->l[i][j];
      values = fill_info->q01_cache->cache->values[i][j];
      for (m = 0; m < n_entries[i][j]; m++) {
	MSCMAXPY_DOW(
	  values[m], /**/ Lb0[l[m]], mat[i][j]);
      }
    }
  }
}

EMIT_BLOCK_VARIANT_PRE(pre_01, false, false);

/* >>> */

/* <<< pre_10() */

static inline void
INLINE_NAME(pre_10)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  const REAL *Lb1;
  const int  *const*n_entries, *k;
  const REAL *values;
  int        i, j, m, n_row, n_col;

  Lb1 = fill_info->op_info.Lb1.real(
    el_info,
    fill_info->op_info.quad[1], 0,
    fill_info->op_info.user_data);
  n_entries = fill_info->q10_cache->cache->n_entries;
  n_row = fill_info->q10_cache->cache->n_psi;
  n_col = fill_info->q10_cache->cache->n_phi;

  for (i = 0; i < n_row; i++) {
    for (j = 0; j < n_col; j++) {
      k      = fill_info->q10_cache->cache->k[i][j];
      values = fill_info->q10_cache->cache->values[i][j];
      for (m = 0; m < n_entries[i][j]; m++) {
	MSCMAXPY_DOW(
	  values[m], /**/ Lb1[k[m]], mat[i][j]);
      }
    }
  }
}

EMIT_BLOCK_VARIANT_PRE(pre_10, false, false);

/* >>> */

/* <<< pre_11() */

static inline void
INLINE_NAME(pre_11)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  const REAL *Lb0;
  const REAL *Lb1;
  const int  *const*n_entries01, *const*n_entries10, *k, *l;
  const REAL *values;
  int        i, j, m, n_row, n_col;

  Lb0 = fill_info->op_info.Lb0.real(
    el_info,
    fill_info->op_info.quad[1], 0,
    fill_info->op_info.user_data);
  Lb1 = fill_info->op_info.Lb1.real(
    el_info,
    fill_info->op_info.quad[1], 0,
    fill_info->op_info.user_data);
  n_entries01 = fill_info->q01_cache->cache->n_entries;
  n_entries10 = fill_info->q10_cache->cache->n_entries;
  n_row = fill_info->q01_cache->cache->n_psi;
  n_col = fill_info->q01_cache->cache->n_phi;

  for (i = 0; i < n_row; i++) {
    for (j = 0; j < n_col; j++) {
      l      = fill_info->q01_cache->cache->l[i][j];
      values = fill_info->q01_cache->cache->values[i][j];
      for (m = 0; m < n_entries01[i][j]; m++) {
	MSCMAXPY_DOW(
	  values[m], /**/ Lb0[l[m]], mat[i][j]);
      }
      k      = fill_info->q10_cache->cache->k[i][j];
      values = fill_info->q10_cache->cache->values[i][j];
      for (m = 0; m < n_entries10[i][j]; m++) {
	MSCMAXPY_DOW(
	  values[m], /**/ Lb1[k[m]], mat[i][j]);
      }
    }
  }
}

EMIT_BLOCK_VARIANT_PRE(pre_11, false, fill_info->op_info.Lb0_Lb1_anti_symmetric);

/* >>> */

/* <<<  advective first order terms */

/* <<< adv_pre_01() */

FORCE_INLINE_ATTR
static inline void
INLINE_NAME(adv_pre_01)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  const REAL_D *Lb0_adv;
  const ADV_CACHE *adv_cache = fill_info->adv_cache;
  const EL_REAL_VEC_D *adv_coeffs;

  Lb0_adv = fill_info->op_info.Lb0.real_d(
    el_info,
    fill_info->op_info.quad[1], 0,
    fill_info->op_info.user_data);

  if (fill_info->adv_coeffs == NULL) {
    ((FILL_INFO *)fill_info)->adv_coeffs =
      fill_info->op_info.advection_field(el_info, fill_info->op_info.user_data);
  }
  adv_coeffs = fill_info->adv_coeffs;

  CHAIN_DO(adv_cache, const ADV_CACHE) {
    const int  *const*const*n_entries, *l;
    const REAL *values;
    int n_row = adv_cache->q010->cache->n_eta;
    int n_col = adv_cache->q010->cache->n_psi;
    int depth = adv_cache->q010->cache->n_phi;
    int i, j, k, m;
    REAL adv_krn[depth][N_LAMBDA_MAX];

    n_entries = adv_cache->q010->cache->n_entries;

    if (adv_coeffs->stride == 1)  { /* DIM_OF_WORLD-valued basis functions */
      const REAL *adv_vec = adv_coeffs->vec;

      for (i = 0; i < depth; i++) {
	const REAL *adv_d = PHI_D(adv_cache->q010->phi, i, NULL);
	for (j = 0; j < N_LAMBDA_MAX; j++) {
	  SCMSET_DOW(0.0, adv_krn[i][j]);
	  for (k = 0; k < DIM_OF_WORLD; k++) {
	    SCMAXPY_DOW(
	      adv_vec[i] * adv_d[k], Lb0_adv[j][k], adv_krn[i][j]);
	  }
	}
      }
    } else { /* scalar basis functions, DIM_OF_WORLD-valued coefficients */
      const REAL_D *adv_vec_d = (const REAL_D *)adv_coeffs->vec;

      for (i = 0; i < depth; i++) {
	for (j = 0; j < N_LAMBDA_MAX; j++) {
	  SCMSET_DOW(0.0, adv_krn[i][j]);
	  for (k = 0; k < DIM_OF_WORLD; k++) {
	    SCMAXPY_DOW(
	      adv_vec_d[i][k], Lb0_adv[j][k], adv_krn[i][j]);
	  }
	}
      }
    }

    for (i = 0; i < n_row; i++) {
      for (j = 0; j < n_col; j++) {
	for (k = 0; k < depth; k++) {
	  l      = adv_cache->q010->cache->l[i][j][k];
	  values = adv_cache->q010->cache->values[i][j][k];
	  for (m = 0; m < n_entries[i][j][k]; m++) {
	    MSCMAXPY_DOW(
	      values[m], /**/ adv_krn[k][l[m]], mat[i][j]);
	  }
	}
      }
    }

    adv_coeffs = CHAIN_NEXT(adv_coeffs, EL_REAL_VEC_D);

  } CHAIN_WHILE(adv_cache, const ADV_CACHE);  
}

EMIT_BLOCK_VARIANT_PRE(adv_pre_01, false, false);

/* >>> */

/* <<< adv_pre_10() */

FORCE_INLINE_ATTR
static inline void
INLINE_NAME(adv_pre_10)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  const REAL_D *Lb1_adv;
  const ADV_CACHE *adv_cache = fill_info->adv_cache;
  const EL_REAL_VEC_D *adv_coeffs;

  Lb1_adv = fill_info->op_info.Lb1.real_d(
    el_info,
    fill_info->op_info.quad[1], 0,
    fill_info->op_info.user_data);

  if (fill_info->adv_coeffs == NULL) {
    ((FILL_INFO *)fill_info)->adv_coeffs =
      fill_info->op_info.advection_field(el_info, fill_info->op_info.user_data);
  }
  adv_coeffs = fill_info->adv_coeffs;

  CHAIN_DO(adv_cache, const ADV_CACHE) {
    const int  *const*const*n_entries, *l;
    const REAL *values;
    int n_row = adv_cache->q100->cache->n_eta;
    int n_col = adv_cache->q100->cache->n_psi;
    int depth = adv_cache->q100->cache->n_phi;
    int i, j, k, m;
    REAL adv_krn[depth][N_LAMBDA_MAX];

    n_entries = adv_cache->q100->cache->n_entries;

    if (adv_coeffs->stride == 1)  { /* DIM_OF_WORLD-valued basis functions */
      const REAL *adv_vec = adv_coeffs->vec;

      for (i = 0; i < depth; i++) {
	const REAL *adv_d = PHI_D(adv_cache->q100->phi, i, NULL);
	for (j = 0; j < N_LAMBDA_MAX; j++) {
	  SCMSET_DOW(0.0, adv_krn[i][j]);
	  for (k = 0; k < DIM_OF_WORLD; k++) {
	    SCMAXPY_DOW(
	      adv_vec[i] * adv_d[k], Lb1_adv[j][k], adv_krn[i][j]);
	  }
	}
      }
    } else { /* scalar basis functions, DIM_OF_WORLD-valued coefficients */
      const REAL_D *adv_vec_d = (const REAL_D *)adv_coeffs->vec;

      for (i = 0; i < depth; i++) {
	for (j = 0; j < N_LAMBDA_MAX; j++) {
	  SCMSET_DOW(0.0, adv_krn[i][j]);
	  for (k = 0; k < DIM_OF_WORLD; k++) {
	    SCMAXPY_DOW(
	      adv_vec_d[i][k], Lb1_adv[j][k], adv_krn[i][j]);
	  }
	}
      }
    }

    for (i = 0; i < n_row; i++) {
      for (j = 0; j < n_col; j++) {
	for (k = 0; k < depth; k++) {
	  l      = adv_cache->q100->cache->l[i][j][k];
	  values = adv_cache->q100->cache->values[i][j][k];
	  for (m = 0; m < n_entries[i][j][k]; m++) {
	    MSCMAXPY_DOW(
	      values[m], /**/ adv_krn[k][l[m]], mat[i][j]);
	  }
	}
      }
    }

    adv_coeffs = CHAIN_NEXT(adv_coeffs, EL_REAL_VEC_D);

  } CHAIN_WHILE(adv_cache, const ADV_CACHE);  
}

EMIT_BLOCK_VARIANT_PRE(adv_pre_10, false, false);

/* >>> */

/* <<< adv_pre_11() */

FORCE_INLINE_ATTR
static inline void
INLINE_NAME(adv_pre_11)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  const REAL_D *Lb0_adv;
  const REAL_D *Lb1_adv;
  const ADV_CACHE *adv_cache = fill_info->adv_cache;
  const EL_REAL_VEC_D *adv_coeffs;

  Lb0_adv = fill_info->op_info.Lb0.real_d(
    el_info,
    fill_info->op_info.quad[1], 0,
    fill_info->op_info.user_data);
  Lb1_adv = fill_info->op_info.Lb1.real_d(
    el_info,
    fill_info->op_info.quad[1], 0,
    fill_info->op_info.user_data);

  if (fill_info->adv_coeffs == NULL) {
    ((FILL_INFO *)fill_info)->adv_coeffs =
      fill_info->op_info.advection_field(el_info, fill_info->op_info.user_data);
  }
  adv_coeffs = fill_info->adv_coeffs;

  CHAIN_DO(adv_cache, const ADV_CACHE) {
    const int  *const*const*n_entries01, *const*const*n_entries10, *l;
    const REAL *values;
    int n_row = adv_cache->q010->cache->n_eta;
    int n_col = adv_cache->q010->cache->n_psi;
    int depth = adv_cache->q010->cache->n_phi;
    int i, j, k, m;
    REAL adv_krn[depth][N_LAMBDA_MAX];

    n_entries01 = adv_cache->q010->cache->n_entries;
    n_entries10 = adv_cache->q100->cache->n_entries;

    if (adv_coeffs->stride == 1)  { /* DIM_OF_WORLD-valued basis functions */
      const REAL *adv_vec = adv_coeffs->vec;

      for (i = 0; i < depth; i++) {
	const REAL *adv_d = PHI_D(adv_cache->q010->phi, i, NULL);
	for (j = 0; j < N_LAMBDA_MAX; j++) {
	  SCMSET_DOW(0.0, adv_krn[i][j]);
	  for (k = 0; k < DIM_OF_WORLD; k++) {
	    SCMAXPY_DOW(
	      adv_vec[i] * adv_d[k], Lb0_adv[j][k], adv_krn[i][j]);
	    SCMAXPY_DOW(
	      adv_vec[i] * adv_d[k], Lb1_adv[j][k], adv_krn[i][j]);
	  }
	}
      }
    } else { /* scalar basis functions, DIM_OF_WORLD-valued coefficients */
      const REAL_D *adv_vec_d = (const REAL_D *)adv_coeffs->vec;

      for (i = 0; i < depth; i++) {
	for (j = 0; j < N_LAMBDA_MAX; j++) {
	  SCMSET_DOW(0.0, adv_krn[i][j]);
	  for (k = 0; k < DIM_OF_WORLD; k++) {
	    SCMAXPY_DOW(
	      adv_vec_d[i][k], Lb0_adv[j][k], adv_krn[i][j]);
	    SCMAXPY_DOW(
	      adv_vec_d[i][k], Lb1_adv[j][k], adv_krn[i][j]);
	  }
	}
      }
    }

    for (i = 0; i < n_row; i++) {
      for (j = 0; j < n_col; j++) {
	for (k = 0; k < depth; k++) {
	  l      = adv_cache->q010->cache->l[i][j][k];
	  values = adv_cache->q010->cache->values[i][j][k];
	  for (m = 0; m < n_entries01[i][j][k]; m++) {
	    MSCMAXPY_DOW(
	      values[m], /**/ adv_krn[k][l[m]], mat[i][j]);
	  }
	  l      = adv_cache->q100->cache->l[i][j][k];
	  values = adv_cache->q100->cache->values[i][j][k];
	  for (m = 0; m < n_entries10[i][j][k]; m++) {
	    MSCMAXPY_DOW(
	      values[m], /**/ adv_krn[k][l[m]], mat[i][j]);
	  }
	}
      }
    }

    adv_coeffs = CHAIN_NEXT(adv_coeffs, EL_REAL_VEC_D);

  } CHAIN_WHILE(adv_cache, const ADV_CACHE);
}

EMIT_BLOCK_VARIANT_PRE(adv_pre_11, false,
		       fill_info->op_info.Lb0_Lb1_anti_symmetric);

/* >>> */

/* >>> */

/* <<< pre_0() */

static inline void
INLINE_NAME(pre_0)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  REAL c;
  int          i, j, n_row, n_col;
  const REAL   *const*values;
  REAL val;

  c = fill_info->op_info.c.real(
    el_info,
    fill_info->op_info.quad[0], 0,
    fill_info->op_info.user_data);
  values = fill_info->q00_cache->cache->values;
  n_row = fill_info->q00_cache->cache->n_psi;
  n_col = fill_info->q00_cache->cache->n_phi;

  if (row_V == col_V && fill_info->c_symmetric) {
    for (i = 0; i < n_row; i++) {
      MSCMAXPY_DOW(
	values[i][i], /**/ c, mat[i][i]);
      for (j = i+1; j < n_col; j++) {
	SCMAXEY_DOW(values[i][j],
			    /**/ c,
			    val);
	MSCMAXPY_DOW(
	  1.0, /**/ val, mat[i][j]);
	MSCMAXPY_DOW(
	  1.0, /**/ val, mat[j][i]);
      }
    }
  } else {
    for (i = 0; i < n_row; i++) {
      for (j = 0; j < n_col; j++) {
	MSCMAXPY_DOW(
	  values[i][j], /**/ c, mat[i][j]);
      }
    }
  }
}

EMIT_BLOCK_VARIANT_PRE(pre_0, fill_info->c_symmetric, false);

/* >>> */

/* <<< pre_2_01 */

static inline void
INLINE_NAME(pre_2_01)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  INLINE_NAME(pre_2)(el_info, fill_info, mat);
  INLINE_NAME(pre_01)(el_info, fill_info, mat);
}

EMIT_BLOCK_VARIANT_PRE(pre_2_01, false, false);

/* >>> */

/* <<< pre_2_10 */

static inline void
INLINE_NAME(pre_2_10)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		      REAL_DD **mat)
{
  INLINE_NAME(pre_2)(el_info, fill_info, mat);
  INLINE_NAME(pre_10)(el_info, fill_info, mat);
}

EMIT_BLOCK_VARIANT_PRE(pre_2_10, false, false);

/* >>> */

/* <<< pre_2_11 */

static inline void
INLINE_NAME(pre_2_11)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  INLINE_NAME(pre_2)(el_info, fill_info, mat);
  INLINE_NAME(pre_11)(el_info, fill_info, mat);
}

EMIT_BLOCK_VARIANT_PRE(pre_2_11, false, false);

/* >>> */

/* <<< pre_2_0 */

static inline void
INLINE_NAME(pre_2_0)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  INLINE_NAME(pre_2)(el_info, fill_info, mat);
  INLINE_NAME(pre_0)(el_info, fill_info, mat);
}

EMIT_BLOCK_VARIANT_PRE(
  pre_2_0,
  fill_info->op_info.LALt_symmetric && fill_info->c_symmetric,
  false);

/* >>> */

/* <<< pre_01_0 */

static inline void
INLINE_NAME(pre_01_0)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  INLINE_NAME(pre_01)(el_info, fill_info, mat);
  INLINE_NAME(pre_0)(el_info, fill_info, mat);
}

EMIT_BLOCK_VARIANT_PRE(pre_01_0, false, false);

/* >>> */

/* <<< pre_10_0 */

static inline void
INLINE_NAME(pre_10_0)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  INLINE_NAME(pre_10)(el_info, fill_info, mat);
  INLINE_NAME(pre_0)(el_info, fill_info, mat);
}

EMIT_BLOCK_VARIANT_PRE(pre_10_0, false, false);

/* >>> */

/* <<< pre_11_0 */

static inline void
INLINE_NAME(pre_11_0)(
  const EL_INFO *el_info, const FILL_INFO *fill_info,
  REAL_DD **mat)
{
  INLINE_NAME(pre_11)(el_info, fill_info, mat);
  INLINE_NAME(pre_0)(el_info, fill_info, mat);
}

EMIT_BLOCK_VARIANT_PRE(pre_11_0, false, false);

/* >>> */

/* <<< pre_2_01_0 */

static inline void
INLINE_NAME(pre_2_01_0)(const EL_INFO *el_info, const FILL_INFO *fill_info,
			REAL_DD **mat)
{
  INLINE_NAME(pre_2)(el_info, fill_info, mat);
  INLINE_NAME(pre_01)(el_info, fill_info, mat);
  INLINE_NAME(pre_0)(el_info, fill_info, mat);
}

EMIT_BLOCK_VARIANT_PRE(pre_2_01_0, false, false);

/* >>> */

/* <<< pre_2_10_0 */

static inline void
INLINE_NAME(pre_2_10_0)(const EL_INFO *el_info, const FILL_INFO *fill_info,
			REAL_DD **mat)
{
  INLINE_NAME(pre_2)(el_info, fill_info, mat);
  INLINE_NAME(pre_10)(el_info, fill_info, mat);
  INLINE_NAME(pre_0)(el_info, fill_info, mat);
}

EMIT_BLOCK_VARIANT_PRE(pre_2_10_0, false, false);

/* >>> */

/* <<< pre_2_11_0 */

static inline void
INLINE_NAME(pre_2_11_0)(const EL_INFO *el_info, const FILL_INFO *fill_info,
			REAL_DD **mat)
{
  INLINE_NAME(pre_2)(el_info, fill_info, mat);
  INLINE_NAME(pre_11)(el_info, fill_info, mat);
  INLINE_NAME(pre_0)(el_info, fill_info, mat);
}

EMIT_BLOCK_VARIANT_PRE(pre_2_11_0, false, false);

/* >>> */

/* >>> */

/* <<< ... without pre-computed integrals */

#define NEED_SCM_BTV 1
#define NEED_M_BTV 1
#define NEED_M_UTAV 1

/* <<< Helper functions btv and utAv */

/* <<< special scalar case (avoiding pointers to scalars) */

#if NEED_SCM_BTV
# undef NEED_SCM_BTV

static inline REAL
__SCMbtv(int n_lambda, const REAL_B b, const REAL_B v, REAL r)
{
  int i;

  r = b[0] * v[0];
  for (i = 1; i < n_lambda; i++) {
    r += b[i] * v[i];
  }
  return r;
}
# define SCMbtv(n_lambda, b, v, r) ((r) = __SCMbtv(n_lambda, b, v, 0.0))
#endif

#if NEED_SCM_UTAV
# undef NEED_SCM_UTAV

static inline REAL
__SCMutAv(int n_lambda, const REAL_B u, const REAL_B *A, const REAL_B v, REAL r)
{
  int i;

  r = u[0] * __SCMbtv(n_lambda, A[0], v, 0.0);
  for (i = 1; i < n_lambda; i++) {
    r += u[i] * __SCMbtv(n_lambda, A[i], v, 0.0);
  }
  return r;
}
#define SCMutAv(n_lambda, u, A, v, r) ((r) = __SCMutAv(n_lambda, u, A, v, 0.0))
#endif

/* >>> */

/* <<< REAL_D and REAL_DD case */

#if NEED_SCM_BTV
# undef NEED_SCM_BTV

/* component-wise n_lambda scp, b is matrix valued */
static inline NOT_NEEDED
SCMbtv(int n_lambda, const REAL b[],
		const REAL_B v, NOT_NEEDED r)
{
  int  i;

  SCMAXEY_DOW(v[0], /**/ b[0], /*@LB_CAST@*/ r);
  for (i = 1; i < n_lambda; i++) {
    SCMAXPY_DOW(v[i], /**/ b[i], /*@LB_CAST@*/ r);
  }
  return (NOT_NEEDED)r;
}

#endif

/* LALT and LB may have the same type */
#if NEED_M_BTV
# undef NEED_M_BTV

/* component-wise n_lambda scp, b is matrix valued */
static inline REAL_D *
Mbtv(int n_lambda, const REAL_DD b[],
		  const REAL_B Dv, REAL_D * r)
{
  int  i;

  MAXEY_DOW(Dv[0], (const REAL_D *) b[0], /*@LALT_CAST@*/ r);
  for (i = 1; i < n_lambda; i++) {
    MAXPY_DOW(Dv[i], (const REAL_D *) b[i], /*@LALT_CAST@*/ r);
  }
  return (REAL_D *)r;
}
#endif

#if HAVE_M_LALT_TYPE || HAVE_DM_LALT_TYPE

/* A is a (N_LAMBDA_MAX)x(N_LAMBDA_MAX) block-matrix with DOWxDOW entries */
static inline REAL_D *
MutAv(int n_lambda,
		   const REAL_B u,
		   const REAL_DD (*A)[N_LAMBDA_MAX],
		   const REAL_B v,
		   REAL_D * r)
{
  int i;
  REAL_DD tmp;

  MAXEY_DOW(
    u[0], (const REAL_D *)Mbtv(n_lambda, A[0], v, r), r);
  for (i = 1; i < n_lambda; i++) {
    MAXPY_DOW(
      u[i], (const REAL_D *)Mbtv(n_lambda, A[i], v, tmp), r);
  }
  return (REAL_D *)r;
}

#endif /* HAVE_M||DM_LALT_TYPE */

/* >>> */

/* <<< utAv for vector/scalar - scalar/vector combinations */

/* Du and Dv are the Jacobians of some "really" vector valued
 * functions, _NOT_ factored into scalar part and direction. This
 * function is used in the case when the directions of the
 * vector-valued basis functions are not constant on an element.
 */
static inline REAL
VV_MutAv(int n_lambda,
		      const REAL_DB Du,
		      const REAL_DD (*A)[N_LAMBDA_MAX],
		      const REAL_DB Dv)
{
  int i, alpha, beta;
  REAL r;

  r = 0.0;
  for (alpha = 0; alpha < n_lambda; alpha++) {
    for (beta = 0; beta < n_lambda; beta++) {
# if HAVE_M_LALT_TYPE
      for (i = 0; i < DIM_OF_WORLD; i++) {
	int j;

	for (j = 0; j < DIM_OF_WORLD; j++) {
	  r += A[alpha][beta][i][j]*Du[i][alpha]*Dv[j][beta];
	}
      }
# elif HAVE_DM_LALT_TYPE
      for (i = 0; i < DIM_OF_WORLD; i++) {
	r += A[alpha][beta][i]*Du[i][alpha]*Dv[i][beta];
      }
# elif HAVE_SCM_LALT_TYPE
      {
	REAL tmp;

	tmp = 0.0;
	for (i = 0; i < DIM_OF_WORLD; i++) {
	  tmp += Du[i][alpha]*Dv[i][beta];
	}
	r += A[alpha][beta]*tmp;
      }
# endif
    }
  }
  return r;
}

/* Du is the Jacobian of a vector-valued basis function, Dv is the
 * gradient of a scalar basis function.  u is _NOT_ factored into
 * scalar part and directional part. Intended for the case when the
 * directional part of a basis functions is not constant on an
 * element.
 */
static inline REAL *
VC_MutAv(int n_lambda,
		      const REAL_DB Du,
		      const REAL_DD (*A)[N_LAMBDA_MAX],
		      const REAL_B Dv,
		      REAL *r)
{
  int i, alpha, beta;

  SET_DOW(0.0, r);
  for (alpha = 0; alpha < n_lambda; alpha++) {
    for (beta = 0; beta < n_lambda; beta++) {
# if HAVE_M_LALT_TYPE
      for (i = 0; i < DIM_OF_WORLD; i++) {
	int j;

	for (j = 0; j < DIM_OF_WORLD; j++) {
	  r[j] += A[alpha][beta][i][j]*Du[i][alpha]*Dv[beta];
	}
      }
# elif HAVE_DM_LALT_TYPE
      for (i = 0; i < DIM_OF_WORLD; i++) {
	r[i] += A[alpha][beta][i]*Du[i][alpha]*Dv[beta];
      }
# elif HAVE_SCM_LALT_TYPE
      for (i = 0; i < DIM_OF_WORLD; i++) {
	r[i] += Du[i][alpha]*A[alpha][beta]*Dv[beta];
      }
# endif
    }
  }
  return r;
}

/* Dv is the Jacobian of a vector-valued basis function which is _NOT_
 * factored into a scalar and a directional part. For use when v is
 * not p.w. constant.
 */
static inline REAL *
CV_MutAv(int n_lambda,
		      const REAL_B Du,
		      const REAL_DD (*A)[N_LAMBDA_MAX],
		      const REAL_DB Dv,
		      REAL *r)
{
  int i, alpha, beta;

  SET_DOW(0.0, r);
  for (alpha = 0; alpha < n_lambda; alpha++) {
    for (beta = 0; beta < n_lambda; beta++) {
# if HAVE_M_LALT_TYPE
      for (i = 0; i < DIM_OF_WORLD; i++) {
	int j;

	for (j = 0; j < DIM_OF_WORLD; j++) {
	  r[i] += A[alpha][beta][i][j]*Du[alpha]*Dv[j][beta];
	}
      }
# elif HAVE_DM_LALT_TYPE
      for (i = 0; i < DIM_OF_WORLD; i++) {
	r[i] += A[alpha][beta][i]*Du[alpha]*Dv[i][beta];
      }
# elif HAVE_SCM_LALT_TYPE
      for (i = 0; i < DIM_OF_WORLD; i++) {
	r[i] += Du[alpha]*A[alpha][beta]*Dv[i][beta];
      }
# endif
    }
  }
  return r;
}

# if HAVE_DM_LALT_TYPE || HAVE_SCM_LALT_TYPE
/* There is an ambiguity if the entries of the operator kernel are
 * REAL_D blocks and one of the finite element space is scalar and the
 * other vector valued: the scalar space may either be a real scalar
 * space, in which case we have to form the scalar product of the
 * kernel with the components of the vector valued basis functions, or
 * the scalar space may in fact be the scalar components of a
 * Cartesian product space, in which case the resulting matrix is a
 * block matrix with REAL_D entries. The VS_... and SV_... versions
 * are for the scalar-vector case, the VC_... and CV_... versions are
 * for the Cartesian product case.
 */

/* Du is the Jacobian of a vector-valued basis function, Dv is the
 * gradient of a scalar basis function.  u is _NOT_ factored into
 * scalar part and directional part. Intended for the case when the
 * directional part of a basis functions is not constant on an
 * element.
 */
static inline REAL
VS_MutAv(int n_lambda,
		      const REAL_DB Du,
		      const REAL_DD (*A)[N_LAMBDA_MAX],
		      const REAL_B Dv)
{
  int i, alpha, beta;
  REAL r;

  r = 0.0;
  for (alpha = 0; alpha < n_lambda; alpha++) {
    for (beta = 0; beta < n_lambda; beta++) {
# if HAVE_DM_LALT_TYPE
      {
	REAL tmp;

	tmp = 0.0;
	for (i = 0; i < DIM_OF_WORLD; i++) {
	  tmp += A[alpha][beta][i]*Du[i][alpha];
	}
	r += tmp * Dv[beta];
      }
# elif HAVE_SCM_LALT_TYPE
      {
	REAL tmp;

	tmp = 0.0;
	for (i = 0; i < DIM_OF_WORLD; i++) {
	  tmp += Du[i][alpha];
	}
	r += tmp * A[alpha][beta]*Dv[beta];
      }
# endif
    }
  }
  return r;
}

/* Dv is the Jacobian of a vector-valued basis function which is _NOT_
 * factored into a scalar and a directional part. For use when v is
 * not p.w. constant.
 */
static inline REAL
SV_MutAv(int n_lambda,
		      const REAL_B Du,
		      const REAL_DD (*A)[N_LAMBDA_MAX],
		      const REAL_DB Dv)
{
  int i, alpha, beta;
  REAL r;

  r = 0.0;
  for (alpha = 0; alpha < n_lambda; alpha++) {
    for (beta = 0; beta < n_lambda; beta++) {
# if HAVE_DM_LALT_TYPE
      for (i = 0; i < DIM_OF_WORLD; i++) {
	r += A[alpha][beta][i]*Du[alpha]*Dv[i][beta];
      }
# elif HAVE_SCM_LALT_TYPE
      {
	REAL tmp;

	tmp = 0.0;
	for (i = 0; i < DIM_OF_WORLD; i++) {
	  tmp += Dv[i][beta];
	}
	r += A[alpha][beta]*Du[alpha]*tmp;
      }
#endif
    }
  }
  return r;
}

# endif /* HAVE_DM_LALT_TYPE || HAVE_SCM_LALT_TYPE */

/* >>> */

/* <<< ubtDv() and Dutbv() for vector/scalar - scalar/vector combinations */

/* <<< vector-vector */

/* Helper for first order term Lb0, derivative on ansatz function */
static inline REAL
VV_SCMubtDv(int n_lambda,
		     const REAL_D u, const REAL b[], const REAL_DB Dv)
{
  int i, alpha;
  REAL r;

  r = 0.0;
  for (alpha = 0; alpha < n_lambda; alpha++) {
#if HAVE_M_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      int j;
      for (j = 0; j < DIM_OF_WORLD; j++) {
	r += u[i] * b[alpha][i][j] * Dv[j][alpha];
      }
    }
#elif HAVE_DM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += u[i] * b[alpha][i] * Dv[i][alpha];
    }
#elif HAVE_SCM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += u[i] * b[alpha] * Dv[i][alpha];
    }
#endif
  }
  return r;
}

/* Helper for first order term Lb1, derivative on test function */
static inline REAL
VV_SCMDutbv(int n_lambda,
		     const REAL_DB Du, const REAL b[], const REAL_D v)
{
  int i, alpha;
  REAL r;

  r = 0.0;
  for (alpha = 0; alpha < n_lambda; alpha++) {
#if HAVE_M_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      int j;
      for (j = 0; j < DIM_OF_WORLD; j++) {
	r += Du[i][alpha] * b[alpha][i][j] * v[j];
      }
    }
#elif HAVE_DM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += Du[i][alpha] * b[alpha][i] * v[i];
    }
#elif HAVE_SCM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += Du[i][alpha] * b[alpha] * v[i];
    }
#endif
  }
  return r;
}

/* >>> */

/* <<< vector-Cartesian */

/* Helper for first order term Lb0, derivative on ansatz function */

static inline REAL *
VC_SCMubtDv(
  int n_lambda,
  const REAL_D u, const REAL b[], const REAL_B Dv, REAL_D r)
{
  int i, alpha;

  SET_DOW(0.0, r);
  for (alpha = 0; alpha < n_lambda; alpha++) {
#if HAVE_M_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      int j;
      for (j = 0; j < DIM_OF_WORLD; j++) {
	r[j] += u[i] * b[alpha][i][j] * Dv[alpha];
      }
    }
#elif HAVE_DM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r[i] += u[i] * b[alpha][i] * Dv[alpha];
    }
#elif HAVE_SCM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r[i] += u[i] * b[alpha] * Dv[alpha];
    }
#endif
  }
  return r;
}

/* Helper for first order term Lb1, derivative on test function */
static inline REAL *
VC_SCMDutbv(
  int n_lambda,
  const REAL_DB Du, const REAL b[], REAL v, REAL_D r)
{
  int i, alpha;

  SET_DOW(0.0, r);
  for (alpha = 0; alpha < n_lambda; alpha++) {
#if HAVE_M_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      int j;
      for (j = 0; j < DIM_OF_WORLD; j++) {
	r[j] += Du[i][alpha] * b[alpha][i][j] * v;
      }
    }
#elif HAVE_DM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r[i] += Du[i][alpha] * b[alpha][i] * v;
    }
#elif HAVE_SCM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r[i] += Du[i][alpha] * b[alpha] * v;
    }
#endif
  }
  return r;
}

/* >>> */

/* <<< Cartesian-vector */

/* Helper for first order term Lb0, derivative on ansatz function */

static inline REAL *CV_SCMubtDv(
  int n_lambda,
  REAL u, const REAL b[], const REAL_DB Dv, REAL_D r)
{
  int i, alpha;

  SET_DOW(0.0, r);
  for (alpha = 0; alpha < n_lambda; alpha++) {
#if HAVE_M_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      int j;
      for (j = 0; j < DIM_OF_WORLD; j++) {
	r[i] += u * b[alpha][i][j] * Dv[j][alpha];
      }
    }
#elif HAVE_DM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r[i] += u * b[alpha][i] * Dv[i][alpha];
    }
#elif HAVE_SCM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r[i] += u * b[alpha] * Dv[i][alpha];
    }
#endif
  }
  return r;
}

/* Helper for first order term Lb1, derivative on test function */
static inline REAL *CV_SCMDutbv(
  int n_lambda,
  const REAL_B Du, const REAL b[], const REAL_D v, REAL_D r)
{
  int i, alpha;

  SET_DOW(0.0, r);
  for (alpha = 0; alpha < n_lambda; alpha++) {
#if HAVE_M_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      int j;
      for (j = 0; j < DIM_OF_WORLD; j++) {
	r[i] += Du[alpha] * b[alpha][i][j] * v[j];
      }
    }
#elif HAVE_DM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r[i] += Du[alpha] * b[alpha][i] * v[i];
    }
#elif HAVE_SCM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r[i] += Du[alpha] * b[alpha] * v[i];
    }
#endif
  }
  return r;
}

/* >>> */

#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE

/* <<< vector-Scalar */

/* Helper for first order term Lb0, derivative on ansatz function */

static inline REAL
VS_SCMubtDv(
  int n_lambda,
  const REAL_D u, const REAL b[], const REAL_B Dv)
{
  int i, alpha;
  REAL r;

  r = 0.0;
  for (alpha = 0; alpha < n_lambda; alpha++) {
#if HAVE_DM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += u[i] * b[alpha][i] * Dv[alpha];
    }
#elif HAVE_SCM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += u[i] * b[alpha] * Dv[alpha];
    }
#endif
  }
  return r;
}

/* Helper for first order term Lb1, derivative on test function */
static inline REAL
VS_SCMDutbv(
  int n_lambda,
  const REAL_DB Du, const REAL b[], REAL v)
{
  int i, alpha;
  REAL r;

  r = 0.0;
  for (alpha = 0; alpha < n_lambda; alpha++) {
#if HAVE_M_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      int j;
      for (j = 0; j < DIM_OF_WORLD; j++) {
	r += Du[i][alpha] * b[alpha][i][j] * v;
      }
    }
#elif HAVE_DM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += Du[i][alpha] * b[alpha][i] * v;
    }
#elif HAVE_SCM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += Du[i][alpha] * b[alpha] * v;
    }
#endif
  }
  return r;
}

/* >>> */

/* <<< Scalar-vector */

/* Helper for first order term Lb0, derivative on ansatz function */

static inline REAL
SV_SCMubtDv(
  int n_lambda,
  REAL u, const REAL b[], const REAL_DB Dv)
{
  int i, alpha;
  REAL r;

  r = 0.0;
  for (alpha = 0; alpha < n_lambda; alpha++) {
#if HAVE_DM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += u * b[alpha][i] * Dv[i][alpha];
    }
#elif HAVE_SCM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += u * b[alpha] * Dv[i][alpha];
    }
#endif
  }
  return r;
}

/* Helper for first order term Lb1, derivative on test function */
static inline REAL
SV_SCMDutbv(
  int n_lambda,
  const REAL_B Du, const REAL b[], const REAL_D v)
{
  int i, alpha;
  REAL r;

  r = 0.0;
  for (alpha = 0; alpha < n_lambda; alpha++) {
#if HAVE_DM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += Du[alpha] * b[alpha][i] * v[i];
    }
#elif HAVE_SCM_LB_TYPE
    for (i = 0; i < DIM_OF_WORLD; i++) {
      r += Du[alpha] * b[alpha] * v[i];
    }
#endif
  }
  return r;
}

/* >>> */

#endif

/* >>> */

/* >>> */

/* dim multiplexing */
#define ARG_CALL(n_lambda) (el_info, info, n_lambda)
#define ARG_DECL           (const EL_INFO *el_info, const FILL_INFO *info)

/* <<< element matrix multiplexing */

static inline REAL_DD **
M_assign_matrices(REAL ***real_mat,
			      REAL_D ***real_d_mat,
			      const FILL_INFO *fill_info,
			      /* bool row_V, bool col_V, */
			      bool row_V_const, bool col_V_const)
{
  REAL_DD **mat = NULL;

  if (row_V || col_V) {

    *real_mat = (REAL **)fill_info->el_mat->data.real;

    if (col_V_const && row_V_const) {
      mat = (REAL_DD **)fill_info->scl_el_mat;
      M_clear_tmp_mat(mat, fill_info);
    } else if (row_V_const /* && col_V */) {
      if (row_V) {
	*real_d_mat = (REAL_D **)fill_info->scl_el_mat;
	DM_clear_tmp_mat(*real_d_mat, fill_info);
      } else {
	*real_d_mat = (REAL_D **)fill_info->el_mat->data.real_d;
      }
    } else if (col_V_const /* && row_V */) {
      if (col_V) {
	*real_d_mat = (REAL_D **)fill_info->scl_el_mat;
	DM_clear_tmp_mat(*real_d_mat, fill_info);
      } else {
	*real_d_mat = (REAL_D **)fill_info->el_mat->data.real_d;
      }
    }
  } else {
    mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
  }

  return mat;
}

/* Possibly condense the temporary matrix if either of the directions
 * was p.w. constant
 */
static inline void
M_condense_matrices(const FILL_INFO *fill_info,
				/* bool row_V, bool col_V,
				   bool row_C, bool col_C, */
				bool row_V_const, bool col_V_const)
{
  if (row_V && col_V) {
    if (row_V_const && col_V_const) {
      VV_M_condense_el_mat(fill_info, false, false);
    } else if (row_V_const) {
      VS_DM_condense_el_mat(fill_info);
    } else if (col_V_const) {
      SV_DM_condense_el_mat(fill_info);
    }
  } else if (row_V && row_V_const) {
    if (col_C) {
      VC_M_condense_el_mat(fill_info);
#if HAVE_SCM_DST_TYPE || HAVE_DM_DST_TYPE
    } else {
      VS_M_condense_el_mat(fill_info);
#endif
    }
  } else if (col_V && col_V_const) {
    if (row_C) {
      CV_M_condense_el_mat(fill_info);
#if HAVE_SCM_DST_TYPE || HAVE_DM_DST_TYPE
    } else {
      SV_M_condense_el_mat(fill_info);
#endif
    }
  }
}

/* >>> */

/* <<< quad_2() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_2)(const EL_INFO *el_info, const FILL_INFO *fill_info,
	     int n_lambda)
{
  const REAL_BDD *LALt;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD val;
  REAL_DD  **mat;
  REAL_D      **real_d_mat = NULL;
  REAL        **real_mat = NULL;
  bool        row_V_const, col_V_const;

  quad      = fill_info->op_info.quad[2];
  row_qfast = fill_info->row_quad_fast[2];
  col_qfast = fill_info->col_quad_fast[2];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V == col_V && fill_info->op_info.LALt_symmetric) {
    const REAL_DB *const*grd_row_d = NULL;
    const REAL_DB *const*grd_col_d = NULL;

    col_qfast   = row_qfast;
    col_V_const = row_V_const;
    /* col_V       = row_V; */

    if (row_V) {
      if (row_V_const) {
	mat = (REAL_DD **)fill_info->scl_el_mat;
	M_clear_tmp_mat(mat, fill_info);
      } else {
	grd_row_d = grd_col_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
    } else {
      mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
    }

    if (row_V_const) { /* non-vector valued, or direction p.w. constant */
      for (iq = 0; iq < quad->n_points; iq++) {
	LALt = fill_info->op_info.LALt.real_dd(
	  el_info, quad, iq, fill_info->op_info.user_data);

	grd_row_phi = grd_col_phi = row_qfast->grd_phi[iq];

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  MutAv(n_lambda, grd_row_phi[i], LALt, grd_col_phi[i], val);
	  MMAXPY_DOW(
	    quad->w[iq], (const REAL_D *) val, mat[i][i]);
	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    MutAv(n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], val);
	    MAX_DOW(quad->w[iq],  val);
	    MMAXPY_DOW(
	      1.0, (const REAL_D *) val, mat[i][j]);
	    if (!row_V) {
	      MMAXTPY_DOW(
		1.0, (const REAL_D *) val, mat[j][i]);
	    }
	  }
	}
      }

      if (row_V) { /* condense with the directions of the basis functions */
	VV_M_condense_el_mat(fill_info, true, false);
      }
    } else { /* vector-valued, and direction not p.w. constant */
      real_mat = (REAL **)fill_info->el_mat->data.real;

      for (iq = 0; iq < quad->n_points; iq++) {
	LALt = fill_info->op_info.LALt.real_dd(
	  el_info, quad, iq, fill_info->op_info.user_data);

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  real_mat[i][i] +=
	    quad->w[iq]
	    *
	    VV_MutAv(
	      n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][i]);
	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    REAL val = quad->w[iq]
	      *
	      VV_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j]);
	    real_mat[i][j] += val;
	    real_mat[j][i] += val;
	  }
	}
      }
    }
  } else { /*  non symmetric assembling   */
    const REAL_DB *const*grd_row_d = NULL;
    const REAL_DB *const*grd_col_d = NULL;
    REAL_D val_d;

    if (row_V || col_V) {
      if (row_V && !row_V_const) {
	grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
      if (col_V && !col_V_const) {
	grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
      }
    }

    mat = M_assign_matrices(&real_mat, &real_d_mat,
					fill_info,
					/* row_V, col_V, */
					row_V_const, col_V_const);

    for (iq = 0; iq < quad->n_points; iq++) {
      LALt = fill_info->op_info.LALt.real_dd(
	el_info, quad, iq, fill_info->op_info.user_data);

      grd_row_phi = row_qfast->grd_phi[iq];
      grd_col_phi = col_qfast->grd_phi[iq];

      for (i = 0; i < fill_info->el_mat->n_row; i++) {
	for (j = 0; j < fill_info->el_mat->n_col; j++) {
	  if (row_V_const && col_V_const) {
	    MutAv(n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], val);
	    MMAXPY_DOW(
	      quad->w[iq], (const REAL_D *) val, mat[i][j]);
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] * SV_MutAv(
		  n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j]);
#endif
	    } else {
	      CV_MutAv(
		n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    }
	  } else if (col_V_const) {
	    if (!col_V && !col_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] * VS_MutAv(
		  n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j]);
#endif
	    } else {
	      VC_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    }
	  } else {
	    real_mat[i][j] +=
	      quad->w[iq] * VV_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j]);
	  }
	} /* column loop */
      } /* row loop */
    } /* quad-point loop */

    /* Now possibly condense the temporary matrix if either of the
     * directions was p.w. constant
     */
    M_condense_matrices(fill_info,
				    /* row_V, col_V, row_C, col_C, */
				    row_V_const, col_V_const);
  } /* non-symmetric assembly */
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_2), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_01() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_01)(const EL_INFO *el_info, const FILL_INFO *fill_info, int n_lambda)
{
  const REAL *Lb0;
  const REAL      *row_val;
  const REAL_B    *grd_col_phi;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL       tmp;
  REAL_D          val_d;
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL;

  quad      = fill_info->op_info.quad[1];
  row_qfast = fill_info->row_quad_fast[1];
  col_qfast = fill_info->col_quad_fast[1];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V || col_V) {
    if (row_V && !row_V_const) {
      row_d = get_quad_fast_phi_dow(row_qfast);
    }
    if (col_V && !col_V_const) {
      grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
    }
  }

  mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
				      /* row_V, col_V, */
				      row_V_const, col_V_const);

  for (iq = 0; iq < quad->n_points; iq++) {
    Lb0 = fill_info->op_info.Lb0.real(
      el_info, quad, iq, fill_info->op_info.user_data);

    grd_col_phi = col_qfast->grd_phi[iq];
    row_val = row_qfast->phi[iq];

    for (i = 0; i < fill_info->el_mat->n_row; i++) {
      for (j = 0; j < fill_info->el_mat->n_col; j++) {
	if (row_V_const && col_V_const) {
	  MSCMAXPY_DOW(
	    quad->w[iq]*row_val[i],
	    /**/ SCMbtv(n_lambda, Lb0, grd_col_phi[j], tmp),
	    mat[i][j]);
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq] * SV_SCMubtDv(
		n_lambda, row_val[i], Lb0, grd_col_d[iq][j]);
#endif
	  } else {
	    CV_SCMubtDv(
	      n_lambda, row_val[i], Lb0, grd_col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq] * VS_SCMubtDv(
		n_lambda, row_d[iq][i], Lb0, grd_col_phi[j]);
#endif
	  } else {
	    VC_SCMubtDv(
	      n_lambda, row_d[iq][i], Lb0, grd_col_phi[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else {
	  real_mat[i][j] +=
	    quad->w[iq] * VV_SCMubtDv(
	      n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j]);
	}
      } /* column loop */
    } /* row loop */
  } /* quad-point loop */

  /* Now possibly condense the temporary matrix if either of the
   * directions was p.w. constant
   */
  M_condense_matrices(fill_info,
				  /* row_V, col_V, row_C, col_C, */
				  row_V_const, col_V_const);
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_01), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_10() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_10)(const EL_INFO *el_info, const FILL_INFO *fill_info, int n_lambda)
{
  const REAL *Lb1;
  const REAL_B    *grd_row_phi;
  const REAL      *col_val;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL       tmp;
  REAL_D          val_d;
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL;
  const REAL_D    *const*col_d = NULL;

  quad      = fill_info->op_info.quad[1];
  row_qfast = fill_info->row_quad_fast[1];
  col_qfast = fill_info->col_quad_fast[1];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V || col_V) {
    if (row_V && !row_V_const) {
      grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
    }
    if (col_V && !col_V_const) {
      col_d = get_quad_fast_phi_dow(col_qfast);
    }
  }

  mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
				      /* row_V, col_V, */
				      row_V_const, col_V_const);

  for (iq = 0; iq < quad->n_points; iq++)  {
    Lb1 = fill_info->op_info.Lb1.real(
      el_info, quad, iq, fill_info->op_info.user_data);

    col_val = col_qfast->phi[iq];
    grd_row_phi = row_qfast->grd_phi[iq];

    for (i = 0; i < fill_info->el_mat->n_row; i++) {
      for (j = 0; j < fill_info->el_mat->n_col; j++) {
	if (row_V_const && col_V_const) {
	  MSCMAXPY_DOW(
	    quad->w[iq]*col_val[j],
	    /**/ SCMbtv(n_lambda, Lb1, grd_row_phi[i], tmp),
	    mat[i][j]);
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq] * SV_SCMDutbv(
		n_lambda, grd_row_phi[i], Lb1, col_d[iq][j]);
#endif
	  } else {
	    CV_SCMDutbv(
	      n_lambda, grd_row_phi[i], Lb1, col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq] * VS_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_val[j]);
#endif
	  } else {
	    VC_SCMDutbv(
	      n_lambda, grd_row_d[iq][i], Lb1, col_val[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else {
	  real_mat[i][j] +=
	    quad->w[iq] * VV_SCMDutbv(
	      n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j]);
	}
      } /* column loop */
    } /* row loop */
  } /* quad-point loop */

  /* Now possibly condense the temporary matrix if either of the
   * directions was p.w. constant
   */
  M_condense_matrices(fill_info,
				  /* row_V, col_V, row_C, col_C, */
				  row_V_const, col_V_const);
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_10), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_11() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_11)(const EL_INFO *el_info, const FILL_INFO *fill_info,
	      int n_lambda)
{
  const REAL *Lb0;
  const REAL *Lb1;
  const REAL      *row_val, *col_val;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL tmp1;
  REAL tmp2;
  REAL val;
  REAL_D          val_d;
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL, *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL, *const*col_d = NULL;

  quad     = fill_info->op_info.quad[1];
  row_qfast = fill_info->row_quad_fast[1];
  col_qfast = fill_info->col_quad_fast[1];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V == col_V && fill_info->op_info.Lb0_Lb1_anti_symmetric) {

    if (row_V) {
      if (row_V_const) {
	mat = (REAL_DD **)fill_info->scl_el_mat;
	M_clear_tmp_mat(mat, fill_info);
      } else {
	grd_row_d = grd_col_d = get_quad_fast_grd_phi_dow(row_qfast);
	row_d = col_d = get_quad_fast_phi_dow(row_qfast);
      }
    } else {
      mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
    }

    if (row_V_const) { /* non-vector valued, or direction p.w. constant */
      for (iq = 0; iq < quad->n_points; iq++) {
	Lb0 = fill_info->op_info.Lb0.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb1 = fill_info->op_info.Lb1.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	grd_row_phi = grd_col_phi = row_qfast->grd_phi[iq];
	row_val = col_val = row_qfast->phi[iq];

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    SCMAXPBY_DOW(
	      quad->w[iq]*row_val[i],
	      /**/ SCMbtv(
		n_lambda, Lb0, grd_col_phi[j], tmp1),
	      quad->w[iq]*col_val[j],
	      /**/ SCMbtv(
		n_lambda, Lb1, grd_row_phi[i], tmp2),
	      val);
	    MSCMAXPY_DOW(
	      1.0, /**/ val, mat[i][j]);
	    MSCMAXTPY_DOW(
	      -1.0, /**/ val, mat[j][i]);
	  }
	}
      }

      if (row_V) { /* condense with the directions of the basis functions */
	VV_M_condense_el_mat(fill_info, false, true);
      }

    } else { /* vector-valued, and direction not p.w. constant */
      real_mat = (REAL **)fill_info->el_mat->data.real;

      for (iq = 0; iq < quad->n_points; iq++) {
	Lb0 = fill_info->op_info.Lb0.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb1 = fill_info->op_info.Lb1.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    REAL val;

	    val = quad->w[iq] *
	      (VV_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	       +
	       VV_SCMubtDv(
		 n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j]));

	    real_mat[i][j] += val;
	    real_mat[j][i] -= val;
	  }
	}
      }
    }
  } else { /*  non symmetric assembling   */
    if (row_V || col_V) {
      if (row_V && !row_V_const) {
	row_d     = get_quad_fast_phi_dow(row_qfast);
	grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
      if (col_V && !col_V_const) {
	grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
	col_d     = get_quad_fast_phi_dow(col_qfast);
      }
    }

    mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
					/* row_V, col_V, */
					row_V_const, col_V_const);

    for (iq = 0; iq < quad->n_points; iq++)  {
      Lb0 = fill_info->op_info.Lb0.real(
	el_info, quad, iq, fill_info->op_info.user_data);
      Lb1 = fill_info->op_info.Lb1.real(
	el_info, quad, iq, fill_info->op_info.user_data);

      grd_col_phi = col_qfast->grd_phi[iq];
      col_val = col_qfast->phi[iq];
      grd_row_phi = row_qfast->grd_phi[iq];
      row_val = row_qfast->phi[iq];

      for (i = 0; i < fill_info->el_mat->n_row; i++) {
	for (j = 0; j < fill_info->el_mat->n_col; j++) {
	  if (row_V_const && col_V_const) {
	    SCMAXPBY_DOW(
	      quad->w[iq]*row_val[i],
	      /**/ SCMbtv(n_lambda, Lb0, grd_col_phi[j], tmp1),
	      quad->w[iq]*col_val[j],
	      /**/ SCMbtv(n_lambda, Lb1, grd_row_phi[i], tmp2),
	      val);
	    MSCMAXPY_DOW(
	      1.0, /**/ val, mat[i][j]);
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] *
		(SV_SCMDutbv(
		  n_lambda, grd_row_phi[i], Lb1, col_d[iq][j])
		 +
		 SV_SCMubtDv(
		   n_lambda, row_val[i], Lb0, grd_col_d[iq][j]));
#endif
	    } else {
	      CV_SCMDutbv(
		n_lambda, grd_row_phi[i], Lb1, col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      CV_SCMubtDv(
		n_lambda, row_val[i], Lb0, grd_col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    }
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] *
		(VS_SCMDutbv(
		  n_lambda, grd_row_d[iq][i], Lb1, col_val[j])
		 +
		 VS_SCMubtDv(
		   n_lambda, row_d[iq][i], Lb0, grd_col_phi[j]));
#endif
	    } else {
	      VC_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_val[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      VC_SCMubtDv(
		n_lambda, row_d[iq][i], Lb0, grd_col_phi[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    }
	  } else {
	    real_mat[i][j] +=
	      quad->w[iq] *
	      (VV_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	       +
	       VV_SCMubtDv(
		 n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j]));
	  }
	} /* column loop */
      } /* row loop */
    } /* quad-point loop */

    /* Now possibly condense the temporary matrix if either of the
     * directions was p.w. constant
     */
    M_condense_matrices(fill_info,
				    /* row_V, col_V, row_C, col_C, */
				    row_V_const, col_V_const);
  }
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_11), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< advective first order terms */

/* <<< adv_quad_01() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(adv_quad_01)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		  int n_lambda)
{
  const REAL_D *Lb0_adv;
  REAL       Lb0[N_LAMBDA_MAX];
  const REAL      *row_val;
  const REAL_B    *grd_col_phi;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast, *adv_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL       tmp;
  REAL_D          val_d;
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL;
  const ADV_CACHE *adv_cache = fill_info->adv_cache;
  const EL_REAL_VEC_D *adv_coeffs;

  row_phi       = fill_info->op_info.row_fe_space->bas_fcts;
  col_phi       = fill_info->op_info.col_fe_space->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (fill_info->adv_coeffs == NULL) {
    ((FILL_INFO *)fill_info)->adv_coeffs =
      fill_info->op_info.advection_field(el_info, fill_info->op_info.user_data);
  }
  adv_coeffs = fill_info->adv_coeffs;

  CHAIN_DO(adv_cache, const ADV_CACHE) {
    REAL_D *adv_field = adv_cache->adv_field;

    /* The next call must be INSIDE the CHAIN_DO_WHILE loop */
    mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
					/* row_V, col_V, */
					row_V_const, col_V_const);

    adv_qfast = adv_cache->adv_quad_fast;
    row_qfast = adv_cache->row_quad_fast;
    col_qfast = adv_cache->col_quad_fast;
    quad      = adv_qfast->quad;

    if (row_V || col_V) {
      if (row_V && !row_V_const) {
	row_d = get_quad_fast_phi_dow(row_qfast);
      }
      if (col_V && !col_V_const) {
	grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
      }
    }

    /* call the single-component evaluation function for all quad-points */
    __uh_dow_at_qp(adv_field, adv_qfast, adv_coeffs, false /* overwrite */);

    for (iq = 0; iq < quad->n_points; iq++) {
      Lb0_adv = fill_info->op_info.Lb0.real_d(
	el_info, quad, iq, fill_info->op_info.user_data);

      /* Now contract Lb0 with the advection field. For the sake of
       * simplicity the contraction is always performed on the first
       * index.
       */
      for (i = 0; i < n_lambda; i++) {
	SCMSET_DOW(0.0, Lb0[i]);
	for (j = 0; j < DIM_OF_WORLD; j++) {
	  SCMAXPY_DOW(adv_field[iq][j], Lb0_adv[i][j], Lb0[i]);
	}
      }

      grd_col_phi = col_qfast->grd_phi[iq];
      row_val = row_qfast->phi[iq];

      for (i = 0; i < fill_info->el_mat->n_row; i++) {
	for (j = 0; j < fill_info->el_mat->n_col; j++) {
	  if (row_V_const && col_V_const) {
	    MSCMAXPY_DOW(
	      quad->w[iq]*row_val[i],
	      /**/ SCMbtv(
		n_lambda, (const REAL *)Lb0, grd_col_phi[j], tmp),
	      mat[i][j]);
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] * SV_SCMubtDv(
		  n_lambda, row_val[i], (const REAL *)Lb0, grd_col_d[iq][j]);
#endif
	    } else {
	      CV_SCMubtDv(
		n_lambda,
		row_val[i], (const REAL *)Lb0, grd_col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    }
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] * VS_SCMubtDv(
		  n_lambda, row_d[iq][i], (const REAL *)Lb0, grd_col_phi[j]);
#endif
	    } else {
	      VC_SCMubtDv(
		n_lambda, row_d[iq][i],
		(const REAL *)Lb0, grd_col_phi[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    }
	  } else {
	    real_mat[i][j] +=
	      quad->w[iq] * VV_SCMubtDv(
		n_lambda, row_d[iq][i], (const REAL *)Lb0, grd_col_d[iq][j]);
	  }
	} /* column loop */
      } /* row loop */
    } /* quad-point loop */

    /* Now possibly condense the temporary matrix if either of the
     * directions was p.w. constant
     */
    M_condense_matrices(fill_info,
				    /* row_V, col_V, row_C, col_C, */
				    row_V_const, col_V_const);

    adv_coeffs = CHAIN_NEXT(adv_coeffs, EL_REAL_VEC_D);

  } CHAIN_WHILE(adv_cache, const ADV_CACHE);
}

EMIT_DIM_VERSIONS(/**/, NAME(adv_quad_01), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< adv_quad_10() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(adv_quad_10)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		  int n_lambda)
{
  const REAL_D *Lb1_adv;
  REAL       Lb1[N_LAMBDA_MAX];
  const REAL_B    *grd_row_phi;
  const REAL      *col_val;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast, *adv_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL       tmp;
  REAL_D          val_d;
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL;
  const REAL_D    *const*col_d = NULL;
  const ADV_CACHE *adv_cache = fill_info->adv_cache;
  const EL_REAL_VEC_D *adv_coeffs;

  row_phi       = fill_info->op_info.row_fe_space->bas_fcts;
  col_phi       = fill_info->op_info.col_fe_space->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (fill_info->adv_coeffs == NULL) {
    ((FILL_INFO *)fill_info)->adv_coeffs =
      fill_info->op_info.advection_field(el_info, fill_info->op_info.user_data);
  }
  adv_coeffs = fill_info->adv_coeffs;

  CHAIN_DO(adv_cache, const ADV_CACHE) {
    REAL_D *adv_field = adv_cache->adv_field;

    /* The next call must be INSIDE the CHAIN_DO_WHILE loop */
    mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
					/* row_V, col_V, */
					row_V_const, col_V_const);

    adv_qfast = adv_cache->adv_quad_fast;
    row_qfast = adv_cache->row_quad_fast;
    col_qfast = adv_cache->col_quad_fast;
    quad      = adv_qfast->quad;

    if (row_V || col_V) {
      if (row_V && !row_V_const) {
	grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
      if (col_V && !col_V_const) {
	col_d = get_quad_fast_phi_dow(col_qfast);
      }
    }
    
    for (iq = 0; iq < quad->n_points; iq++)  {
      Lb1_adv = fill_info->op_info.Lb1.real_d(
	el_info, quad, iq, fill_info->op_info.user_data);

      /* Now contract Lb0 with the advection field. For the sake of
       * simplicity the contraction is always performed on the first
       * index.
       */
      for (i = 0; i < n_lambda; i++) {
	SCMSET_DOW(0.0, Lb1[i]);
	for (j = 0; j < DIM_OF_WORLD; j++) {
	  SCMAXPY_DOW(adv_field[iq][j], Lb1_adv[i][j], Lb1[i]);
	}
      }

      col_val = col_qfast->phi[iq];
      grd_row_phi = row_qfast->grd_phi[iq];

      for (i = 0; i < fill_info->el_mat->n_row; i++) {
	for (j = 0; j < fill_info->el_mat->n_col; j++) {
	  if (row_V_const && col_V_const) {
	    MSCMAXPY_DOW(
	      quad->w[iq]*col_val[j],
	      /**/ SCMbtv(
		n_lambda, (const REAL *)Lb1, grd_row_phi[i], tmp),
	      mat[i][j]);
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] * SV_SCMDutbv(
		  n_lambda, grd_row_phi[i], (const REAL *)Lb1, col_d[iq][j]);
#endif
	    } else {
	      CV_SCMDutbv(
		n_lambda, grd_row_phi[i],
		(const REAL *)Lb1, col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    }
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] * VS_SCMDutbv(
		  n_lambda, grd_row_d[iq][i], (const REAL *)Lb1, col_val[j]);
#endif
	    } else {
	      VC_SCMDutbv(
		n_lambda, grd_row_d[iq][i],
		(const REAL *)Lb1, col_val[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    }
	  } else {
	    real_mat[i][j] +=
	      quad->w[iq] * VV_SCMDutbv(
		n_lambda, grd_row_d[iq][i], (const REAL *)Lb1, col_d[iq][j]);
	  }
	} /* column loop */
      } /* row loop */
    } /* quad-point loop */

    /* Now possibly condense the temporary matrix if either of the
     * directions was p.w. constant
     */
    M_condense_matrices(fill_info,
				    /* row_V, col_V, row_C, col_C, */
				    row_V_const, col_V_const);

    adv_coeffs = CHAIN_NEXT(adv_coeffs, EL_REAL_VEC_D);

  } CHAIN_WHILE(adv_cache, const ADV_CACHE);
}

EMIT_DIM_VERSIONS(/**/, NAME(adv_quad_10), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< adv_quad_11() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(adv_quad_11)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		  int n_lambda)
{
  const REAL_D *Lb0_adv;
  const REAL_D *Lb1_adv;
  REAL       Lb0[N_LAMBDA_MAX];
  REAL       Lb1[N_LAMBDA_MAX];
  const REAL      *row_val, *col_val;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast, *adv_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL tmp1;
  REAL tmp2;
  REAL val;
  REAL_D          val_d;
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL, *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL, *const*col_d = NULL;
  const ADV_CACHE *adv_cache = fill_info->adv_cache;
  const EL_REAL_VEC_D *adv_coeffs;

  row_phi       = fill_info->op_info.row_fe_space->bas_fcts;
  col_phi       = fill_info->op_info.col_fe_space->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (fill_info->adv_coeffs == NULL) {
    ((FILL_INFO *)fill_info)->adv_coeffs =
      fill_info->op_info.advection_field(el_info, fill_info->op_info.user_data);
  }
  adv_coeffs = fill_info->adv_coeffs;

  CHAIN_DO(adv_cache, const ADV_CACHE) {
    REAL_D *adv_field = adv_cache->adv_field;

    adv_qfast = adv_cache->adv_quad_fast;
    row_qfast = adv_cache->row_quad_fast;
    col_qfast = adv_cache->col_quad_fast;
    quad      = adv_qfast->quad;

    if (row_V == col_V && fill_info->op_info.Lb0_Lb1_anti_symmetric) {

      if (row_V) {
	if (row_V_const) {
	  mat = (REAL_DD **)fill_info->scl_el_mat;
	  M_clear_tmp_mat(mat, fill_info);
	} else {
	  grd_row_d = grd_col_d = get_quad_fast_grd_phi_dow(row_qfast);
	  row_d = col_d = get_quad_fast_phi_dow(row_qfast);
	}
      } else {
	mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
      }

      if (row_V_const) { /* non-vector valued, or direction p.w. constant */
	for (iq = 0; iq < quad->n_points; iq++) {
	  Lb0_adv = fill_info->op_info.Lb0.real_d(
	    el_info, quad, iq, fill_info->op_info.user_data);
	  Lb1_adv = fill_info->op_info.Lb1.real_d(
	    el_info, quad, iq, fill_info->op_info.user_data);

	  /* Now contract Lb0 and Lb1 with the advection field. For
	   * the sake of simplicity the contraction is always
	   * performed on the first index.
	   */
	  for (i = 0; i < n_lambda; i++) {
	    SCMSET_DOW(0.0, Lb0[i]);
	    SCMSET_DOW(0.0, Lb1[i]);
	    for (j = 0; j < DIM_OF_WORLD; j++) {
	      SCMAXPY_DOW(adv_field[iq][j], Lb0_adv[i][j], Lb0[i]);
	      SCMAXPY_DOW(adv_field[iq][j], Lb1_adv[i][j], Lb1[i]);
	    }
	  }

	  grd_row_phi = grd_col_phi = row_qfast->grd_phi[iq];
	  row_val = col_val = row_qfast->phi[iq];

	  for (i = 0; i < fill_info->el_mat->n_row; i++) {
	    for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	      SCMAXPBY_DOW(
		quad->w[iq]*row_val[i],
		/**/ SCMbtv(
		  n_lambda, (const REAL *)Lb0, grd_col_phi[j], tmp1),
		quad->w[iq]*col_val[j],
		/**/ SCMbtv(
		  n_lambda, (const REAL *)Lb1, grd_row_phi[i], tmp2),
		val);
	      MSCMAXPY_DOW(
		1.0, /**/ val, mat[i][j]);
	      MSCMAXTPY_DOW(
		-1.0, /**/ val, mat[j][i]);
	    }
	  }
	}

	if (row_V) { /* condense with the directions of the basis functions */
	  VV_M_condense_el_mat(fill_info, false, true);
	}

      } else { /* vector-valued, and direction not p.w. constant */
	real_mat = (REAL **)fill_info->el_mat->data.real;

	for (iq = 0; iq < quad->n_points; iq++) {
	  Lb0_adv = fill_info->op_info.Lb0.real_d(
	    el_info, quad, iq, fill_info->op_info.user_data);
	  Lb1_adv = fill_info->op_info.Lb1.real_d(
	    el_info, quad, iq, fill_info->op_info.user_data);

	  /* Now contract Lb0 and Lb1 with the advection field. For
	   * the sake of simplicity the contraction is always
	   * performed on the first index.
	   */
	  for (i = 0; i < n_lambda; i++) {
	    SCMSET_DOW(0.0, Lb0[i]);
	    SCMSET_DOW(0.0, Lb1[i]);
	    for (j = 0; j < DIM_OF_WORLD; j++) {
	      SCMAXPY_DOW(adv_field[iq][j], Lb0_adv[i][j], Lb0[i]);
	      SCMAXPY_DOW(adv_field[iq][j], Lb1_adv[i][j], Lb1[i]);
	    }
	  }

	  for (i = 0; i < fill_info->el_mat->n_row; i++) {
	    for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	      REAL val;

	      val = quad->w[iq] *
		(VV_SCMDutbv(
		  n_lambda, grd_row_d[iq][i], (const REAL *)Lb1, col_d[iq][j])
		 +
		 VV_SCMubtDv(
		   n_lambda, row_d[iq][i],
		   (const REAL *)Lb0, grd_col_d[iq][j]));

	      real_mat[i][j] += val;
	      real_mat[j][i] -= val;
	    }
	  }
	}
      }
    } else { /*  non symmetric assembling   */
      if (row_V || col_V) {
	if (row_V && !row_V_const) {
	  row_d     = get_quad_fast_phi_dow(row_qfast);
	  grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
	}
	if (col_V && !col_V_const) {
	  grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
	  col_d     = get_quad_fast_phi_dow(col_qfast);
	}
      }

      mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
					  /* row_V, col_V, */
					  row_V_const, col_V_const);

      for (iq = 0; iq < quad->n_points; iq++)  {
	Lb0_adv = fill_info->op_info.Lb0.real_d(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb1_adv = fill_info->op_info.Lb1.real_d(
	  el_info, quad, iq, fill_info->op_info.user_data);

	/* Now contract Lb0 and Lb1 with the advection field. For
	 * the sake of simplicity the contraction is always
	 * performed on the first index.
	 */
	for (i = 0; i < n_lambda; i++) {
	  SCMSET_DOW(0.0, Lb0[i]);
	  SCMSET_DOW(0.0, Lb1[i]);
	  for (j = 0; j < DIM_OF_WORLD; j++) {
	    SCMAXPY_DOW(adv_field[iq][j], Lb0_adv[i][j], Lb0[i]);
	    SCMAXPY_DOW(adv_field[iq][j], Lb1_adv[i][j], Lb1[i]);
	  }
	}

	grd_col_phi = col_qfast->grd_phi[iq];
	col_val = col_qfast->phi[iq];
	grd_row_phi = row_qfast->grd_phi[iq];
	row_val = row_qfast->phi[iq];

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  for (j = 0; j < fill_info->el_mat->n_col; j++) {
	    if (row_V_const && col_V_const) {
	      SCMAXPBY_DOW(
		quad->w[iq]*row_val[i],
		/**/ SCMbtv(
		  n_lambda, (const REAL *)Lb0, grd_col_phi[j], tmp1),
		quad->w[iq]*col_val[j],
		/**/ SCMbtv(
		  n_lambda, (const REAL *)Lb1, grd_row_phi[i], tmp2),
		val);
	      MSCMAXPY_DOW(
		1.0, /**/ val, mat[i][j]);
	    } else if (row_V_const) {
	      if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
		real_mat[i][j] +=
		  quad->w[iq] *
		  (SV_SCMDutbv(
		    n_lambda, grd_row_phi[i], (const REAL *)Lb1, col_d[iq][j])
		   +
		   SV_SCMubtDv(
		     n_lambda, row_val[i],
		     (const REAL *)Lb0, grd_col_d[iq][j]));
#endif
	      } else {
		CV_SCMDutbv(
		  n_lambda, grd_row_phi[i],
		  (const REAL *)Lb1, col_d[iq][j], val_d);
		DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
		CV_SCMubtDv(
		  n_lambda, row_val[i],
		  (const REAL *)Lb0, grd_col_d[iq][j], val_d);
		DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      }
	    } else if (row_V_const) {
	      if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
		real_mat[i][j] +=
		  quad->w[iq] *
		  (VS_SCMDutbv(
		    n_lambda, grd_row_d[iq][i], (const REAL *)Lb1, col_val[j])
		   +
		   VS_SCMubtDv(
		     n_lambda, row_d[iq][i],
		     (const REAL *)Lb0, grd_col_phi[j]));
#endif
	      } else {
		VC_SCMDutbv(
		  n_lambda, grd_row_d[iq][i],
		  (const REAL *)Lb1, col_val[j], val_d);
		DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
		VC_SCMubtDv(
		  n_lambda, row_d[iq][i],
		  (const REAL *)Lb0, grd_col_phi[j], val_d);
		DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      }
	    } else {
	      real_mat[i][j] +=
		quad->w[iq] *
		(VV_SCMDutbv(
		  n_lambda, grd_row_d[iq][i], (const REAL *)Lb1, col_d[iq][j])
		 +
		 VV_SCMubtDv(
		   n_lambda, row_d[iq][i],
		   (const REAL *)Lb0, grd_col_d[iq][j]));
	    }
	  } /* column loop */
	} /* row loop */
      } /* quad-point loop */

      /* Now possibly condense the temporary matrix if either of the
       * directions was p.w. constant
       */
      M_condense_matrices(fill_info,
				      /* row_V, col_V, row_C, col_C, */
				      row_V_const, col_V_const);
    }

    adv_coeffs = CHAIN_NEXT(adv_coeffs, EL_REAL_VEC_D);

  } CHAIN_WHILE(adv_cache, const ADV_CACHE);
}

EMIT_DIM_VERSIONS(/**/, NAME(adv_quad_11), ARG_DECL, ARG_CALL);

/* >>> */

/* >>> */

/* <<< quad_0() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

void
NAME(quad_0)(const EL_INFO *el_info, const FILL_INFO *fill_info)
{
  REAL c;
  const REAL      *row_val, *col_val;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL        val;
  REAL_DD      **mat;
  REAL_D      **real_d_mat = NULL;
  REAL        **real_mat = NULL;
  bool        row_V_const, col_V_const;

  quad      = fill_info->op_info.quad[0];
  row_qfast = fill_info->row_quad_fast[0];
  col_qfast = fill_info->col_quad_fast[0];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V == col_V && fill_info->c_symmetric) {

    col_V_const = row_V_const;
    /* col_V       = row_V; */

    if (row_V) {
      if (row_V_const) {
	mat = (REAL_DD **)fill_info->scl_el_mat;
	M_clear_tmp_mat(mat, fill_info);
      }
    } else {
      mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
    }

    if (row_V_const) { /* non-vector valued, or direction p.w. constant */
      for (iq = 0; iq < quad->n_points; iq++) {
	c = fill_info->op_info.c.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	row_val = col_val = row_qfast->phi[iq];

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  MSCMAXPY_DOW(
	    quad->w[iq] * row_val[i] * col_val[i], /**/ c, mat[i][i]);

	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    SCMAXEY_DOW(
	      quad->w[iq] * row_val[i] * col_val[j], /**/ c, val);
	    MSCMAXPY_DOW(
	      1.0, /**/ val, mat[i][j]);
	    MSCMAXTPY_DOW(
	      1.0, /**/ val, mat[j][i]);
	  }
	}
      }

      if (row_V) { /* condense with the directions of the basis functions */
	VV_M_condense_el_mat(fill_info, true, false);
      }
    } else { /* vector-valued, and direction not p.w. constant */
      const REAL_D *const*row_d;
      const REAL_D *const*col_d;

      row_d = col_d = get_quad_fast_phi_dow(row_qfast);

      real_mat = (REAL **)fill_info->el_mat->data.real;

      for (iq = 0; iq < quad->n_points; iq++) {
	c = fill_info->op_info.c.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  real_mat[i][i] +=
	    quad->w[iq]
	    *
	    SCMGRAMSCP_DOW(/**/ c, row_d[iq][i], col_d[iq][i]);
	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    REAL val;

	    val =
	      quad->w[iq]
	      *
	      SCMGRAMSCP_DOW(/**/ c,
				     row_d[iq][i], col_d[iq][j]);

	    real_mat[i][j] += val;
	    real_mat[j][i] += val;
	  }
	}
      }
    }
  } else {      /*  non symmetric assembling   */
    const REAL_D *const*row_d = NULL;
    const REAL_D *const*col_d = NULL;

    if (row_V || col_V) {
      if (row_V && !row_V_const) {
	row_d = get_quad_fast_phi_dow(row_qfast);
      }
      if (col_V && !col_V_const) {
	col_d = get_quad_fast_phi_dow(col_qfast);
      }
    }

    mat = M_assign_matrices(&real_mat, &real_d_mat,
					fill_info,
					/* row_V, col_V, */
					row_V_const, col_V_const);

    for (iq = 0; iq < quad->n_points; iq++) {
      c = fill_info->op_info.c.real(
	el_info, quad, iq, fill_info->op_info.user_data);

      row_val = row_qfast->phi[iq];
      col_val = col_qfast->phi[iq];

      for (i = 0; i < fill_info->el_mat->n_row; i++) {
	for (j = 0; j < fill_info->el_mat->n_col; j++) {
	  if (row_V_const && col_V_const) {
	    MSCMAXPY_DOW(
	      quad->w[iq] * row_val[i] * col_val[j], /**/ c,
	      mat[i][j]);
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      REAL_D val_d = { 0.0, };
	      real_mat[i][j] +=
		quad->w[iq] * row_val[i] *
		SUM_DOW(SCMGEMV_DOW(1.0, /**/ c, col_d[iq][j],
					    0.0, val_d));
#endif
	    } else {
	      SCMGEMV_DOW(quad->w[iq]*row_val[i], /**/ c,
				  col_d[iq][j], 1.0, real_d_mat[i][j]);
	    }
	  } else if (col_V_const) {
	    if (!col_V && !col_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      REAL_D val_d = { 0.0, };
	      real_mat[i][j] +=
		quad->w[iq] * col_val[j] *
		SUM_DOW(SCMGEMTV_DOW(1.0, /**/ c, row_d[iq][i],
					     0.0, val_d));
#endif
	    } else {
	      SCMGEMTV_DOW(quad->w[iq]*col_val[j], /**/ c,
				   row_d[iq][i], 1.0, real_d_mat[i][j]);
	    }
	  } else {
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      SCMGRAMSCP_DOW(/**/ c,
				     row_d[iq][i], col_d[iq][i]);
	  }
	} /* column loop */
      } /* row loop */
    } /* quad-point loop */

    /* Now possibly condense the temporary matrix if either of the
     * directions was p.w. constant
     */
    M_condense_matrices(fill_info,
				    /* row_V, col_V, row_C, col_C, */
				    row_V_const, col_V_const);
  } /* non-symmetric assembly */
}

/* >>> */

/* <<< quad_2_01() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_2_01)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		int n_lambda)
{
  const REAL_BDD *LALt;
  const REAL *Lb0;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  const REAL      *row_val;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL_DD tmpA;
  REAL tmpb;
  REAL_D          val_d;
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_col_d = NULL, *const*grd_row_d = NULL;
  const REAL_D    *const*row_d = NULL;

  quad      = fill_info->op_info.quad[2];
  row_qfast = fill_info->row_quad_fast[2];
  col_qfast = fill_info->col_quad_fast[2];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V || col_V) {
    if (row_V && !row_V_const) {
      row_d     = get_quad_fast_phi_dow(row_qfast);
      grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
    }
    if (col_V && !col_V_const) {
      grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
    }
  }

  mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
				      /* row_V, col_V, */
				      row_V_const, col_V_const);

  for (iq = 0; iq < quad->n_points; iq++) {
    LALt = fill_info->op_info.LALt.real_dd(
      el_info, quad, iq, fill_info->op_info.user_data);
    Lb0 = fill_info->op_info.Lb0.real(
      el_info, quad, iq, fill_info->op_info.user_data);

    grd_row_phi = row_qfast->grd_phi[iq];
    grd_col_phi = col_qfast->grd_phi[iq];

    row_val = row_qfast->phi[iq];

    for (i = 0; i < fill_info->el_mat->n_row; i++) {
      for (j = 0; j < fill_info->el_mat->n_col; j++) {
	if (row_V_const && col_V_const) {
	  MSCMAXPY_DOW(
	    quad->w[iq]*row_val[i],
	    /**/ SCMbtv(
	      n_lambda, Lb0, grd_col_phi[j], tmpb),
	    mat[i][j]);
	  MMAXPY_DOW(
	    quad->w[iq],
	    (const REAL_D *) MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], tmpA),
	    mat[i][j]);
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (SV_MutAv(
		n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j])
	       +
	       SV_SCMubtDv(
		 n_lambda, row_val[i], Lb0, grd_col_d[iq][j]));
#endif
	  } else {
	    CV_MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    CV_SCMubtDv(
	      n_lambda, row_val[i], Lb0, grd_col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (VS_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j])
	       +
	       VS_SCMubtDv(
		 n_lambda, row_d[iq][i], Lb0, grd_col_phi[j]));
#endif
	  } else {
	    VC_MutAv(
	      n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    VC_SCMubtDv(
	      n_lambda, row_d[iq][i], Lb0, grd_col_phi[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else {
	  real_mat[i][j] +=
	    quad->w[iq]
	    *
	    (VV_SCMubtDv(
	      n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j])
	     +
	     VV_MutAv(
	       n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j]));
	}
      } /* column loop */
    } /* row loop */
  } /* quad-point loop */

  /* Now possibly condense the temporary matrix if either of the
   * directions was p.w. constant
   */
  M_condense_matrices(fill_info,
				  /* row_V, col_V, row_C, col_C, */
				  row_V_const, col_V_const);
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_2_01), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_2_10() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_2_10)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		int n_lambda)
{
  const REAL_BDD *LALt;
  const REAL *Lb1;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  const REAL      *col_val;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL_DD tmpA;
  REAL tmpb;
  REAL_D          val_d;
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_col_d = NULL, *const*grd_row_d = NULL;
  const REAL_D    *const*col_d = NULL;

  quad      = fill_info->op_info.quad[2];
  row_qfast = fill_info->row_quad_fast[2];
  col_qfast = fill_info->col_quad_fast[2];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V || col_V) {
    if (row_V && !row_V_const) {
      col_d = get_quad_fast_phi_dow(row_qfast);
      grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
    }
    if (col_V && !col_V_const) {
      grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
    }
  }

  mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
				      /* row_V, col_V, */
				      row_V_const, col_V_const);

  for (iq = 0; iq < quad->n_points; iq++) {
    LALt = fill_info->op_info.LALt.real_dd(
      el_info, quad, iq, fill_info->op_info.user_data);
    Lb1 = fill_info->op_info.Lb1.real(
      el_info, quad, iq, fill_info->op_info.user_data);

    grd_row_phi = row_qfast->grd_phi[iq];
    grd_col_phi = col_qfast->grd_phi[iq];

    col_val = col_qfast->phi[iq];

    for (i = 0; i < fill_info->el_mat->n_row; i++) {
      for (j = 0; j < fill_info->el_mat->n_col; j++) {
	if (row_V_const && col_V_const) {
	  MSCMAXPY_DOW(
	    quad->w[iq]*col_val[j],
	    /**/ SCMbtv(
	      n_lambda, Lb1, grd_row_phi[i], tmpb),
	    mat[i][j]);
	  MMAXPY_DOW(
	    quad->w[iq],
	    (const REAL_D *) MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], tmpA),
	    mat[i][j]);
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (SV_MutAv(
		n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j])
	       +
	       SV_SCMDutbv(
		 n_lambda, grd_row_phi[i], Lb1, col_d[iq][j]));
#endif
	  } else {
	    CV_MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    CV_SCMDutbv(
	      n_lambda, grd_row_phi[i], Lb1, col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (VS_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j])
	       +
	       VS_SCMDutbv(
		 n_lambda, grd_row_d[iq][i], Lb1, col_val[j]));
#endif
	  } else {
	    VC_MutAv(
	      n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    VC_SCMDutbv(
	      n_lambda, grd_row_d[iq][i], Lb1, col_val[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else {
	  real_mat[i][j] +=
	    quad->w[iq]
	    *
	    (VV_SCMDutbv(
	      n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	     +
	     VV_MutAv(
	       n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j]));
	}
      } /* column loop */
    } /* row loop */
  } /* quad-point loop */

  /* Now possibly condense the temporary matrix if either of the
   * directions was p.w. constant
   */
  M_condense_matrices(fill_info,
				  /* row_V, col_V, row_C, col_C, */
				  row_V_const, col_V_const);
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_2_10), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_2_11() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_2_11)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		int n_lambda)
{
  const REAL_BDD *LALt;
  const REAL *Lb0;
  const REAL *Lb1;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  const REAL      *row_val, *col_val;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL val1;
  REAL tmp1;
  REAL_DD val2;
  REAL_DD tmp2;
  REAL_D          val_d;
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL, *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL, *const*col_d = NULL;

  quad      = fill_info->op_info.quad[2];
  row_qfast = fill_info->row_quad_fast[2];
  col_qfast = fill_info->col_quad_fast[2];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V == col_V &&
      fill_info->op_info.LALt_symmetric &&
      fill_info->op_info.Lb0_Lb1_anti_symmetric) {

    col_qfast   = row_qfast;
    col_V_const = row_V_const;
    /* col_V       = row_V; */

    if (row_V) {
      if (row_V_const) {
	mat = (REAL_DD **)fill_info->scl_el_mat;
	M_clear_tmp_mat(mat, fill_info);
      } else {
	grd_row_d = grd_col_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
    } else {
      mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
    }

    if (row_V_const) { /* non-vector valued, or direction p.w. constant */
      for (iq = 0; iq < quad->n_points; iq++) {
	LALt = fill_info->op_info.LALt.real_dd(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb0 = fill_info->op_info.Lb0.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb1 = fill_info->op_info.Lb1.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	grd_row_phi = grd_col_phi = col_qfast->grd_phi[iq];
	row_val = col_val = col_qfast->phi[iq];

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  MMAXPY_DOW(
	    quad->w[iq],
	    (const REAL_D *) MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_phi[i], tmp2),
	    mat[i][i]);

	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], val2);
	    MAX_DOW(quad->w[iq],  val2);
	    MMAXPY_DOW(
	      1.0, (const REAL_D *) val2, mat[i][j]);
	    MMAXTPY_DOW(
	      1.0, (const REAL_D *) val2, mat[j][i]);

	    SCMAXEY_DOW(
	      quad->w[iq]*row_val[i],
	      /**/ SCMbtv(
		n_lambda, Lb0, grd_col_phi[j], tmp1),
	      val1);
	    SCMAXPY_DOW(
	      quad->w[iq]*col_val[j],
	      /**/ SCMbtv(
		n_lambda, Lb1, grd_row_phi[i], tmp1),
	      val1);

	    MSCMAXPY_DOW(
	      1.0, /**/ val1, mat[i][j]);
	    MSCMAXTPY_DOW(
	      -1.0, /**/ val1, mat[j][i]);
	  }
	}
      }
      if (row_V) { /* condense with the directions of the basis functions */
	VV_M_condense_el_mat(fill_info, true, false);
      }
    } else { /* vector-valued and direction not p.w. constant. */
      real_mat = (REAL **)fill_info->el_mat->data.real;

      for (iq = 0; iq < quad->n_points; iq++) {
	LALt = fill_info->op_info.LALt.real_dd(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb0 = fill_info->op_info.Lb0.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb1 = fill_info->op_info.Lb1.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  real_mat[i][i] +=
	    quad->w[iq]
	    *
	    VV_MutAv(
	      n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][i]);
	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    REAL val;

	    val = quad->w[iq]
	      *
	      VV_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j]);
	    real_mat[i][j] += val;
	    real_mat[j][i] += val;

	    val = quad->w[iq] *
	      (VV_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	       +
	       VV_SCMubtDv(
		 n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j]));

	    real_mat[i][j] += val;
	    real_mat[j][i] -= val;
	  }
	}
      }
    }
  } else { /* non-(anti-)symmetric assembly */
    if (row_V || col_V) {
      if (row_V && !row_V_const) {
	row_d     = get_quad_fast_phi_dow(row_qfast);
	grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
      if (col_V && !col_V_const) {
	grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
	col_d     = get_quad_fast_phi_dow(col_qfast);
      }
    }

    mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
					/* row_V, col_V, */
					row_V_const, col_V_const);

    for (iq = 0; iq < quad->n_points; iq++) {
      LALt = fill_info->op_info.LALt.real_dd(
	el_info, quad, iq, fill_info->op_info.user_data);
      Lb0 = fill_info->op_info.Lb0.real(
	el_info, quad, iq, fill_info->op_info.user_data);
      Lb1 = fill_info->op_info.Lb1.real(
	el_info, quad, iq, fill_info->op_info.user_data);

      grd_col_phi = col_qfast->grd_phi[iq];
      col_val = col_qfast->phi[iq];

      grd_row_phi = row_qfast->grd_phi[iq];
      row_val = row_qfast->phi[iq];

      for (i = 0; i < fill_info->el_mat->n_row; i++) {
	for (j = 0; j < fill_info->el_mat->n_col; j++) {
	  if (row_V_const && col_V_const) {
	    MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], val2);
	    MMAXPY_DOW(
	      quad->w[iq], (const REAL_D *) val2,
	      mat[i][j]);
	    SCMAXEY_DOW(
	      row_val[i],
	      /**/ SCMbtv(
		n_lambda, Lb0, grd_col_phi[j], tmp1),
	      val1);
	    SCMAXPY_DOW(
	      col_val[j],
	      /**/ SCMbtv(
		n_lambda, Lb1, grd_row_phi[i], tmp1),
	      val1);
	    MSCMAXPY_DOW(
	      quad->w[iq],
	      /**/ val1,
	      mat[i][j]);
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] *
		(SV_SCMDutbv(
		  n_lambda, grd_row_phi[i], Lb1, col_d[iq][j])
		 +
		 SV_SCMubtDv(
		   n_lambda, row_val[i], Lb0, grd_col_d[iq][j])
		 +
		 SV_MutAv(
		   n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j]));
#endif
	    } else {
	      CV_SCMDutbv(
		n_lambda, grd_row_phi[i], Lb1, col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      CV_SCMubtDv(
		n_lambda, row_val[i], Lb0, grd_col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      CV_MutAv(
		n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    }
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] *
		(VS_SCMDutbv(
		  n_lambda, grd_row_d[iq][i], Lb1, col_val[j])
		 +
		 VS_SCMubtDv(
		   n_lambda, row_d[iq][i], Lb0, grd_col_phi[j])
		 +
		 VS_MutAv(
		   n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j]));
#endif
	    } else {
	      VC_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_val[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      VC_SCMubtDv(
		n_lambda, row_d[iq][i], Lb0, grd_col_phi[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      VC_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    }
	  } else {
	    real_mat[i][j] +=
	      quad->w[iq] *
	      (VV_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	       +
	       VV_SCMubtDv(
		 n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j])
	       +
	       VV_MutAv(
		 n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j]));
	  }
	} /* column loop */
      } /* row loop */
    } /* quad-point loop */

    /* Now possibly condense the temporary matrix if either of the
     * directions was p.w. constant
     */
    M_condense_matrices(fill_info,
				    /* row_V, col_V, row_C, col_C, */
				    row_V_const, col_V_const);
  }
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_2_11), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_2_0() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_2_0)(const EL_INFO *el_info, const FILL_INFO *fill_info,
	       int n_lambda)
{
  const REAL_BDD *LALt;
  REAL c;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  const REAL      *row_val, *col_val;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL_DD val2;
  REAL_DD valdst;
  REAL_D          val_d;
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL, *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL, *const*col_d = NULL;

  quad      = fill_info->op_info.quad[2];
  row_qfast = fill_info->row_quad_fast[2];
  col_qfast = fill_info->col_quad_fast[2];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V == col_V && fill_info->op_info.LALt_symmetric) {
    /* => row_phi == col_phi => c_symmetric  */

    col_qfast   = row_qfast;
    col_V_const = row_V_const;
    /* col_V       = row_V; */

    if (row_V) {
      if (row_V_const) {
	mat = (REAL_DD **)fill_info->scl_el_mat;
	M_clear_tmp_mat(mat, fill_info);
      } else {
	grd_row_d = grd_col_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
    } else {
      mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
    }

    if (row_V_const) { /* non-vector valued, or direction p.w. constant */
      for (iq = 0; iq < quad->n_points; iq++) {
	LALt = fill_info->op_info.LALt.real_dd(
	  el_info, quad, iq, fill_info->op_info.user_data);
	c = fill_info->op_info.c.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	grd_row_phi = grd_col_phi = row_qfast->grd_phi[iq];
	row_val = col_val = row_qfast->phi[iq];

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  MSCMAXEY_DOW(
	    row_val[i]*col_val[i], /**/ c, valdst);
	  MutAv(
	    n_lambda, grd_row_phi[i], LALt, grd_col_phi[i], val2);
	  MMAXPY_DOW(
	    1.0, (const REAL_D *) val2, valdst);
	  MMAXPY_DOW(
	    quad->w[iq], (const REAL_D *) valdst, mat[i][i]);

	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    MSCMAXEY_DOW(
	      row_val[i]*col_val[j], /**/ c, valdst);
	    MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], val2);
	    MMAXPY_DOW(
	      1.0, (const REAL_D *) val2, valdst);
	    MAX_DOW(quad->w[iq], valdst);
	    MMAXPY_DOW(
	      1.0, (const REAL_D *) valdst, mat[i][j]);
	    MMAXTPY_DOW(
	      1.0, (const REAL_D *) valdst, mat[j][i]);
	  }
	}
      }
      if (row_V) { /* condense with the directions of the basis functions */
	VV_M_condense_el_mat(fill_info, true, false);
      }
    } else { /* vector-valued and direction not p.w. constant. */
      real_mat = (REAL **)fill_info->el_mat->data.real;

      for (iq = 0; iq < quad->n_points; iq++) {
	LALt = fill_info->op_info.LALt.real_dd(
	  el_info, quad, iq, fill_info->op_info.user_data);
	c = fill_info->op_info.c.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  real_mat[i][i] +=
	    quad->w[iq]
	    *
	    (VV_MutAv(
	      n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][i])
	     +
	     SCMGRAMSCP_DOW(
	       /**/ c, row_d[iq][i], col_d[iq][i]));
	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    REAL val;

	    val =
	      quad->w[iq]
	      *
	      (VV_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j])
	       +
	       SCMGRAMSCP_DOW(
		 /**/ c, row_d[iq][i], col_d[iq][j]));

	    real_mat[i][j] += val;
	    real_mat[j][i] += val;
	  }
	}
      }
    }
  } else {      /*  non symmetric assembling   */
    const REAL_DB *const*grd_row_d = NULL;
    const REAL_DB *const*grd_col_d = NULL;
    const REAL_D *const*row_d = NULL;
    const REAL_D *const*col_d = NULL;

    if (row_V || col_V) {
      if (row_V && !row_V_const) {
	row_d     = get_quad_fast_phi_dow(row_qfast);
	grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
      if (col_V && !col_V_const) {
	col_d     = get_quad_fast_phi_dow(col_qfast);
	grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
      }
    }

    mat = M_assign_matrices(&real_mat, &real_d_mat,
					fill_info,
					/* row_V, col_V, */
					row_V_const, col_V_const);

    for (iq = 0; iq < quad->n_points; iq++) {
      LALt = fill_info->op_info.LALt.real_dd(
	el_info, quad, iq, fill_info->op_info.user_data);
      c = fill_info->op_info.c.real(
	el_info, quad, iq, fill_info->op_info.user_data);

      grd_row_phi = row_qfast->grd_phi[iq];
      grd_col_phi = col_qfast->grd_phi[iq];

      row_val = row_qfast->phi[iq];
      col_val = col_qfast->phi[iq];

      for (i = 0; i < fill_info->el_mat->n_row; i++) {
	for (j = 0; j < fill_info->el_mat->n_col; j++) {
	  if (row_V_const && col_V_const) {
	    MSCMAXEY_DOW(
	      row_val[i]*col_val[j], /**/ c, valdst);
	    MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], val2);
	    MMAXPY_DOW(
	      1.0, (const REAL_D *) val2, valdst);
	    MAX_DOW(quad->w[iq], valdst);
	    MMAXPY_DOW(
	      quad->w[iq], (const REAL_D *) valdst, mat[i][j]);
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      REAL_D val_d = { 0.0, };
	      real_mat[i][j] +=
		quad->w[iq]
		*
		(SV_MutAv(
		  n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j])
		 +
		 row_val[i] *
		 SUM_DOW(SCMGEMV_DOW(1.0, /**/ c, col_d[iq][j],
					     0.0, val_d)));
#endif
	    } else {
	      CV_MutAv(
		n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      SCMGEMV_DOW(quad->w[iq]*row_val[i], /**/ c,
				  col_d[iq][j], 1.0, real_d_mat[i][j]);
	    }
	  } else if (col_V_const) {
	    if (!col_V && !col_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      REAL_D val_d = { 0.0, };
	      real_mat[i][j] +=
		quad->w[iq]
		*
		(VS_MutAv(
		  n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j])
		 +
		 col_val[j] * SUM_DOW(
		   SCMGEMTV_DOW(1.0, /**/ c, row_d[iq][i],
					0.0, val_d)));
#endif
	    } else {
	      VC_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      SCMGEMTV_DOW(quad->w[iq]*col_val[j], /**/ c,
				   row_d[iq][i], 1.0, real_d_mat[i][j]);
	    }
	  } else {
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (VV_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j])
	       +
	       SCMGRAMSCP_DOW(
		 /**/ c, row_d[iq][i], col_d[iq][i]));
	  }
	} /* column loop */
      } /* row loop */
    } /* quad-point loop */

    /* Now possibly condense the temporary matrix if either of the
     * directions was p.w. constant
     */
    M_condense_matrices(fill_info,
				    /* row_V, col_V, row_C, col_C, */
				    row_V_const, col_V_const);
  } /* non-symmetric assembly */
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_2_0), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_01_0() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_01_0)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		int n_lambda)
{
  const REAL *Lb0;
  REAL c;
  const REAL      *row_val, *col_val;
  const REAL_B    *grd_col_phi;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL val;
  REAL_DD dstval;
  REAL_D          val_d = { 0.0, };
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL, *const*col_d = NULL;

  quad      = fill_info->op_info.quad[1];
  row_qfast = fill_info->row_quad_fast[1];
  col_qfast = fill_info->col_quad_fast[1];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V || col_V) {
    if (row_V && !row_V_const) {
      row_d = get_quad_fast_phi_dow(row_qfast);
    }
    if (col_V && !col_V_const) {
      grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
      col_d = get_quad_fast_phi_dow(col_qfast);
    }
  }

  mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
				      /* row_V, col_V, */
				      row_V_const, col_V_const);

  for (iq = 0; iq < quad->n_points; iq++) {
    Lb0 = fill_info->op_info.Lb0.real(
      el_info, quad, iq, fill_info->op_info.user_data);
    c = fill_info->op_info.c.real(
      el_info, quad, iq, fill_info->op_info.user_data);

    grd_col_phi = col_qfast->grd_phi[iq];
    row_val = row_qfast->phi[iq];
    col_val = col_qfast->phi[iq];

    for (i = 0; i < fill_info->el_mat->n_row; i++) {
      for (j = 0; j < fill_info->el_mat->n_col; j++) {
	if (row_V_const && col_V_const) {
	  SCMbtv(n_lambda, Lb0, grd_col_phi[j], val);
	  MSCMAXEY_DOW(1.0, /**/ val, dstval);
	  MSCMAXPY_DOW(col_val[j], /**/ c, dstval);
	  MMAXPY_DOW(
	    quad->w[iq]*row_val[i], (const REAL_D *) dstval, mat[i][j]);
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (SV_SCMubtDv(n_lambda, row_val[i], Lb0, grd_col_d[iq][j])
	       +
	       row_val[i] * SUM_DOW(SCMGEMV_DOW(
				      1.0, /**/ c, col_d[iq][j],
				      0.0, val_d)));
#endif
	  } else {
	    CV_SCMubtDv(
	      n_lambda, row_val[i], Lb0, grd_col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    SCMGEMV_DOW(quad->w[iq]*row_val[i], /**/ c,
				col_d[iq][j], 1.0, real_d_mat[i][j]);
	  }
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (VS_SCMubtDv(n_lambda, row_d[iq][i], Lb0, grd_col_phi[j])
	       +
	       col_val[j] * SUM_DOW(SCMGEMTV_DOW(
				      1.0, /**/ c, row_d[iq][i],
				      0.0, val_d)));
#endif
	  } else {
	    VC_SCMubtDv(
	      n_lambda, row_d[iq][i], Lb0, grd_col_phi[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    SCMGEMTV_DOW(quad->w[iq]*col_val[j], /**/ c,
				 row_d[iq][i], 1.0, real_d_mat[i][j]);
	  }
	} else {
	  real_mat[i][j] +=
	    quad->w[iq]
	    *
	    (VV_SCMubtDv(n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j])
	     +
	     SCMGRAMSCP_DOW(/**/ c,
				    row_d[iq][i], col_d[iq][i]));
	}
      } /* column loop */
    } /* row loop */
  } /* quad-point loop */

  /* Now possibly condense the temporary matrix if either of the
   * directions was p.w. constant
   */
  M_condense_matrices(fill_info,
				  /* row_V, col_V, row_C, col_C, */
				  row_V_const, col_V_const);
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_01_0), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_10_0 */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_10_0)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		int n_lambda)
{
  const REAL *Lb1;
  REAL c;
  const REAL      *row_val, *col_val;
  const REAL_B    *grd_row_phi;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL val;
  REAL_DD dstval;
  REAL_D          val_d = { 0.0, };
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL;
  const REAL_D    *const*row_d = NULL, *const*col_d = NULL;

  quad      = fill_info->op_info.quad[1];
  row_qfast = fill_info->row_quad_fast[1];
  col_qfast = fill_info->col_quad_fast[1];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V || col_V) {
    if (row_V && !row_V_const) {
      grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
      row_d = get_quad_fast_phi_dow(row_qfast);
    }
    if (col_V && !col_V_const) {
      col_d = get_quad_fast_phi_dow(col_qfast);
    }
  }

  mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
				      /* row_V, col_V, */
				      row_V_const, col_V_const);

  for (iq = 0; iq < quad->n_points; iq++) {
    Lb1 = fill_info->op_info.Lb1.real(
      el_info, quad, iq, fill_info->op_info.user_data);
    c = fill_info->op_info.c.real(
      el_info, quad, iq, fill_info->op_info.user_data);

    grd_row_phi = row_qfast->grd_phi[iq];
    row_val = row_qfast->phi[iq];
    col_val = col_qfast->phi[iq];

    for (i = 0; i < fill_info->el_mat->n_row; i++) {
      for (j = 0; j < fill_info->el_mat->n_col; j++) {
	if (row_V_const && col_V_const) {
	  SCMbtv(n_lambda, Lb1, grd_row_phi[i], val);
	  MSCMAXEY_DOW(
	    1.0, /**/ val, dstval);
	  MSCMAXPY_DOW(row_val[i], /**/ c, dstval);
	  MMAXPY_DOW(
	    quad->w[iq]*col_val[j], (const REAL_D *) dstval, mat[i][j]);
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (SV_SCMDutbv(n_lambda, grd_row_phi[i], Lb1, col_d[iq][j])
	       +
	       row_val[i]
	       *
	       SUM_DOW(SCMGEMV_DOW(1.0, /**/ c, col_d[iq][j],
					   0.0, val_d)));
#endif
	  } else {
	    CV_SCMDutbv(
	      n_lambda, grd_row_phi[i], Lb1, col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    SCMGEMV_DOW(quad->w[iq]*row_val[i], /**/ c,
				col_d[iq][j], 1.0, real_d_mat[i][j]);
	  }
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (VS_SCMDutbv(n_lambda, grd_row_d[iq][i], Lb1, col_val[j])
	       +
	       col_val[j]
	       *
	       SUM_DOW(SCMGEMTV_DOW(1.0, /**/ c, row_d[iq][i],
					    0.0, val_d)));
#endif
	  } else {
	    VC_SCMDutbv(
	      n_lambda, grd_row_d[iq][i], Lb1, col_val[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    SCMGEMTV_DOW(quad->w[iq]*col_val[j], /**/ c,
				 row_d[iq][i], 1.0, real_d_mat[i][j]);
	  }
	} else {
	  real_mat[i][j] +=
	    quad->w[iq]
	    *
	    (VV_SCMDutbv(n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	     +
	     SCMGRAMSCP_DOW(/**/ c,
				    row_d[iq][i], col_d[iq][i]));
	}
      } /* column loop */
    } /* row loop */
  } /* quad-point loop */

  /* Now possibly condense the temporary matrix if either of the
   * directions was p.w. constant
   */
  M_condense_matrices(fill_info,
				  /* row_V, col_V, row_C, col_C, */
				  row_V_const, col_V_const);
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_10_0), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_11_0() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_11_0)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		int n_lambda)
{
  const REAL *Lb0;
  const REAL *Lb1;
  REAL c;
  const REAL      *row_val, *col_val;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL tmp1;;
  REAL_DD dstval;
  REAL_D          val_d = { 0.0, };
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL, *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL, *const*col_d = NULL;

  quad      = fill_info->op_info.quad[1];
  row_qfast = fill_info->row_quad_fast[1];
  col_qfast = fill_info->col_quad_fast[1];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V == col_V && fill_info->op_info.Lb0_Lb1_anti_symmetric) {
    /* => row_phi == col_phi => c symmetric  */

    if (row_V) {
      if (row_V_const) {
	mat = (REAL_DD **)fill_info->scl_el_mat;
	M_clear_tmp_mat(mat, fill_info);
      } else {
	grd_row_d = grd_col_d = get_quad_fast_grd_phi_dow(row_qfast);
	row_d = col_d = get_quad_fast_phi_dow(row_qfast);
      }
    } else {
      mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
    }

    if (row_V_const) { /* non-vector valued, or direction p.w. constant */
      for (iq = 0; iq < quad->n_points; iq++) {
	Lb0 = fill_info->op_info.Lb0.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb1 = fill_info->op_info.Lb1.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	c = fill_info->op_info.c.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	grd_row_phi = grd_col_phi = row_qfast->grd_phi[iq];
	row_val = col_val = row_qfast->phi[iq];

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  MSCMAXPY_DOW(
	    quad->w[iq]*row_val[i]*col_val[i], /**/ c, mat[i][i]);

	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    SCMbtv(n_lambda, Lb0, grd_col_phi[j], tmp1);
	    MSCMAXEY_DOW(
	      quad->w[iq]*row_val[i], /**/ tmp1, dstval);
	    SCMbtv(n_lambda, Lb1, grd_row_phi[i], tmp1),
	      MSCMAXPY_DOW(
		quad->w[iq]*col_val[j], /**/ tmp1, dstval);
	    MAXPY_DOW(1.0, (const REAL_D *) dstval, mat[i][j]);
	    MAXTPY_DOW(-1.0, (const REAL_D *) dstval, mat[j][i]);

	    MSCMAXEY_DOW(
	      quad->w[iq]*row_val[i]*col_val[j], /**/ c, dstval);
	    MAXPY_DOW(
	      1.0, (const REAL_D *) dstval, mat[i][j]);
	    MAXTPY_DOW(
	      1.0, (const REAL_D *) dstval, mat[j][i]);
	  }
	}
      }

      if (row_V) { /* condense with the directions of the basis functions */
	VV_M_condense_el_mat(fill_info, false, true);
      }

    } else { /* vector-valued, and direction not p.w. constant */
      real_mat = (REAL **)fill_info->el_mat->data.real;

      for (iq = 0; iq < quad->n_points; iq++) {
	Lb0 = fill_info->op_info.Lb0.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb1 = fill_info->op_info.Lb1.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	c = fill_info->op_info.c.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  real_mat[i][i] +=
	    quad->w[iq]
	    *
	    SCMGRAMSCP_DOW(/**/ c, row_d[iq][i], col_d[iq][i]);
	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    REAL val;

	    val = quad->w[iq] * SCMGRAMSCP_DOW(
	      /**/ c, row_d[iq][i], col_d[iq][j]);

	    real_mat[i][j] += val;
	    real_mat[j][i] += val;

	    val = quad->w[iq] *
	      (VV_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	       +
	       VV_SCMubtDv(
		 n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j]));

	    real_mat[i][j] += val;
	    real_mat[j][i] -= val;
	  }
	}
      }
    }
  } else { /*  non symmetric assembling   */

    if (row_V || col_V) {
      if (row_V && !row_V_const) {
	row_d     = get_quad_fast_phi_dow(row_qfast);
	grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
      if (col_V && !col_V_const) {
	grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
	col_d     = get_quad_fast_phi_dow(col_qfast);
      }
    }

    mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
					/* row_V, col_V, */
					row_V_const, col_V_const);

    for (iq = 0; iq < quad->n_points; iq++) {
      Lb0 = fill_info->op_info.Lb0.real(
	el_info, quad, iq, fill_info->op_info.user_data);
      Lb1 = fill_info->op_info.Lb1.real(
	el_info, quad, iq, fill_info->op_info.user_data);
      c = fill_info->op_info.c.real(
	el_info, quad, iq, fill_info->op_info.user_data);

      grd_row_phi = row_qfast->grd_phi[iq];
      row_val = row_qfast->phi[iq];
      grd_col_phi = col_qfast->grd_phi[iq];
      col_val = col_qfast->phi[iq];

      for (i = 0; i < fill_info->el_mat->n_row; i++) {
	for (j = 0; j < fill_info->el_mat->n_col; j++) {
	  if (row_V_const && col_V_const) {
	    SCMbtv(n_lambda, Lb0, grd_col_phi[j], tmp1);
	    MSCMAXEY_DOW(
	      row_val[i], /**/ tmp1, dstval);
	    SCMbtv(n_lambda, Lb1, grd_row_phi[i], tmp1),
	      MSCMAXPY_DOW(
		col_val[j], /**/ tmp1, dstval);
	    MSCMAXPY_DOW(
	      row_val[i]*col_val[j], /**/ c, dstval);
	    MAXPY_DOW(
	      quad->w[iq], (const REAL_D *) dstval, mat[i][j]);
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] *
		(SV_SCMDutbv(
		  n_lambda, grd_row_phi[i], Lb1, col_d[iq][j])
		 +
		 SV_SCMubtDv(
		   n_lambda, row_val[i], Lb0, grd_col_d[iq][j])
		 +
		 row_val[i] *
		 SUM_DOW(SCMGEMV_DOW(1.0, /**/ c, col_d[iq][j],
					     0.0, val_d)));
#endif
	    } else {
	      CV_SCMDutbv(
		n_lambda, grd_row_phi[i], Lb1, col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      CV_SCMubtDv(
		n_lambda, row_val[i], Lb0, grd_col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      SCMGEMV_DOW(quad->w[iq]*row_val[i], /**/ c,
				  col_d[iq][j], 1.0, real_d_mat[i][j]);
	    }
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] *
		(VS_SCMDutbv(
		  n_lambda, grd_row_d[iq][i], Lb1, col_val[j])
		 +
		 VS_SCMubtDv(
		   n_lambda, row_d[iq][i], Lb0, grd_col_phi[j])
		 +
		 col_val[j] *
		 SUM_DOW(SCMGEMTV_DOW(
			   1.0, /**/ c, row_d[iq][i], 0.0, val_d)));
#endif
	    } else {
	      VC_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_val[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      VC_SCMubtDv(
		n_lambda, row_d[iq][i], Lb0, grd_col_phi[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      SCMGEMTV_DOW(quad->w[iq]*col_val[j], /**/ c,
				   row_d[iq][i], 1.0, real_d_mat[i][j]);
	    }
	  } else {
	    real_mat[i][j] +=
	      quad->w[iq] *
	      (VV_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	       +
	       VV_SCMubtDv(
		 n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j])
	       +
	       SCMGRAMSCP_DOW(/**/ c,
				      row_d[iq][i], col_d[iq][i]));
	  }
	} /* column loop */
      } /* row loop */
    } /* quad-point loop */

    /* Now possibly condense the temporary matrix if either of the
     * directions was p.w. constant
     */
    M_condense_matrices(fill_info,
				    /* row_V, col_V, row_C, col_C, */
				    row_V_const, col_V_const);
  }
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_11_0), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_2_01_0() */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_2_01_0)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		  int n_lambda)
{
  const REAL_BDD *LALt;
  const REAL *Lb0;
  REAL c;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  const REAL      *row_val, *col_val;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD      **mat;
  REAL_DD dstval;
  REAL val1;
  REAL_DD val2;
  REAL_D          val_d = { 0.0, };
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL, *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL, *const*col_d = NULL;

  quad      = fill_info->op_info.quad[2];
  row_qfast = fill_info->row_quad_fast[2];
  col_qfast = fill_info->col_quad_fast[2];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V || col_V) {
    if (row_V && !row_V_const) {
      grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
      row_d = get_quad_fast_phi_dow(row_qfast);
    }
    if (col_V && !col_V_const) {
      grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
      col_d = get_quad_fast_phi_dow(col_qfast);
    }
  }

  mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
				      /* row_V, col_V, */
				      row_V_const, col_V_const);

  for (iq = 0; iq < quad->n_points; iq++) {
    LALt = fill_info->op_info.LALt.real_dd(
      el_info, quad, iq, fill_info->op_info.user_data);
    Lb0 = fill_info->op_info.Lb0.real(
      el_info, quad, iq, fill_info->op_info.user_data);
    c = fill_info->op_info.c.real(
      el_info, quad, iq, fill_info->op_info.user_data);

    grd_row_phi = row_qfast->grd_phi[iq];
    grd_col_phi = col_qfast->grd_phi[iq];

    row_val = row_qfast->phi[iq];
    col_val = col_qfast->phi[iq];

    for (i = 0; i < fill_info->el_mat->n_row; i++)  {
      for (j = 0; j < fill_info->el_mat->n_col; j++) {
	if (row_V_const && col_V_const) {
	  SCMbtv(n_lambda, Lb0, grd_col_phi[j], val1);
	  MSCMAXEY_DOW(
	    row_val[i], /**/ val1, dstval);
	  MSCMAXPY_DOW(
	    row_val[i]*col_val[j], /**/ c, dstval);
	  MutAv(
	    n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], val2);
	  MMAXPY_DOW(
	    1.0, (const REAL_D *) val2, dstval);
	  MAXPY_DOW(
	    quad->w[iq], (const REAL_D *) dstval, mat[i][j]);
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (SV_SCMubtDv(n_lambda, row_val[i], Lb0, grd_col_d[iq][j])
	       +
	       row_val[i] * SUM_DOW(SCMGEMV_DOW(
				      1.0, /**/ c, col_d[iq][j],
				      0.0, val_d))
	       +
	       SV_MutAv(
		 n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j]));
#endif
	  } else {
	    CV_MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    CV_SCMubtDv(
	      n_lambda, row_val[i], Lb0, grd_col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    SCMGEMV_DOW(quad->w[iq]*row_val[i], /**/ c,
				col_d[iq][j], 1.0, real_d_mat[i][j]);
	  }
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (VS_SCMubtDv(n_lambda, row_d[iq][i], Lb0, grd_col_phi[j])
	       +
	       col_val[j] * SUM_DOW(SCMGEMTV_DOW(
				      1.0, /**/ c, row_d[iq][i],
				      0.0, val_d))
	       +
	       VS_MutAv(
		 n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j]));
#endif
	  } else {
	    VC_SCMubtDv(
	      n_lambda, row_d[iq][i], Lb0, grd_col_phi[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    SCMGEMTV_DOW(quad->w[iq]*col_val[j], /**/ c,
				 row_d[iq][i], 1.0, real_d_mat[i][j]);
	    VC_MutAv(
	      n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else {
	  real_mat[i][j] +=
	    quad->w[iq]
	    *
	    (VV_SCMubtDv(n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j])
	     +
	     SCMGRAMSCP_DOW(/**/ c,
				    row_d[iq][i], col_d[iq][i])
	     +
	     VV_MutAv(
	       n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j]));
	}
      } /* column loop */
    } /* row loop */
  } /* quad-point loop */

  /* Now possibly condense the temporary matrix if either of the
   * directions was p.w. constant
   */
  M_condense_matrices(fill_info,
				  /* row_V, col_V, row_C, col_C, */
				  row_V_const, col_V_const);
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_2_01_0), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_2_10_0() */

static inline void
NAME(quad_2_10_0)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		  int n_lambda)
{
  const REAL_BDD *LALt;
  const REAL *Lb1;
  REAL c;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  const REAL      *row_val, *col_val;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD **mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
  REAL_DD val2;
  REAL val1;
  REAL_DD dstval;
  REAL_D          val_d = { 0.0, };
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL, *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL, *const*col_d = NULL;

  quad      = fill_info->op_info.quad[2];
  row_qfast = fill_info->row_quad_fast[2];
  col_qfast = fill_info->col_quad_fast[2];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V || col_V) {
    if (row_V && !row_V_const) {
      grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
      row_d = get_quad_fast_phi_dow(row_qfast);
    }
    if (col_V && !col_V_const) {
      grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
      col_d = get_quad_fast_phi_dow(col_qfast);
    }
  }

  mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
				      /* row_V, col_V, */
				      row_V_const, col_V_const);

  for (iq = 0; iq < quad->n_points; iq++) {
    LALt = fill_info->op_info.LALt.real_dd(
      el_info, quad, iq, fill_info->op_info.user_data);
    Lb1 = fill_info->op_info.Lb1.real(
      el_info, quad, iq, fill_info->op_info.user_data);
    c = fill_info->op_info.c.real(
      el_info, quad, iq, fill_info->op_info.user_data);

    grd_row_phi = row_qfast->grd_phi[iq];
    grd_col_phi = col_qfast->grd_phi[iq];

    row_val = row_qfast->phi[iq];
    col_val = col_qfast->phi[iq];

    for (i = 0; i < fill_info->el_mat->n_row; i++) {
      for (j = 0; j < fill_info->el_mat->n_col; j++) {
	if (row_V_const && col_V_const) {
	  SCMbtv(n_lambda,
			  Lb1, grd_row_phi[i], val1);
	  MSCMAXEY_DOW(
	    col_val[j], /**/ val1, dstval);
	  MSCMAXPY_DOW(
	    row_val[i]*col_val[j], /**/ c, dstval);
	  MutAv(
	    n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], val2);
	  MMAXPY_DOW(
	    1.0, (const REAL_D *) val2, dstval);
	  MAXPY_DOW(
	    quad->w[iq], (const REAL_D *) dstval, mat[i][j]);
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (SV_SCMDutbv(n_lambda, grd_row_phi[i], Lb1, col_d[iq][j])
	       +
	       row_val[i]
	       *
	       SUM_DOW(SCMGEMV_DOW(1.0, /**/ c, col_d[iq][j],
					   0.0, val_d))
	       +
	       SV_MutAv(
		 n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j]));
#endif
	  } else {
	    CV_SCMDutbv(
	      n_lambda, grd_row_phi[i], Lb1, col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    SCMGEMV_DOW(quad->w[iq]*row_val[i], /**/ c,
				col_d[iq][j], 1.0, real_d_mat[i][j]);
	    CV_MutAv(
	      n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else if (row_V_const) {
	  if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	    real_mat[i][j] +=
	      quad->w[iq]
	      *
	      (VS_SCMDutbv(n_lambda, grd_row_d[iq][i], Lb1, col_val[j])
	       +
	       col_val[j]
	       *
	       SUM_DOW(SCMGEMTV_DOW(1.0, /**/ c, row_d[iq][i],
					    0.0, val_d))
	       +
	       VS_MutAv(
		 n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j]));
#endif
	  } else {
	    VC_SCMDutbv(
	      n_lambda, grd_row_d[iq][i], Lb1, col_val[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	    SCMGEMTV_DOW(quad->w[iq]*col_val[j], /**/ c,
				 row_d[iq][i], 1.0, real_d_mat[i][j]);
	    VC_MutAv(
	      n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j], val_d);
	    DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	  }
	} else {
	  real_mat[i][j] +=
	    quad->w[iq]
	    *
	    (VV_SCMDutbv(n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	     +
	     SCMGRAMSCP_DOW(/**/ c,
				    row_d[iq][i], col_d[iq][i])
	     +
	     VV_MutAv(
	       n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j]));
	}
      } /* column loop */
    } /* row loop */
  } /* quad-point loop */

  /* Now possibly condense the temporary matrix if either of the
   * directions was p.w. constant
   */
  M_condense_matrices(fill_info,
				  /* row_V, col_V, row_C, col_C, */
				  row_V_const, col_V_const);
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_2_10_0), ARG_DECL, ARG_CALL);

/* >>> */

/* <<< quad_2_11_0 */

/* bool row_V, bool col_V, bool row_C, bool col_C globally defined */

static inline void
NAME(quad_2_11_0)(const EL_INFO *el_info, const FILL_INFO *fill_info,
		  int n_lambda)
{
  const REAL_BDD *LALt;
  const REAL *Lb0;
  const REAL *Lb1;
  REAL c;
  const REAL_B    *grd_row_phi, *grd_col_phi;
  const REAL      *row_val, *col_val;
  int             iq, i, j;
  const QUAD_FAST *row_qfast, *col_qfast;
  const BAS_FCTS  *row_phi, *col_phi;
  const QUAD      *quad;
  REAL_DD **mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
  REAL_DD dstval;
  REAL_DD val2;
  REAL val1, tmp1;
  REAL_D          val_d = { 0.0, };
  REAL_D          **real_d_mat = NULL;
  REAL            **real_mat = NULL;
  bool            row_V_const, col_V_const;
  const REAL_DB   *const*grd_row_d = NULL, *const*grd_col_d = NULL;
  const REAL_D    *const*row_d = NULL, *const*col_d = NULL;

  quad      = fill_info->op_info.quad[2];
  row_qfast = fill_info->row_quad_fast[2];
  col_qfast = fill_info->col_quad_fast[2];

  row_phi       = row_qfast->bas_fcts;
  col_phi       = col_qfast->bas_fcts;

  row_V_const = !row_V || row_phi->dir_pw_const;
  col_V_const = !col_V || col_phi->dir_pw_const;

  if (row_V == col_V &&
      fill_info->op_info.LALt_symmetric  &&
      fill_info->op_info.Lb0_Lb1_anti_symmetric) {

    col_qfast   = row_qfast;
    col_V_const = row_V_const;
    /* col_V       = row_V; */

    if (row_V) {
      if (row_V_const) {
	mat = (REAL_DD **)fill_info->scl_el_mat;
	M_clear_tmp_mat(mat, fill_info);
      } else {
	grd_row_d = grd_col_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
    } else {
      mat = (REAL_DD **)fill_info->el_mat->data.real_dd;
    }

    if (row_V_const) { /* non-vector valued, or direction p.w. constant */
      for (iq = 0; iq < quad->n_points; iq++)  {
	LALt = fill_info->op_info.LALt.real_dd(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb0 = fill_info->op_info.Lb0.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb1 = fill_info->op_info.Lb1.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	c = fill_info->op_info.c.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	grd_row_phi = grd_col_phi = row_qfast->grd_phi[iq];
	row_val = col_val = row_qfast->phi[iq];

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  MutAv(n_lambda, grd_row_phi[i], LALt, grd_col_phi[i], val2);
	  MMAXEY_DOW(
	    1.0, (const REAL_D *) val2, dstval);
	  MSCMAXPY_DOW(
	    row_val[i]*col_val[i], /**/ c, dstval);
	  MAXPY_DOW(
	    quad->w[iq], (const REAL_D *) dstval, mat[i][i]);

	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    MutAv(n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], val2);
	    MMAXEY_DOW(
	      1.0, (const REAL_D *) val2, dstval);
	    MSCMAXPY_DOW(
	      row_val[i]*col_val[j], /**/ c, dstval);
	    MAX_DOW(quad->w[iq], dstval);
	    MAXPY_DOW(1.0, (const REAL_D *) dstval, mat[i][j]);
	    MAXTPY_DOW(1.0, (const REAL_D *) dstval, mat[j][i]);

	    MSCMAXEY_DOW(
	      quad->w[iq]*row_val[i],
	      /**/ SCMbtv(n_lambda, Lb0, grd_col_phi[j], tmp1),
	      dstval);
	    MSCMAXPY_DOW(
	      quad->w[iq]*col_val[j],
	      /**/ SCMbtv(n_lambda, Lb1, grd_row_phi[i], tmp1),
	      dstval);
	    MAXPY_DOW(1.0, (const REAL_D *) dstval, mat[i][j]);
	    MAXTPY_DOW(-1.0, (const REAL_D *) dstval, mat[i][j]);
	  }
	}
      }
      if (row_V) { /* condense with the directions of the basis functions */
	VV_M_condense_el_mat(fill_info, true, false);
      }
    } else { /* vector-valued and direction not p.w. constant. */
      real_mat = (REAL **)fill_info->el_mat->data.real;

      for (iq = 0; iq < quad->n_points; iq++) {
	LALt = fill_info->op_info.LALt.real_dd(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb0 = fill_info->op_info.Lb0.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	Lb1 = fill_info->op_info.Lb1.real(
	  el_info, quad, iq, fill_info->op_info.user_data);
	c = fill_info->op_info.c.real(
	  el_info, quad, iq, fill_info->op_info.user_data);

	for (i = 0; i < fill_info->el_mat->n_row; i++) {
	  real_mat[i][i] +=
	    quad->w[iq]
	    *
	    (VV_MutAv(
	      n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][i])
	     +
	     SCMGRAMSCP_DOW(
	       /**/ c, row_d[iq][i], col_d[iq][i]));
	  for (j = i+1; j < fill_info->el_mat->n_col; j++) {
	    REAL val;

	    val = quad->w[iq]
	      *
	      (VV_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j])
	       +
	       SCMGRAMSCP_DOW(/**/ c,
				      row_d[iq][i], col_d[iq][j]));
	    real_mat[i][j] += val;
	    real_mat[j][i] += val;

	    val = quad->w[iq] *
	      (VV_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	       +
	       VV_SCMubtDv(
		 n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j]));

	    real_mat[i][j] += val;
	    real_mat[j][i] -= val;
	  }
	}
      }
    }
  } else { /* non-(anti-)symmetric assembly */
    if (row_V || col_V) {
      if (row_V && !row_V_const) {
	row_d     = get_quad_fast_phi_dow(row_qfast);
	grd_row_d = get_quad_fast_grd_phi_dow(row_qfast);
      }
      if (col_V && !col_V_const) {
	grd_col_d = get_quad_fast_grd_phi_dow(col_qfast);
	col_d     = get_quad_fast_phi_dow(col_qfast);
      }
    }

    mat = M_assign_matrices(&real_mat, &real_d_mat, fill_info,
					/* row_V, col_V, */
					row_V_const, col_V_const);

    for (iq = 0; iq < quad->n_points; iq++) {
      LALt = fill_info->op_info.LALt.real_dd(
	el_info, quad, iq, fill_info->op_info.user_data);
      Lb0 = fill_info->op_info.Lb0.real(
	el_info, quad, iq, fill_info->op_info.user_data);
      Lb1 = fill_info->op_info.Lb1.real(
	el_info, quad, iq, fill_info->op_info.user_data);
      c = fill_info->op_info.c.real(
	el_info, quad, iq, fill_info->op_info.user_data);

      grd_row_phi = row_qfast->grd_phi[iq];
      grd_col_phi = col_qfast->grd_phi[iq];

      row_val = row_qfast->phi[iq];
      col_val = col_qfast->phi[iq];

      for (i = 0; i < fill_info->el_mat->n_row; i++) {
	for (j = 0; j < fill_info->el_mat->n_col; j++) {
	  if (row_V_const && col_V_const) {
	    MutAv(n_lambda, grd_row_phi[i], LALt, grd_col_phi[j], val2);
	    MMAXEY_DOW(
	      1.0, (const REAL_D *) val2, dstval);
	    MSCMAXPY_DOW(
	      row_val[i]*col_val[j], /**/ c, dstval);
	    MSCMAXPY_DOW(
	      row_val[i],
	      /**/ SCMbtv(n_lambda, Lb0, grd_col_phi[j], val1),
	      dstval);
	    MSCMAXPY_DOW(
	      col_val[j],
	      /**/ SCMbtv(n_lambda, Lb1, grd_row_phi[i], val1),
	      dstval);
	    MAXPY_DOW(quad->w[iq],
				  (const REAL_D *) dstval, mat[i][j]);
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] *
		(SV_SCMDutbv(
		  n_lambda, grd_row_phi[i], Lb1, col_d[iq][j])
		 +
		 SV_SCMubtDv(
		   n_lambda, row_val[i], Lb0, grd_col_d[iq][j])
		 +
		 SV_MutAv(
		   n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j])
		 +
		 row_val[i] *
		 SUM_DOW(SCMGEMV_DOW(1.0, /**/ c, col_d[iq][j],
					     0.0, val_d)));
#endif
	    } else {
	      CV_SCMDutbv(
		n_lambda, grd_row_phi[i], Lb1, col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      CV_SCMubtDv(
		n_lambda, row_val[i], Lb0, grd_col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      CV_MutAv(
		n_lambda, grd_row_phi[i], LALt, grd_col_d[iq][j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      SCMGEMV_DOW(quad->w[iq]*row_val[i], /**/ c,
				  col_d[iq][j], 1.0, real_d_mat[i][j]);
	    }
	  } else if (row_V_const) {
	    if (!row_V && !row_C) {
#if HAVE_DM_DST_TYPE || HAVE_SCM_DST_TYPE
	      real_mat[i][j] +=
		quad->w[iq] *
		(VS_SCMDutbv(
		  n_lambda, grd_row_d[iq][i], Lb1, col_val[j])
		 +
		 VS_SCMubtDv(
		   n_lambda, row_d[iq][i], Lb0, grd_col_phi[j])
		 +
		 VS_MutAv(
		   n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j])
		 +
		 col_val[j] *
		 SUM_DOW(SCMGEMTV_DOW(
			   1.0, /**/ c, row_d[iq][i], 0.0, val_d)));
#endif
	    } else {
	      VC_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_val[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      VC_SCMubtDv(
		n_lambda, row_d[iq][i], Lb0, grd_col_phi[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      VC_MutAv(
		n_lambda, grd_row_d[iq][i], LALt, grd_col_phi[j], val_d);
	      DMDMAXPY_DOW(quad->w[iq], val_d, real_d_mat[i][j]);
	      SCMGEMTV_DOW(quad->w[iq]*col_val[j], /**/ c,
				   row_d[iq][i], 1.0, real_d_mat[i][j]);
	    }
	  } else {
	    real_mat[i][j] +=
	      quad->w[iq] *
	      (VV_SCMDutbv(
		n_lambda, grd_row_d[iq][i], Lb1, col_d[iq][j])
	       +
	       VV_SCMubtDv(
		 n_lambda, row_d[iq][i], Lb0, grd_col_d[iq][j])
	       +
	       VV_MutAv(
		 n_lambda, grd_row_d[iq][i], LALt, grd_col_d[iq][j])
	       +
	       SCMGRAMSCP_DOW(
		 /**/ c, row_d[iq][i], col_d[iq][i]));
	  }
	} /* column loop */
      } /* row loop */
    } /* quad-point loop */

    /* Now possibly condense the temporary matrix if either of the
     * directions was p.w. constant
     */
    M_condense_matrices(fill_info,
				    /* row_V, col_V, row_C, col_C, */
				    row_V_const, col_V_const);
  }
}

EMIT_DIM_VERSIONS(/**/, NAME(quad_2_11_0), ARG_DECL, ARG_CALL);

/* >>> */

/* >>> */

/*
 * Local Variables: ***
 * mode: C ***
 * End: ***
 */
