FORM 4.3
optimize.cc File Reference
#include <vector>
#include <stack>
#include <algorithm>
#include <set>
#include <map>
#include <climits>
#include <cmath>
#include <string>
#include <iostream>
#include <tr1/unordered_map>
#include <tr1/unordered_set>
#include "form3.h"
#include "mytime.h"

Go to the source code of this file.

Data Structures

class  tree_node
struct  CSEHash
struct  CSEEq
struct  node
struct  NodeHash
struct  NodeEq
class  optimization

Typedefs

typedef struct node NODE

Functions

void print_instr (const vector< WORD > &instr, WORD num)
template<class RandomAccessIterator>
void my_random_shuffle (PHEAD RandomAccessIterator fr, RandomAccessIterator to)
LONG get_expression (int exprnr)
vector< vector< WORD > > get_brackets ()
int count_operators (const WORD *expr, bool print=false)
int count_operators (const vector< WORD > &instr, bool print=false)
vector< WORD > occurrence_order (const WORD *expr, bool rev)
WORD getpower (const WORD *term, int var, int pos)
void fixarg (UWORD *t, WORD &n)
void GcdLong_fix_args (PHEAD UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc)
void DivLong_fix_args (UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc, UWORD *d, WORD *nd)
void build_Horner_tree (const WORD **terms, int numterms, int var, int maxvar, int pos, vector< WORD > *res)
bool term_compare (const WORD *a, const WORD *b)
vector< WORD > Horner_tree (const WORD *expr, const vector< WORD > &order)
void print_tree (const vector< WORD > &tree)
template<typename T>
size_t hash_range (T *array, int size)
vector< WORD > generate_instructions (const vector< WORD > &tree, bool do_CSE)
int count_operators_cse (const vector< WORD > &tree)
NODEbuildTree (vector< WORD > &tree)
int count_operators_cse_topdown (vector< WORD > &tree)
vector< WORD > simulated_annealing ()
void find_Horner_MCTS_expand_tree ()
void find_Horner_MCTS ()
vector< WORD > merge_operators (const vector< WORD > &all_instr, bool move_coeff)
vector< optimizationfind_optimizations (const vector< WORD > &instr)
bool do_optimization (const optimization optim, vector< WORD > &instr, int newid)
int partial_factorize (vector< WORD > &instr, int n, int improve)
vector< WORD > optimize_greedy (vector< WORD > instr, LONG time_limit)
vector< WORD > recycle_variables (const vector< WORD > &all_instr)
void optimize_expression_given_Horner ()
VOID generate_output (const vector< WORD > &instr, int exprnr, int extraoffset, const vector< vector< WORD > > &brackets)
WORD generate_expression (WORD exprnr)
VOID optimize_print_code (int print_expr)
int Optimize (WORD exprnr, int do_print)
int ClearOptimize ()

Variables

const WORD OPER_ADD = -1
const WORD OPER_MUL = -2
const WORD OPER_COMMA = -3
WORD * optimize_expr
vector< vector< WORD > > optimize_best_Horner_schemes
int optimize_num_vars
int optimize_best_num_oper
vector< WORD > optimize_best_instr
vector< WORD > optimize_best_vars
bool mcts_factorized
bool mcts_separated
vector< WORD > mcts_vars
tree_node mcts_root
int mcts_expr_score
set< pair< int, vector< WORD > > > mcts_best_schemes

Detailed Description

experimental routines for the optimization of FORTRAN or C output.

Definition in file optimize.cc.

Function Documentation

◆ print_instr()

void print_instr ( const vector< WORD > & instr,
WORD num )

Definition at line 158 of file optimize.cc.

◆ my_random_shuffle()

template<class RandomAccessIterator>
void my_random_shuffle ( PHEAD RandomAccessIterator fr,
RandomAccessIterator to )

Random shuffle

Description

Randomly permutes elements in the range [fr,to). Functionality is the same as C++'s "random_shuffle", but it uses Form's "wranf".

Definition at line 180 of file optimize.cc.

Referenced by find_Horner_MCTS().

◆ get_expression()

LONG get_expression ( int exprnr)

Get expression

Description

Reads an expression from the input file into a buffer (called optimize_expr). This buffer is used during the optimization process. Non-symbols are removed by ConvertToPoly and are put in temporary symbols.

The return value is the length of the expression in WORDs, or a negative number if it failed.

Definition at line 204 of file optimize.cc.

References EndSort(), LowerSortLevel(), NewSort(), and StoreTerm().

Referenced by Optimize().

◆ get_brackets()

vector< vector< WORD > > get_brackets ( )

Get brackets

Description

Checks whether the input expression (stored in optimize_expr) contains brackets. If so, this method replaces terms outside brackets by powers of SEPERATESYMBOL (equal brackets have equal powers) and the brackets are returned. If not, the result is empty.

Brackets are used for simultaneous optimization. The symbol SEPARATESYMBOL is always the first one used in a Horner scheme.

Definition at line 281 of file optimize.cc.

Referenced by Optimize().

◆ count_operators() [1/2]

int count_operators ( const WORD * expr,
bool print = false )

Count operators

Description

Counts the number of operators in a Form-style expression.

Definition at line 401 of file optimize.cc.

Referenced by find_Horner_MCTS(), generate_instructions(), Optimize(), optimize_expression_given_Horner(), and optimize_greedy().

◆ count_operators() [2/2]

int count_operators ( const vector< WORD > & instr,
bool print = false )

Count operators

Description

Counts the number of operators in a vector of instructions

Definition at line 455 of file optimize.cc.

◆ occurrence_order()

vector< WORD > occurrence_order ( const WORD * expr,
bool rev )

Occurrence order

Description

Extracts all variables from an expression and sorts them with most occurring first (or last, with rev=true)

Definition at line 498 of file optimize.cc.

Referenced by Optimize().

◆ getpower()

WORD getpower ( const WORD * term,
int var,
int pos )

Horner tree building

Description

Given a Form-style expression (in a buffer in memory), this builds an expression tree. The tree is determined by a multivariate Horner scheme, i.e., something of the form:

1+y+x*(2+y*(1+y)+x*(3-y*(...)))

The order of the variables is given to the method "Horner_tree", which renumbers ad reorders the terms of the expression. Next, the recursive method "build_Horner_tree" does the actual tree construction.

The tree is represented in postfix notation. Tokens are of the following forms:

  • SNUMBER tokenlength num den coefflength
  • SYMBOL tokenlength variable power
  • OPER_ADD or OPER_MUL

Note

Sets AN.poly_num_vars and allocates AN.poly_vars. The latter should be freed later. Get power of variable (helper function for build_Horner_tree)

Description

Returns the power of the variable "var", which is at position "pos" in this term, if it is present.

Definition at line 579 of file optimize.cc.

Referenced by build_Horner_tree().

◆ fixarg()

void fixarg ( UWORD * t,
WORD & n )

Call GcdLong/DivLong with leading zeroes

Description

These method remove leading zeroes, so that GcdLong and DivLong can safely be called.

Definition at line 593 of file optimize.cc.

◆ GcdLong_fix_args()

void GcdLong_fix_args ( PHEAD UWORD * a,
WORD na,
UWORD * b,
WORD nb,
UWORD * c,
WORD * nc )

Definition at line 599 of file optimize.cc.

◆ DivLong_fix_args()

void DivLong_fix_args ( UWORD * a,
WORD na,
UWORD * b,
WORD nb,
UWORD * c,
WORD * nc,
UWORD * d,
WORD * nd )

Definition at line 605 of file optimize.cc.

◆ build_Horner_tree()

void build_Horner_tree ( const WORD ** terms,
int numterms,
int var,
int maxvar,
int pos,
vector< WORD > * res )

Build the Horner tree

Description

Constructs the Horner tree. The method processes one variable and continues recursively until the Horner scheme is completed.

"terms" is a pointer to the starts of the terms. "numterms" is the number of terms to be processed. "var" is the next variable to be processed (index between 0 and #maxvar) and "maxvar" is the last variable to be processed, so that partial Horner trees can also be constructed. "pos" is the position that the power of "var" should be in (one level further in the recursion, "pos" has increased by 0 or 1 depending on whether the previous power was 0 or not). The result is written at the pointer "res".

This method also factors out gcds of the coefficients. The result should end with "gcd OPER_MUL" at all times, so that one level higher gcds can be extracted again.

Definition at line 631 of file optimize.cc.

References build_Horner_tree(), and getpower().

Referenced by build_Horner_tree(), and Horner_tree().

◆ term_compare()

bool term_compare ( const WORD * a,
const WORD * b )

Term compare (helper function for Horner_tree)

Description

Compares two terms of the form "L SYM 4 x n coeff" or "L coeff". Lower powers of lower-indexed symbols come first. This is used to sort the terms in correct order.

Definition at line 831 of file optimize.cc.

Referenced by Horner_tree().

◆ Horner_tree()

vector< WORD > Horner_tree ( const WORD * expr,
const vector< WORD > & order )

Prepare Horner tree building

Description

This method renumbers the variables to 0...#vars-1 according to the specified order. Next, it stored pointer to individual terms and sorts the terms with higher power first. Then the sorted lists of power is used for the construction of the Horner tree.

Definition at line 852 of file optimize.cc.

References build_Horner_tree(), and term_compare().

Referenced by optimize_expression_given_Horner().

◆ print_tree()

void print_tree ( const vector< WORD > & tree)

Definition at line 955 of file optimize.cc.

◆ hash_range()

template<typename T>
size_t hash_range ( T * array,
int size )

Definition at line 1024 of file optimize.cc.

◆ generate_instructions()

vector< WORD > generate_instructions ( const vector< WORD > & tree,
bool do_CSE )

Generate instructions

Description

Converts the expression tree to a list of instructions that directly translate to code. Instructions are of the form:

expr.nr operator length [operands]+ trailing.zero

The operands are of the form:

length [(EXTRA)SYMBOL length variable power] coeff

This method only generates binary operators. Merging is done later. The method also checks for common subexpressions and eliminates them and the flag "do_CSE" is set.

Implementation details

The map "ID" keeps track of which subexpressions already exist. The key is formatted as one of the following:

SYMBOL x n SNUMBER coeff OPERATOR LHS RHS

with LHS/RHS formatted as one of the following:

SNUMBER idx 0 (EXTRA)SYMBOL x n

ID[symbol] or ID[operator] equals a subexpression number. ID[coeff] equals the position of the number in the input.

The stack s is used the process the postfix expression tree. Three-word tokens of the form:

SNUMBER idx.of.coeff 0 SYMBOL x n EXTRASYMBOL x 1

are pushed onto it. Operators pop two operands and push the resulting expression.

(Extra)symbols are 1-indexed, because -X is also needed to represent -1 times this term.

There is currently a bug. The notation cannot tell if there is a single bracket and then ignores the bracket.

TODO: check if this method performs properly if do_CSE=false

Definition at line 1086 of file optimize.cc.

References count_operators().

Referenced by optimize_expression_given_Horner().

◆ count_operators_cse()

int count_operators_cse ( const vector< WORD > & tree)

Count number of operators in a binary tree, while removing CSEs on the fly. The instruction set is not created, which makes this method slightly faster.

A hash is created on the fly and is passed through the stack. TODO: find better hash functions

Definition at line 1295 of file optimize.cc.

◆ buildTree()

NODE * buildTree ( vector< WORD > & tree)

Definition at line 1517 of file optimize.cc.

◆ count_operators_cse_topdown()

int count_operators_cse_topdown ( vector< WORD > & tree)

Definition at line 1579 of file optimize.cc.

◆ simulated_annealing()

vector< WORD > simulated_annealing ( )

Definition at line 1634 of file optimize.cc.

◆ find_Horner_MCTS_expand_tree()

void find_Horner_MCTS_expand_tree ( )

Definition at line 2007 of file optimize.cc.

◆ find_Horner_MCTS()

void find_Horner_MCTS ( )

Find best Horner schemes using MCTS

Description

The method governs the MCTS for the best Horner schemes. It does some pre-processing, calls "find_Horner_MCTS_expand_tree" a number of times and does some post-processing.

Definition at line 2208 of file optimize.cc.

References count_operators(), my_random_shuffle(), and TimeWallClock().

Referenced by Optimize().

◆ merge_operators()

vector< WORD > merge_operators ( const vector< WORD > & all_instr,
bool move_coeff )

Merge operators

Description

The input instructions form a binary DAG. This method merges expressions like

Z1 = a+b; Z2 = Z1+c;

into

Z2 = a+b+c;

An instruction is merged iff it only has one parent and the operator equals its parent's operator.

This still leaves some freedom: where should the coefficients end up in cases as:

Z1 = Z2 + x <=> Z1 = 2*Z2 + x Z2 = 2*x*y Z2 = x*y

Both are relevant, e.g. for CSE of the form "2*x" and "2*Z2". The flag "move_coeff" moves coefficients from LHS-like expressions to RHS-like expressions.

Furthermore, this method removes empty equation (Z1=0), that are introduced by some "optimize_greedy" substitutions.

Implementation details

Expressions are mostly traversed via a stack, so that parents are evaluated before their children.

With "move_coeff" set coefficients are moved, but this leads to some tricky cases, e.g.

Z1 = Z2 + x Z2 = 2*y

Here Z2 reduces to the trivial equation Z2=y, which should be eliminated. Here the array skip[i] comes in.

Furthermore in the case

Z1 = Z2 + x Z2 = 2*Z3 Z3 = x*Z4 Z4 = y*z

after substituting Z1 = 2*Z3 + x, the parent expression for Z4 becomes Z3 instead of Z2. This is where renum_par[i] comes in.

Finally, once a coefficient has been moved, skip_coeff[i] is set and this coefficient is copied into the new expression anymore.

Definition at line 2356 of file optimize.cc.

Referenced by optimize_expression_given_Horner().

◆ find_optimizations()

vector< optimization > find_optimizations ( const vector< WORD > & instr)

Find optimizations

Description

This method find all optimization of the form described in "class Optimization". It process every equation, looking for possible optimizations and stores them in a fast-access data structure to count the total improvement of an optimization.

Definition at line 2651 of file optimize.cc.

Referenced by optimize_greedy().

◆ do_optimization()

bool do_optimization ( const optimization optim,
vector< WORD > & instr,
int newid )

Do optimization

Description

This method performs an optimization. It scans through the equations of "optim.eqnidxs" and looks in which this optimization can still be performed (due to other performed optimizations this isn't always the case). If possible, it substitutes the common subexpression by a new extra symbol numbered "newid". Finally, the new extrasymbol is defined accordingly.

Substitutions may lead to trivial equations of the form "Zi=Zj", but these are removed in the end of the method. The method returns whether the substitution has been done once or more (or not).

Definition at line 2884 of file optimize.cc.

Referenced by optimize_greedy().

◆ partial_factorize()

int partial_factorize ( vector< WORD > & instr,
int n,
int improve )

Partial factorization of instructions

Description

This method performs partial factorization of instructions. In particular the following instructions

Z1 = x*a*b Z2 = x*c*d*e Z3 = 2*x + Z1 + Z2 + more

are replaced by

Z1 = a*b Z2 = c*d*e Z3 = Zj + more Zi = 2 + Z1 + Z2 Zj = x*Zi

Here it is necessary that no other equations refer to Z1 and Z2. The generation of trivial instructions (Zi=Zj or Zi=x) is prevented.

Definition at line 3433 of file optimize.cc.

Referenced by optimize_greedy().

◆ optimize_greedy()

vector< WORD > optimize_greedy ( vector< WORD > instr,
LONG time_limit )

Optimize instructions greedily

Description

This method optimizes an expression greedily. It calls "find_optimizations" to obtain candidates and performs the best one(s) by calling "do_optimization".

How many different optimization are done, before "find_optimization" is called again, is determined by the settings "greedyminnum" and "greedymaxperc".

During the optimization process, sequences of zeroes are introduced in the instructions, since moving all instructions when one gets optimized, is very costly. Therefore, in the end, the instructions are "compressed" again to remove these extra zeroes.

Definition at line 3727 of file optimize.cc.

References count_operators(), do_optimization(), find_optimizations(), partial_factorize(), and TimeWallClock().

Referenced by optimize_expression_given_Horner().

◆ recycle_variables()

vector< WORD > recycle_variables ( const vector< WORD > & all_instr)

Recycle variables

Description

The current input uses many temporary variables. Many of them become obsolete at some point during the evaluation of the code, so can be recycled. This method renumbers the temporary variables, so that they are recycled. Furthermore, the input is order in depth-first order, so that the instructions can be performed consecutively.

Implementation details

First, for each subDAG, an estimate for the number of variables needed is made. This is done by the following recursive formula:

#vars(x) = max(#vars(ch_i(x)) + i),

with ch_i(x) the i-th child of x, where the childs are ordered w.r.t. #vars(ch_i). This formula is exact if the input forms a tree, and otherwise gives a reasonable estimate.

Then, the instructions are reordered in a depth-first order with childs ordered w.r.t. #vars. Next, the times that variables become obsolete are found. Each LHS of an instruction is renumbered to the lowest-numbered temporary variable that is available at that time.

Definition at line 3867 of file optimize.cc.

Referenced by optimize_expression_given_Horner().

◆ optimize_expression_given_Horner()

void optimize_expression_given_Horner ( )

Optimize expression given a Horner scheme

Description

This method picks one Horner scheme from the list of best Horner schemes, applies this scheme to the expression and then, depending on optimize.settings, does a common subexpression elimination (CSE) or performs greedy optimizations.

CSE is fast, while greedy might be slow. CSE followed by greedy is faster than greedy alone, but typically results in slightly worse code (not proven; just observed).

eventually do greedy optimations

Definition at line 4014 of file optimize.cc.

References count_operators(), generate_instructions(), Horner_tree(), merge_operators(), optimize_greedy(), recycle_variables(), and TimeWallClock().

Referenced by Optimize().

◆ generate_output()

VOID generate_output ( const vector< WORD > & instr,
int exprnr,
int extraoffset,
const vector< vector< WORD > > & brackets )

Generate output

Description

This method prepares the instructions for printing. They are stored in Form format, so that they can be printed by "PrintExtraSymbol". The results are stored in the buffer AO.OptimizeResult.

Definition at line 4245 of file optimize.cc.

Referenced by Optimize().

◆ generate_expression()

WORD generate_expression ( WORD exprnr)

Generate expression

Description

This method modifies the original optimized expression by an expression with extra symbols. This is used for "#Optimize".

Definition at line 4394 of file optimize.cc.

References EndSort(), Generator(), LowerSortLevel(), NewSort(), and PutOut().

Referenced by Optimize().

◆ optimize_print_code()

VOID optimize_print_code ( int print_expr)

Print optimized code

Description

This method prints the optimized code via "PrintExtraSymbol". Depending on the flag, the original expression is printed (for "Print") or not (for "#Optimize / #write "O").

Definition at line 4474 of file optimize.cc.

Referenced by Optimize().

◆ Optimize()

int Optimize ( WORD exprnr,
int do_print )

Optimization of expression

Description

This method takes an input expression and generates optimized code to calculate its value. The following methods are called to do so:

(1) get_expression : to read to expression

(2) get_brackets : find brackets for simultaneous optimization

(3) occurrence_order or find_Horner_MCTS : to determine (the) Horner scheme(s) to use; this depends on AO.optimize.horner

(4) optimize_expression_given_Horner : to do the optimizations for each Horner scheme; this method does either CSE or greedy optimizations dependings on AO.optimize.method

(5) generate_output : to format the output in Form notation and store it in a buffer

(6a) optimize_print_code : to print the expression (for "Print") or (6b) generate_expression : to modify the expression (for "#Optimize")

On ParFORM, all the processes must call this function at the same time. Then

(1) Because only the master can access to the expression to be optimized, the master broadcast the expression to all the slaves after reading the expression (PF_get_expression).

(2) get_brackets reads optimize_expr as the input and it works also on the slaves. We leave it although the bracket information is not needed on the slaves (used in (5) on the master).

(3) and (4) find_Horner_MCTS and optimize_expression_given_Horner are parallelized.

(5), (6a) and (6b) are needed only on the master.

Definition at line 4587 of file optimize.cc.

References ClearOptimize(), count_operators(), find_Horner_MCTS(), generate_expression(), generate_output(), get_brackets(), get_expression(), occurrence_order(), optimize_expression_given_Horner(), optimize_print_code(), PF_Broadcast(), PF_LongMultiBroadcast(), PF_Pack(), PF_PrepareLongMultiPack(), PF_PreparePack(), PF_Unpack(), and PutPreVar().

◆ ClearOptimize()

int ClearOptimize ( )

Optimization of expression

Description

Clears the buffers that were used for optimization output. Clears the expression from the buffers (marks it to be dropped). Note: we need to use the expression by its name, because numbers may change if we drop other expressions between when we do the optimizations and clear the results (in execute.c). Also this is not 100% safe, because we could overwrite the optimized expression. But that can be done only in a Local or Global statement and hence we only have to test there that we might have to call ClearOptimize first. (in file comexpr.c)

Definition at line 4924 of file optimize.cc.

References PutPreVar().

Referenced by Optimize().

Variable Documentation

◆ OPER_ADD

const WORD OPER_ADD = -1

Definition at line 116 of file optimize.cc.

◆ OPER_MUL

const WORD OPER_MUL = -2

Definition at line 117 of file optimize.cc.

◆ OPER_COMMA

const WORD OPER_COMMA = -3

Definition at line 118 of file optimize.cc.

◆ optimize_expr

WORD* optimize_expr

Definition at line 135 of file optimize.cc.

◆ optimize_best_Horner_schemes

vector<vector<WORD> > optimize_best_Horner_schemes

Definition at line 136 of file optimize.cc.

◆ optimize_num_vars

int optimize_num_vars

Definition at line 137 of file optimize.cc.

◆ optimize_best_num_oper

int optimize_best_num_oper

Definition at line 138 of file optimize.cc.

◆ optimize_best_instr

vector<WORD> optimize_best_instr

Definition at line 139 of file optimize.cc.

◆ optimize_best_vars

vector<WORD> optimize_best_vars

Definition at line 140 of file optimize.cc.

◆ mcts_factorized

bool mcts_factorized

Definition at line 143 of file optimize.cc.

◆ mcts_separated

bool mcts_separated

Definition at line 143 of file optimize.cc.

◆ mcts_vars

vector<WORD> mcts_vars

Definition at line 144 of file optimize.cc.

◆ mcts_root

tree_node mcts_root

Definition at line 145 of file optimize.cc.

◆ mcts_expr_score

int mcts_expr_score

Definition at line 146 of file optimize.cc.

◆ mcts_best_schemes

set<pair<int,vector<WORD> > > mcts_best_schemes

Definition at line 147 of file optimize.cc.