#!/usr/bin/python3

import textwrap
import argparse
import sys
import os
import re
from collections import OrderedDict
from itertools import zip_longest
import logging

log = logging.getLogger()

class TextTable:
    def __init__(self, colsizes, titles):
        self.colsizes = colsizes
        self.titles = titles
        self.wrappers = [textwrap.TextWrapper(width=x) for x in colsizes]

    def print_row(self, vals):
        wrapped_vals = [wrapper.wrap(val) for wrapper, val in zip(self.wrappers, vals)]
        for cols in zip_longest(*wrapped_vals, fillvalue=""):
            row = []
            for sz, val in zip(self.colsizes, cols):
                row.append(val.ljust(sz))
            print(" ".join(row))

    def print_head(self):
        self.print_row(self.titles)
        self.print_row(["-" * x for x in self.colsizes])


class MdTable:
    def __init__(self, colsizes, titles):
        self.colsizes = colsizes
        self.titles = titles

    def print_row(self, vals):
        row = []
        for sz, val in zip(self.colsizes, vals):
            row.append(val.ljust(sz))
        print("|", " | ".join(row), "|")

    def print_head(self):
        self.print_row(self.titles)
        self.print_row(["-" * x for x in self.colsizes])

def mkformat(unit, sz, dec):
    if  "CHARACTER" in unit or "CODE TABLE" in unit:
        return "{} chars".format(sz)
    elif dec == 0:
        return "{} digits".format(sz)
    elif dec > 0:
        return ('#' * (sz - dec)) + '.' + ('#' * dec)
    else:
        return ('#' * sz) + ('0' * (-dec));

class Keywords:
    def __init__(self):
        # The values are:
        #   required/optional/ignored on insert
        #   required/optional/ignored on query
        #   present/absent            on results
        #   comment about the field  [optional value]
        self.comments = OrderedDict((
            ("priority", [ "ignored", "optional", "present",
                        "Every type of report has an associated priority that controls "
                        "which data are first returned when there is more than one in the "
                        "same physical space.  It can be changed by editing "
                        "/etc/dballe/repinfo.csv"]),
            ("priomax",  [ "ignored", "optional", "absent" ]),
            ("priomin",  [ "ignored", "optional", "absent" ]),
            ("rep_memo", [ "required", "optional", "present" ]),
            ("ana_id",   [ "optional", "optional", "present",
                        "Internal DB-ALLe ID referring to a pseudoana entry, used as "
                        "a shortcut reference instead of specifying the full data" ]),
            ("block",    [ "optional", "optional", "present" ]),
            ("station",  [ "optional", "optional", "present" ]),
            ("mobile",   [ "required", "optional", "present",
                        "Set to 1 if the station is mobile, such as a ship or a flight; else 0"]),
            ("ident",    [ "required if mobile=1", "optional", "present if mobile=1" ]),
            ("name",     [ "optional", "optional", "present" ]),
            ("lat",      [ "required", "optional", "present", "on insert, it has priority over ana_id" ]),
            ("lon",      [ "required", "optional", "present", "on insert, it has priority over ana_id" ]),
            ("latmax",   [ "ignored", "optional", "absent" ]),
            ("latmin",   [ "ignored", "optional", "absent" ]),
            ("lonmax",   [ "ignored", "optional", "absent" ]),
            ("lonmin",   [ "ignored", "optional", "absent" ]),
            ("height",   [ "optional", "optional", "present" ]),
            ("heightbaro", [ "optional", "optional", "present" ]),
            ("year_ident", [ "required if mobile=1", "optional", "present if mobile=1",
                            "Only needed when the station is mobile"]),
            ("month_ident", [ "required if mobile=1", "optional", "present if mobile=1",
                            "Only needed when the station is mobile"]),
            ("day_ident", [ "required if mobile=1", "optional", "present if mobile=1",
                        "Only needed when the station is mobile"]),
            ("hour_ident", [ "required if mobile=1", "optional", "present if mobile=1",
                            "Only needed when the station is mobile"]),
            ("min_ident", [ "required if mobile=1", "optional", "present if mobile=1",
                        "Only needed when the station is mobile"]),
        #   "datetime", [ "ignored", "ignored", "present",
        #                     "Convenience parameter: after a query it contains the datetime informations ",
        #             " all in a single string"],
            ("year",     [ "required", "optional", "present" ]),
            ("month",    [ "required", "optional", "present" ]),
            ("day",      [ "required", "optional", "present" ]),
            ("hour",     [ "required", "optional", "present" ]),
            ("min",      [ "required", "optional", "present" ]),
            ("sec",      [ "required", "optional", "present" ]),
            ("yearmax",  [ "ignored", "optional", "absent" ]),
            ("yearmin",  [ "ignored", "optional", "absent" ]),
            ("monthmax", [ "ignored", "optional", "absent" ]),
            ("monthmin", [ "ignored", "optional", "absent" ]),
            ("daymax",   [ "ignored", "optional", "absent" ]),
            ("daymin",   [ "ignored", "optional", "absent" ]),
            ("hourmax",  [ "ignored", "optional", "absent" ]),
            ("hourmin",  [ "ignored", "optional", "absent" ]),
            ("minumax",  [ "ignored", "optional", "absent" ]),
            ("minumin",  [ "ignored", "optional", "absent" ]),
            ("secmax",   [ "ignored", "optional", "absent" ]),
            ("secmin",   [ "ignored", "optional", "absent" ]),
            ("leveltype", [ "required", "optional", "present" ]),
            ("l1",       [ "required", "optional", "present" ]),
            ("l2",       [ "required", "optional", "present" ]),
            ("pindicator",   [ "required", "optional", "present" ]),
            ("p1",       [ "required", "optional", "present" ]),
            ("p2",       [ "required", "optional", "present" ]),
            ("var",      [ "ignored", "optional", "present, indicates the name of the last variable returned" ]),
            ("varlist",  [ "ignored", "optional", "absent",
                    "Comma-separated list of variable codes wanted on output" ]),
            ("query",    [ "ignored", "optional", "absent",
                    "Comma-separated list of query modifiers.  Can have one of: "
                    "'best', 'bigana', 'nosort', 'stream'.  "
                    "Examples: 'best', 'nosort,stream'" ]),
            ("ana_filter",   [ "ignored", "optional", "absent",
                    "Restricts the results to only those stations which "
                    "have a pseudoana value that matches the filter. "
                    "Examples: 'height>=1000', 'B02001=1', '1000<=height<=2000" ]),
            ("data_filter",  [ "ignored", "optional", "absent",
                    "Restricts the results to only the variables of the given type, which "
                    "have a value that matches the filter. "
                    "Examples: 't<260', 'B22021>2', '10<=B22021<=20'" ]),
            ("attr_filter",  [ "ignored", "optional", "absent",
                    "Restricts the results to only those data which "
                    "have an attribute that matches the filter. "
                    "Examples: 'conf>70', 'B33197=0', '25<=conf<=50'" ]),
            ("limit",    [ "ignored", "optional", "absent",
                    "Maximum number of results to return" ]),
        ))
        self.data = OrderedDict()

    def read_kw(self, key, const):
        self.data[const] = {"key": key}

    def read_info_string(self, const, desc, len):
        if const not in self.data: return
        self.data[const]["desc"] = desc
        self.data[const]["unit"] = "CCITTIA5"
        self.data[const]["scale"] = 0
        self.data[const]["len"] = int(len)

    def read_info_maxint(self, const, desc, unit):
        if const not in self.data: return
        self.data[const]["desc"] = desc
        self.data[const]["unit"] = unit
        self.data[const]["scale"] = 0
        self.data[const]["len"] = 10

    def read_info_int(self, const, desc, unit, len):
        if const not in self.data: return
        self.data[const]["desc"] = desc
        self.data[const]["unit"] = unit
        self.data[const]["scale"] = 0
        self.data[const]["len"] = len

    def read_info_bufr(self, const, desc, unit, scale, len):
        if const not in self.data: return
        self.data[const]["desc"] = desc
        self.data[const]["unit"] = unit
        self.data[const]["scale"] = int(scale)
        self.data[const]["len"] = int(len)

    def read(self):
        parser = (
            (self.read_kw, re.compile(r"^(?P<key>[a-z0-9_]+),\s+(?P<const>[A-Z0-9_]+)$")),
            (self.read_info_string, re.compile(r"""^\s+infos\[(?P<const>[A-Z0-9_]+)\s*\].set_string\s*\(0,\s*"(?P<desc>[^"]+)",\s*(?P<len>\d+)""")),
            (self.read_info_maxint, re.compile(r"""^\s+infos\[(?P<const>[A-Z0-9_]+)\s*\].set_bufr\s*\(0,\s*"(?P<desc>[^"]+)",\s*"(?P<unit>[^"]+)",\s*DEF_MAXINT""")),
            (self.read_info_int, re.compile(r"""^\s+infos\[(?P<const>[A-Z0-9_]+)\s*\].set_bufr\s*\(0,\s*"(?P<desc>[^"]+)",\s*"(?P<unit>[^"]+)",\s*DEF_INT\((?P<len>\d+)\)""")),
            (self.read_info_bufr, re.compile(r"""^\s+infos\[(?P<const>[A-Z0-9_]+)\s*\].set_bufr\s*\(0,\s*"(?P<desc>[^"]+)",\s*"(?P<unit>[^"]+)",\s*(?P<scale>\d+),\s*(?P<len>\d+),""")),
        )
        for line in sys.stdin:
            for fun, regex in parser:
                mo = regex.match(line)
                if not mo: continue
                fun(**mo.groupdict())
        for const, info in self.data.items():
            if "desc" not in info:
                raise RuntimeError("keyword {} has no description".format(const))

    def print_dox(self):
        print("""/**@defgroup dba_record_keywords Keywords used by dballe::Record
@ingroup tables

This table lists the keywords used by dballe::Record.  You can use them to query
and set data using function such as Record::enq() and Record::set().

Every keyword is listed together with its measure unit, length in characters or
digits and description.

\\verbatim
""")

        table = TextTable(
            colsizes=(13, 9, 13, 53),
            titles=("Name", "Unit", "Format", "Description")
        )
        table.print_head()

        for const, info in self.data.items():
            #tag, unit, sz, dec, desc
            fmt = mkformat(info["unit"], info["len"], info["scale"])
            table.print_row((info["key"], info["unit"], fmt, info["desc"]))

        print()
        print("\\endverbatim")
        print("*/")

    def print_md(self):
        print("# Input/output/query parameters")
        print()

        table = MdTable(
            colsizes=(13, 9, 10, 27, 9, 8, 10, 31),
            titles=("Name", "Unit", "Format", "Description", "On insert", "On query", "On results", "Comment")
        )
        table.print_head()

        for const, kwinfo in self.data.items():
        #for tag, unit, sz, dec, desc in self.data:
            info = self.comments.get(kwinfo["key"], None)
            if info is None:
                ins = que = res = com = ""
            else:
                ins, que, res, com = info + ([""] * (4 - len(info)))

            fmt = mkformat(kwinfo["unit"], kwinfo["len"], kwinfo["scale"])

            table.print_row((kwinfo["key"], kwinfo["unit"], fmt, kwinfo["desc"], ins, que, res, com))

        print()
        print("## Input parameters for attribute-related action routines")
        print()

        table = MdTable(
            colsizes=(13, 9, 10, 27, 9, 8, 10, 31),
            titles=("Name", "Unit", "Format", "Description", "On insert", "On query", "On results", "Comment")
        )
        table.print_head()
        table.print_row(("`*Bxxyyy`", "depends", "depends", "Value of the attribute", "required", "ignored", "present", ""))
        table.print_row(("`*var`", "Character", "7 chars", "Acoderibute queried", "ignored", "optional", "present, indicates the name of the last attribute returned", ""))
        table.print_row(("`*varlist`", "Character", "255 chars", "List of attributes to query", "ignored", "optional", "absent", "Comma-separated list of attribute B codes wanted on output"))
        table.print_row(("`*var_related`", "Character", "6 chars", "Variable related to the attribute to query", "required", "required", "absent", "It is automatically set by `idba_dammelo` and `idba_prendilo` (when `idba_prendilo` inserts a single variable)"))
        table.print_row(("`*context_id`", "Numeric", "10 digits", "Context ID of the variable related to the attribute to query", "required", "required", "absent", "It is automatically set by `idba_dammelo` and `idba_prendilo`"))


class Aliases:
    def read(self):
        # Skip the declaration section
        for line in sys.stdin:
            if line.startswith("%%"): break

        alias_line = re.compile(r"^(?P<name>[^,]+),\s*WR_VAR\((?P<f>\d+),\s*(?P<x>\d+),\s*(?P<y>\d+)\)")

        # Read the data
        data = []
        for line in sys.stdin:
            line = line.strip()
            if line == "%%": break
            mo = alias_line.match(line)
            if not mo: raise RuntimeError("Cannot parse {}".format(repr(line)))

            data.append((
                mo.group("name"),
                "B{x:02d}{y:03d}".format(x=int(mo.group("x")), y=int(mo.group("y")))
            ))

        data.sort()

        self.data = data

    def print_dox(self):
        print("""/**@defgroup dba_core_aliases Variable aliases
@ingroup tables

This table lists the aliases that can be used to refer to varcodes.

\\verbatim""")

        table = TextTable(
            colsizes=(13, 8),
            titles=("Alias", "Variable"))
        table.print_head()

        for alias, var in self.data:
            table.print_row((alias, var))

        print("""\\endverbatim
*/""")

    def print_md(self):
        print("""# Varcode aliases

This table lists the aliases that can be used to refer to varcodes.
""")

        table = MdTable(
            colsizes=(13, 8),
            titles=("Alias", "Variable"))
        table.print_head()

        for alias, var in self.data:
            table.print_row((alias, var))


class Levels:
    def read(self):
        re_split = re.compile(r"\t+")
        # Read the data
        self.data = []
        for line in sys.stdin:
            line = line.strip()
            l = re_split.split(line)
            if len(l) < 3:
                l += [""] * (3 - len(l))
            elif len(l) > 3:
                raise RuntimeError("line {} has too many fields".format(repr(l)))
            self.data.append(l)

    def print_dox(self):
        print("""/**@defgroup level_table Level type values
@ingroup tables

This table lists the possible values for leveltype1 or
leveltype2 and the interpretation of the corresponding numerical
value l1 or l2.  Leveltype values in the range 0-255 can
be used for defining either a single level (leveltype1) or a surface
delimiting a layer (leveltype1 and leveltype2) with any meaningful
combination of leveltypes; values of leveltype >255 have a special use
for encoding cloud values in SYNOP reports and they do not strictly
define physical surfaces.

The idea is borrowed from the GRIB edition 2 fixed surface
concept and the values for leveltype coincide with the GRIB standard
where possible.

\\verbatim
""")
        table = TextTable(
            colsizes=(11, 38, 27),
            titles=("Level Type", "Meaning", "Unit/contents of l1/l2")
        )
        table.print_head()

        for type, desc, li in self.data:
            table.print_row((type, desc, li))

        print("""\\endverbatim
*/""")


    def print_md(self):
        print("""# Level type values

This table lists the possible values for leveltype1 or
leveltype2 and the interpretation of the corresponding numerical
value l1 or l2.  Leveltype values in the range 0-255 can
be used for defining either a single level (leveltype1) or a surface
delimiting a layer (leveltype1 and leveltype2) with any meaningful
combination of leveltypes; values of leveltype >255 have a special use
for encoding cloud values in SYNOP reports and they do not strictly
define physical surfaces.

The idea is borrowed from the GRIB edition 2 fixed surface
concept and the values for leveltype coincide with the GRIB standard
where possible.
""")
        table = MdTable(
            colsizes=(11, 38, 27),
            titles=("Level Type", "Meaning", "Unit/contents of l1/l2")
        )
        table.print_head()

        for type, desc, li in self.data:
            table.print_row((type, desc, li))


class Tranges:
    def __init__(self):
        self.descs = (
            ( 0, "Average" ),
            ( 1, "Accumulation" ),
            ( 2, "Maximum" ),
            ( 3, "Minimum" ),
            ( 4, "Difference (value at the end of the time range minus value at the beginning)" ),
            ( 5, "Root Mean Square" ),
            ( 6, "Standard Deviation" ),
            ( 7, "Covariance (temporal variance)" ),
            ( 8, "Difference (value at the beginning of the time range minus value at the end)" ),
            ( 9, "Ratio" ),
            ( 51, "Climatological Mean Value" ),
            ( '10-191', "Reserved" ),
            ( '192-254', "Reserved for Local Use" ),
            ( 200, "Vectorial mean" ),
            ( 201, "Mode" ),
            ( 202, "Standard deviation vectorial mean" ),
            ( 203, "Vectorial maximum" ),
            ( 204, "Vectorial minimum" ),
            ( 205, "Product with a valid time ranging inside the given period" ),
            ( 254, "Istantaneous value" ),
        )

        self.notes = (
            "Validity time is defined as the time at which the data are measured or at which forecast is valid; for statistically processed data, the validity time is the end of the time interval.",
            "Reference time is defined as the nominal time of an observation for observed values, or as the time at which a model forecast starts for forecast values.",
            "The date and time in DB-All.e are always the validity date and time of a value, regardless of the value being an observation or a forecast.",
            "P1 is defined as the difference in seconds between validity time and reference time. For forecasts it is the positive forecast time. For observed values, the reference time is usually the same as the validity time, therefore P1 is zero. However P1 < 0 is a valid case for reports containing data in the past with respect to the nominal report time.",
            "P2 is defined as the duration of the period over which statistical processing is performed, and is always nonnegative. Note that, for instantaneous values, P2 is always zero.",
            # "The Eta (NAM) vertical coordinate system involves normalizing the pressure at some point on a specific level by the mean sea level pressure at that point.",
        )

    def read(self):
        pass

    def print_dox(self):
        print("""/**@defgroup trange_table Time range values
@ingroup tables

Definition of the main concepts related to the description of time
range and statistical processing for observed and forecast data:
""")

        re_newlines = re.compile(r"\n+")
        for n in self.notes:
            print("\\li {}".format(re_newlines.sub(n, "\n")))

        print("""
The following table lists the possible values for pindicator and the
interpretation of the corresponding values of P1 and P2 specifying a
time range:
""")

        for d in self.descs:
            print('\\li \b {} {}'.format(d[0], re_newlines.sub(d[1], "\n")))

        print("*/")

    def print_md(self):
        print("""# Time range values"

Definition of the main concepts related to the description of time
range and statistical processing for observed and forecast data:
""")
        re_newlines = re.compile(r"\n+")
        for n in self.notes:
            print("* {}".format(re_newlines.sub(n, "\n")))

        print("""
The following table lists the possible values for pindicator and the
interpretation of the corresponding values of P1 and P2 specifying a
time range:
""")

        for d in self.descs:
            print('* **{}** {}'.format(d[0], re_newlines.sub(d[1], "\n")))


class Btable:
    def read(self):
        import struct
        reader = struct.Struct("x 6s x 64s x 24s x 3s x 12s x 3s x 24s x 3s x 10s")

        self.data = []
        for lineno, line in enumerate(sys.stdin.buffer, 1):
            self.data.append(tuple(x.decode("utf-8") for x in reader.unpack(line[:158])))

    def print_dox(self):
        print("""/**@defgroup local_b_table Local B table
@ingroup tables

This table lists all the entries of the local B table.  You can use them to
provide context information for a measured value.

Every entry is listed together with its measure unit, length in characters or
digits and description.

\\verbatim
""")

        table = TextTable(
            colsizes=(6, 66, 18, 14),
            titles=("Code", "Description", "Unit", "Format")
        )
        table.print_head()

        for info in self.data:
            code, desc, unit, dec, sz = info[0], info[1], info[6], int(info[7]), int(info[8])
            fmt = mkformat(unit, sz, dec)
            unit = re.sub("CHARACTER", "Character", unit)
            unit = re.sub("(NUMERIC|NUMBER)", "Numeric", unit)
            table.print_row((code, desc, unit, fmt))
        print("""\\endverbatim
*/""")

    def print_md(self):
        print("""# Local B table

This table lists all the entries of the local B table.  You can use them to
provide context information for a measured value.

Every entry is listed together with its measure unit, length in characters or
digits and description.
""")

        table = MdTable(
            colsizes=(6, 66, 18, 14),
            titles=("Code", "Description", "Unit", "Format")
        )
        table.print_head()

        for info in self.data:
            code, desc, unit, dec, sz = info[0], info[1], info[6], int(info[7]), int(info[8])
            fmt = mkformat(unit, sz, dec)
            unit = re.sub("CHARACTER", "Character", unit)
            unit = re.sub("(NUMERIC|NUMBER)", "Numeric", unit)
            table.print_row((code, desc, unit, fmt))


def main():
    parser = argparse.ArgumentParser(description="Format dballe data snippets for documentation.")
    parser.add_argument("intype", help="input type")
    parser.add_argument("outtype", help="output type")
    parser.add_argument("-v", "--verbose", action="store_true", help="verbose output")
    parser.add_argument("--debug", action="store_true", help="verbose output")

    args = parser.parse_args()

    FORMAT = "%(asctime)-15s %(levelname)s %(message)s"
    if args.debug:
        logging.basicConfig(level=logging.DEBUG, stream=sys.stderr, format=FORMAT)
    elif args.verbose:
        logging.basicConfig(level=logging.INFO, stream=sys.stderr, format=FORMAT)
    else:
        logging.basicConfig(level=logging.WARN, stream=sys.stderr, format=FORMAT)

    inputs = {
        "key": Keywords,
        "alias": Aliases,
        "levels": Levels,
        "tranges": Tranges,
        "btable": Btable,
    }

    Proc = inputs.get(args.intype, None)
    if Proc is None: raise RuntimeError("Input type {} not supported".format(args.intype))

    proc = Proc()
    proc.read()

    gen = getattr(proc, "print_" + args.outtype, None)
    if gen is None: raise RuntimeError("Output type {} not supported for input {}".format(args.outtype, args.intype))

    gen()


if __name__ == "__main__":
    main()
