Source code for clldutils.markup

import io
import re
import csv
import sys
import typing
import urllib.parse

import attr
from tabulate import tabulate
from markdown import markdown
from lxml import etree

from clldutils.misc import slug
from clldutils.text import replace_pattern

__all__ = [
    'Table',
    'iter_markdown_tables', 'iter_markdown_sections', 'add_markdown_text',
    'MarkdownLink', 'MarkdownImageLink']


[docs]class Table(list): """ A context manager to - aggregate rows in a table - which will be printed on exit. .. code-block:: python >>> with Table('col1', 'col2', tablefmt='simple') as t: ... t.append(['v1', 'v2']) ... col1 col2 ------ ------ v1 v2 For more control of the table rendering, a `Table` can be used without a `with` statement, calling :meth:`Table.render` instead: .. code-block:: python >>> t = Table('col1', 'col2') >>> t.extend([['z', 1], ['a', 2]]) >>> print(t.render(sortkey=lambda r: r[0], tablefmt='simple')) col1 col2 ------ ------ a 2 z 1 """ def __init__(self, *cols: str, **kw): self.columns = list(cols) super(Table, self).__init__(kw.pop('rows', [])) self._file = kw.pop('file', sys.stdout) self._kw = kw
[docs] def render(self, sortkey=None, condensed=True, verbose=False, reverse=False, **kw): """ :param sortkey: A callable which can be used as key when sorting the rows. :param condensed: Flag signalling whether whitespace padding should be collapsed. :param verbose: Flag signalling whether to output additional info. :param reverse: Flag signalling whether we should sort in reverse order. :param kw: Additional keyword arguments are passed to the `tabulate` function. :return: String representation of the table in the chosen format. """ tab_kw = dict(tablefmt='pipe', headers=self.columns, floatfmt='.2f') tab_kw.update(self._kw) tab_kw.update(kw) if tab_kw['tablefmt'] == 'tsv': res = io.StringIO() w = csv.writer(res, delimiter='\t') w.writerow(self.columns) for row in (sorted(self, key=sortkey, reverse=reverse) if sortkey else self): w.writerow(row) res.seek(0) return res.read() res = tabulate( sorted(self, key=sortkey, reverse=reverse) if sortkey else self, **tab_kw) if tab_kw['tablefmt'] == 'pipe': if condensed: # remove whitespace padding around column content: res = re.sub(r'\|[ ]+', '| ', res) res = re.sub(r'[ ]+\|', ' |', res) if verbose: res += '\n\n(%s rows)\n\n' % len(self) return res
def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): print(self.render(), file=self._file)
[docs]def iter_markdown_tables(text) -> \ typing.Generator[typing.Tuple[typing.List[str], typing.List[typing.List[str]]], None, None]: """ Parse tables from a markdown formatted text. :param str text: markdown formatted text. :return: generator of (header, rows) pairs, where "header" is a `list` of column names and \ rows is a list of lists of row values. """ def split_row(line, outer_pipes): line = line.strip() if outer_pipes: assert line.startswith('|') and line.endswith('|'), 'inconsistent table formatting' line = line[1:-1].strip() return [c.strip() for c in line.split('|')] for header, rows, outer_pipes in _iter_table_blocks(text.splitlines()): yield split_row(header, outer_pipes), [split_row(row, outer_pipes) for row in rows]
def _iter_table_blocks(lines): # Tables are detected by # 1. A header line, i.e. a line with at least one `|` # 2. A line separating header and body of the form below SEP = re.compile(r'\s*\|?\s*:?--(-)+:?\s*(\|\s*:?--(-)+:?\s*)+\|?\s*') lines = list(lines) header, table, outer_pipes = None, [], False for i, line in enumerate(lines): if header: if '|' not in line: if table: yield header, table, outer_pipes header, table, outer_pipes = None, [], False else: if not SEP.fullmatch(line): table.append(line) else: if '|' in line and len(lines) > i + 1 and SEP.fullmatch(lines[i + 1]): header = line outer_pipes = lines[i + 1].strip().startswith('|') if table: yield header, table, outer_pipes
[docs]def iter_markdown_sections(text) -> typing.Generator[typing.Tuple[int, str, str], None, None]: """ Parse sections from a markdown formatted text. .. note:: We only recognize the "#" syntax for marking section headings. :param str text: markdown formatted text. :return: generator of (level, header, content) pairs, where "level" is an `int`, \ "header" is the exact section heading (including "#"s and newline) or `None` and \ "content" the markdown text of the section. """ section_pattern = re.compile(r'(?P<level>[#]+)') lines, header, level = [], None, None for line in text.splitlines(keepends=True): match = section_pattern.match(line) if match: if lines: yield level, header, ''.join(lines) lines, header, level = [], line, len(match.group('level')) else: lines.append(line) if lines or header: yield level, header, ''.join(lines)
[docs]def add_markdown_text(text: str, new: str, section: typing.Optional[typing.Union[typing.Callable, str]] = None) -> str: """ Append markdown text to a (specific section of a) markdown document. :param str text: markdown formatted text. :param str new: markdown formatted text to be inserted into `text`. :param section: optionally specifies a section to which to append `new`. `section` can either \ be a `str` and then specifies the first section with a header containing `section` as \ substring; or a callable and then specifies the first section for which `section` returns \ a truthy value when passed the section header. \ If `None`, `new` will be appended at the end. :return: markdown formatted text resulting from inserting `new` in `text`. :raises ValueError: The specified section was not encountered. """ res = [] for level, header, content in iter_markdown_sections(text): if header: res.append(header) res.append(content) if header and section and new: if (callable(section) and section(header)) or (section in header): res.append(new + '\n\n' if content.endswith('\n\n') else '\n\n' + new) new = None res = ''.join(res) if section is None: if res: res += '\n\n' res += new else: if new is not None: raise ValueError('Specified section not found') return res