123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- """
- Format a pretty string of a `SoupSieve` object for easy debugging.
- This won't necessarily support all types and such, and definitely
- not support custom outputs.
- It is mainly geared towards our types as the `SelectorList`
- object is a beast to look at without some indentation and newlines.
- The format and various output types is fairly known (though it
- hasn't been tested extensively to make sure we aren't missing corners).
- Example:
- ```
- >>> import soupsieve as sv
- >>> sv.compile('this > that.class[name=value]').selectors.pretty()
- SelectorList(
- selectors=(
- Selector(
- tag=SelectorTag(
- name='that',
- prefix=None),
- ids=(),
- classes=(
- 'class',
- ),
- attributes=(
- SelectorAttribute(
- attribute='name',
- prefix='',
- pattern=re.compile(
- '^value$'),
- xml_type_pattern=None),
- ),
- nth=(),
- selectors=(),
- relation=SelectorList(
- selectors=(
- Selector(
- tag=SelectorTag(
- name='this',
- prefix=None),
- ids=(),
- classes=(),
- attributes=(),
- nth=(),
- selectors=(),
- relation=SelectorList(
- selectors=(),
- is_not=False,
- is_html=False),
- rel_type='>',
- contains=(),
- lang=(),
- flags=0),
- ),
- is_not=False,
- is_html=False),
- rel_type=None,
- contains=(),
- lang=(),
- flags=0),
- ),
- is_not=False,
- is_html=False)
- ```
- """
- import re
- from typing import Any
- RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
- RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
- RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
- RE_LSTRT = re.compile(r'\[')
- RE_DSTRT = re.compile(r'\{')
- RE_TSTRT = re.compile(r'\(')
- RE_LEND = re.compile(r'\]')
- RE_DEND = re.compile(r'\}')
- RE_TEND = re.compile(r'\)')
- RE_INT = re.compile(r'\d+')
- RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
- RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
- RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
- RE_SEP = re.compile(r'\s*(,)\s*')
- RE_DSEP = re.compile(r'\s*(:)\s*')
- TOKENS = {
- 'class': RE_CLASS,
- 'param': RE_PARAM,
- 'empty': RE_EMPTY,
- 'lstrt': RE_LSTRT,
- 'dstrt': RE_DSTRT,
- 'tstrt': RE_TSTRT,
- 'lend': RE_LEND,
- 'dend': RE_DEND,
- 'tend': RE_TEND,
- 'sqstr': RE_SQSTR,
- 'sep': RE_SEP,
- 'dsep': RE_DSEP,
- 'int': RE_INT,
- 'kword': RE_KWORD,
- 'dqstr': RE_DQSTR
- }
- def pretty(obj: Any) -> str: # pragma: no cover
- """Make the object output string pretty."""
- sel = str(obj)
- index = 0
- end = len(sel) - 1
- indent = 0
- output = []
- while index <= end:
- m = None
- for k, v in TOKENS.items():
- m = v.match(sel, index)
- if m:
- name = k
- index = m.end(0)
- if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
- indent += 4
- output.append('{}\n{}'.format(m.group(0), " " * indent))
- elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
- output.append(m.group(0))
- elif name in ('lend', 'dend', 'tend'):
- indent -= 4
- output.append(m.group(0))
- elif name in ('sep',):
- output.append('{}\n{}'.format(m.group(1), " " * indent))
- elif name in ('dsep',):
- output.append('{} '.format(m.group(1)))
- break
- return ''.join(output)
|