pretty.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. """
  2. Format a pretty string of a `SoupSieve` object for easy debugging.
  3. This won't necessarily support all types and such, and definitely
  4. not support custom outputs.
  5. It is mainly geared towards our types as the `SelectorList`
  6. object is a beast to look at without some indentation and newlines.
  7. The format and various output types is fairly known (though it
  8. hasn't been tested extensively to make sure we aren't missing corners).
  9. Example:
  10. ```
  11. >>> import soupsieve as sv
  12. >>> sv.compile('this > that.class[name=value]').selectors.pretty()
  13. SelectorList(
  14. selectors=(
  15. Selector(
  16. tag=SelectorTag(
  17. name='that',
  18. prefix=None),
  19. ids=(),
  20. classes=(
  21. 'class',
  22. ),
  23. attributes=(
  24. SelectorAttribute(
  25. attribute='name',
  26. prefix='',
  27. pattern=re.compile(
  28. '^value$'),
  29. xml_type_pattern=None),
  30. ),
  31. nth=(),
  32. selectors=(),
  33. relation=SelectorList(
  34. selectors=(
  35. Selector(
  36. tag=SelectorTag(
  37. name='this',
  38. prefix=None),
  39. ids=(),
  40. classes=(),
  41. attributes=(),
  42. nth=(),
  43. selectors=(),
  44. relation=SelectorList(
  45. selectors=(),
  46. is_not=False,
  47. is_html=False),
  48. rel_type='>',
  49. contains=(),
  50. lang=(),
  51. flags=0),
  52. ),
  53. is_not=False,
  54. is_html=False),
  55. rel_type=None,
  56. contains=(),
  57. lang=(),
  58. flags=0),
  59. ),
  60. is_not=False,
  61. is_html=False)
  62. ```
  63. """
  64. import re
  65. from typing import Any
  66. RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
  67. RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
  68. RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
  69. RE_LSTRT = re.compile(r'\[')
  70. RE_DSTRT = re.compile(r'\{')
  71. RE_TSTRT = re.compile(r'\(')
  72. RE_LEND = re.compile(r'\]')
  73. RE_DEND = re.compile(r'\}')
  74. RE_TEND = re.compile(r'\)')
  75. RE_INT = re.compile(r'\d+')
  76. RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
  77. RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
  78. RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
  79. RE_SEP = re.compile(r'\s*(,)\s*')
  80. RE_DSEP = re.compile(r'\s*(:)\s*')
  81. TOKENS = {
  82. 'class': RE_CLASS,
  83. 'param': RE_PARAM,
  84. 'empty': RE_EMPTY,
  85. 'lstrt': RE_LSTRT,
  86. 'dstrt': RE_DSTRT,
  87. 'tstrt': RE_TSTRT,
  88. 'lend': RE_LEND,
  89. 'dend': RE_DEND,
  90. 'tend': RE_TEND,
  91. 'sqstr': RE_SQSTR,
  92. 'sep': RE_SEP,
  93. 'dsep': RE_DSEP,
  94. 'int': RE_INT,
  95. 'kword': RE_KWORD,
  96. 'dqstr': RE_DQSTR
  97. }
  98. def pretty(obj: Any) -> str: # pragma: no cover
  99. """Make the object output string pretty."""
  100. sel = str(obj)
  101. index = 0
  102. end = len(sel) - 1
  103. indent = 0
  104. output = []
  105. while index <= end:
  106. m = None
  107. for k, v in TOKENS.items():
  108. m = v.match(sel, index)
  109. if m:
  110. name = k
  111. index = m.end(0)
  112. if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
  113. indent += 4
  114. output.append('{}\n{}'.format(m.group(0), " " * indent))
  115. elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
  116. output.append(m.group(0))
  117. elif name in ('lend', 'dend', 'tend'):
  118. indent -= 4
  119. output.append(m.group(0))
  120. elif name in ('sep',):
  121. output.append('{}\n{}'.format(m.group(1), " " * indent))
  122. elif name in ('dsep',):
  123. output.append('{} '.format(m.group(1)))
  124. break
  125. return ''.join(output)