test_formatter.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. import pytest
  2. from bs4.element import Tag
  3. from bs4.formatter import (
  4. Formatter,
  5. HTMLFormatter,
  6. XMLFormatter,
  7. )
  8. from . import SoupTest
  9. class TestFormatter(SoupTest):
  10. def test_default_attributes(self):
  11. # Test the default behavior of Formatter.attributes().
  12. formatter = Formatter()
  13. tag = Tag(name="tag")
  14. tag['b'] = 1
  15. tag['a'] = 2
  16. # Attributes come out sorted by name. In Python 3, attributes
  17. # normally come out of a dictionary in the order they were
  18. # added.
  19. assert [('a', 2), ('b', 1)] == formatter.attributes(tag)
  20. # This works even if Tag.attrs is None, though this shouldn't
  21. # normally happen.
  22. tag.attrs = None
  23. assert [] == formatter.attributes(tag)
  24. assert ' ' == formatter.indent
  25. def test_sort_attributes(self):
  26. # Test the ability to override Formatter.attributes() to,
  27. # e.g., disable the normal sorting of attributes.
  28. class UnsortedFormatter(Formatter):
  29. def attributes(self, tag):
  30. self.called_with = tag
  31. for k, v in sorted(tag.attrs.items()):
  32. if k == 'ignore':
  33. continue
  34. yield k,v
  35. soup = self.soup('<p cval="1" aval="2" ignore="ignored"></p>')
  36. formatter = UnsortedFormatter()
  37. decoded = soup.decode(formatter=formatter)
  38. # attributes() was called on the <p> tag. It filtered out one
  39. # attribute and sorted the other two.
  40. assert formatter.called_with == soup.p
  41. assert '<p aval="2" cval="1"></p>' == decoded
  42. def test_empty_attributes_are_booleans(self):
  43. # Test the behavior of empty_attributes_are_booleans as well
  44. # as which Formatters have it enabled.
  45. for name in ('html', 'minimal', None):
  46. formatter = HTMLFormatter.REGISTRY[name]
  47. assert False == formatter.empty_attributes_are_booleans
  48. formatter = XMLFormatter.REGISTRY[None]
  49. assert False == formatter.empty_attributes_are_booleans
  50. formatter = HTMLFormatter.REGISTRY['html5']
  51. assert True == formatter.empty_attributes_are_booleans
  52. # Verify that the constructor sets the value.
  53. formatter = Formatter(empty_attributes_are_booleans=True)
  54. assert True == formatter.empty_attributes_are_booleans
  55. # Now demonstrate what it does to markup.
  56. for markup in (
  57. "<option selected></option>",
  58. '<option selected=""></option>'
  59. ):
  60. soup = self.soup(markup)
  61. for formatter in ('html', 'minimal', 'xml', None):
  62. assert b'<option selected=""></option>' == soup.option.encode(formatter='html')
  63. assert b'<option selected></option>' == soup.option.encode(formatter='html5')
  64. @pytest.mark.parametrize(
  65. "indent,expect",
  66. [
  67. (None, '<a>\n<b>\ntext\n</b>\n</a>'),
  68. (-1, '<a>\n<b>\ntext\n</b>\n</a>'),
  69. (0, '<a>\n<b>\ntext\n</b>\n</a>'),
  70. ("", '<a>\n<b>\ntext\n</b>\n</a>'),
  71. (1, '<a>\n <b>\n text\n </b>\n</a>'),
  72. (2, '<a>\n <b>\n text\n </b>\n</a>'),
  73. ("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>'),
  74. ('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>'),
  75. # Some invalid inputs -- the default behavior is used.
  76. (object(), '<a>\n <b>\n text\n </b>\n</a>'),
  77. (b'bytes', '<a>\n <b>\n text\n </b>\n</a>'),
  78. ]
  79. )
  80. def test_indent(self, indent, expect):
  81. # Pretty-print a tree with a Formatter set to
  82. # indent in a certain way and verify the results.
  83. soup = self.soup("<a><b>text</b></a>")
  84. formatter = Formatter(indent=indent)
  85. assert soup.prettify(formatter=formatter) == expect
  86. # Pretty-printing only happens with prettify(), not
  87. # encode().
  88. assert soup.encode(formatter=formatter) != expect
  89. def test_default_indent_value(self):
  90. formatter = Formatter()
  91. assert formatter.indent == ' '