test_builder_registry.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. """Tests of the builder registry."""
  2. import pytest
  3. import warnings
  4. from bs4 import BeautifulSoup
  5. from bs4.builder import (
  6. builder_registry as registry,
  7. HTMLParserTreeBuilder,
  8. TreeBuilderRegistry,
  9. )
  10. try:
  11. from bs4.builder import HTML5TreeBuilder
  12. HTML5LIB_PRESENT = True
  13. except ImportError:
  14. HTML5LIB_PRESENT = False
  15. try:
  16. from bs4.builder import (
  17. LXMLTreeBuilderForXML,
  18. LXMLTreeBuilder,
  19. )
  20. LXML_PRESENT = True
  21. except ImportError:
  22. LXML_PRESENT = False
  23. class TestBuiltInRegistry(object):
  24. """Test the built-in registry with the default builders registered."""
  25. def test_combination(self):
  26. assert registry.lookup('strict', 'html') == HTMLParserTreeBuilder
  27. if LXML_PRESENT:
  28. assert registry.lookup('fast', 'html') == LXMLTreeBuilder
  29. assert registry.lookup('permissive', 'xml') == LXMLTreeBuilderForXML
  30. if HTML5LIB_PRESENT:
  31. assert registry.lookup('html5lib', 'html') == HTML5TreeBuilder
  32. def test_lookup_by_markup_type(self):
  33. if LXML_PRESENT:
  34. assert registry.lookup('html') == LXMLTreeBuilder
  35. assert registry.lookup('xml') == LXMLTreeBuilderForXML
  36. else:
  37. assert registry.lookup('xml') == None
  38. if HTML5LIB_PRESENT:
  39. assert registry.lookup('html') == HTML5TreeBuilder
  40. else:
  41. assert registry.lookup('html') == HTMLParserTreeBuilder
  42. def test_named_library(self):
  43. if LXML_PRESENT:
  44. assert registry.lookup('lxml', 'xml') == LXMLTreeBuilderForXML
  45. assert registry.lookup('lxml', 'html') == LXMLTreeBuilder
  46. if HTML5LIB_PRESENT:
  47. assert registry.lookup('html5lib') == HTML5TreeBuilder
  48. assert registry.lookup('html.parser') == HTMLParserTreeBuilder
  49. def test_beautifulsoup_constructor_does_lookup(self):
  50. with warnings.catch_warnings(record=True) as w:
  51. # This will create a warning about not explicitly
  52. # specifying a parser, but we'll ignore it.
  53. # You can pass in a string.
  54. BeautifulSoup("", features="html")
  55. # Or a list of strings.
  56. BeautifulSoup("", features=["html", "fast"])
  57. pass
  58. # You'll get an exception if BS can't find an appropriate
  59. # builder.
  60. with pytest.raises(ValueError):
  61. BeautifulSoup("", features="no-such-feature")
  62. class TestRegistry(object):
  63. """Test the TreeBuilderRegistry class in general."""
  64. def setup_method(self):
  65. self.registry = TreeBuilderRegistry()
  66. def builder_for_features(self, *feature_list):
  67. cls = type('Builder_' + '_'.join(feature_list),
  68. (object,), {'features' : feature_list})
  69. self.registry.register(cls)
  70. return cls
  71. def test_register_with_no_features(self):
  72. builder = self.builder_for_features()
  73. # Since the builder advertises no features, you can't find it
  74. # by looking up features.
  75. assert self.registry.lookup('foo') is None
  76. # But you can find it by doing a lookup with no features, if
  77. # this happens to be the only registered builder.
  78. assert self.registry.lookup() == builder
  79. def test_register_with_features_makes_lookup_succeed(self):
  80. builder = self.builder_for_features('foo', 'bar')
  81. assert self.registry.lookup('foo') is builder
  82. assert self.registry.lookup('bar') is builder
  83. def test_lookup_fails_when_no_builder_implements_feature(self):
  84. builder = self.builder_for_features('foo', 'bar')
  85. assert self.registry.lookup('baz') is None
  86. def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
  87. builder1 = self.builder_for_features('foo')
  88. builder2 = self.builder_for_features('bar')
  89. assert self.registry.lookup() == builder2
  90. def test_lookup_fails_when_no_tree_builders_registered(self):
  91. assert self.registry.lookup() is None
  92. def test_lookup_gets_most_recent_builder_supporting_all_features(self):
  93. has_one = self.builder_for_features('foo')
  94. has_the_other = self.builder_for_features('bar')
  95. has_both_early = self.builder_for_features('foo', 'bar', 'baz')
  96. has_both_late = self.builder_for_features('foo', 'bar', 'quux')
  97. lacks_one = self.builder_for_features('bar')
  98. has_the_other = self.builder_for_features('foo')
  99. # There are two builders featuring 'foo' and 'bar', but
  100. # the one that also features 'quux' was registered later.
  101. assert self.registry.lookup('foo', 'bar') == has_both_late
  102. # There is only one builder featuring 'foo', 'bar', and 'baz'.
  103. assert self.registry.lookup('foo', 'bar', 'baz') == has_both_early
  104. def test_lookup_fails_when_cannot_reconcile_requested_features(self):
  105. builder1 = self.builder_for_features('foo', 'bar')
  106. builder2 = self.builder_for_features('foo', 'baz')
  107. assert self.registry.lookup('bar', 'baz') is None