123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- """Tests of the builder registry."""
- import pytest
- import warnings
- from bs4 import BeautifulSoup
- from bs4.builder import (
- builder_registry as registry,
- HTMLParserTreeBuilder,
- TreeBuilderRegistry,
- )
- try:
- from bs4.builder import HTML5TreeBuilder
- HTML5LIB_PRESENT = True
- except ImportError:
- HTML5LIB_PRESENT = False
- try:
- from bs4.builder import (
- LXMLTreeBuilderForXML,
- LXMLTreeBuilder,
- )
- LXML_PRESENT = True
- except ImportError:
- LXML_PRESENT = False
- class TestBuiltInRegistry(object):
- """Test the built-in registry with the default builders registered."""
- def test_combination(self):
- assert registry.lookup('strict', 'html') == HTMLParserTreeBuilder
- if LXML_PRESENT:
- assert registry.lookup('fast', 'html') == LXMLTreeBuilder
- assert registry.lookup('permissive', 'xml') == LXMLTreeBuilderForXML
- if HTML5LIB_PRESENT:
- assert registry.lookup('html5lib', 'html') == HTML5TreeBuilder
- def test_lookup_by_markup_type(self):
- if LXML_PRESENT:
- assert registry.lookup('html') == LXMLTreeBuilder
- assert registry.lookup('xml') == LXMLTreeBuilderForXML
- else:
- assert registry.lookup('xml') == None
- if HTML5LIB_PRESENT:
- assert registry.lookup('html') == HTML5TreeBuilder
- else:
- assert registry.lookup('html') == HTMLParserTreeBuilder
- def test_named_library(self):
- if LXML_PRESENT:
- assert registry.lookup('lxml', 'xml') == LXMLTreeBuilderForXML
- assert registry.lookup('lxml', 'html') == LXMLTreeBuilder
- if HTML5LIB_PRESENT:
- assert registry.lookup('html5lib') == HTML5TreeBuilder
- assert registry.lookup('html.parser') == HTMLParserTreeBuilder
- def test_beautifulsoup_constructor_does_lookup(self):
- with warnings.catch_warnings(record=True) as w:
- # This will create a warning about not explicitly
- # specifying a parser, but we'll ignore it.
- # You can pass in a string.
- BeautifulSoup("", features="html")
- # Or a list of strings.
- BeautifulSoup("", features=["html", "fast"])
- pass
-
- # You'll get an exception if BS can't find an appropriate
- # builder.
- with pytest.raises(ValueError):
- BeautifulSoup("", features="no-such-feature")
- class TestRegistry(object):
- """Test the TreeBuilderRegistry class in general."""
- def setup_method(self):
- self.registry = TreeBuilderRegistry()
- def builder_for_features(self, *feature_list):
- cls = type('Builder_' + '_'.join(feature_list),
- (object,), {'features' : feature_list})
- self.registry.register(cls)
- return cls
- def test_register_with_no_features(self):
- builder = self.builder_for_features()
- # Since the builder advertises no features, you can't find it
- # by looking up features.
- assert self.registry.lookup('foo') is None
- # But you can find it by doing a lookup with no features, if
- # this happens to be the only registered builder.
- assert self.registry.lookup() == builder
- def test_register_with_features_makes_lookup_succeed(self):
- builder = self.builder_for_features('foo', 'bar')
- assert self.registry.lookup('foo') is builder
- assert self.registry.lookup('bar') is builder
- def test_lookup_fails_when_no_builder_implements_feature(self):
- builder = self.builder_for_features('foo', 'bar')
- assert self.registry.lookup('baz') is None
- def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
- builder1 = self.builder_for_features('foo')
- builder2 = self.builder_for_features('bar')
- assert self.registry.lookup() == builder2
- def test_lookup_fails_when_no_tree_builders_registered(self):
- assert self.registry.lookup() is None
- def test_lookup_gets_most_recent_builder_supporting_all_features(self):
- has_one = self.builder_for_features('foo')
- has_the_other = self.builder_for_features('bar')
- has_both_early = self.builder_for_features('foo', 'bar', 'baz')
- has_both_late = self.builder_for_features('foo', 'bar', 'quux')
- lacks_one = self.builder_for_features('bar')
- has_the_other = self.builder_for_features('foo')
- # There are two builders featuring 'foo' and 'bar', but
- # the one that also features 'quux' was registered later.
- assert self.registry.lookup('foo', 'bar') == has_both_late
- # There is only one builder featuring 'foo', 'bar', and 'baz'.
- assert self.registry.lookup('foo', 'bar', 'baz') == has_both_early
- def test_lookup_fails_when_cannot_reconcile_requested_features(self):
- builder1 = self.builder_for_features('foo', 'bar')
- builder2 = self.builder_for_features('foo', 'baz')
- assert self.registry.lookup('bar', 'baz') is None
|