import warnings
from bs4.element import (
Comment,
NavigableString,
)
from . import SoupTest
class TestTag(SoupTest):
"""Test various methods of Tag which aren't so complicated they
need their own classes.
"""
def test__should_pretty_print(self):
# Test the rules about when a tag should be pretty-printed.
tag = self.soup("").new_tag("a_tag")
# No list of whitespace-preserving tags -> pretty-print
tag._preserve_whitespace_tags = None
assert True == tag._should_pretty_print(0)
# List exists but tag is not on the list -> pretty-print
tag.preserve_whitespace_tags = ["some_other_tag"]
assert True == tag._should_pretty_print(1)
# Indent level is None -> don't pretty-print
assert False == tag._should_pretty_print(None)
# Tag is on the whitespace-preserving list -> don't pretty-print
tag.preserve_whitespace_tags = ["some_other_tag", "a_tag"]
assert False == tag._should_pretty_print(1)
def test_len(self):
"""The length of a Tag is its number of children."""
soup = self.soup("123")
# The BeautifulSoup object itself contains one element: the
# tag.
assert len(soup.contents) == 1
assert len(soup) == 1
# The tag contains three elements: the text node "1", the
# tag, and the text node "3".
assert len(soup.top) == 3
assert len(soup.top.contents) == 3
def test_member_access_invokes_find(self):
"""Accessing a Python member .foo invokes find('foo')"""
soup = self.soup('')
assert soup.b == soup.find('b')
assert soup.b.i == soup.find('b').find('i')
assert soup.a == None
def test_deprecated_member_access(self):
soup = self.soup('')
with warnings.catch_warnings(record=True) as w:
tag = soup.bTag
assert soup.b == tag
assert '.bTag is deprecated, use .find("b") instead. If you really were looking for a tag called bTag, use .find("bTag")' == str(w[0].message)
def test_has_attr(self):
"""has_attr() checks for the presence of an attribute.
Please note note: has_attr() is different from
__in__. has_attr() checks the tag's attributes and __in__
checks the tag's chidlren.
"""
soup = self.soup("")
assert soup.foo.has_attr('attr')
assert not soup.foo.has_attr('attr2')
def test_attributes_come_out_in_alphabetical_order(self):
markup = ''
self.assertSoupEquals(markup, '')
def test_string(self):
# A Tag that contains only a text node makes that node
# available as .string.
soup = self.soup("foo")
assert soup.b.string == 'foo'
def test_empty_tag_has_no_string(self):
# A Tag with no children has no .stirng.
soup = self.soup("")
assert soup.b.string == None
def test_tag_with_multiple_children_has_no_string(self):
# A Tag with no children has no .string.
soup = self.soup("foo")
assert soup.b.string == None
soup = self.soup("foobar")
assert soup.b.string == None
# Even if all the children are strings, due to trickery,
# it won't work--but this would be a good optimization.
soup = self.soup("foo")
soup.a.insert(1, "bar")
assert soup.a.string == None
def test_tag_with_recursive_string_has_string(self):
# A Tag with a single child which has a .string inherits that
# .string.
soup = self.soup("foo")
assert soup.a.string == "foo"
assert soup.string == "foo"
def test_lack_of_string(self):
"""Only a Tag containing a single text node has a .string."""
soup = self.soup("feo")
assert soup.b.string is None
soup = self.soup("")
assert soup.b.string is None
def test_all_text(self):
"""Tag.text and Tag.get_text(sep=u"") -> all child text, concatenated"""
soup = self.soup("ar t ")
assert soup.a.text == "ar t "
assert soup.a.get_text(strip=True) == "art"
assert soup.a.get_text(",") == "a,r, , t "
assert soup.a.get_text(",", strip=True) == "a,r,t"
def test_get_text_ignores_special_string_containers(self):
soup = self.soup("foobar")
assert soup.get_text() == "foobar"
assert soup.get_text(types=(NavigableString, Comment)) == "fooIGNOREbar"
assert soup.get_text(types=None) == "fooIGNOREbar"
soup = self.soup("foobar")
assert soup.get_text() == "foobar"
def test_all_strings_ignores_special_string_containers(self):
soup = self.soup("foobar")
assert ['foo', 'bar'] == list(soup.strings)
soup = self.soup("foobar")
assert ['foo', 'bar'] == list(soup.strings)
def test_string_methods_inside_special_string_container_tags(self):
# Strings inside tags like ")
assert style.div.get_text() == "a"
assert list(style.div.strings) == ["a"]
assert style.div.style.get_text() == "Some CSS"
assert list(style.div.style.strings) == ['Some CSS']
# The comment is not picked up here. That's because it was
# parsed into a Comment object, which is not considered
# interesting by template.strings.
assert template.div.get_text() == "a"
assert list(template.div.strings) == ["a"]
assert template.div.template.get_text() == "Templated text."
assert list(template.div.template.strings) == ["Templated ", "text", "."]
# The comment is included here, because it didn't get parsed
# into a Comment object--it's part of the Script string.
assert script.div.get_text() == "a"
assert list(script.div.strings) == ["a"]
assert script.div.script.get_text() == "Some text"
assert list(script.div.script.strings) == ['Some text']
class TestMultiValuedAttributes(SoupTest):
"""Test the behavior of multi-valued attributes like 'class'.
The values of such attributes are always presented as lists.
"""
def test_single_value_becomes_list(self):
soup = self.soup("")
assert ["foo"] ==soup.a['class']
def test_multiple_values_becomes_list(self):
soup = self.soup("")
assert ["foo", "bar"] == soup.a['class']
def test_multiple_values_separated_by_weird_whitespace(self):
soup = self.soup("")
assert ["foo", "bar", "baz"] ==soup.a['class']
def test_attributes_joined_into_string_on_output(self):
soup = self.soup("")
assert b'' == soup.a.encode()
def test_get_attribute_list(self):
soup = self.soup("")
assert ['abc def'] == soup.a.get_attribute_list('id')
def test_accept_charset(self):
soup = self.soup('