test_navigablestring.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import pytest
  2. from bs4.element import (
  3. CData,
  4. Comment,
  5. Declaration,
  6. Doctype,
  7. NavigableString,
  8. RubyParenthesisString,
  9. RubyTextString,
  10. Script,
  11. Stylesheet,
  12. TemplateString,
  13. )
  14. from . import SoupTest
  15. class TestNavigableString(SoupTest):
  16. def test_text_acquisition_methods(self):
  17. # These methods are intended for use against Tag, but they
  18. # work on NavigableString as well,
  19. s = NavigableString("fee ")
  20. cdata = CData("fie ")
  21. comment = Comment("foe ")
  22. assert "fee " == s.get_text()
  23. assert "fee" == s.get_text(strip=True)
  24. assert ["fee "] == list(s.strings)
  25. assert ["fee"] == list(s.stripped_strings)
  26. assert ["fee "] == list(s._all_strings())
  27. assert "fie " == cdata.get_text()
  28. assert "fie" == cdata.get_text(strip=True)
  29. assert ["fie "] == list(cdata.strings)
  30. assert ["fie"] == list(cdata.stripped_strings)
  31. assert ["fie "] == list(cdata._all_strings())
  32. # Since a Comment isn't normally considered 'text',
  33. # these methods generally do nothing.
  34. assert "" == comment.get_text()
  35. assert [] == list(comment.strings)
  36. assert [] == list(comment.stripped_strings)
  37. assert [] == list(comment._all_strings())
  38. # Unless you specifically say that comments are okay.
  39. assert "foe" == comment.get_text(strip=True, types=Comment)
  40. assert "foe " == comment.get_text(types=(Comment, NavigableString))
  41. def test_string_has_immutable_name_property(self):
  42. # string.name is defined as None and can't be modified
  43. string = self.soup("s").string
  44. assert None == string.name
  45. with pytest.raises(AttributeError):
  46. string.name = 'foo'
  47. class TestNavigableStringSubclasses(SoupTest):
  48. def test_cdata(self):
  49. # None of the current builders turn CDATA sections into CData
  50. # objects, but you can create them manually.
  51. soup = self.soup("")
  52. cdata = CData("foo")
  53. soup.insert(1, cdata)
  54. assert str(soup) == "<![CDATA[foo]]>"
  55. assert soup.find(string="foo") == "foo"
  56. assert soup.contents[0] == "foo"
  57. def test_cdata_is_never_formatted(self):
  58. """Text inside a CData object is passed into the formatter.
  59. But the return value is ignored.
  60. """
  61. self.count = 0
  62. def increment(*args):
  63. self.count += 1
  64. return "BITTER FAILURE"
  65. soup = self.soup("")
  66. cdata = CData("<><><>")
  67. soup.insert(1, cdata)
  68. assert b"<![CDATA[<><><>]]>" == soup.encode(formatter=increment)
  69. assert 1 == self.count
  70. def test_doctype_ends_in_newline(self):
  71. # Unlike other NavigableString subclasses, a DOCTYPE always ends
  72. # in a newline.
  73. doctype = Doctype("foo")
  74. soup = self.soup("")
  75. soup.insert(1, doctype)
  76. assert soup.encode() == b"<!DOCTYPE foo>\n"
  77. def test_declaration(self):
  78. d = Declaration("foo")
  79. assert "<?foo?>" == d.output_ready()
  80. def test_default_string_containers(self):
  81. # In some cases, we use different NavigableString subclasses for
  82. # the same text in different tags.
  83. soup = self.soup(
  84. "<div>text</div><script>text</script><style>text</style>"
  85. )
  86. assert [NavigableString, Script, Stylesheet] == [
  87. x.__class__ for x in soup.find_all(string=True)
  88. ]
  89. # The TemplateString is a little unusual because it's generally found
  90. # _inside_ children of a <template> element, not a direct child of the
  91. # <template> element.
  92. soup = self.soup(
  93. "<template>Some text<p>In a tag</p></template>Some text outside"
  94. )
  95. assert all(
  96. isinstance(x, TemplateString)
  97. for x in soup.template._all_strings(types=None)
  98. )
  99. # Once the <template> tag closed, we went back to using
  100. # NavigableString.
  101. outside = soup.template.next_sibling
  102. assert isinstance(outside, NavigableString)
  103. assert not isinstance(outside, TemplateString)
  104. # The TemplateString is also unusual because it can contain
  105. # NavigableString subclasses of _other_ types, such as
  106. # Comment.
  107. markup = b"<template>Some text<p>In a tag</p><!--with a comment--></template>"
  108. soup = self.soup(markup)
  109. assert markup == soup.template.encode("utf8")
  110. def test_ruby_strings(self):
  111. markup = "<ruby>漢 <rp>(</rp><rt>kan</rt><rp>)</rp> 字 <rp>(</rp><rt>ji</rt><rp>)</rp></ruby>"
  112. soup = self.soup(markup)
  113. assert isinstance(soup.rp.string, RubyParenthesisString)
  114. assert isinstance(soup.rt.string, RubyTextString)
  115. # Just as a demo, here's what this means for get_text usage.
  116. assert "漢字" == soup.get_text(strip=True)
  117. assert "漢(kan)字(ji)" == soup.get_text(
  118. strip=True,
  119. types=(NavigableString, RubyTextString, RubyParenthesisString)
  120. )