for ic in list_icon:.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. for ic in list_icon:
  2. url = 'http://iconclass.org/rdk/' + str(ic)
  3. html = urlopen(url).read()
  4. soup = BeautifulSoup(html, 'html.parser')
  5. # kill all script and style elements
  6. for script in soup(["script", "style"]):
  7. script.extract() # rip it out
  8. # get text
  9. text = soup.get_text()
  10. pretty = soup.prettify()
  11. ff = soup.find("div", {"id": "ic_current"})
  12. dd = ff.find("a", {"class", "ic_notation"})
  13. ss = dd.text
  14. x = ss.find(' ')
  15. icon_label = ss[x + 1:]
  16. ur = ic.replace("(", "%28")
  17. urr = ur.replace(")", "%29")
  18. line = triple(datplaceHolder,
  19. cidocCoords.prefix + 'P62_depicts',
  20. iconCoords.prefix + urr) + closeLine
  21. output.write(line)
  22. line = triple(iconCoords.prefix + urr,
  23. nsCoords.prefix + 'type',
  24. cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
  25. output.write(line)
  26. # P2 Opera d'arte
  27. line = triple(datplaceHolder,
  28. cidocCoords.prefix + 'P2_has_type',
  29. '\"Opera d\'Arte\"') + closeLine
  30. output.write(line)