1234567891011121314151617181920212223242526272829303132333435363738 |
- for ic in list_icon:
- url = 'http://iconclass.org/rdk/' + str(ic)
- html = urlopen(url).read()
- soup = BeautifulSoup(html, 'html.parser')
- # kill all script and style elements
- for script in soup(["script", "style"]):
- script.extract() # rip it out
- # get text
- text = soup.get_text()
- pretty = soup.prettify()
- ff = soup.find("div", {"id": "ic_current"})
- dd = ff.find("a", {"class", "ic_notation"})
- ss = dd.text
- x = ss.find(' ')
- icon_label = ss[x + 1:]
- ur = ic.replace("(", "%28")
- urr = ur.replace(")", "%29")
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P62_depicts',
- iconCoords.prefix + urr) + closeLine
- output.write(line)
- line = triple(iconCoords.prefix + urr,
- nsCoords.prefix + 'type',
- cidocCoords.prefix + 'E1_CRM_Entity') + closeLine
- output.write(line)
- # P2 Opera d'arte
- line = triple(datplaceHolder,
- cidocCoords.prefix + 'P2_has_type',
- '\"Opera d\'Arte\"') + closeLine
- output.write(line)
|