{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for ic in list_icon:\n", " url = 'http://iconclass.org/rdk/' + str(ic)\n", " html = urlopen(url).read()\n", " soup = BeautifulSoup(html, 'html.parser')\n", "\n", " # kill all script and style elements\n", " for script in soup([\"script\", \"style\"]):\n", " script.extract() # rip it out\n", "\n", " # get text\n", " text = soup.get_text()\n", "\n", " pretty = soup.prettify()\n", "\n", " ff = soup.find(\"div\", {\"id\": \"ic_current\"})\n", " dd = ff.find(\"a\", {\"class\", \"ic_notation\"})\n", " ss = dd.text\n", " x = ss.find(' ')\n", "\n", " icon_label = ss[x + 1:]\n", "\n", " ur = ic.replace(\"(\", \"%28\")\n", " urr = ur.replace(\")\", \"%29\")\n", "\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P62_depicts',\n", " iconCoords.prefix + urr) + closeLine\n", " output.write(line)\n", " line = triple(iconCoords.prefix + urr,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E1_CRM_Entity') + closeLine\n", " output.write(line)\n", "\n", " " ] } ], "metadata": { "interpreter": { "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" }, "kernelspec": { "display_name": "Python 3.9.0 64-bit", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.9.0" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }