{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "ename": "URLError", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mSSLCertVerificationError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py\u001b[0m in \u001b[0;36mdo_open\u001b[0;34m(self, http_class, req, **http_conn_args)\u001b[0m\n\u001b[1;32m 1341\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1342\u001b[0;31m h.request(req.get_method(), req.selector, req.data, headers,\n\u001b[0m\u001b[1;32m 1343\u001b[0m encode_chunked=req.has_header('Transfer-encoding'))\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1254\u001b[0m \u001b[0;34m\"\"\"Send a complete request to the server.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1255\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py\u001b[0m in \u001b[0;36m_send_request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1300\u001b[0m \u001b[0mbody\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'body'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1301\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mendheaders\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1302\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py\u001b[0m in \u001b[0;36mendheaders\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1249\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCannotSendHeader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1250\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage_body\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1251\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py\u001b[0m in \u001b[0;36m_send_output\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1009\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1010\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1011\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 949\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_open\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 950\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 951\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py\u001b[0m in \u001b[0;36mconnect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1423\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1424\u001b[0;31m self.sock = self._context.wrap_socket(self.sock,\n\u001b[0m\u001b[1;32m 1425\u001b[0m server_hostname=server_hostname)\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ssl.py\u001b[0m in \u001b[0;36mwrap_socket\u001b[0;34m(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)\u001b[0m\n\u001b[1;32m 499\u001b[0m \u001b[0;31m# ctx._wrap_socket()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 500\u001b[0;31m return self.sslsocket_class._create(\n\u001b[0m\u001b[1;32m 501\u001b[0m \u001b[0msock\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msock\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ssl.py\u001b[0m in \u001b[0;36m_create\u001b[0;34m(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)\u001b[0m\n\u001b[1;32m 1039\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"do_handshake_on_connect should not be specified for non-blocking sockets\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1040\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_handshake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1041\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mOSError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ssl.py\u001b[0m in \u001b[0;36mdo_handshake\u001b[0;34m(self, block)\u001b[0m\n\u001b[1;32m 1308\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msettimeout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1309\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_handshake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1310\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mSSLCertVerificationError\u001b[0m: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1122)", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mURLError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_10523/3987068014.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1065\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mic\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlist_icon\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1066\u001b[0m \u001b[0murl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'https://iconclass.org/rdk/'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mic\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1067\u001b[0;31m \u001b[0mhtml\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0murlopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1068\u001b[0m \u001b[0msoup\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBeautifulSoup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhtml\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'html.parser'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1069\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(url, data, timeout, cafile, capath, cadefault, context)\u001b[0m\n\u001b[1;32m 212\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 213\u001b[0m \u001b[0mopener\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_opener\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 214\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mopener\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 215\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 216\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minstall_opener\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py\u001b[0m in \u001b[0;36mopen\u001b[0;34m(self, fullurl, data, timeout)\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 516\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maudit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'urllib.Request'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 517\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 518\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 519\u001b[0m \u001b[0;31m# post-process response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py\u001b[0m in \u001b[0;36m_open\u001b[0;34m(self, req, data)\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 533\u001b[0m \u001b[0mprotocol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 534\u001b[0;31m result = self._call_chain(self.handle_open, protocol, protocol +\n\u001b[0m\u001b[1;32m 535\u001b[0m '_open', req)\n\u001b[1;32m 536\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py\u001b[0m in \u001b[0;36m_call_chain\u001b[0;34m(self, chain, kind, meth_name, *args)\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhandler\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mhandlers\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 493\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhandler\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeth_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 494\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 495\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 496\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py\u001b[0m in \u001b[0;36mhttps_open\u001b[0;34m(self, req)\u001b[0m\n\u001b[1;32m 1383\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1384\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mhttps_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1385\u001b[0;31m return self.do_open(http.client.HTTPSConnection, req,\n\u001b[0m\u001b[1;32m 1386\u001b[0m context=self._context, check_hostname=self._check_hostname)\n\u001b[1;32m 1387\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py\u001b[0m in \u001b[0;36mdo_open\u001b[0;34m(self, http_class, req, **http_conn_args)\u001b[0m\n\u001b[1;32m 1343\u001b[0m encode_chunked=req.has_header('Transfer-encoding'))\n\u001b[1;32m 1344\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mOSError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# timeout error\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1345\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mURLError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1346\u001b[0m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1347\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mURLError\u001b[0m: " ] } ], "source": [ "# Utilities to read/write csv files\n", "import csv\n", "# Utilities to handle character encodings\n", "import unicodedata\n", "# Ordered Dicts\n", "from collections import OrderedDict\n", "\n", "from urllib.request import urlopen\n", "from bs4 import BeautifulSoup\n", "\n", "import json\n", "\n", "# OPZIONAL IMPORTS\n", "\n", "# For timestamping/simple speed tests\n", "from datetime import datetime\n", "# Random number generator\n", "from random import *\n", "# System & command line utilities\n", "import sys\n", "# Json for the dictionary\n", "import json\n", "\n", "import_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/CSV/corretti/'\n", "export_dir = '/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/RDF/'\n", "\n", "\n", "# Custom class to store URIs + related infos for the ontologies/repositories\n", "\n", "class RDFcoords:\n", " def __init__(self, uri, prefix, code=None):\n", " self.uri = uri\n", " self.prefix = prefix\n", " self.code = code\n", "\n", "\n", "# Repositories\n", "museoCoords = RDFcoords('', 'mpp:')\n", "autCoords = RDFcoords('', 'aut:')\n", "cidocCoords = RDFcoords('', 'crm:')\n", "aatCoords = RDFcoords('', 'aat:')\n", "nsCoords = RDFcoords('', 'rdf:')\n", "schemaCoords = RDFcoords('', 'rdfs:')\n", "xsdCoords = RDFcoords('', 'xsd:')\n", "iconCoords = RDFcoords('', 'ico:')\n", "\n", "\n", "# Basic functions for triples / shortened triples in TTL format\n", "\n", "def triple(subject, predicate, object1):\n", " line = subject + ' ' + predicate + ' ' + object1\n", " return line\n", "\n", "\n", "def doublet(predicate, object1):\n", " line = ' ' + predicate + ' ' + object1\n", " return line\n", "\n", "\n", "def singlet(object1):\n", " line = ' ' + object1\n", " return line\n", "\n", "\n", "# Line endings in TTL format\n", "continueLine1 = ' ;\\n'\n", "continueLine2 = ' ,\\n'\n", "closeLine = ' .\\n'\n", "\n", "\n", "def writeTTLHeader(output):\n", " output.write('@prefix ' + museoCoords.prefix + ' ' + museoCoords.uri + closeLine)\n", " output.write('@prefix ' + cidocCoords.prefix + ' ' + cidocCoords.uri + closeLine)\n", " output.write('@prefix ' + aatCoords.prefix + ' ' + aatCoords.uri + closeLine)\n", " output.write('@prefix ' + schemaCoords.prefix + ' ' + schemaCoords.uri + closeLine)\n", " output.write('@prefix ' + nsCoords.prefix + ' ' + nsCoords.uri + closeLine)\n", " output.write('@prefix ' + autCoords.prefix + ' ' + autCoords.uri + closeLine)\n", " output.write('@prefix ' + xsdCoords.prefix + ' ' + xsdCoords.uri + closeLine)\n", " output.write('@prefix ' + iconCoords.prefix + ' ' + iconCoords.uri + closeLine)\n", "\n", " output.write('\\n')\n", "\n", "\n", "filePrefix = '00_SR20OA_'\n", "fileType = 'Martini'\n", "max_entries = 1000000000\n", "\n", "\n", "def get_aut_url(code):\n", " aut_file = open(import_dir + 'AR20AUT_' + fileType + '.csv', newline=\"\")\n", " reader = csv.DictReader(aut_file)\n", " for row in reader:\n", " auth = int(row['AUTH'])\n", " cod = int(code)\n", " role = ''\n", " if row['AUTQ'] != '':\n", " role = row['AUTQ']\n", " else:\n", " role = ''\n", " if auth == cod:\n", " return [row['URL'], role]\n", "\n", "def get_role(role):\n", " role_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/CSV/AAT_RUOLI.csv', newline=\"\")\n", " reader = csv.DictReader(role_file)\n", " for row in reader:\n", " if row['Label'] == role:\n", " return row['AAT']\n", "\n", "def get_elem(mtc):\n", " mtc_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/MPP/CSV/AAT_MTC.csv', newline=\"\")\n", " reader = csv.DictReader(mtc_file)\n", " for row in reader:\n", " if row['MTC'] == mtc:\n", " return [row['AAT'], row['Type']]\n", "\n", "with open(import_dir + filePrefix + fileType + '.csv', newline=\"\") as csv_file, open(\n", " export_dir + filePrefix + fileType + '.ttl', 'w') as output:\n", " reader = csv.DictReader(csv_file)\n", " writeTTLHeader(output)\n", " first = True\n", " ii = 0\n", " for row in reader:\n", " # The index ii is used to process a limited number of entries for testing purposes\n", " ii = ii + 1\n", "\n", " if row['RVEL'] == '' or row['RVEL'] == '0':\n", "\n", " sb = ''\n", " subj = ''\n", " #pp = row['OGTD'] + ' (' + row['ACC'] + ') '\n", " if row['SGTI'] != '':\n", " sb = row['SGTI']\n", "\n", " # Triplify the 'codice' -- should exist for every entry\n", " codice = ''\n", " if (row['NCTR'] != '' and row['NCTN'] != ''):\n", " codice = row['NCTR'] + row['NCTN']\n", "\n", " codiceP = ''\n", " if (row['AUTH'] != ''):\n", " codiceP = row['AUTH']\n", "\n", " place = ''\n", " if (row['PRVC'] != ''):\n", " place = row['PRVC']\n", "\n", " columnName = list(row)\n", " url = row['URL']\n", "\n", " # placeHolders\n", " datplaceHolder = museoCoords.prefix + url\n", " e1placeHolder = museoCoords.prefix + url + '_E1'\n", " e3placeHolder = museoCoords.prefix + url + 'E3'\n", " e10placeHolder = museoCoords.prefix + url + '_E10'\n", " e12placeHolder = museoCoords.prefix + url + '_E12'\n", " e13placeHolder = museoCoords.prefix + url + '_E13'\n", " e21placeHolder = museoCoords.prefix + url + '_InE21'\n", " e25placeHolder = museoCoords.prefix + url + '_E25'\n", " e34placeHolder = museoCoords.prefix + url + '_E34'\n", " e35placeHolder1 = museoCoords.prefix + url + '_E35'\n", " e42placeHolder = museoCoords.prefix + url + '_E42'\n", " e42CplaceHolder = museoCoords.prefix + url + '_E42_MPP'\n", " e65placeHolder = museoCoords.prefix + url + '_InE65'\n", " e73placeHolder = museoCoords.prefix + url + '_E73'\n", " e74placeHolder = museoCoords.prefix + url + '_E74'\n", "\n", " if (codice != ''):\n", " line = triple(datplaceHolder, cidocCoords.prefix + 'P1_is_identified_by', e42placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(e42placeHolder, nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E42_Identifier') + closeLine\n", " output.write(line)\n", "\n", " line = triple(e42placeHolder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"0' + codice + '\\\"') + closeLine\n", " output.write(line)\n", " ###\n", " line = triple(e42placeHolder,\n", " cidocCoords.prefix + 'P2_has_type',\n", " '\\\"Codice univoco del bene (NCT)\\\"') + closeLine\n", " output.write(line)\n", "\n", "\n", " # Write E22 Man Made Object & E73 Information Object -- should exist for every entry?\n", " line = triple(datplaceHolder, nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E22_Man-Made_Object') + closeLine\n", " output.write(line)\n", " # Added by AS\n", " line = triple(datplaceHolder, schemaCoords.prefix + 'label', '\\\"' + sb + '\\\"') + closeLine\n", " output.write(line)\n", " # End AS\n", " line = triple(datplaceHolder, cidocCoords.prefix + 'P128_carries', e73placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(e73placeHolder, nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E73_Information_Object') + closeLine\n", " output.write(line)\n", "\n", " # AS\n", " ss = ''\n", " if row['SGTI'] != '':\n", " ss = row['SGTI']\n", " else:\n", " ss = 'senza titolo'\n", "\n", " line = triple(e73placeHolder, schemaCoords.prefix + 'label',\n", " '\\\"Opera d\\'arte raffigurante ' + ss + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E73 - P2 - E55\n", "\n", " tt = ''\n", " typeLabel = ''\n", "\n", " if row['OGTD'] == 'dipinto':\n", " tt = aatCoords.prefix + \"300033618\"\n", " elif row['OGTD'] == 'rilievo':\n", " tt = aatCoords.prefix + \"300047230\"\n", " elif row['OGTD'] == 'polittico':\n", " tt = aatCoords.prefix + \"300178235\"\n", " elif row['OGTD'] == 'predella':\n", " tt = aatCoords.prefix + \"300003745\"\n", "\n", " line = triple(e73placeHolder,\n", " cidocCoords.prefix + 'P2_has_type',\n", " tt) + closeLine\n", " output.write(line)\n", " line = triple(tt, schemaCoords.prefix + 'label',\n", " '\\\"' + row['OGTD'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E73 - P1 - E35\n", "\n", " if row['SGTT'] != '':\n", " line = triple(e73placeHolder, cidocCoords.prefix + 'P1_is_identified_by', e35placeHolder1) + closeLine\n", " output.write(line)\n", " line = triple(e35placeHolder1, nsCoords.prefix + 'type', cidocCoords.prefix + 'E35_Title') + closeLine\n", " output.write(line)\n", " line = triple(e35placeHolder1, schemaCoords.prefix + 'label', '\\\"' + row['SGTT'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E22 - P62 - E1\n", "\n", " if row['SGTI'] != '':\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P62_depicts',\n", " e1placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(e1placeHolder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E1_CRM_Entity') + closeLine\n", " output.write(line)\n", " line = triple(e1placeHolder,\n", " schemaCoords.prefix + 'label', '\\\"' +\n", " row['SGTI'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " line = triple(e1placeHolder,\n", " cidocCoords.prefix + 'P2_has_type',\n", " '\\\"Identificazione Iconografica\\\"') + closeLine\n", " output.write(line)\n", "\n", " # Attention: these triples are identified only for C100005 Museo di Palazzo Pretorio\n", "\n", " if row['ESC'] == 'C100005':\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P52_has_current_owner',\n", " '') + closeLine\n", " output.write(line)\n", " line = triple('',\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E74_Group') + closeLine\n", " output.write(line)\n", " line = triple('',\n", " schemaCoords.prefix + 'label',\n", " '\\\"Museo di Palazzo Pretorio\\\"') + closeLine\n", " output.write(line)\n", "\n", " currentLocation = ''\n", "\n", " # E22 - P54 - E53\n", " if row['LDCN'] != '':\n", " if row['LDCS'] != '':\n", " currentLocation = row['LDCS']\n", " else:\n", " currentLocation = currentLocation\n", " if row['LDCM'] != '':\n", " currentLocation = currentLocation + ', ' + row['LDCM']\n", " else:\n", " currentLocation = currentLocation\n", " if row['LDCN'] != '':\n", " currentLocation = currentLocation + ', ' + row['LDCN']\n", " else:\n", " currentLocation = currentLocation\n", "\n", " currentLocation = currentLocation + ', ' + row['PVCC'] + ' (' + row['PVCP'] + ')'\n", "\n", " line = triple(datplaceHolder, cidocCoords.prefix + 'P54_has_current_permanent_location',\n", " '\\\"' + currentLocation + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " e12FplaceHolder = ''\n", " if row['DTSI'] != row['DTSF']:\n", " e12FplaceHolder = museoCoords.prefix + url + '_E12F'\n", "\n", " # Write E12 Production -- should exist for every entry?\n", " # E12 P108 E22\n", " line = triple(e12placeHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine\n", " output.write(line)\n", " line = triple(e12placeHolder, nsCoords.prefix + 'type', cidocCoords.prefix + 'E12_Production') + closeLine\n", " output.write(line)\n", " # E73 P108i E12\n", " line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12placeHolder) + closeLine\n", " output.write(line)\n", "\n", " if e12FplaceHolder != '':\n", " line = triple(e12FplaceHolder, cidocCoords.prefix + 'P108_has_produced', datplaceHolder) + closeLine\n", " output.write(line)\n", " line = triple(e12FplaceHolder, nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E12_Production') + closeLine\n", " output.write(line)\n", " line = triple(e12FplaceHolder, schemaCoords.prefix + 'label',\n", " '\\\"Fine produzione di ' + row['SGTI'] + '\\\"') + closeLine\n", " output.write(line)\n", " # E73 P108i E12\n", " line = triple(e73placeHolder, cidocCoords.prefix + 'P108i_was_produced_by', e12FplaceHolder) + closeLine\n", " output.write(line)\n", " # E12 P140i E13\n", " line = triple(e12FplaceHolder,\n", " cidocCoords.prefix + 'P140i_was_attributed_by',\n", " e13placeHolder) + closeLine\n", " output.write(line)\n", " # E12 P2\n", " line = triple(e12FplaceHolder,\n", " cidocCoords.prefix + 'P2_has_type',\n", " '\\\"Fine\\\"^^xsd:string') + closeLine\n", " output.write(line)\n", " line = triple(e12placeHolder,\n", " cidocCoords.prefix + 'P2_has_type',\n", " '\\\"Inizio\\\"^^xsd:string') + closeLine\n", " output.write(line)\n", " line = triple(e12placeHolder, schemaCoords.prefix + 'label',\n", " '\\\"Inizio produzione di ' + row['SGTI'] + '\\\"') + closeLine\n", " output.write(line)\n", " else:\n", " line = triple(e12placeHolder, schemaCoords.prefix + 'label',\n", " '\\\"Produzione di ' + row['SGTI'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " tcl = []\n", " for name in columnName:\n", " if 'TCL' in name:\n", " tcl.append(name)\n", "\n", " # E12 - P7 - E53\n", " for el in tcl:\n", " i = 0\n", " if row[el] == 'luogo di produzione':\n", " pl = ''\n", " if i == 0:\n", " pl = row['PRVC']\n", " else:\n", " pl = row['PRVC' + i]\n", " line = triple(e12placeHolder,\n", " cidocCoords.prefix + 'P7_took_place_at',\n", " museoCoords.prefix + pl) + closeLine\n", " output.write(line)\n", " if e12FplaceHolder != '':\n", " line = triple(e12FplaceHolder,\n", " cidocCoords.prefix + 'P7_took_place_at',\n", " museoCoords.prefix + pl) + closeLine\n", " output.write(line)\n", " i = i + 1\n", "\n", " # E12 - PC14 - E21\n", " if row['AUTH'] != '':\n", " aut = get_aut_url(row['AUTH'])\n", " aut_url = aut[0]\n", " aut_role = aut[1]\n", " ll = row['AUTN'] + '_' + aut_role\n", " lab = ll.replace(' ', '')\n", " label = lab.replace(',', '')\n", "\n", " AuthorPlaceholder = autCoords.prefix + aut_url\n", " line = triple(museoCoords.prefix + '_' + label,\n", " cidocCoords.prefix + 'P01_has_domain',\n", " e12placeHolder) + closeLine\n", " output.write(line)\n", " if e12FplaceHolder != '':\n", " line = triple(museoCoords.prefix + '_' + label,\n", " cidocCoords.prefix + 'P01_has_domain',\n", " e12FplaceHolder) + closeLine\n", " output.write(line)\n", "\n", " if 'AUTH1' in columnName:\n", " if row['AUTH1'] != '':\n", " aut = get_aut_url(row['AUTH1'])\n", " aut_url = aut[0]\n", " aut_role = aut[1]\n", " ll = row['AUTN1'] + '_' + aut_role\n", " lab = ll.replace(' ', '')\n", " label = lab.replace(',', '')\n", "\n", " AuthorPlaceholder = autCoords.prefix + aut_url\n", " line = triple(museoCoords.prefix + '_' + label,\n", " cidocCoords.prefix + 'P01_has_domain',\n", " e12placeHolder) + closeLine\n", " output.write(line)\n", " if e12FplaceHolder != '':\n", " line = triple(museoCoords.prefix + '_' + label,\n", " cidocCoords.prefix + 'P01_has_domain',\n", " e12FplaceHolder) + closeLine\n", " output.write(line)\n", "\n", " # E12 - PC14 - E21\n", " if 'CMMN' in columnName:\n", " if row['CMMN'] != '':\n", " cc = row['CMMN']\n", " cm = cc.replace(' ', '')\n", " cmmn = cm.replace(',', '')\n", "\n", " cmmPlaceholder = museoCoords.prefix + '_' + cmmn\n", " line = triple(museoCoords.prefix + '_commit_' + cmmn,\n", " cidocCoords.prefix + 'P01_has_domain',\n", " e12placeHolder) + closeLine\n", " output.write(line)\n", " if e12FplaceHolder != '':\n", " line = triple(museoCoords.prefix + '_commit_' + cmmn,\n", " cidocCoords.prefix + 'P01_has_domain',\n", " e12FplaceHolder) + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + '_commit_' + cmmn,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'PC14_carried_out_by') + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + '_commit_' + cmmn,\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + row['CMMN'] + ' nel ruolo di committente\\\"') + closeLine\n", " output.write(line)\n", "\n", " line = triple(museoCoords.prefix + '_commit_' + cmmn,\n", " cidocCoords.prefix + 'P02_has_range',\n", " cmmPlaceholder) + closeLine\n", " output.write(line)\n", " line = triple(cmmPlaceholder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E39_Actor') + closeLine\n", " output.write(line)\n", " line = triple(cmmPlaceholder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + row['CMMN'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " line = triple(museoCoords.prefix + '_commit_' + cmmn,\n", " cidocCoords.prefix + 'P14.1_in_the_role_of',\n", " museoCoords.prefix + '_client') + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + '_client',\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E55_Type') + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + '_client',\n", " schemaCoords.prefix + 'label',\n", " '\\\"Committente\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E12 - P4 - E52\n", " if row['DTSI'] != '':\n", " line = triple(e12placeHolder,\n", " cidocCoords.prefix + 'P4_has_time-span',\n", " museoCoords.prefix + row['DTSI']) + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + row['DTSI'],\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E52_Time-Span') + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + row['DTSI'],\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + row['DTSI'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if e12FplaceHolder != '':\n", " line = triple(e12FplaceHolder,\n", " cidocCoords.prefix + 'P4_has_time-span',\n", " museoCoords.prefix + row['DTSF']) + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + row['DTSF'],\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E52_Time-Span') + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + row['DTSF'],\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + row['DTSF'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " tcl = []\n", " for name in columnName:\n", " if 'TCL' in name:\n", " tcl.append(name)\n", "\n", " j = 0\n", "\n", " for el in tcl:\n", " if row[el] != '':\n", " j = j + 1\n", "\n", " last = str(j - 1)\n", "\n", " n = len(tcl) - 1\n", "\n", " for i in range(n):\n", "\n", " k = str(i + 1)\n", "\n", " if i + 1 == 1:\n", " w = ''\n", " else:\n", " w = i\n", "\n", " f = str(w)\n", "\n", " if row['TCL' + k] != '':\n", "\n", " pastActor = ''\n", " newActor = ''\n", " pl = ''\n", "\n", " if row['PRCD' + k] != '':\n", " newActor = ' a ' + row['PRCD' + k]\n", "\n", " if row['PRCD' + f] != '':\n", " pastActor = ' da ' + row['PRCD' + f]\n", " pl = row['PRCD' + f].replace(' ', '')\n", "\n", " newe10placeHolder = museoCoords.prefix + url + \"_E10_\" + k\n", "\n", " line = triple(newe10placeHolder,\n", " cidocCoords.prefix + 'P30_transferred_custody_of',\n", " datplaceHolder) + closeLine\n", " output.write(line)\n", " line = triple(newe10placeHolder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine\n", " output.write(line)\n", " line = triple(newe10placeHolder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"Passaggio di ' + row['SGTI'] + pastActor +\n", " newActor + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['PRDI' + f] != '':\n", " timespan = row['PRDI' + f]\n", "\n", " tt = timespan.replace(' ', '')\n", " tp = tt.replace('.', '')\n", " ts = tp.replace('/', '')\n", "\n", " timespanPlaceholder = museoCoords.prefix + '_' + ts\n", "\n", " # E10 P4 E52\n", "\n", " line = triple(newe10placeHolder,\n", " cidocCoords.prefix + 'P4_has_time-span',\n", " timespanPlaceholder) + closeLine\n", " output.write(line)\n", " line = triple(timespanPlaceholder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E52_Time-Span') + closeLine\n", " output.write(line)\n", " line = triple(timespanPlaceholder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + timespan + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " pastActorPlaceholder = museoCoords.prefix + '_' + pl\n", " newLoc = row['PRCD' + k].replace(' ', '')\n", " newActorPlaceholder = museoCoords.prefix + '_' + newLoc\n", "\n", " # E10 P26 E74 (moved to)\n", "\n", " if newActorPlaceholder != '':\n", " line = triple(newe10placeHolder,\n", " cidocCoords.prefix + 'P29_custody_received_by',\n", " newActorPlaceholder) + closeLine\n", " output.write(line)\n", "\n", " # E10 P27 E74\n", "\n", " pastActorLabel = row['PRCD' + f]\n", "\n", " line = triple(newe10placeHolder,\n", " cidocCoords.prefix + 'P28_custody_surrendered_by',\n", " pastActorPlaceholder) + closeLine\n", " output.write(line)\n", " line = triple(pastActorPlaceholder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E39_Actor') + closeLine\n", " output.write(line)\n", " line = triple(pastActorPlaceholder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + pastActorLabel + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P49_has_former_or_current_keeper',\n", " pastActorPlaceholder) + closeLine\n", " output.write(line)\n", "\n", " # E74 P74 E53\n", "\n", " pastResidenceLabel = row['PRVC' + f]\n", " pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + f]\n", "\n", " line = triple(pastActorPlaceholder,\n", " cidocCoords.prefix + 'P74_has_current_or_former_residence',\n", " pastResidencePlaceHolder) + closeLine\n", " output.write(line)\n", "\n", " ####\n", "\n", " pastActor = ''\n", " newActor = ''\n", " pl = ''\n", "\n", " if row['LDCN'] != '':\n", " newActor = ' a ' + row['LDCN']\n", "\n", " if row['PRCD' + last] != '':\n", " pastActor = ' da ' + row['PRCD' + last]\n", " pl = row['PRCD' + last].replace(' ', '')\n", "\n", " line = triple(e10placeHolder,\n", " cidocCoords.prefix + 'P30_transferred_custody_of',\n", " datplaceHolder) + closeLine\n", " output.write(line)\n", " line = triple(e10placeHolder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E10_Transfer_of_Custody') + closeLine\n", " output.write(line)\n", " line = triple(e10placeHolder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"Passaggio di ' + row['SGTI'] + pastActor +\n", " newActor + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['PRDU' + last] != '':\n", " timespan = row['PRDU' + last]\n", "\n", " tt = timespan.replace(' ', '')\n", " ts = tt.replace('/', '')\n", "\n", " timespanPlaceholder = museoCoords.prefix + '_' + ts\n", "\n", " # E10 P4 E52\n", "\n", " line = triple(e10placeHolder,\n", " cidocCoords.prefix + 'P4_has_time-span',\n", " timespanPlaceholder) + closeLine\n", " output.write(line)\n", " line = triple(timespanPlaceholder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E52_Time-Span') + closeLine\n", " output.write(line)\n", " line = triple(timespanPlaceholder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + timespan + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " pastActorPlaceholder = museoCoords.prefix + '_' + pl\n", " newLocPlaceholder = e74placeHolder\n", "\n", " # E10 P26 E74 (moved to)\n", "\n", " if newLocPlaceholder != '':\n", " line = triple(e10placeHolder,\n", " cidocCoords.prefix + 'P29_custody_received_by',\n", " newLocPlaceholder) + closeLine\n", " output.write(line)\n", "\n", " # E10 P27 E74\n", "\n", " pastActorLabel = row['PRCD' + last]\n", "\n", " line = triple(e10placeHolder,\n", " cidocCoords.prefix + 'P28_custody_surrendered_by',\n", " pastActorPlaceholder) + closeLine\n", " output.write(line)\n", " line = triple(pastActorPlaceholder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E39_Actor') + closeLine\n", " output.write(line)\n", " line = triple(pastActorPlaceholder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + pastActorLabel + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P49_has_former_or_current_keeper',\n", " pastActorPlaceholder) + closeLine\n", " output.write(line)\n", "\n", " # E74 P74 E53\n", "\n", " pastResidenceLabel = row['PRVC' + last]\n", " pastResidencePlaceHolder = museoCoords.prefix + row['PRVC' + last]\n", "\n", " if row['PRVP' + last] != '':\n", " pastResidenceLabel = pastResidenceLabel + ' (' + row['PRVP' + last] + ')'\n", "\n", " if row['PRVR' + last] != '':\n", " pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVR' + last]\n", "\n", " if row['PRVS' + last] != '':\n", " pastResidenceLabel = pastResidenceLabel + ', ' + row['PRVS' + last]\n", "\n", " line = triple(pastActorPlaceholder,\n", " cidocCoords.prefix + 'P74_has_current_or_former_residence',\n", " pastResidencePlaceHolder) + closeLine\n", " output.write(line)\n", " line = triple(pastResidencePlaceHolder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E53_Place') + closeLine\n", " output.write(line)\n", "\n", " # E22 P44 E3\n", "\n", " if row['STCC'] != '':\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P44_has_condition',\n", " e3placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(e3placeHolder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E3_Condition_State') + closeLine\n", " output.write(line)\n", " line = triple(e3placeHolder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"Condizione di: ' + row['SGTI'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " line = triple(e3placeHolder,\n", " cidocCoords.prefix + 'P2_has_type',\n", " '\\\"' + row['STCC'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E22 P65 E34\n", " if (row['ISRI'] != ''):\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P56_bears_feature',\n", " e25placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(e25placeHolder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E25_Man-Made_Feature') + closeLine\n", " output.write(line)\n", " line = triple(e25placeHolder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"Iscrizione su ' + subj + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " line = triple(e25placeHolder,\n", " cidocCoords.prefix + 'P128_carries',\n", " e34placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(e34placeHolder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E34_Inscription') + closeLine\n", " output.write(line)\n", " line = triple(e34placeHolder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"Iscrizione: ' + row['ISRI'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " pl = row['ISRI'].replace(' ', '-')\n", " pla = pl.replace('.', '')\n", " line = triple(e34placeHolder,\n", " cidocCoords.prefix + 'P3_has_note',\n", " '\\\"' + row['ISRI'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E34 P2 E55\n", "\n", " if (row['ISRT'] != ''):\n", " rr = row['ISRT'].replace(' ', '')\n", " line = triple(e34placeHolder,\n", " cidocCoords.prefix + 'P2_has_type',\n", " '\\\"' + row['ISRT'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E34 P72 E56\n", "\n", " if (row['ISRL'] != ''):\n", " line = triple(e34placeHolder,\n", " cidocCoords.prefix + 'P72_has_language',\n", " museoCoords.prefix + '_' + row['ISRL']) + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + '_' + row['ISRL'],\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E56_Language') + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + '_' + row['ISRL'],\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + row['ISRL'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if (row['ISRA'] != '') or (row['ISRS'] != ''):\n", " line = triple(e34placeHolder,\n", " cidocCoords.prefix + 'P92i_was_brought_into_existence_by',\n", " e65placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(e65placeHolder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E65_Creation') + closeLine\n", " output.write(line)\n", " line = triple(e65placeHolder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"Creazione dell\\'Iscrizione ' + row['ISRI'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['ISRA'] != '':\n", " line = triple(e65placeHolder,\n", " cidocCoords.prefix + 'P14_carried_out_by',\n", " e21placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(e21placeHolder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E21_Person') + closeLine\n", " output.write(line)\n", " line = triple(e21placeHolder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + row['ISRA'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['ISRS']:\n", " ss = row['ISRS'].replace(' ', '')\n", " tecPlaceholder = museoCoords.prefix + url + '_' + ss\n", " line = triple(e65placeHolder,\n", " cidocCoords.prefix + 'P32_used_general_technique',\n", " tecPlaceholder) + closeLine\n", " output.write(line)\n", " line = triple(tecPlaceholder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E55_Type') + closeLine\n", " output.write(line)\n", " line = triple(tecPlaceholder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + row['ISRS'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['ISRP'] != '':\n", " line = triple(e25placeHolder,\n", " cidocCoords.prefix + 'P3_has_note',\n", " '\\\"' + row['ISRP'] + '\\\"^^xsd:string') + closeLine\n", " output.write(line)\n", "\n", " unit = ''\n", " if (row['MISU'] != ''):\n", " unit = row['MISU']\n", "\n", " valueA = ''\n", " valueL = ''\n", "\n", " if (row['MISA'] != ''):\n", " value = row['MISA']\n", " valueA = value.replace(',', 'v')\n", "\n", " if (row['MISL'] != ''):\n", " value = row['MISL']\n", " valueL = value.replace(',', 'v')\n", "\n", " # Altezza\n", " # E22 P43 E54\n", " if (row['MISA'] != ''):\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P43_has_dimension',\n", " museoCoords.prefix + url + '_Altezza') + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + url + '_Altezza',\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E54_Dimension') + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + url + '_Altezza',\n", " schemaCoords.prefix + 'label',\n", " '\\\"Altezza: ' + row['MISA'] + row['MISU'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E54 P90 E60\n", " line = triple(museoCoords.prefix + url + '_Altezza',\n", " cidocCoords.prefix + 'P90_has_value',\n", " '\\\"' + row['MISA'] + '\\\"^^xsd:integer') + closeLine\n", " output.write(line)\n", "\n", " # E54 P2 E55\n", "\n", " line = triple(museoCoords.prefix + url + '_Altezza',\n", " cidocCoords.prefix + 'P2_has_type',\n", " aatCoords.prefix + '300055644') + closeLine\n", " output.write(line)\n", " line = triple(aatCoords.prefix + '300055644',\n", " schemaCoords.prefix + 'label',\n", " '\\\"altezza\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E54 P91 E58\n", " if (row['MISU'] != ''):\n", " line = triple(museoCoords.prefix + url + '_Altezza',\n", " cidocCoords.prefix + 'P91_has_unit',\n", " aatCoords.prefix + '300379098') + closeLine\n", " output.write(line)\n", " line = triple(aatCoords.prefix + '300379098',\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine\n", " output.write(line)\n", " line = triple(aatCoords.prefix + '300379098',\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + row['MISU'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " # Larghezza\n", " # E22 P43 E54\n", " if (row['MISL'] != ''):\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P43_has_dimension',\n", " museoCoords.prefix + url + '_Larghezza') + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + url + '_Larghezza',\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E54_Dimension') + closeLine\n", " output.write(line)\n", " line = triple(museoCoords.prefix + url + '_Larghezza',\n", " schemaCoords.prefix + 'label',\n", " '\\\"Larghezza: ' + row['MISL'] + row['MISU'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E54 P90 E60\n", " line = triple(museoCoords.prefix + url + '_Larghezza',\n", " cidocCoords.prefix + 'P90_has_value',\n", " '\\\"' + row['MISL'] + '\\\"^^xsd:integer') + closeLine\n", " output.write(line)\n", "\n", " # E54 P2 E55\n", "\n", " line = triple(museoCoords.prefix + url + '_Larghezza',\n", " cidocCoords.prefix + 'P2_has_type',\n", " aatCoords.prefix + '300055647') + closeLine\n", " output.write(line)\n", " line = triple(aatCoords.prefix + '300055647',\n", " schemaCoords.prefix + 'label',\n", " '\\\"larghezza\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E54 P91 E58\n", " if (row['MISU'] != ''):\n", " line = triple(museoCoords.prefix + url + '_Larghezza',\n", " cidocCoords.prefix + 'P91_has_unit',\n", " aatCoords.prefix + '300379098') + closeLine\n", " output.write(line)\n", " line = triple(aatCoords.prefix + '300379098',\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E58_Measurement_Unit') + closeLine\n", " output.write(line)\n", " line = triple(aatCoords.prefix + '300379098',\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + row['MISU'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " if row['MTC'] != '':\n", " mtcs = []\n", " if '/' in row['MTC']:\n", " mtcs = row['MTC'].split('/')\n", " else:\n", " mtcs.append(row['MTC'])\n", "\n", "\n", " for tc in mtcs:\n", " mtc = tc.lstrip()\n", " el = get_elem(mtc)\n", " if el[1] == 'MTC/M':\n", "\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P45_consists_of',\n", " aatCoords.prefix + el[0]) + closeLine\n", " output.write(line)\n", " line = triple(aatCoords.prefix + el[0],\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E57_Material') + closeLine\n", " output.write(line)\n", " line = triple(aatCoords.prefix + el[0],\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + mtc + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " else: #E12 Production - P32 used technique - E55 Type\n", " line = triple(e12placeHolder,\n", " cidocCoords.prefix + 'P32_used_general_technique',\n", " aatCoords.prefix + el[0]) + closeLine\n", " output.write(line)\n", " if e12FplaceHolder != '':\n", " line = triple(e12FplaceHolder,\n", " cidocCoords.prefix + 'P32_used_general_technique',\n", " aatCoords.prefix + el[0]) + closeLine\n", " output.write(line)\n", " line = triple(aatCoords.prefix + el[0],\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E55_Type') + closeLine\n", " output.write(line)\n", " line = triple(aatCoords.prefix + el[0],\n", " schemaCoords.prefix + 'label',\n", " '\\\"' + mtc + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " # E12 P140i E13\n", " if row['AUTM'] != '':\n", " mot = row['AUTM'].replace(' ', '_')\n", " e55placeHolder = museoCoords.prefix + url + '_' + mot\n", " line = triple(e12placeHolder,\n", " cidocCoords.prefix + 'P140i_was_attributed_by',\n", " e13placeHolder) + closeLine\n", " output.write(line)\n", " line = triple(e13placeHolder,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E13_Attribute_Assignment') + closeLine\n", " output.write(line)\n", " line = triple(e13placeHolder,\n", " schemaCoords.prefix + 'label',\n", " '\\\"Motivazione attribuzione\\\"') + closeLine\n", " output.write(line)\n", "\n", " line = triple(e13placeHolder,\n", " cidocCoords.prefix + 'P2_has_type',\n", " '\\\"' + row['AUTM'] + '\\\"') + closeLine\n", " output.write(line)\n", "\n", " aut = get_aut_url(row['AUTH'])\n", " aut_url = aut[0]\n", " AuthorPlaceholder = autCoords.prefix + aut_url\n", "\n", " line = triple(e13placeHolder,\n", " cidocCoords.prefix + 'P141_assigned',\n", " AuthorPlaceholder) + closeLine\n", " output.write(line)\n", "\n", " # E22 P44 E62\n", "\n", " if row['NSC'] != '':\n", " ph = row['NSC'].replace(' \"', ' «')\n", " phr = ph.replace('\"', '»')\n", "\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P3_has_note',\n", " '\\\"' + phr + '\\\"^^xsd:string') + closeLine\n", " output.write(line)\n", "\n", " iconclass = row['DESI']\n", " icon = iconclass.replace(' ', '')\n", "\n", " list_icon = []\n", "\n", " if ':' in icon:\n", " list_icon = icon.split(':')\n", " else:\n", " list_icon.append(icon)\n", "\n", " for ic in list_icon:\n", " url = 'https://iconclass.org/rdk/' + str(ic)\n", " html = urlopen(url).read()\n", " soup = BeautifulSoup(html, 'html.parser')\n", "\n", " # kill all script and style elements\n", " for script in soup([\"script\", \"style\"]):\n", " script.extract() # rip it out\n", "\n", " # get text\n", " text = soup.get_text()\n", "\n", " pretty = soup.prettify()\n", "\n", " ff = soup.find(\"div\", {\"id\": \"ic_current\"})\n", " dd = ff.find(\"a\", {\"class\", \"ic_notation\"})\n", " ss = dd.text\n", " x = ss.find(' ')\n", "\n", " icon_label = ss[x + 1:]\n", "\n", " ur = ic.replace(\"(\", \"%28\")\n", " urr = ur.replace(\")\", \"%29\")\n", "\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P62_depicts',\n", " iconCoords.prefix + urr) + closeLine\n", " output.write(line)\n", " line = triple(iconCoords.prefix + urr,\n", " nsCoords.prefix + 'type',\n", " cidocCoords.prefix + 'E1_CRM_Entity') + closeLine\n", " output.write(line)\n", "\n", " # P2 Opera d'arte\n", " line = triple(datplaceHolder,\n", " cidocCoords.prefix + 'P2_has_type',\n", " '\\\"Opera d\\'Arte\\\"') + closeLine\n", " output.write(line)\n", "\n", " output.write('\\n')\n", " #\n", " #\n", " # Limit number of entries processed (if desired)\n", " if (ii > max_entries):\n", " break" ] } ], "metadata": { "interpreter": { "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" }, "kernelspec": { "display_name": "Python 3.9.0 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }