{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import xml.etree.ElementTree as ET\n", "import os\n", "import csv\n", "from collections import OrderedDict\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_18830/2994982029.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0mn_iperlemma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'n_iperlemma'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0mcommento\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'commento'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 36\u001b[0;31m \u001b[0mid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetIdAspo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msigla_ovi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 37\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0mline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msigla_ovi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/var/folders/_n/1ldwyw1s547dcpvn3485fr7r0000gn/T/ipykernel_18830/2994982029.py\u001b[0m in \u001b[0;36mgetIdAspo\u001b[0;34m(sigla_ovi)\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0mlink_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/SIGLA-IDASPO.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mreader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcsv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDictReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlink_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 20\u001b[0m \u001b[0msigla_aspo\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sigla'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0msa\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msigla_ovi\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/csv.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[0;31m# Used only for its side effect.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfieldnames\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 111\u001b[0;31m \u001b[0mrow\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 112\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mline_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mline_num\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "import csv\n", "import codecs\n", "import pandas as pd\n", "import re\n", "import os\n", "import io\n", "import tokenize\n", "\n", "confronto_data = open('data_lemmi_iperlemmi_codice', 'w')\n", "csvwriter = csv.writer(confronto_data)\n", "\n", "params = ['sigla', 'n_lemma', 'lemma', 'pos', 'iperlemma', 'n_iperlemma', 'commento', 'id']\n", "\n", "csvwriter.writerow(params)\n", "\n", "def getIdAspo(sigla_ovi):\n", " link_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/SIGLA-IDASPO.csv')\n", " reader = csv.DictReader(link_file)\n", " for row in reader:\n", " sigla_aspo = row['sigla']\n", " sa = sigla_ovi\n", " if (sigla_aspo == sa):\n", " return row['id']\n", "merge_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/LEMMI.csv')\n", "reader = csv.DictReader(merge_file)\n", "\n", "for row in reader:\n", " line = []\n", " sigla_ovi = row['sigla']\n", " n_lemma = row['n_lemma']\n", " lemma = row['lemma']\n", " pos = row['pos']\n", " iperlemma = row['iperlemma']\n", " n_iperlemma = row['n_iperlemma']\n", " commento = row['commento']\n", " id = getIdAspo(sigla_ovi)\n", " line.append(sigla_ovi)\n", " line.append(n_lemma)\n", " line.append(lemma)\n", " line.append(pos)\n", " line.append(iperlemma)\n", " line.append(n_iperlemma)\n", " line.append(commento)\n", "\n", " if id is not None:\n", " line.append(id)\n", " else:\n", " line.append(\"\")\n", "\n", " csvwriter.writerow(line)\n", "\n", "confronto_data.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 2 }