{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [], "source": [ "import xml.etree.ElementTree as ET\n", "import os\n", "import csv\n", "from collections import OrderedDict\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n", "None\n" ] } ], "source": [ "import csv\n", "import codecs\n", "import pandas as pd\n", "import re\n", "import os\n", "import io\n", "import tokenize\n", "\n", "confronto_data = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/data_lemmi_iperlemmi_thing_id_merci_ASPO.csv', 'w')\n", "csvwriter = csv.writer(confronto_data)\n", "\n", "params = ['sigla', 'lemma', 'n_lemma', 'pos', 'iperlemma', 'n_iperlemma', 'commento', 'id', 'cosa', 'merce_ASPO']\n", "\n", "csvwriter.writerow(params)\n", "\n", "def getIfMerce(lemma):\n", " link_file = open('/Users/federicaspinelli/TEAMOVI/Parser/DATA/OVI/CSV/OVI_lemmi_iperlemmi_OLD - Lemmi con Merce copia.csv')\n", " reader = csv.DictReader(link_file)\n", " for row in reader:\n", " lemma_aspo = row['lemma']\n", " iperlemma_aspo = row['iperlemma']\n", " sigla_aspo = row['sigla']\n", " last = row['merce']\n", " lemma_ovi = lemma\n", " iperlemma_ovi = iperlemma\n", " lo = lemma_ovi\n", " if (lemma_aspo == lo):\n", " return last\n", "merge_file = open('/Users/federicaspinelli/TEAMOVI/Parser/OVI/Lemmi/data_lemmi_iperlemmi_thing_id copia.csv')\n", "reader = csv.DictReader(merge_file)\n", "\n", "for row in reader:\n", " line = []\n", " sigla = row['sigla']\n", " n_lemma = row['n_lemma']\n", " lemma_ovi = row['lemma']\n", " pos = row['pos']\n", " iperlemma = row['iperlemma']\n", " n_iperlemma = row['n_iperlemma']\n", " commento = row['commento']\n", " id = row['id']\n", " thing = row['thing']\n", " #line.append(sigla)\n", " #line.append(id)\n", " #line.append(lemma)\n", " #line.append(n_lemma)\n", " #line.append(iperlemma)\n", " #line.append(n_iperlemma)\n", " #line.append(pos)\n", " #line.append(commento)\n", " #line.append(thing)\n", " merce = getIfMerce(lemma_ovi)\n", " if merce is not None:\n", " line.append(merce)\n", " else:\n", " line.append(\"\")\n", " \n", " print (merce)\n", "\n", " csvwriter.writerow(line)\n", "\n", "confronto_data.close()\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 2 }