Date.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. import xml.etree.ElementTree as Xet
  2. from typing import Dict, Any
  3. import pandas as pd
  4. import os
  5. import csv
  6. from xml.dom import minidom
  7. import sys
  8. import re
  9. AspoDate_data = open('AspoDate.csv', 'w')
  10. csvwriter = csv.writer(AspoDate_data)
  11. params = ['URL', 'Label', 'Estremo temporale inferiore', 'Estremo temporale superiore']
  12. csvwriter.writerow(params)
  13. '''
  14. date_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_ASPO/Cronologia/date_aspo.csv')
  15. reader = csv.DictReader(date_file)
  16. for row in reader:
  17. date = row['label']
  18. dd = date.replace('Data di partenza: ', '')
  19. dt = dd.replace('Data di arrivo: ', '')
  20. ss = dt.split('/')
  21. norm = ss[2] + '-' + ss[1] + '-' + ss[0]
  22. year = ss[2]
  23. rr = [row['o'], date, norm, year]
  24. res.encode('utf8')
  25. csvwriter.writerow(rr)
  26. '''
  27. date_file = open('/Users/alessiaspadi/Documents/RESTORE/temp_ASPO/Cronologia/Date2.csv')
  28. reader = csv.DictReader(date_file)
  29. for row in reader:
  30. lab = row['label']
  31. lk = lab.encode("iso-8859-1")
  32. ll = lk.decode("utf8")
  33. kk = ll.replace("\uFFFD", "")
  34. pr = kk.replace(" ", "")
  35. periodo = pr.replace("Periodo:", "")
  36. date = periodo.split("-")
  37. mesi = ['gen.', 'feb.', 'mar.', 'apr.', 'mag.', 'giu.', 'lug.', 'ago.', 'set.', 'ott.', 'nov.', 'dic.']
  38. link = row['s']
  39. riga = [link, kk]
  40. for data in date:
  41. dat = ''
  42. if 'gen.' not in data and 'feb.' not in data and 'mar.' not in data and 'apr.' not in data and 'mag.' not in data and 'giu.'not in data and 'lug.' not in data and 'ago.' not in data and 'set.' not in data and 'ott.' not in data and 'nov.' not in data and 'dic.' not in data:
  43. dat = data
  44. if 'gen.' in data:
  45. dat = data.replace('gen.', "/1/")
  46. if 'feb.' in data:
  47. dat = data.replace('feb.', "/2/")
  48. if 'mar.' in data:
  49. dat = data.replace('mar.', "/3/")
  50. if 'apr.' in data:
  51. dat = data.replace('apr.', "/4/")
  52. if 'mag.' in data:
  53. dat = data.replace('mag.', "/5/")
  54. if 'giu.' in data:
  55. dat = data.replace('giu.', "/6/")
  56. if 'lug.' in data:
  57. dat = data.replace('lug.', "/7/")
  58. if 'ago.' in data:
  59. dat = data.replace('ago.', "/8/")
  60. if 'set.' in data:
  61. dat = data.replace('set.', "/9/")
  62. if 'ott.' in data:
  63. dat = data.replace('ott.', "/10/")
  64. if 'nov.' in data:
  65. dat = data.replace('nov.', "/11/")
  66. if 'dic.' in data:
  67. dat = data.replace('dic.', "/12/")
  68. tt = ''
  69. if dat.endswith("/"):
  70. i = len(dat)
  71. tt = dat[:i-1]
  72. else:
  73. tt = dat
  74. anno = ''
  75. if '/' in tt:
  76. ss = tt.split('/')
  77. if len(ss) == 2:
  78. anno = ss[0]
  79. if len(ss) == 3:
  80. if len(ss[0]) < 4:
  81. anno = ss[2]
  82. else:
  83. anno = ss[0]
  84. else:
  85. anno = tt
  86. riga.append(anno)
  87. csvwriter.writerow(riga)
  88. AspoDate_data.close()