From 50b5fae6ea62ff16ae6d298689428e86655ce3bc Mon Sep 17 00:00:00 2001
From: daniel_shazura <daniel@shazura.com>
Date: Thu, 25 Nov 2021 10:27:12 +0100
Subject: [PATCH] fix mongo queries + readmes

---
 README.md                                     | 33 +++++++++++++++++++
 data_gathering/cities/README.md               |  2 ++
 website/README.md                             | 21 ++++++++++++
 website/app/models/mongodb.py                 | 31 ++++++++---------
 .../app/templates/utils/modal_contratos.html  |  9 ++---
 website/app/views/main.py                     |  7 ++--
 6 files changed, 81 insertions(+), 22 deletions(-)
 create mode 100644 README.md
 create mode 100644 data_gathering/cities/README.md

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..afc75ca
--- /dev/null
+++ b/README.md
@@ -0,0 +1,33 @@
+# Contrataciones Madrid
+
+Bienvenido/a a Contrataciones Madrid, tu portal agregador
+de contratos, listas electorales y listas de evasores fiscales favoritos.
+
+
+En Contrataciones Madrid puedes encontrar 2 tipos distintos de carpetas:
+- `data_gathering`: Aquí encontrarás archivos auxiliares y herramientas para obtener y generar
+los datos de la misma manera que lo hemos hecho nosotros
+- `website`: Aquí encontrarás la página web y todo lo necesario para ejecutarla, incluyendo los links
+para que te puedas bajar los datos e inicializar el mongo igual que nosotros.
+
+
+## Esto funciona gracias a...
+
+[LibreBor](https://librebor.me/)
+
+
+## Algunas estadísticas
+
+Actualmente contamos con 137649 contratos, de los casi 3M que componen la base de datos
+de la Comunidad de Madrid. ¿Porqué no tenemos todos? Para esta prueba de concepto no era necesario,
+además, 137649 contratos ocupan 500MB, por lo que los 3M se podrían salir un poco de nuestro ___scope___.
+
+
+De los 130K de contratos, alrededor de 100K han sido adjudicados.
+
+
+De los 100K de contratos adjudicados, solo hay 6000 empresas diferentes, agrupando alguna hasta 4000 contrataciones.
+
+
+Los papeles de paraísos fiscales utilizados han sido solamente los "Panama Papers", pero se pueden añadir nuevos papeles
+de forma sencilla con incluir una nueva colección en el MongoDB de paraísos fiscales y los nombres de personas/empresas en la misma.
diff --git a/data_gathering/cities/README.md b/data_gathering/cities/README.md
new file mode 100644
index 0000000..6d05cfe
--- /dev/null
+++ b/data_gathering/cities/README.md
@@ -0,0 +1,2 @@
+Script auxiliar para generar una colección de empresas, de forma que podamos obtener ciudades/localidades
+a las que pertenecen y adicionalmente, guarda en base de datos contratos con su petición a LibreBOR ya calculada.
diff --git a/website/README.md b/website/README.md
index d8a4a40..1db10d3 100644
--- a/website/README.md
+++ b/website/README.md
@@ -15,3 +15,24 @@ python wsgi.py
 
 Te instalas mongo: https://docs.mongodb.com/manual/administration/install-community/
 Y lo inicias según te toque
+
+
+## Inserción de datos en el Mongo
+
+Actualmente existen 2 versiones de la base de datos.
+
+1. Contrataciones según se han extraído del portal de contrataciones de la CAM (`data.zip` en releases)
+2. Backup de MongoDB DESPUÉS de haber buscado en LibreBOR la información relevante a las 200 empresas más frecuentes. (`data_librebor.zip` en releases)
+
+La segunda versión de la base de datos nos asegura tener siempre información para un gran subset de contrataciones y además nos permite
+no realizar peticiones a LibreBOR. Especialmente útil cuando no tienes una API Key o se han consumido las peticiones a la misma.
+
+### Con data.zip
+
+Ejecuta el script `get_data_zip.sh` dentro de la carpeta `app/models` y esto debería descargarse el zip y extraelo en la misma carpeta para generarte
+todos los datos suficientes para que cuando inicies la app por primera vez se realizen todas las inserciones en MongoDB.
+
+### Con data\_librebor.zip
+
+Ejecuta el script `backup_mongo.sh` dentro de la carpeta `app/models` y esto debería borrarte la base de datos `dme` en casos de existir y aplicar la copia
+guardada dentro de `data_librebor.zip`. No contiene los jsons originales (a diferencia de `data.zip`)
diff --git a/website/app/models/mongodb.py b/website/app/models/mongodb.py
index 9c17d8f..b4b527f 100644
--- a/website/app/models/mongodb.py
+++ b/website/app/models/mongodb.py
@@ -66,16 +66,16 @@ def __init__(self):
     def init_db(self):
         """ Inicializa la bbdd si alguna colección no existe
         """
-        if len(self.get_all_contracts()) == 0:
+        if self.get_all_contracts().count() == 0:
             self.add_contracts()
 
-        if len(self.get_all_electoral_lists()) == 0:
+        if self.get_all_electoral_lists().count() == 0:
             self.add_electoral_lists()
 
-        if len(self.get_all_offshore_papers()) == 0:
+        if self.get_all_offshore_papers().count() == 0:
             self.add_offshore_papers()
 
-        if len(self.get_all_companies()) == 0:
+        if self.get_all_companies().count() == 0:
             self.add_company_dataset()
 
     def add_contracts(self):
@@ -96,20 +96,19 @@ def delete_all_contracts(self):
         self.database[CONTRACT_COLLECTION].drop()
 
     def get_all_contracts(self):
-        return list(self.database[CONTRACT_COLLECTION].find({}))
+        return self.database[CONTRACT_COLLECTION].find({})
 
     def get_contracts_by_title(self, title):
-        return list(
-            self.database[CONTRACT_COLLECTION].find({'titulo': {'$regex': ".*" + title + ".*", '$options': 'i'}}))
+        return self.database[CONTRACT_COLLECTION].find({'titulo': {'$regex': ".*" + title + ".*", '$options': 'i'}})
 
     def get_contract_by_id(self, id):
         return self.database[CONTRACT_COLLECTION].find_one({'_id': id})
 
     def get_contracts_categories(self):
-        return list(self.database[CONTRACT_COLLECTION].distinct('categoria'))
+        return self.database[CONTRACT_COLLECTION].distinct('categoria')
 
     def get_contracts_by_category(self, category):
-        return list(self.database[CONTRACT_COLLECTION].find({'categoria': category}))
+        return self.database[CONTRACT_COLLECTION].find({'categoria': category})
 
     def add_electoral_lists(self):
         self.__insert_jsons(ELECTORAL_LISTS_COLLECTION, ELECTORAL_LISTS_PATH)
@@ -118,11 +117,13 @@ def delete_all_electoral_lists(self):
         self.database[ELECTORAL_LISTS_COLLECTION].drop()
 
     def get_all_electoral_lists(self):
-        return list(self.database[ELECTORAL_LISTS_COLLECTION].find({}))
+        return self.database[ELECTORAL_LISTS_COLLECTION].find({})
 
     def get_party_by_name(self, name):
-        return list(self.database[ELECTORAL_LISTS_COLLECTION].find(
-            {'candidatos': {'$regex': ".*" + query_format(name) + ".*", '$options': 'xsi'}}, {'partido': 1, '_id': 0}))
+        return self.database[ELECTORAL_LISTS_COLLECTION].find(
+            {'candidatos': {'$regex': ".*" + query_format(name) + ".*", '$options': 'xsi'}},
+            {'partido': 1, '_id': 0}
+        )
 
     def add_offshore_papers(self):
         self.__insert_jsons(OFFSHORE_COLLECTION, OFFSHORE_PATH)
@@ -131,7 +132,7 @@ def delete_offshore_papers(self):
         self.database[OFFSHORE_COLLECTION].drop()
 
     def get_all_offshore_papers(self):
-        return list(self.database[OFFSHORE_COLLECTION].find({}))
+        return self.database[OFFSHORE_COLLECTION].find({})
 
     def __insert_jsons(self, collection, path):
         if not os.path.exists(path): return
@@ -166,7 +167,7 @@ def add_company(self, company:dict):
     def get_all_companies(self):
         """ Devuelve todas las companía indexadas
         """
-        return list(self.database[COMPANY_COLLECTION].find({}))
+        return self.database[COMPANY_COLLECTION].find({})
 
     def add_company_dataset(self):
         """ Crea la colección de compañías a partir de los JSON
@@ -177,4 +178,4 @@ def filter_company(self, filters:dict):
         """ Devuelve las compañía que cumple los filtros.
             filters es un diccionario key:valç
         """
-        return list(self.database[COMPANY_COLLECTION].find(filters))
+        return self.database[COMPANY_COLLECTION].find(filters)
diff --git a/website/app/templates/utils/modal_contratos.html b/website/app/templates/utils/modal_contratos.html
index cf6fd3f..2364bb0 100644
--- a/website/app/templates/utils/modal_contratos.html
+++ b/website/app/templates/utils/modal_contratos.html
@@ -15,7 +15,8 @@ <h5>Información del contrato:</h5>
                         <tbody>
                             {% set keys = contrato.keys()|sort %}
                             {% for k in keys %}
-                                {% if k == 'adjudicacion' %}
+                              {% if k == "librebor" %}
+                              {% elif k == "adjudicacion" %}
                                 {% if contrato[k] is mapping %}
                                 <tr>
                                   <th scope='row'>{{ k|replace('-', ' ')|title }}</th>
@@ -33,19 +34,19 @@ <h5>Información del contrato:</h5>
                                   </td>
                                 </tr>
                                 {% endif %}
-                                {% elif k == 'url' %}
+                              {% elif k == 'url' %}
                                 <tr>
                                     <th scope='row'>{{ k|replace('-', ' ')|title }}</th>
                                     <td>
                                         <a href="{{ contrato[k] }}" target="_blank">{{ contrato[k] }}</a>
                                     </td>
                                 </tr>
-                                {% elif k != '_id' %}
+                              {% elif k != '_id' %}
                                 <tr>
                                     <th scope='row'>{{ k|replace('-', ' ')|title }}</th>
                                     <td>{{ contrato[k] }}</td>
                                 </tr>
-                                {% endif %}
+                              {% endif %}
                             {% endfor %}
                         </tbody>
                     </table>
diff --git a/website/app/views/main.py b/website/app/views/main.py
index be2ec20..5ecdb9f 100644
--- a/website/app/views/main.py
+++ b/website/app/views/main.py
@@ -21,16 +21,17 @@ def index():
     """ Página principal
     """
     all_contracts = app.mongo.get_all_contracts()
-    category_list = [(i, len(app.mongo.get_contracts_by_category(i))) for i in
+    category_list = [(i, app.mongo.get_contracts_by_category(i).count()) for i in
                      app.mongo.get_contracts_categories()]
     category_list.sort(key=lambda x: x[1], reverse=True)
 
+    # TODO cambiar a query de mongo
     total_money = sum(map(lambda j: j['presupuesto'] if 'presupuesto' in j.keys() else 0, all_contracts))
 
     tweets = tw_query("comunidad madrid contrato", 10, TW_AUTH)
 
     return render_template(
-        'index.html', numero_contratos=len(all_contracts), categorias=list(category_list),
+        'index.html', numero_contratos=all_contracts.count(), categorias=list(category_list),
         dinero_total=locale.format_string("%.2f", total_money, grouping=True), tweets=tweets
     )
 
@@ -44,7 +45,7 @@ def results():
     """
     busqueda = request.form['busqueda']
     t0 = time.time()
-    search = app.mongo.get_contracts_by_title(busqueda)
+    search = list(app.mongo.get_contracts_by_title(busqueda).limit(50))
     tiempo = time.time() - t0
     return render_template('results.html', busqueda=busqueda, contratos=search, tiempo=tiempo)