feat: add init script in order to load amp data in database

dataforgoodfr · Feb 11, 2024 · be4a194 · be4a194
1 parent 30e9c7c
commit be4a194
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 16 deletions.
diff --git a/.gitignore b/.gitignore
@@ -28,6 +28,8 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+zones_subset_02022024.csv
+
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

diff --git a/Makefile b/Makefile
@@ -13,6 +13,9 @@ launch-dev-db:
 	$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} alembic upgrade head
 	$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 alembic/init_script/load_vessels_data.py
 
+load-amp-data:
+	$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 alembic/init_script/load_amp_data.py
+
 launch-dev-container:
 	$(BLOOM_DEV_DOCKER) -dti  d4g/bloom:${VERSION} /bin/bash
 

diff --git a/alembic/init_script/load_geometry_data.py → alembic/init_script/load_amp_data.py b/alembic/init_script/load_geometry_data.py → alembic/init_script/load_amp_data.py
@@ -1,21 +1,9 @@
-"""
-This script presents a method to load geometry data in a local DB.
-First, you will need 4 shape files present in the data directory :
-data/Nonterrestrial_WDPA_Jan2023.dbf
-data/Nonterrestrial_WDPA_Jan2023.prj
-data/Nonterrestrial_WDPA_Jan2023.shp
-data/Nonterrestrial_WDPA_Jan2023.shx
-The, you will have to spawn a database and a pgadmin containers locally,
-using the db.yaml docker compose file.
-#! docker compose up -d postgres pgadmin
-
-Once images are built and running, you can run the following
-python script from the root of the bloom project.
-"""
 import logging
 import os
 
 import geopandas as gpd
+import pandas as pd
+from shapely import wkb
 from sqlalchemy import create_engine
 
 logging.basicConfig()
@@ -42,6 +30,20 @@
     + postgres_db
 )
 engine = create_engine(db_url, echo=False)
-gdf = gpd.read_file("data/Nonterrestrial_WDPA_Jan2023.shp")
 
-gdf.to_postgis("mpa", con=engine, if_exists="append", index=False)
+df = pd.read_csv("zones_subset_02022024.csv")
+df = df.rename(columns={"Geometry": "geometry",
+                        "Index": "index", "Wdpaid": "WDPAID",
+                        "Name": "name",
+                        "Desig Eng": "DESIG_ENG",
+                        "Desig Type": "DESIG_TYPE",
+                        "Iucn Cat": "IUCN_CAT",
+                        "Parent Iso": "PARENT_ISO",
+                        "Iso3": "ISO3",
+                        "Beneficiaries": "BENEFICIARIES"})
+
+df['geometry'] = df['geometry'].apply(wkb.loads)
+gdf = gpd.GeoDataFrame(df, crs='epsg:4326')
+gdf.head()
+
+gdf.to_postgis("mpa_fr_with_mn", con=engine, if_exists="replace", index=False)
diff --git a/alembic/versions/961cee5426d6_create_amp_table.py b/alembic/versions/961cee5426d6_create_amp_table.py
@@ -0,0 +1,43 @@
+"""create amp table
+
+Revision ID: 961cee5426d6
+Revises: 1fd83d22bd1e
+Create Date: 2024-02-11 22:10:19.010986
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import geoalchemy2
+
+# revision identifiers, used by Alembic.
+revision = '961cee5426d6'
+down_revision = '1fd83d22bd1e'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table("mpa_fr_with_mn",
+                    sa.Column("index", sa.Integer, primary_key=True),
+                    sa.Column("wdpaid", sa.Integer),
+                    sa.Column("name", sa.String, nullable=False),
+                    sa.Column("desig_eng", sa.String),
+                    sa.Column("desig_type", sa.String),
+                    sa.Column("iucn_cat", sa.String),
+                    sa.Column("parent_iso", sa.String),
+                    sa.Column("iso3", sa.String),
+                    sa.Column("geometry", geoalchemy2.types.Geometry(geometry_type="GEOMETRY", srid=4326)),
+                    sa.Column("benificiaries", sa.String)
+                    )
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+    inspector = Inspector.from_engine(conn)
+    sql_tables = inspector.get_table_names()
+    tables = [
+        "mpa_fr_with_mn",
+    ]
+    for t in tables:
+        if t in sql_tables:
+            op.drop_table(t)