diff --git a/src/imagery/i.eodag/i.eodag.html b/src/imagery/i.eodag/i.eodag.html index 35c3fe859f..d49e70cfae 100644 --- a/src/imagery/i.eodag/i.eodag.html +++ b/src/imagery/i.eodag/i.eodag.html @@ -63,24 +63,56 @@

EXAMPLES

v.extract input=urbanarea where="NAME = 'Durham'" output=durham i.eodag -l start=2022-05-25 end=2022-06-01 \ - map=durham dataset=S2_MSI_L2A provider=cop_dataspace + map=durham dataset=S2_MSI_L2A provider=cop_dataspace \ + sort=cloudcover,ingestiondate order=asc,desc

 
-Download all available scenes in the tmp directory, with Cloud Coverage not exceeding 50%:
+Search and list the available Sentinel 2 scenes in the Copernicus Data Space
+Ecosystem, with at least 70% of the AOI covered:
+
+v.extract input=urbanarea where="NAME = 'Durham'" output=durham
+
+i.eodag -l start=2022-05-25 end=2022-06-01 \
+    dataset=S2_MSI_L2A provider=cop_dataspace \
+    clouds=50 map=durham minimum_overlap=70
+
+ +Sort results, descendingly, by cloudcover, and then by ingestiondate +Note that sorting with cloudcover use +unrounded values, while they are rounded to the nearest integer when listing. + +
+i.eodag -l start=2022-05-25 end=2022-06-01 \
+    dataset=S2_MSI_L2A provider=cop_dataspace \
+    sort=cloudcover,ingestiondate order=desc
+
+ +Search for scenes with a list of IDs text file, and filter the results with the +provided parameters: + +
+i.eodag -l file=ids_list.txt \
+    start=2022-05-25 \
+    area_relation=Contains clouds=3
+
+ +Download all available scenes with cloud coverage not exceeding 50% +in the tmp directory:
 i.eodag start=2022-05-25 end=2022-06-01 \
     dataset=S2_MSI_L2A provider=cop_dataspace clouds=50
 
-Download only selected scenes from a text file of IDs, using the Copernicus Data Space Ecosystem as the provider: +Download only selected scenes from a text file of IDs, using the Copernicus Data +Space Ecosystem as the provider:
 i.eodag file=ids_list.txt provider=cop_dataspace
 
-Download and extract only selected scenes into the download_here directory, -using a custom config file: +Download and extract only selected scenes into the download_here +directory, using a custom config file:
 i.eodag -e provider=cop_dataspace \
@@ -95,8 +127,6 @@ 

REQUIREMENTS

SEE ALSO

diff --git a/src/imagery/i.eodag/i.eodag.py b/src/imagery/i.eodag/i.eodag.py index 13369789ec..38843c374c 100755 --- a/src/imagery/i.eodag/i.eodag.py +++ b/src/imagery/i.eodag/i.eodag.py @@ -31,7 +31,12 @@ # FLAGS # %flag # % key: l -# % description: List the search result without downloading +# % description: List filtered products and exit +# %end + +# %flag +# % key: j +# % description: Print extended metadata information in JSON style # %end # %flag @@ -82,6 +87,23 @@ # % required: no # %end +# %option +# % key: area_relation +# % type: string +# % description: Spatial relation of footprint to AOI +# % options: Intersects,Contains,IsWithin +# % required: no +# % guisection: Region +# %end + +# %option +# % key: minimum_overlap +# % type: integer +# % description: Minimal AOI area covered by the scene [0, 100] +# % required: no +# % guisection: Region +# %end + # %option # % key: id # % type: string @@ -101,11 +123,29 @@ # %option # % key: provider # % type: string -# % description: The provider to search within. Providers available by default: https://eodag.readthedocs.io/en/stable/getting_started_guide/providers.html +# % label: The provider to search within. +# % description: Providers available by default: https://eodag.readthedocs.io/en/stable/getting_started_guide/providers.html # % required: no # % guisection: Filter # %end +# %option +# % key: sort +# % description: Field to sort values by +# % multiple: yes +# % options: ingestiondate,cloudcover +# % answer: cloudcover,ingestiondate +# % guisection: Sort +# %end + +# %option +# % key: order +# % description: Sort order (see sort parameter) +# % options: asc,desc +# % answer: asc +# % guisection: Sort +# %end + # %option # % key: start # % type: string @@ -120,31 +160,56 @@ # % guisection: Filter # %end +# %option +# % key: save +# % type: string +# % description: File name to save in (the format will be adjusted according to the file extension) +# % label: Supported files extensions [geojson: Rreadable by i.eodag | json: Beautified] +# % guisection: Filter +# %end + # %rules # % exclusive: file, id +# % exclusive: -l, -j # %end import sys import os import getpass +import pytz +import json from pathlib import Path from subprocess import PIPE -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone +from functools import cmp_to_key import grass.script as gs from grass.pygrass.modules import Module -from grass.exceptions import ParameterError def create_dir(directory): + """Creates directory.""" try: Path(directory).mkdir(parents=True, exist_ok=True) except: - gs.fatal(_("Could not create directory {}").format(dir)) + gs.fatal(_("Could not create directory {}").format(directory)) def get_bb(proj): + """Gets the bounding box of the current computational + region in geographic coordinates. + + :param proj: Projection information from 'gs.parse_command("g.proj", flags="j")' + :type proj: str + + :return: Bounding box of the current computational region. + Format: + {"lonmin" : lonmin, "latmin" : latmin, + "lonmax" : lonmax, "latmax" : latmax} + :rtype: dict + """ + gs.verbose("Generating AOI from bounding box...") if proj["+proj"] != "longlat": info = gs.parse_command("g.region", flags="uplg") return { @@ -163,7 +228,18 @@ def get_bb(proj): def get_aoi(vector=None): - """Get the AOI for querying""" + """Parses and returns the AOI. + + :param vector: Vector map (if None, returns the boudning box) + :type vector: str + + :return: Either a WKT when using a Vector map, or a dict representing + the current computational region bounding box. + The latter format: + {"lonmin" : lonmin, "latmin" : latmin, + "lonmax" : lonmax, "latmax" : latmax} + :rtype: str | dict + """ proj = gs.parse_command("g.proj", flags="j") if "+proj" not in proj: @@ -220,22 +296,37 @@ def get_aoi(vector=None): def search_by_ids(products_ids): - gs.message("Searching for products...") + """Search for products based on their ids. + + :param products_ids: List of products' ids. + :type products_ids: list + + :return: EO products found by searching with 'search_parameters' + :rtype: class:'eodag.api.search_result.SearchResult' + """ + gs.verbose("Searching for products...") search_result = [] for query_id in products_ids: - gs.message(_("Searching for {}".format(query_id))) + gs.verbose(_("Searching for {}".format(query_id))) product, count = dag.search(id=query_id, provider=options["provider"] or None) if count > 1: - gs.message(_("Could not be uniquely identified.")) + gs.warning( + _("{}\nCould not be uniquely identified. Skipping...".format(query_id)) + ) elif count == 0 or not product[0].properties["id"].startswith(query_id): - gs.message(_("Not found.")) + gs.warning(_("{}\nNot Found. Skipping...".format(query_id))) else: - gs.message(_("Found.")) + gs.verbose(_("Found.")) search_result.append(product[0]) - return search_result + return SearchResult(search_result) def setup_environment_variables(env, **kwargs): + """Sets the eodag environment variables based on the provided options/flags. + + :param kwargs: options/flags from gs.parser + :type kwargs: dict + """ provider = kwargs.get("provider") extract = kwargs.get("e") delete_archive = kwargs.get("d") @@ -255,37 +346,62 @@ def setup_environment_variables(env, **kwargs): if extract: gs.warning( _( - """Ignoring 'e' flag... - 'extract' option in the config file will be used. - If you wish to use the 'e' flag, please specify a provider.""" + "Ignoring 'e' flag...\n \ + 'extract' option in the config file will be used.\n \ + If you wish to use the 'e' flag, please specify a provider." ) ) if delete_archive: gs.warning( _( - """Ignoring 'd' flag... - 'delete_archive' option in the config file will be used. - If you wish to use the 'd' flag, please specify a provider.""" + "Ignoring 'd' flag...\n \ + 'delete_archive' option in the config file will be used.\n \ + If you wish to use the 'd' flag, please specify a provider." ) ) if output: gs.warning( _( - """Ignoring 'output' option... - 'output' option in the config file will be used. - If you wish to use the 'output' option, please specify a provider.""" + "Ignoring 'output' option...\n \ + 'output' option in the config file will be used.\n \ + If you wish to use the 'output' option, please specify a provider." ) ) def normalize_time(datetime_str: str): + """Unifies the different ISO formats into 'YYYY-MM-DDTHH:MM:SS' + + :param datetime_str: Datetime in ISO format + :type datetime_str: str + + :return: Datetime converted to 'YYYY-MM-DDTHH:MM:SS' + :rtype: str + """ normalized_datetime = datetime.fromisoformat(datetime_str) + if normalized_datetime.tzinfo is None: + normalized_datetime = normalized_datetime.replace(tzinfo=timezone.utc) + # Remove microseconds normalized_datetime = normalized_datetime.replace(microsecond=0) + # Convert time to UTC + normalized_datetime = normalized_datetime.astimezone(pytz.utc) + # Remove timezone info normalized_datetime = normalized_datetime.replace(tzinfo=None) return normalized_datetime.isoformat() def no_fallback_search(search_parameters, provider): + """Search in only one provider (fallback is disabled). + + :param search_parameters: Queryables to which searching will take place + :type search_parameters: dict + + :param provider: Provider to use for searching + :type provider: str + + :return: EO products found by searching with 'search_parameters' + :rtype: class:'eodag.api.search_result.SearchResult' + """ try: server_poke = dag.search(**search_parameters, provider=provider) if server_poke[1] == 0: @@ -300,82 +416,237 @@ def no_fallback_search(search_parameters, provider): search_result = dag.search_iter_page(**search_parameters) # TODO: Would it be useful if user could iterate through - # the pages manually, and look for the product themselves? + # the pages manually, and look for the product themselves? try: # Merging the pages into one list with all products - return [j for i in search_result for j in i] + return SearchResult([j for i in search_result for j in i]) except Exception as e: - gs.verbose(e) + gs.debug(e) gs.fatal(_("Server error, please try again.")) -def create_products_dataframe(eo_products): - result_dict = {"id": [], "time": [], "cloudCover": [], "productType": []} - for product in eo_products: - for key in result_dict: - if key == "time": - if ( - "startTimeFromAscendingNode" in product.properties - and product.properties["startTimeFromAscendingNode"] is not None - ): +def list_products(products): + """Lists products on the Standard Output stream (shell). + + :param products: EO poducts to be listed + :type products: class:'eodag.api.search_result.SearchResult' + """ + columns = ["id", "startTimeFromAscendingNode", "cloudCover", "productType"] + columns_NA = ["id_NA", "time_NA", "cloudCover_NA", "productType_NA"] + for product in products: + product_line = "" + for i, column in enumerate(columns): + product_attribute_value = product.properties[column] + # Display NA if not available + if product_attribute_value is None: + product_attribute_value = columns_NA[i] + else: + if column == "cloudCover": + # Special formatting for cloud cover + product_attribute_value = f"{product_attribute_value:2.0f}%" + elif column == "startTimeFromAscendingNode": + # Special formatting for datetime try: - result_dict["time"].append( - normalize_time( - product.properties["startTimeFromAscendingNode"] - ) + product_attribute_value = normalize_time( + product_attribute_value ) except: - result_dict["time"].append( - product.properties["startTimeFromAscendingNode"] - ) - else: - result_dict["time"].append(None) - else: - if key in product.properties and product.properties[key] is not None: - result_dict[key].append(product.properties[key]) - else: - result_dict[key].append(None) + product_attribute_value = product.properties[column] + if i != 0: + product_line += " " + product_line += product_attribute_value + print(product_line) - df = pd.DataFrame().from_dict(result_dict) - return df +def list_products_json(products): + """Lists products on the Standard Output stream (shell) in JSON format. -def list_products(products): - df = create_products_dataframe(products) - for idx in range(len(df)): - product_id = df["id"].iloc[idx] - if product_id is None: - time_string = "id_NA" - time_string = df["time"].iloc[idx] - if time_string is None: - time_string = "time_NA" - else: - time_string += "Z" - cloud_cover_string = df["cloudCover"].iloc[idx] - if cloud_cover_string is not None: - cloud_cover_string = f"{cloud_cover_string:2.0f}%" - else: - cloud_cover_string = "cloudCover_NA" - product_type_string = df["productType"].iloc[idx] - if product_type_string is None: - product_type_string = "productType_NA" - print(f"{product_id} {time_string} {cloud_cover_string} {product_type_string}") + :param products: EO poducts to be listed + :type products: class:'eodag.api.search_result.SearchResult' + """ + print(json.dumps(products.as_geojson_object(), indent=4)) -def apply_filters(search_result): +def remove_duplicates(search_result): + """Removes duplicated products, in case a provider returns a product multiple times.""" filtered_result = [] + is_added = set() for product in search_result: - valid = True - if ( - options["clouds"] - and "cloudCover" in product.properties - and product.properties["cloudCover"] is not None - and product.properties["cloudCover"] > int(options["clouds"]) - ): - valid = False - if valid: - filtered_result.append(product) - return filtered_result + if product.properties["id"] in is_added: + continue + is_added.add(product.properties["id"]) + filtered_result.append(product) + return SearchResult(filtered_result) + + +def dates_to_iso_format(): + """Converts the start/end options to the isoformat and save them in-place. + + If options['end'] is not set, options['end'] will be today's date. + If options['start'] is not set, options['start'] will be 60 days prior + to options['end'] date. + """ + end_date = options["end"] + if not options["end"]: + end_date = datetime.now(timezone.utc).isoformat() + try: + end_date = normalize_time(end_date) + except Exception as e: + gs.debug(e) + gs.fatal(_("Could not parse 'end' time.")) + + start_date = options["start"] + if not options["start"]: + delta_days = timedelta(60) + start_date = (datetime.fromisoformat(end_date) - delta_days).isoformat() + try: + start_date = normalize_time(start_date) + except Exception as e: + gs.debug(e) + gs.fatal(_("Could not parse 'start' time.")) + + if end_date < start_date: + gs.fatal( + _( + "End Date ({}) can not come before Start Date ({})".format( + end_date, start_date + ) + ) + ) + options["start"] = start_date + options["end"] = end_date + + +def filter_result(search_result, geometry, **kwargs): + """Filter results to comply with options/flags. + :param search_result: Search Result to filter + :type search_result: class:'eodag.api.search_result.SearchResult' + + :param geometry: WKT String with the geometry to filter with respect to + :type geometry: str, optional + + :param kwargs: options/flags from gs.parser, with the crietria that will + be used for filtering. + :type kwargs: dict + + :returns: A collection of EO products matching the filters criteria. + :rtype: class:'eodag.api.search_result.SearchResult' + """ + prefilter_count = len(search_result) + area_relation = kwargs["area_relation"] + minimum_overlap = kwargs["minimum_overlap"] + cloud_cover = kwargs["clouds"] + start_date = kwargs["start"] + end_date = kwargs["end"] + + # If neither a geometry is provided as a parameter + # nor a vector map is provided through "options", + # then none of the geometry filtering will take place. + if geometry is None and (area_relation is not None or minimum_overlap is not None): + geometry = get_aoi(kwargs["map"]) + gs.verbose(_("Filtering results...")) + + if area_relation: + # Product's geometry intersects with AOI + if area_relation == "Intersects": + search_result = search_result.filter_overlap( + geometry=geometry, intersects=True + ) + # Product's geometry contains the AOI + elif area_relation == "Contains": + search_result = search_result.filter_overlap( + geometry=geometry, contains=True + ) + # Product's geometry is within the AOI + elif area_relation == "IsWithin": + search_result = search_result.filter_overlap(geometry=geometry, within=True) + + if minimum_overlap: + # Percentage of the AOI area covered by the product's geometry + search_result = search_result.filter_overlap( + geometry=geometry, minimum_overlap=int(minimum_overlap) + ) + + if cloud_cover: + search_result = search_result.filter_property( + operator="le", cloudCover=int(cloud_cover) + ) + + search_result = search_result.filter_date(start=start_date, end=end_date) + search_result = remove_duplicates(search_result) + + postfilter_count = len(search_result) + gs.verbose( + _("{} product(s) filtered out.".format(prefilter_count - postfilter_count)) + ) + + return search_result + + +def sort_result(search_result): + """Sorts search results according to options['sort'] and options['order'] + + options['sort'] parameters and options['order'] are matched correspondingly. + If options['order'] parameters are not suffcient, + 'asc' will be used by default. + + :param search_result: EO products to be sorted + :type search_result: class'eodag.api.search_result.SearchResult' + + :return: Sorted EO products + :rtype: class:'eodag.api.search_result.SearchResult' + """ + gs.verbose(_("Sorting...")) + + sort_keys = options["sort"].split(",") + sort_order = options["order"] + + # Sort keys and sort orders are matched respectively + def products_compare(first, second): + for sort_key in sort_keys: + if sort_key == "ingestiondate": + first_value = first.properties["startTimeFromAscendingNode"] + second_value = second.properties["startTimeFromAscendingNode"] + elif sort_key == "cloudcover": + first_value = first.properties["cloudCover"] + second_value = second.properties["cloudCover"] + if first_value < second_value: + return 1 if sort_order == "desc" else -1 + elif first_value > second_value: + return -1 if sort_order == "desc" else 1 + return 0 + + search_result.sort(key=cmp_to_key(products_compare)) + return search_result + + +def save_search_result(search_result, file_name): + """Save search results to files. + + If the file is a json file, + the search result is saved in a beautified JSON format. + If the file is a geojson file, + the search result is saved using EODAG serialize method, + saving it in a format that can be read again by i.eodag, + to restore the search results. + + :param search_result: EO products to be sorted + :type search_result: class'eodag.api.search_result.SearchResult' + + :param file_name: EO products to be sorted + :type file_name: str + """ + if file_name[-5:].lower() == ".json": + gs.verbose(_("Saving searchin result in '{}'".format(file_name))) + with open(file_name, "w") as f: + f.write( + json.dumps( + search_result.as_geojson_object(), ensure_ascii=False, indent=4 + ) + ) + if file_name[-8:].lower() == ".geojson": + gs.verbose(_("Saving searchin result in '{}'".format(file_name))) + dag.serialize(search_result, filename=file_name) def main(): @@ -387,8 +658,10 @@ def main(): if options["provider"]: dag.set_preferred_provider(options["provider"]) + dates_to_iso_format() + # Download by IDs - # Searching for additional products won't take place + # Searching for additional products will not take place ids_set = set() if options["id"]: # Parse IDs @@ -396,7 +669,7 @@ def main(): elif options["file"]: # Read IDs from file if Path(options["file"]).is_file(): - gs.message(_('Reading file "{}"'.format(options["file"]))) + gs.verbose(_('Reading file "{}"'.format(options["file"]))) ids_set = set( Path(options["file"]).read_text(encoding="UTF8").strip().split("\n") ) @@ -404,9 +677,12 @@ def main(): gs.fatal(_('Could not open file "{}"'.format(options["file"]))) if len(ids_set): + # Remove empty string ids_set.discard(str()) gs.message(_("Found {} distinct ID(s).".format(len(ids_set)))) gs.message("\n".join(ids_set)) + + # Search for products found from options["file"] or options["id"] search_result = search_by_ids(ids_set) else: items_per_page = 40 @@ -414,60 +690,41 @@ def main(): # could be handled by catching exceptions when searching... product_type = options["dataset"] - # HARDCODED VALUES FOR TESTING { "lonmin": 1.9, "latmin": 43.9, "lonmax": 2, "latmax": 45, } # hardcoded for testing + # HARDCODED VALUES FOR TESTING { "lonmin": 1.9, "latmin": 43.9, "lonmax": 2, "latmax": 45, } + geometry = get_aoi(options["map"]) + gs.verbose(_("AOI: {}".format(geometry))) - geom = get_aoi(options["map"]) - gs.verbose(_("Region used for searching: {}".format(geom))) search_parameters = { "items_per_page": items_per_page, "productType": product_type, - "geom": geom, + "geom": geometry, } if options["clouds"]: search_parameters["cloudCover"] = options["clouds"] - end_date = options["end"] - if not options["end"]: - end_date = datetime.utcnow().isoformat() - try: - end_date = normalize_time(end_date) - except Exception as e: - gs.debug(e) - gs.fatal(_("Could not parse 'end' time.")) - - start_date = options["start"] - if not options["start"]: - delta_days = timedelta(60) - start_date = (datetime.fromisoformat(end_date) - delta_days).isoformat() - try: - start_date = normalize_time(start_date) - except Exception as e: - gs.debug(e) - gs.fatal(_("Could not parse 'start' time.")) - - if end_date < start_date: - gs.fatal( - _( - "End Date ({}) can not come before Start Date ({})".format( - end_date, start_date - ) - ) - ) - - # TODO: Requires further testing to make sure the isoformat works with all the providers - search_parameters["start"] = start_date - search_parameters["end"] = end_date + search_parameters["start"] = options["start"] + search_parameters["end"] = options["end"] if options["provider"]: search_result = no_fallback_search(search_parameters, options["provider"]) else: search_result = dag.search_all(**search_parameters) - gs.message(_("Applying filters...")) - search_result = apply_filters(search_result) + search_result = filter_result( + search_result, geometry if "geometry" in locals() else None, **options + ) + search_result = sort_result(search_result) + print(type(search_result)) + gs.message(_("{} product(s) found.").format(len(search_result))) + # TODO: Add a way to search in multiple providers at once + # Check for when this feature is added https://github.com/CS-SI/eodag/issues/163 + if options["save"]: + save_search_result(search_result, options["save"]) if flags["l"]: list_products(search_result) + elif flags["j"]: + list_products_json(search_result) else: # TODO: Consider adding a quicklook flag # TODO: Add timeout and wait parameters for downloading offline products... @@ -479,7 +736,8 @@ def main(): gs.warning(_("Experimental Version...")) gs.warning( _( - "This module is still under development, and its behaviour is not guaranteed to be reliable" + "This module is still under development, \ + and its behaviour is not guaranteed to be reliable" ) ) options, flags = gs.parser() @@ -490,11 +748,9 @@ def main(): from eodag.api.search_result import SearchResult except: gs.fatal(_("Cannot import eodag. Please intall the library first.")) - try: - import pandas as pd - except: - gs.fatal(_("Cannot import pandas. Please intall the library first.")) + # To disable eodag logs, set DEBUG to 0 + # with " g.gisenv 'set=DEBUG=0' " if "DEBUG" in gs.read_command("g.gisenv"): debug_level = int(gs.read_command("g.gisenv", get="DEBUG")) if not debug_level: @@ -503,5 +759,4 @@ def main(): setup_logging(2) else: setup_logging(3) - sys.exit(main())