From af687727ebb9a06d95ad8ae896b265c644c5617b Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 15 Mar 2023 20:42:26 +0100 Subject: [PATCH 1/3] Remove all trailing whitespace Signed-off-by: Stefan Weil --- .github/workflows/docker-image.yml | 1 - CHANGELOG.md | 2 +- ocrd/ocrd/processor/base.py | 8 +- ocrd/ocrd/resolver.py | 2 +- ocrd/ocrd/resource_manager.py | 6 +- ocrd_models/README.md | 2 +- ocrd_models/ocrd_models/ocrd_mets.py | 34 ++++---- .../ocrd_models/ocrd_page_generateds.py | 64 +++++++------- .../extend_AllIndexed.py | 2 +- ocrd_utils/ocrd_logging.conf | 6 +- ocrd_utils/ocrd_utils/__init__.py | 8 +- ocrd_utils/ocrd_utils/image.py | 20 ++--- ocrd_utils/ocrd_utils/os.py | 2 +- ocrd_validators/ocrd_validators/page.xsd | 4 +- .../ocrd_validators/page_validator.py | 2 +- ocrd_validators/ocrd_validators/xlink.xsd | 84 +++++++++---------- tests/model/mets_bench_extreme.py | 12 +-- tests/model/mets_bench_extreme_additional.py | 4 +- tests/model/test_agent.py | 2 +- tests/model/test_ocrd_mets.py | 2 +- tests/model/test_ocrd_mets_bench.py | 24 +++--- 21 files changed, 145 insertions(+), 146 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 25999a199e..707ed53257 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -38,4 +38,3 @@ jobs: run: | docker push ${{ env.DOCKER_TAG }}:latest docker push ${{ env.DOCKER_TAG }}-cuda:latest - diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b3385518f..5d99b9cee2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1294,7 +1294,7 @@ Added: * Workspace validation will check cardinality of images per file is 1, #243, OCR-D/spec#132 Changed: - + * bashlib will no longer warn about "non-conformant" file group names, #365 * Invalid `file:/` URL will now raise exceptions, #373 * image_from_*: increase tolerance for size mismatch after rotation to 2px, #371 diff --git a/ocrd/ocrd/processor/base.py b/ocrd/ocrd/processor/base.py index 38b7848a03..ae5a492d08 100644 --- a/ocrd/ocrd/processor/base.py +++ b/ocrd/ocrd/processor/base.py @@ -44,7 +44,7 @@ class Processor(): for run-time data processing. That is, it executes a single workflow step, or a combination of workflow steps, on the workspace (represented by local METS). It reads input files for all or requested physical pages of the input fileGrp(s), - and writes output files for them into the output fileGrp(s). It may take + and writes output files for them into the output fileGrp(s). It may take a number of optional or mandatory parameters. """ @@ -166,12 +166,12 @@ def verify(self): def process(self): """ - Process the :py:attr:`workspace` + Process the :py:attr:`workspace` from the given :py:attr:`input_file_grp` to the given :py:attr:`output_file_grp` for the given :py:attr:`page_id` under the given :py:attr:`parameter`. - + (This contains the main functionality and needs to be overridden by subclasses.) """ raise Exception("Must be implemented") @@ -282,7 +282,7 @@ def input_files(self): - Otherwise raise an error (complaining that only PAGE-XML warrants having multiple images for a single page) Algorithm _ - + Returns: A list of :py:class:`ocrd_models.ocrd_file.OcrdFile` objects. """ diff --git a/ocrd/ocrd/resolver.py b/ocrd/ocrd/resolver.py index 25f7507f12..171ad6a3cc 100644 --- a/ocrd/ocrd/resolver.py +++ b/ocrd/ocrd/resolver.py @@ -175,7 +175,7 @@ def workspace_from_url( src_baseurl (string, None): Base URL for resolving relative file locations **kwargs (): Passed on to ``OcrdMets.find_files`` if download == True - Download (clone) :py:attr:`mets_url` to ``mets.xml`` in :py:attr:`dst_dir`, unless + Download (clone) :py:attr:`mets_url` to ``mets.xml`` in :py:attr:`dst_dir`, unless the former is already local and the latter is ``none`` or already identical to its directory name. Returns: diff --git a/ocrd/ocrd/resource_manager.py b/ocrd/ocrd/resource_manager.py index 9d1e6ac596..20782bf8db 100644 --- a/ocrd/ocrd/resource_manager.py +++ b/ocrd/ocrd/resource_manager.py @@ -158,9 +158,9 @@ def list_installed(self, executable=None): resdict = resdict_list[0] elif str(res_filename.parent) == moduledir: resdict = { - 'name': res_name, - 'url': str(res_filename), - 'description': 'Found at module', + 'name': res_name, + 'url': str(res_filename), + 'description': 'Found at module', 'type': res_type, 'size': res_size } diff --git a/ocrd_models/README.md b/ocrd_models/README.md index 57b0cbe7c7..8f1bc67f91 100644 --- a/ocrd_models/README.md +++ b/ocrd_models/README.md @@ -36,7 +36,7 @@ Let's say you want to add a method `get_FirstTextRegion` on the `pc:Page` elemen Would add the method `exportChildren` from a file `exportChildren_PageType.py`. - > **Note**: + > **Note**: > The method name in the file must match the method name passed to > `_add_method`. This is *not* checked automatically, so double-check manually! diff --git a/ocrd_models/ocrd_models/ocrd_mets.py b/ocrd_models/ocrd_models/ocrd_mets.py index 3319f8f6ff..d06d5f18d7 100644 --- a/ocrd_models/ocrd_models/ocrd_mets.py +++ b/ocrd_models/ocrd_models/ocrd_mets.py @@ -137,7 +137,7 @@ def _clear_caches(self): self._file_cache = None self._page_cache = None self._fptr_cache = None - + def refresh_caches(self): if self._cache_flag: # Cache for the files (mets:file) - two nested dictionaries @@ -158,11 +158,11 @@ def refresh_caches(self): # The inner dictionary's Key: 'fptr.FILEID' # The inner dictionary's Value: a 'fptr' object at some memory location self._fptr_cache = {} - + # Note, if the empty_mets() function is used to instantiate OcrdMets # Then the cache is empty even after this operation self._fill_caches() - + @property def unique_identifier(self): """ @@ -173,7 +173,7 @@ def unique_identifier(self): found = self._tree.getroot().find('.//mods:identifier[@type="%s"]' % t, NS) if found is not None: return found.text - + @unique_identifier.setter def unique_identifier(self, purl): """ @@ -268,8 +268,8 @@ def find_files( local_filename (string) : ``@xlink:href`` local/cached filename of ``mets:Flocat`` of ``mets:file`` mimetype (string) : ``@MIMETYPE`` of ``mets:file`` local (boolean) : Whether to restrict results to local files in the filesystem - include_fileGrp (list[str]) : Whitelist of allowd file groups - exclude_fileGrp (list[str]) : Blacklist of disallowd file groups + include_fileGrp (list[str]) : Whitelist of allowd file groups + exclude_fileGrp (list[str]) : Blacklist of disallowd file groups Yields: :py:class:`ocrd_models:ocrd_file:OcrdFile` instantiations """ @@ -303,7 +303,7 @@ def find_files( mimetype = re.compile(mimetype[REGEX_PREFIX_LEN:]) if url and url.startswith(REGEX_PREFIX): url = re.compile(url[REGEX_PREFIX_LEN:]) - + candidates = [] if self._cache_flag: if fileGrp: @@ -315,7 +315,7 @@ def find_files( candidates = [el_file for id_to_file in self._file_cache.values() for el_file in id_to_file.values()] else: candidates = self._tree.getroot().xpath('//mets:file', namespaces=NS) - + for cand in candidates: if ID: if isinstance(ID, str): @@ -404,7 +404,7 @@ def rename_file_group(self, old, new): if el_fileGrp is None: raise FileNotFoundError("No such fileGrp '%s'" % old) el_fileGrp.set('USE', new) - + if self._cache_flag: self._file_cache[new] = self._file_cache.pop(old) @@ -452,7 +452,7 @@ def remove_file_group(self, USE, recursive=False, force=False): if self._cache_flag: # Note: Since the files inside the group are removed - # with the 'remove_one_file' method above, + # with the 'remove_one_file' method above, # we should not take care of that again. # We just remove the fileGrp. del self._file_cache[el_fileGrp.get('USE')] @@ -591,7 +591,7 @@ def physical_pages(self): """ if self._cache_flag: return list(self._page_cache.keys()) - + return [str(x) for x in self._tree.getroot().xpath( 'mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]/@ID', namespaces=NS)] @@ -604,7 +604,7 @@ def get_physical_pages(self, for_fileIds=None): if for_fileIds is None: return self.physical_pages ret = [None] * len(for_fileIds) - + if self._cache_flag: for pageId in self._fptr_cache.keys(): for fptr in self._fptr_cache[pageId].keys(): @@ -657,14 +657,14 @@ def set_physical_page_for_file(self, pageId, ocrd_file, order=None, orderlabel=N if el_seqdiv is None: el_seqdiv = ET.SubElement(el_structmap, TAG_METS_DIV) el_seqdiv.set('TYPE', 'physSequence') - + el_pagediv = None if self._cache_flag: if pageId in self._page_cache: el_pagediv = self._page_cache[pageId] else: el_pagediv = el_seqdiv.find('mets:div[@ID="%s"]' % pageId, NS) - + if el_pagediv is None: el_pagediv = ET.SubElement(el_seqdiv, TAG_METS_DIV) el_pagediv.set('TYPE', 'page') @@ -676,10 +676,10 @@ def set_physical_page_for_file(self, pageId, ocrd_file, order=None, orderlabel=N if self._cache_flag: # Create a new entry in the page cache self._page_cache[pageId] = el_pagediv - # Create a new entry in the fptr cache and + # Create a new entry in the fptr cache and # assign an empty dictionary to hold the fileids self._fptr_cache[pageId] = {} - + el_fptr = ET.SubElement(el_pagediv, TAG_METS_FPTR) el_fptr.set('FILEID', ocrd_file.ID) @@ -756,7 +756,7 @@ def remove_physical_page_fptr(self, fileId): if self._cache_flag: for page_id in self._fptr_cache.keys(): if fileId in self._fptr_cache[page_id].keys(): - mets_fptrs.append(self._fptr_cache[page_id][fileId]) + mets_fptrs.append(self._fptr_cache[page_id][fileId]) else: mets_fptrs = self._tree.getroot().xpath( 'mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]/mets:fptr[@FILEID="%s"]' % fileId, namespaces=NS) diff --git a/ocrd_models/ocrd_models/ocrd_page_generateds.py b/ocrd_models/ocrd_models/ocrd_page_generateds.py index d95ea4b321..888f5968a6 100644 --- a/ocrd_models/ocrd_models/ocrd_page_generateds.py +++ b/ocrd_models/ocrd_models/ocrd_page_generateds.py @@ -1234,14 +1234,14 @@ def id(self): def get_AllAlternativeImagePaths(self, page=True, region=True, line=True, word=True, glyph=True): """ Get all the ``pc:AlternativeImage/@filename`` paths referenced in the PAGE-XML document. - + Arguments: page (boolean): Get images on ``pc:Page`` level region (boolean): Get images on ``pc:*Region`` level line (boolean): Get images on ``pc:TextLine`` level word (boolean): Get images on ``pc:Word`` level glyph (boolean): Get images on ``pc:Glyph`` level - + Returns: a list of image filename strings """ @@ -1278,7 +1278,7 @@ def get_AllAlternativeImagePaths(self, page=True, region=True, line=True, word=T ret += doc.xpath('//page:Word/page:AlternativeImage/@filename', namespaces=NAMESPACES) if glyph: ret += doc.xpath('//page:Glyph/page:AlternativeImage/@filename', namespaces=NAMESPACES) - + return ret def prune_ReadingOrder(self): """ @@ -3120,7 +3120,7 @@ def id(self): # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring def _region_class(self, x): # pylint: disable=unused-argument return x.__class__.__name__.replace('RegionType', '') - + def _get_recursive_regions(self, regions, level, classes=None): from .constants import PAGE_REGION_TYPES # pylint: disable=relative-beyond-top-level,import-outside-toplevel if level == 1: @@ -3146,7 +3146,7 @@ def _get_recursive_regions(self, regions, level, classes=None): ret.append(r) ret += self._get_recursive_regions(more, level - 1 if level else 0, classes) return self._get_recursive_regions(ret, 1, classes) - + def _get_recursive_reading_order(self, rogroup): if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable elements = rogroup.get_AllIndexed() @@ -3158,12 +3158,12 @@ def _get_recursive_reading_order(self, rogroup): if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable regionrefs.extend(self._get_recursive_reading_order(elem)) return regionrefs - + def get_AllRegions(self, classes=None, order='document', depth=0): """ Get all the ``*Region`` elements, or only those provided by `classes`. Return in document order, unless `order` is ``reading-order``. - + Arguments: classes (list): Classes of regions that shall be returned, \ e.g. ``['Text', 'Image']`` @@ -3174,7 +3174,7 @@ def get_AllRegions(self, classes=None, order='document', depth=0): omitted (``reading-order-only``) depth (int): Recursive depth to look for regions at, set to `0` for \ all regions at any depth. Default: 0 - + Returns: a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \ :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \ @@ -3184,7 +3184,7 @@ def get_AllRegions(self, classes=None, order='document', depth=0): :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \ :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \ and/or :py:class:`CustomRegionType` - + For example, to get all text anywhere on the page in reading order, use: :: '\\n'.join(line.get_TextEquiv()[0].Unicode @@ -3218,14 +3218,14 @@ def get_AllRegions(self, classes=None, order='document', depth=0): def get_AllAlternativeImages(self, page=True, region=True, line=True, word=True, glyph=True): """ Get all the ``pc:AlternativeImage`` in a document - + Arguments: page (boolean): Get images on ``pc:Page`` level region (boolean): Get images on ``pc:*Region`` level line (boolean): Get images on ``pc:TextLine`` level word (boolean): Get images on ``pc:Word`` level glyph (boolean): Get images on ``pc:Glyph`` level - + Returns: a list of :py:class:`AlternativeImageType` """ @@ -3245,11 +3245,11 @@ def get_AllAlternativeImages(self, page=True, region=True, line=True, word=True, if glyph: ret += this_glyph.get_AlternativeImage() return ret - + def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). - + If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ @@ -3289,7 +3289,7 @@ def set_Border(self, Border): def get_AllTextLines(self, region_order='document', respect_textline_order=True): """ Return all the TextLine in the document - + Arguments: region_order ("document"|"reading-order"|"reading-order-only"): Whether to \ return regions sorted by document order (``document``, default) or by \ @@ -3297,7 +3297,7 @@ def get_AllTextLines(self, region_order='document', respect_textline_order=True) returned list (``reading-order``) or regions not in the reading order \ omitted (``reading-order-only``) respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute - + Returns: a list of :py:class:`TextLineType` """ @@ -3311,7 +3311,7 @@ def get_AllTextLines(self, region_order='document', respect_textline_order=True) lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom' ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines)) return ret - + def set_orientation(self, orientation): """ Set deskewing angle to given `orientation` number. @@ -3981,7 +3981,7 @@ def __hash__(self): def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). - + If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ @@ -4473,7 +4473,7 @@ def __hash__(self): def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). - + If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ @@ -4917,7 +4917,7 @@ def __hash__(self): def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). - + If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ @@ -6234,12 +6234,12 @@ def __hash__(self): def get_AllIndexed(self, classes=None, index_sort=True): """ Get all indexed children sorted by their ``@index``. - + Arguments: classes (list): Type of children (sans ``Indexed``) to return. \ Default: ``['RegionRef', 'OrderedGroup', 'UnorderedGroup']`` index_sort (boolean): Whether to sort by ``@index`` - + Returns: a list of :py:class:`RegionRefIndexedType`, \ :py:class:`OrderedGroupIndexedType`, and \ @@ -6259,13 +6259,13 @@ def clear_AllIndexed(self): self.set_OrderedGroupIndexed([]) self.set_UnorderedGroupIndexed([]) return ret - + # pylint: disable=line-too-long,invalid-name,missing-module-docstring def extend_AllIndexed(self, elements, validate_continuity=False): """ Add all elements in list `elements`, respecting ``@index`` order. With `validate_continuity`, check that all new elements come after all old elements - (or raise an exception). + (or raise an exception). Otherwise, ensure this condition silently (by increasing ``@index`` accordingly). """ if not isinstance(elements, list): @@ -6304,7 +6304,7 @@ def sort_AllIndexed(self, validate_uniqueness=True): elif isinstance(element, UnorderedGroupIndexedType): # pylint: disable=undefined-variable self.add_UnorderedGroupIndexed(element) return self.get_AllIndexed() - + # pylint: disable=line-too-long,invalid-name,missing-module-docstring,missing-function-docstring def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): # pylint: disable=unused-argument,too-many-arguments namespaceprefix_ = 'pc:' @@ -6706,7 +6706,7 @@ def get_UnorderedGroupChildren(self): """ # TODO: should not change order return self.get_RegionRef() + self.get_OrderedGroup() + self.get_UnorderedGroup() - + # end class UnorderedGroupIndexedType @@ -7166,12 +7166,12 @@ def __hash__(self): def get_AllIndexed(self, classes=None, index_sort=True): """ Get all indexed children sorted by their ``@index``. - + Arguments: classes (list): Type of children (sans ``Indexed``) to return. \ Default: ``['RegionRef', 'OrderedGroup', 'UnorderedGroup']`` index_sort (boolean): Whether to sort by ``@index`` - + Returns: a list of :py:class:`RegionRefIndexedType`, \ :py:class:`OrderedGroupIndexedType`, and \ @@ -7191,13 +7191,13 @@ def clear_AllIndexed(self): self.set_OrderedGroupIndexed([]) self.set_UnorderedGroupIndexed([]) return ret - + # pylint: disable=line-too-long,invalid-name,missing-module-docstring def extend_AllIndexed(self, elements, validate_continuity=False): """ Add all elements in list `elements`, respecting ``@index`` order. With `validate_continuity`, check that all new elements come after all old elements - (or raise an exception). + (or raise an exception). Otherwise, ensure this condition silently (by increasing ``@index`` accordingly). """ if not isinstance(elements, list): @@ -7236,7 +7236,7 @@ def sort_AllIndexed(self, validate_uniqueness=True): elif isinstance(element, UnorderedGroupIndexedType): # pylint: disable=undefined-variable self.add_UnorderedGroupIndexed(element) return self.get_AllIndexed() - + # pylint: disable=line-too-long,invalid-name,missing-module-docstring,missing-function-docstring def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): # pylint: disable=unused-argument,too-many-arguments namespaceprefix_ = 'pc:' @@ -7620,7 +7620,7 @@ def get_UnorderedGroupChildren(self): """ # TODO: should not change order return self.get_RegionRef() + self.get_OrderedGroup() + self.get_UnorderedGroup() - + # end class UnorderedGroupType @@ -9710,7 +9710,7 @@ def __hash__(self): def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). - + If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ diff --git a/ocrd_models/ocrd_page_user_methods/extend_AllIndexed.py b/ocrd_models/ocrd_page_user_methods/extend_AllIndexed.py index 594d664277..c1ad330f91 100644 --- a/ocrd_models/ocrd_page_user_methods/extend_AllIndexed.py +++ b/ocrd_models/ocrd_page_user_methods/extend_AllIndexed.py @@ -3,7 +3,7 @@ def extend_AllIndexed(self, elements, validate_continuity=False): """ Add all elements in list `elements`, respecting ``@index`` order. With `validate_continuity`, check that all new elements come after all old elements - (or raise an exception). + (or raise an exception). Otherwise, ensure this condition silently (by increasing ``@index`` accordingly). """ if not isinstance(elements, list): diff --git a/ocrd_utils/ocrd_logging.conf b/ocrd_utils/ocrd_logging.conf index 3595d64ccd..cca4614a17 100644 --- a/ocrd_utils/ocrd_logging.conf +++ b/ocrd_utils/ocrd_logging.conf @@ -5,7 +5,7 @@ # into your CWD, HOME or /etc. These directories are searched # in said order, and the first find wins. When no config file # is found, the default logging configuration applies (cf. ocrd.logging.py). -# +# # mandatory loggers section # configure loggers with corresponding keys "root", "" # each logger requires a corresponding configuration section below @@ -43,8 +43,8 @@ handlers=consoleHandler,fileHandler # as separate configuration sections like below # # example logger "ocrd_workspace" uses fileHandler and overrides -# default log level "INFO" with custom level "DEBUG" -# "qualname" must match the logger label used in the corresponding +# default log level "INFO" with custom level "DEBUG" +# "qualname" must match the logger label used in the corresponding # ocrd module # see in the module-of-interest (moi) # diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 1e7565afe9..bb15198980 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -8,11 +8,11 @@ levels below page (i.e. region, line, word, glyph) between relative coordinates w.r.t. a corresponding image and absolute coordinates w.r.t. the top-level image. This includes rotation and offset correction, based on affine transformations. - (Used by :py:class:`ocrd.workspace.Workspace` methods - :py:meth:`ocrd.workspace.Workspace.image_from_page` and + (Used by :py:class:`ocrd.workspace.Workspace` methods + :py:meth:`ocrd.workspace.Workspace.image_from_page` and :py:meth:`ocrd.workspace.Workspace.image_from_segment`.) -* :py:func:`rotate_coordinates`, +* :py:func:`rotate_coordinates`, :py:func:`shift_coordinates`, :py:func:`transpose_coordinates`, :py:func:`transform_coordinates` @@ -22,7 +22,7 @@ used to pass down the coordinate system along with images (both invariably sharing the same operations context) when traversing the element hierarchy top to bottom. (Used by :py:class:`ocrd.workspace.Workspace` methods - :py:meth:`ocrd.workspace.Workspace.image_from_page` and + :py:meth:`ocrd.workspace.Workspace.image_from_page` and :py:meth:`ocrd.workspace.Workspace.image_from_segment`.) * :py:func:`rotate_image`, diff --git a/ocrd_utils/ocrd_utils/image.py b/ocrd_utils/ocrd_utils/image.py index 3bc14e6612..10dfa1872a 100644 --- a/ocrd_utils/ocrd_utils/image.py +++ b/ocrd_utils/ocrd_utils/image.py @@ -41,12 +41,12 @@ def adjust_canvas_to_rotation(size, angle): """Calculate the enlarged image size after rotation. - + Given a numpy array ``size`` of an original canvas (width and height), and a rotation angle in degrees counter-clockwise ``angle``, calculate the new size which is necessary to encompass the full image after rotation. - + Return a numpy array of the enlarged width and height. """ angle = np.deg2rad(angle) @@ -58,11 +58,11 @@ def adjust_canvas_to_rotation(size, angle): def adjust_canvas_to_transposition(size, method): """Calculate the flipped image size after transposition. - + Given a numpy array ``size`` of an original canvas (width and height), and a transposition mode ``method`` (see ``transpose_image``), calculate the new size after transposition. - + Return a numpy array of the enlarged width and height. """ if method in [Image.ROTATE_90, @@ -159,7 +159,7 @@ def coordinates_for_segment(polygon, parent_image, parent_coords): - ``parent_coords``, its corresponding affine transformation, ...calculate the absolute coordinates within the page. - + That is, apply the given transform inversely to ``polygon`` The transform encodes (recursively): @@ -209,7 +209,7 @@ def rotate_coordinates(transform, angle, orig=np.array([0, 0])): by pure rotation, and subsequent translation back. However, since rotation necessarily increases the bounding box, and thus image size, do not translate back the same amount, but to the enlarged offset.) - + Return a numpy array of the resulting affine transformation matrix. """ LOG = getLogger('ocrd.utils.coords.rotate_coordinates') @@ -295,7 +295,7 @@ def shift_coordinates(transform, offset): ``offset`` of the translation vector, calculate the affine coordinate transform corresponding to the composition of both transformations. - + Return a numpy array of the resulting affine transformation matrix. """ LOG = getLogger('ocrd.utils.coords.shift_coordinates') @@ -312,7 +312,7 @@ def scale_coordinates(transform, factors): ``factors`` of the scaling factors, calculate the affine coordinate transform corresponding to the composition of both transformations. - + Return a numpy array of the resulting affine transformation matrix. """ LOG = getLogger('ocrd.utils.coords.scale_coordinates') @@ -438,7 +438,7 @@ def transpose_image(image, method): columns become rows (but counted from the bottom), i.e. all pixels get mirrored at the opposite diagonal; width becomes height and vice versa - + Return a new PIL.Image. """ LOG = getLogger('ocrd.utils.transpose_image') @@ -497,7 +497,7 @@ def image_from_polygon(image, polygon, fill='background', transparency=False): Images which already have an alpha channel will have it shrunk from the polygon mask (i.e. everything outside the polygon will be transparent, in addition to existing transparent pixels). - + Return a new PIL.Image. """ if fill == 'none' or fill is None: diff --git a/ocrd_utils/ocrd_utils/os.py b/ocrd_utils/ocrd_utils/os.py index a416ccb12e..ecb4d827f2 100644 --- a/ocrd_utils/ocrd_utils/os.py +++ b/ocrd_utils/ocrd_utils/os.py @@ -156,7 +156,7 @@ def list_all_resources(executable, moduled=None, xdg_data_home=None): # code and data; `is_resource()` only singles out # files over directories; but we want data files only # todo: more code and cache exclusion patterns! - ['*.py', '*.py[cod]', '*~', 'ocrd-tool.json', + ['*.py', '*.py[cod]', '*~', 'ocrd-tool.json', 'environment.pickle', 'resource_list.yml', 'lib.bash']): continue candidates.append(resource) diff --git a/ocrd_validators/ocrd_validators/page.xsd b/ocrd_validators/ocrd_validators/page.xsd index edeac039d1..f096f34825 100644 --- a/ocrd_validators/ocrd_validators/page.xsd +++ b/ocrd_validators/ocrd_validators/page.xsd @@ -545,7 +545,7 @@ - The secondary script used in the text line + The secondary script used in the text line @@ -629,7 +629,7 @@ - The secondary script used in the word + The secondary script used in the word diff --git a/ocrd_validators/ocrd_validators/page_validator.py b/ocrd_validators/ocrd_validators/page_validator.py index 41ce0b9f94..456186913e 100644 --- a/ocrd_validators/ocrd_validators/page_validator.py +++ b/ocrd_validators/ocrd_validators/page_validator.py @@ -178,7 +178,7 @@ def compare_without_whitespace(a, b): def page_get_reading_order(ro, rogroup): """Add all elements from the given reading order group to the given dictionary. - + Given a dict ``ro`` from layout element IDs to ReadingOrder element objects, and an object ``rogroup`` with additional ReadingOrder element objects, add all references to the dict, traversing the group recursively. diff --git a/ocrd_validators/ocrd_validators/xlink.xsd b/ocrd_validators/ocrd_validators/xlink.xsd index f55eb6dae1..8283fe6697 100644 --- a/ocrd_validators/ocrd_validators/xlink.xsd +++ b/ocrd_validators/ocrd_validators/xlink.xsd @@ -1,75 +1,75 @@ - + - + - - - - - + + + + + - - - - + + + + - - - + + + - - - - - - - + + + + + + + - - - + + + - - - - - + + + + + - - - - - - - + + + + + + + - - - - + + + + - + - + diff --git a/tests/model/mets_bench_extreme.py b/tests/model/mets_bench_extreme.py index 63b30e31db..799e6f610e 100644 --- a/tests/model/mets_bench_extreme.py +++ b/tests/model/mets_bench_extreme.py @@ -50,7 +50,7 @@ def benchmark_find_files(number_of_pages, mets): benchmark_find_files_filegrp(number_of_pages, mets) benchmark_find_files_fileid(number_of_pages, mets) benchmark_find_files_physical_page(number_of_pages, mets) - # This is not really useful to measure. + # This is not really useful to measure. # We iterate all files in both cached and non-cached in the same routine # When no specific search parameters are provided # benchmark_find_files_all(number_of_pages, mets) @@ -94,7 +94,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s50(benchmark): @benchmark - def ret(): + def ret(): global mets_50 benchmark_find_files(50, mets_50) del mets_50 @@ -130,7 +130,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s500(benchmark): @benchmark - def ret(): + def ret(): global mets_500 benchmark_find_files(500, mets_500) del mets_500 @@ -168,7 +168,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s1000(benchmark): @benchmark - def ret(): + def ret(): global mets_1000 benchmark_find_files(1000, mets_1000) del mets_1000 @@ -205,7 +205,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s2000(benchmark): @benchmark - def ret(): + def ret(): global mets_2000 benchmark_find_files(2000, mets_2000) del mets_2000 @@ -242,7 +242,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s5000(benchmark): @benchmark - def ret(): + def ret(): global mets_5000 benchmark_find_files(5000, mets_5000) del mets_5000 diff --git a/tests/model/mets_bench_extreme_additional.py b/tests/model/mets_bench_extreme_additional.py index e699454e2b..67802a2da5 100644 --- a/tests/model/mets_bench_extreme_additional.py +++ b/tests/model/mets_bench_extreme_additional.py @@ -49,7 +49,7 @@ def benchmark_find_files(number_of_pages, mets): benchmark_find_files_filegrp(number_of_pages, mets) benchmark_find_files_fileid(number_of_pages, mets) benchmark_find_files_physical_page(number_of_pages, mets) - # This is not really useful to measure. + # This is not really useful to measure. # We iterate all files in both cached and non-cached in the same routine # When no specific search parameters are provided # benchmark_find_files_all(number_of_pages, mets) @@ -88,7 +88,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s500(benchmark): @benchmark - def ret(): + def ret(): global mets_500 benchmark_find_files(500, mets_500) del mets_500 diff --git a/tests/model/test_agent.py b/tests/model/test_agent.py index 57c741655e..6494b0c8db 100644 --- a/tests/model/test_agent.py +++ b/tests/model/test_agent.py @@ -35,7 +35,7 @@ def test_init_othertype(): def test_set_name(): ag = OcrdAgent(name='foobar') assert ag.name == 'foobar' - ag.name = 'barfoo' + ag.name = 'barfoo' assert ag.name == 'barfoo' diff --git a/tests/model/test_ocrd_mets.py b/tests/model/test_ocrd_mets.py index 64ea1eccfe..eba8d362da 100644 --- a/tests/model/test_ocrd_mets.py +++ b/tests/model/test_ocrd_mets.py @@ -164,7 +164,7 @@ def test_add_file_id_already_exists(sbb_sample_01): assert len(list(sbb_sample_01.find_files(ID='best-id-ever'))) == 1 if sbb_sample_01._cache_flag else 2 if sbb_sample_01._cache_flag: - # Does not work with caching + # Does not work with caching with pytest.raises(FileExistsError) as val_err: sbb_sample_01.add_file('OUTPUT', ID='best-id-ever', mimetype="beep/boop", force=True) else: diff --git a/tests/model/test_ocrd_mets_bench.py b/tests/model/test_ocrd_mets_bench.py index ace6387336..3acc29b2b3 100644 --- a/tests/model/test_ocrd_mets_bench.py +++ b/tests/model/test_ocrd_mets_bench.py @@ -65,7 +65,7 @@ def benchmark_find_files_fileid(number_of_pages, mets): assert_len(1, mets, dict(ID='FULL_0001_TIF')) # Worst case - does not exist assert_len(0, mets, dict(ID='FULL_0001_TIF-NOTEXISTS')) - + def benchmark_find_files_physical_page(number_of_pages, mets): # Best case - first physical page assert_len(1, mets, dict(pageId='PHYS_0001')) @@ -117,28 +117,28 @@ def result(): @mark.benchmark(group="search") def test_s5(benchmark): @benchmark - def ret(): + def ret(): global mets_5 benchmark_find_files(5, mets_5) @mark.benchmark(group="search") def test_s10(benchmark): @benchmark - def ret(): + def ret(): global mets_10 benchmark_find_files(10, mets_10) @mark.benchmark(group="search") def test_s20(benchmark): @benchmark - def ret(): + def ret(): global mets_20 benchmark_find_files(20, mets_20) @mark.benchmark(group="search") def test_s50(benchmark): @benchmark - def ret(): + def ret(): global mets_50 benchmark_find_files(50, mets_50) @@ -196,24 +196,24 @@ def ret(): @mark.benchmark(group="search") def test_s10_c(benchmark): @benchmark - def ret(): + def ret(): global mets_c_10 benchmark_find_files(10, mets_c_10) @mark.benchmark(group="search") def test_s20_c(benchmark): @benchmark - def ret(): + def ret(): global mets_c_20 benchmark_find_files(20, mets_c_20) @mark.benchmark(group="search") def test_s50_c(benchmark): @benchmark - def ret(): + def ret(): global mets_c_50 benchmark_find_files(50, mets_c_50) - + del mets_c_5 del mets_c_10 del mets_c_20 @@ -221,7 +221,7 @@ def ret(): def manual_t(): mets = _build_mets(2, cache_flag=False) - mets_cached = _build_mets(2, cache_flag=True) + mets_cached = _build_mets(2, cache_flag=True) # print("METS>--------------------------------------------------------------------") # print(mets) @@ -233,11 +233,11 @@ def manual_t(): benchmark_find_files(2, mets) print("-----Cached-Bench-------------------------------------------------------------") benchmark_find_files(2, mets_cached) - + print("-----Regular------------------------------------------------------------------") print("len=%d" % len(mets.find_all_files(fileGrp='SEG-REG'))) print(mets.find_all_files(fileGrp='SEG-REG')) - + print("-----Cached-------------------------------------------------------------------") print("len=%d" % len(mets_cached.find_all_files(fileGrp='SEG-REG'))) print(mets_cached.find_all_files(fileGrp='SEG-REG')) From e524f58ab4c21c743eac4c1c70d38e13b04353e9 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 26 Jun 2023 14:03:39 +0200 Subject: [PATCH 2/3] Remove newly introduced trailing whitespace Signed-off-by: Stefan Weil --- .circleci/config.yml | 2 +- Makefile | 4 ++-- tests/cli/test_bashlib.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 080052146b..03f4f89ad7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,7 +27,7 @@ jobs: steps: - checkout - run: sudo apt-get -y update - - run: sudo make deps-ubuntu + - run: sudo make deps-ubuntu - run: make install deps-test - run: make test benchmark diff --git a/Makefile b/Makefile index f0ff387b20..5c1b9e3ae1 100644 --- a/Makefile +++ b/Makefile @@ -264,7 +264,7 @@ pyclean: .PHONY: docker docker-cuda # Additional arguments to docker build. Default: '$(DOCKER_ARGS)' -DOCKER_ARGS = +DOCKER_ARGS = # Build docker image docker: DOCKER_BASE_IMAGE = ubuntu:20.04 @@ -277,7 +277,7 @@ docker-cuda: DOCKER_FILE = Dockerfile.cuda docker-cuda: docker -docker docker-cuda: +docker docker-cuda: docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) . # Build wheels and source dist and twine upload them diff --git a/tests/cli/test_bashlib.py b/tests/cli/test_bashlib.py index fc043b5fd7..185a115096 100644 --- a/tests/cli/test_bashlib.py +++ b/tests/cli/test_bashlib.py @@ -49,7 +49,7 @@ def invoke_bash(self, script, *args, executable=None): return -1, "", str(e) finally: os.remove(scriptfile.name) - + def setUp(self): self.maxDiff = None super().setUp() From ca599d4e355581d75dc8f357da7fc4bcc28da6e7 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 6 Dec 2023 17:31:13 +0100 Subject: [PATCH 3/3] Remove newly introduced trailing whitespace Signed-off-by: Stefan Weil --- ocrd/ocrd/cli/workspace.py | 2 +- ocrd_models/ocrd_models/ocrd_file.py | 2 +- ocrd_validators/ocrd_validators/workspace_validator.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ocrd/ocrd/cli/workspace.py b/ocrd/ocrd/cli/workspace.py index 250cc08b23..c75f3caa3b 100644 --- a/ocrd/ocrd/cli/workspace.py +++ b/ocrd/ocrd/cli/workspace.py @@ -56,7 +56,7 @@ def workspace_cli(ctx, directory, mets, mets_basename, mets_server_url, backup): A workspace comprises a METS file and a directory as point of reference. - Operates on the file system directly or via a METS server + Operates on the file system directly or via a METS server (already running via some prior `server start` subcommand). """ initLogging() diff --git a/ocrd_models/ocrd_models/ocrd_file.py b/ocrd_models/ocrd_models/ocrd_file.py index e8205a33d9..fd8cdd0684 100644 --- a/ocrd_models/ocrd_models/ocrd_file.py +++ b/ocrd_models/ocrd_models/ocrd_file.py @@ -235,7 +235,7 @@ def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filena mimetype (string): ``@MIMETYPE`` of this ``mets:file`` pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file`` loctype (string): ``@LOCTYPE`` of this ``mets:file`` - url (string): ignored XXX the remote/original file once we have proper mets:FLocat bookkeeping + url (string): ignored XXX the remote/original file once we have proper mets:FLocat bookkeeping local_filename (): ``@xlink:href`` of this ``mets:file`` - XXX the local file once we have proper mets:FLocat bookkeeping ID (string): ``@ID`` of this ``mets:file`` """ diff --git a/ocrd_validators/ocrd_validators/workspace_validator.py b/ocrd_validators/ocrd_validators/workspace_validator.py index 4061cd8871..9fc9ec7f60 100644 --- a/ocrd_validators/ocrd_validators/workspace_validator.py +++ b/ocrd_validators/ocrd_validators/workspace_validator.py @@ -115,10 +115,10 @@ def validate(*args, **kwargs): resolver (:class:`ocrd.Resolver`): Resolver mets_url (string): URL of the METS file src_dir (string, None): Directory containing mets file - skip (list): Validation checks to omit. One or more of - 'mets_unique_identifier', 'mets_file_group_names', + skip (list): Validation checks to omit. One or more of + 'mets_unique_identifier', 'mets_file_group_names', 'mets_files', 'pixel_density', 'dimension', 'url', - 'multipage', 'page', 'page_xsd', 'mets_xsd', + 'multipage', 'page', 'page_xsd', 'mets_xsd', 'mets_fileid_page_pcgtsid' download (boolean): Whether to download remote file references temporarily during validation (like a processor would)