From c5d3250d1c50430d789db09ace07f6d1d3e78e21 Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Tue, 14 Mar 2023 17:31:14 -0400 Subject: [PATCH 01/22] pred2bq: Update schema parsing from prediction results. --- .../predictions_to_bigquery/executor.py | 200 +++++++++++------- .../predictions_to_bigquery/executor_test.py | 190 ++++++++++++----- .../7/prediction_logs-00000-of-00001.gz | Bin 0 -> 27429 bytes 3 files changed, 263 insertions(+), 127 deletions(-) create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/BulkInferrer/inference_result/7/prediction_logs-00000-of-00001.gz diff --git a/tfx_addons/predictions_to_bigquery/executor.py b/tfx_addons/predictions_to_bigquery/executor.py index 763e226f..a271d733 100644 --- a/tfx_addons/predictions_to_bigquery/executor.py +++ b/tfx_addons/predictions_to_bigquery/executor.py @@ -19,8 +19,7 @@ import datetime import os import re -from collections.abc import Mapping, Sequence -from typing import Any, List, Optional, Tuple, Union +from typing import Any, Optional, Union import apache_beam as beam import numpy as np @@ -31,7 +30,7 @@ from tensorflow_serving.apis import prediction_log_pb2 from tfx import types from tfx.dsl.components.base import base_beam_executor -from tfx.types import artifact_utils +from tfx.types import Artifact, artifact_utils # TODO(cezequiel): Move relevant functions in utils module here. from tfx_addons.predictions_to_bigquery import utils @@ -41,32 +40,50 @@ _DEFAULT_TIMESTRING_FORMAT = '%Y%m%d_%H%M%S' _REQUIRED_EXEC_PROPERTIES = ( 'bq_table_name', - 'bq_dataset', 'filter_threshold', - 'gcp_project', 'gcs_temp_dir', - 'vocab_label_file', ) _REGEX_CHARS_TO_REPLACE = re.compile(r'[^a-zA-Z0-9_]') +_REGEX_BQ_TABLE_NAME = re.compile(r'^[\w-]*:?[\w_]+\.[\w_]+$') -def _check_exec_properties(exec_properties: Mapping[str, Any]) -> None: +def _check_exec_properties(exec_properties: dict[str, Any]) -> None: for key in _REQUIRED_EXEC_PROPERTIES: if exec_properties[key] is None: raise ValueError(f'{key} must be set in exec_properties') -def _get_labels(transform_output_uri: str, vocab_file: str) -> Sequence[str]: - tf_transform_output = tft.TFTransformOutput(transform_output_uri) - tft_vocab = tf_transform_output.vocabulary_by_name(vocab_filename=vocab_file) +def _get_prediction_log_path(inference_results: list[Artifact]) -> str: + inference_results_uri = artifact_utils.get_single_uri(inference_results) + return f'{inference_results_uri}/*.gz' + + +def _get_tft_output( + transform_graph: Optional[list[Artifact]] = None +) -> Optional[tft.TFTransformOutput]: + if transform_graph is None: + return None + + transform_graph_uri = artifact_utils.get_single_uri(transform_graph) + return tft.TFTransformOutput(transform_graph_uri) + + +def _get_labels(tft_output: tft.TFTransformOutput, + vocab_file: str) -> list[str]: + tft_vocab = tft_output.vocabulary_by_name(vocab_filename=vocab_file) return [label.decode() for label in tft_vocab] -def _get_bq_table_name( - basename: str, - timestamp: Optional[datetime.datetime] = None, - timestring_format: Optional[str] = None, -) -> str: +def _check_bq_table_name(bq_table_name: str) -> None: + if _REGEX_BQ_TABLE_NAME.match(bq_table_name) is None: + raise ValueError('Invalid BigQuery table name.' + ' Specify in either `PROJECT:DATASET.TABLE` or' + ' `DATASET.TABLE` format.') + + +def _add_bq_table_name_suffix(basename: str, + timestamp: Optional[datetime.datetime] = None, + timestring_format: Optional[str] = None) -> str: if timestamp is not None: timestring_format = timestring_format or _DEFAULT_TIMESTRING_FORMAT return basename + '_' + timestamp.strftime(timestring_format) @@ -74,37 +91,67 @@ def _get_bq_table_name( def _get_additional_bq_parameters( - expiration_days: Optional[int] = None, - table_partitioning: bool = False, -) -> Mapping[str, Any]: + table_expiration_days: Optional[int] = None, + table_partitioning: Optional[bool] = False, +) -> dict[str, Any]: output = {} if table_partitioning: time_partitioning = {'type': 'DAY'} logging.info('BigQuery table time partitioning set to DAY') - if expiration_days: - expiration_time_delta = datetime.timedelta(days=expiration_days) + if table_expiration_days: + expiration_time_delta = datetime.timedelta(days=table_expiration_days) expiration_milliseconds = expiration_time_delta.total_seconds() * 1000 logging.info( - f'BigQuery table partition expiration time set to {expiration_days}' - ' days') + f'BigQuery table expiration set to {table_expiration_days} days.') time_partitioning['expirationMs'] = expiration_milliseconds output['timePartitioning'] = time_partitioning return output -def _get_features( - *, - schema_uri: Optional[str] = None, +# TODO(cezequiel): Move to a separate module with called functions. +# pylint: disable=protected-access +def _parse_features_from_prediction_results( + prediction_log_path: str) -> dict[str, Any]: + filepath = tf.io.gfile.glob(prediction_log_path)[0] + compression_type = utils._get_compress_type(filepath) + dataset = tf.data.TFRecordDataset([filepath], + compression_type=compression_type) + + for bytes_record in dataset.take(1): + prediction_log = prediction_log_pb2.PredictionLog.FromString( + bytes_record.numpy()) + + example_bytes = ( + prediction_log.predict_log.request.inputs['examples'].string_val[0]) + example = tf.train.Example.FromString(example_bytes) + features = {} + + for name, feature_proto in example.features.feature.items(): + feature_dtype = utils._get_feature_type(feature=feature_proto) + feature = tf.io.VarLenFeature(dtype=feature_dtype) + features[name] = feature + + return features + + +def _get_schema_features( + schema: Optional[list[Artifact]] = None, + tft_output: Optional[tft.TFTransformOutput] = None, prediction_log_path: Optional[str] = None, -) -> Mapping[str, Any]: - if schema_uri: +) -> dict[str, Any]: + if schema is not None: + schema_uri = artifact_utils.get_single_uri(schema) schema_file = os.path.join(schema_uri, _SCHEMA_FILE_NAME) return utils.load_schema(schema_file) - if not prediction_log_path: - raise ValueError('Specify one of `schema_uri` or `prediction_log_path`.') + if tft_output is not None: + return tft_output.raw_feature_spec() - return utils.parse_schema(prediction_log_path) + if prediction_log_path is None: + raise ValueError( + 'Specify one of `schema`, `tft_output` or `prediction_log_path`.') + + return _parse_features_from_prediction_results(prediction_log_path) def _get_bq_field_name_from_key(key: str) -> str: @@ -112,8 +159,7 @@ def _get_bq_field_name_from_key(key: str) -> str: return re.sub('_+', '_', field_name).strip('_') -def _features_to_bq_schema(features: Mapping[str, Any], - required: bool = False): +def _features_to_bq_schema(features: dict[str, Any], required: bool = False): bq_schema_fields_ = utils.feature_to_bq_schema(features, required=required) bq_schema_fields = [] for field in bq_schema_fields_: @@ -128,8 +174,7 @@ def _features_to_bq_schema(features: Mapping[str, Any], def _tensor_to_native_python_value( - tensor: Union[tf.Tensor, tf.sparse.SparseTensor] -) -> Optional[Union[int, float, str]]: + tensor: Union[tf.Tensor, tf.sparse.SparseTensor]) -> Optional[Any]: """Converts a TF Tensor to a native Python value.""" if isinstance(tensor, tf.sparse.SparseTensor): values = tensor.values.numpy() @@ -139,7 +184,7 @@ def _tensor_to_native_python_value( return None values = np.squeeze(values) # Removes extra dimension, e.g. shape (n, 1). values = values.item() # Converts to native Python type - if isinstance(values, Sequence) and isinstance(values[0], bytes): + if isinstance(values, list) and isinstance(values[0], bytes): return [v.decode('utf-8') for v in values] if isinstance(values, bytes): return values.decode('utf-8') @@ -147,34 +192,35 @@ def _tensor_to_native_python_value( @beam.typehints.with_input_types(str) -@beam.typehints.with_output_types(beam.typehints.Iterable[Tuple[str, str, +@beam.typehints.with_output_types(beam.typehints.Iterable[tuple[str, str, Any]]) class FilterPredictionToDictFn(beam.DoFn): """Converts a PredictionLog proto to a dict.""" def __init__( self, - labels: List, - features: Any, + features: dict[str, tf.io.FixedLenFeature], timestamp: datetime.datetime, filter_threshold: float, + labels: Optional[list[str]] = None, score_multiplier: float = 1., ): super().__init__() - self._labels = labels self._features = features + self._timestamp = timestamp self._filter_threshold = filter_threshold + self._labels = labels self._score_multiplier = score_multiplier - self._timestamp = timestamp - def _parse_prediction(self, predictions: npt.ArrayLike): + def _parse_prediction( + self, predictions: npt.ArrayLike) -> tuple[Optional[str], float]: prediction_id = np.argmax(predictions) logging.debug("Prediction id: %s", prediction_id) logging.debug("Predictions: %s", predictions) - label = self._labels[prediction_id] + label = self._labels[prediction_id] if self._labels is not None else None score = predictions[0][prediction_id] return label, score - def _parse_example(self, serialized: bytes) -> Mapping[str, Any]: + def _parse_example(self, serialized: bytes) -> dict[str, Any]: parsed_example = tf.io.parse_example(serialized, self._features) output = {} for key, tensor in parsed_example.items(): @@ -191,17 +237,18 @@ def process(self, element, *args, **kwargs): # pylint: disable=missing-function del args, kwargs # unused parsed_prediction_scores = tf.make_ndarray( - element.predict_log.response.outputs["outputs"]) + element.predict_log.response.outputs['outputs']) label, score = self._parse_prediction(parsed_prediction_scores) if score >= self._filter_threshold: output = { - "category_label": label, # Workaround to issue with the score value having additional non-zero values # in higher decimal places. # e.g. 0.8 -> 0.800000011920929 - "score": round(score * self._score_multiplier, _DECIMAL_PLACES), - "datetime": self._timestamp, + 'score': round(score * self._score_multiplier, _DECIMAL_PLACES), + 'datetime': self._timestamp, } + if label is not None: + output['category_label'] = label output.update( self._parse_example( element.predict_log.request.inputs['examples'].string_val)) @@ -212,9 +259,9 @@ class Executor(base_beam_executor.BaseBeamExecutor): """Implements predictions-to-bigquery component logic.""" def Do( self, - input_dict: Mapping[str, List[types.Artifact]], - output_dict: Mapping[str, List[types.Artifact]], - exec_properties: Mapping[str, Any], + input_dict: dict[str, list[types.Artifact]], + output_dict: dict[str, list[types.Artifact]], + exec_properties: dict[str, Any], ) -> None: """Do function for predictions_to_bq executor.""" @@ -223,36 +270,41 @@ def Do( # Check required keys set in exec_properties _check_exec_properties(exec_properties) - # get labels from tf transform generated vocab file - labels = _get_labels( - artifact_utils.get_single_uri(input_dict['transform_graph']), - exec_properties['vocab_label_file'], - ) - logging.info(f"found the following labels from TFT vocab: {labels}") - - # set BigQuery table name and timestamp suffix if specified. - bq_table_name = _get_bq_table_name(exec_properties['bq_table_name'], - timestamp, - exec_properties['table_suffix']) - - # set prediction result file path and decoder - inference_results_uri = artifact_utils.get_single_uri( - input_dict["inference_results"]) - prediction_log_path = f"{inference_results_uri}/*.gz" + # Get prediction log file path and decoder + prediction_log_path = _get_prediction_log_path( + input_dict['inference_results']) prediction_log_decoder = beam.coders.ProtoCoder( prediction_log_pb2.PredictionLog) + tft_output = _get_tft_output(input_dict.get('transform_graph')) + # get schema features - features = _get_features(schema_uri=artifact_utils.get_single_uri( - input_dict["schema"]), - prediction_log_path=prediction_log_path) + features = _get_schema_features( + schema=input_dict.get('schema'), + tft_output=tft_output, + prediction_log_path=prediction_log_path, + ) + + # get label names from TFTransformOutput object, if applicable + if tft_output is not None and 'vocab_label_file' in exec_properties: + labels = _get_labels(tft_output, exec_properties['vocab_label_file']) + logging.info(f'Found the following labels from TFT vocab: {labels}.') + else: + labels = None + logging.info('No TFTransform output given; no labels parsed.') + + # set BigQuery table name and timestamp suffix if specified. + _check_bq_table_name(exec_properties['bq_table_name']) + bq_table_name = _add_bq_table_name_suffix( + exec_properties['bq_table_name'], timestamp, + exec_properties['table_time_suffix']) # generate bigquery schema from tf transform features bq_schema = _features_to_bq_schema(features) logging.info(f'generated bq_schema: {bq_schema}.') additional_bq_parameters = _get_additional_bq_parameters( - exec_properties.get('expiration_time_delta'), + exec_properties.get('table_expiration_days'), exec_properties.get('table_partitioning')) # run the Beam pipeline to write the inference data to bigquery @@ -262,14 +314,12 @@ def Do( prediction_log_path, coder=prediction_log_decoder) | 'Filter and Convert to Dict' >> beam.ParDo( FilterPredictionToDictFn( - labels=labels, features=features, timestamp=timestamp, - filter_threshold=exec_properties['filter_threshold'])) - | 'Write Dict to BQ' >> beam.io.gcp.bigquery.WriteToBigQuery( + filter_threshold=exec_properties['filter_threshold'], + labels=labels)) + | 'Write Dict to BQ' >> beam.io.WriteToBigQuery( table=bq_table_name, - dataset=exec_properties['bq_dataset'], - project=exec_properties['gcp_project'], schema=bq_schema, additional_bq_parameters=additional_bq_parameters, create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED, diff --git a/tfx_addons/predictions_to_bigquery/executor_test.py b/tfx_addons/predictions_to_bigquery/executor_test.py index 38447fb4..d1f01ebc 100644 --- a/tfx_addons/predictions_to_bigquery/executor_test.py +++ b/tfx_addons/predictions_to_bigquery/executor_test.py @@ -15,7 +15,8 @@ """Tests for executor.py.""" import datetime -from typing import Mapping, Sequence, Union +import pathlib +from typing import Union from unittest import mock import apache_beam as beam @@ -35,7 +36,7 @@ def _create_tf_example( - features: Mapping[str, Union[bytes, float, int]]) -> tf.train.Example: + features: dict[str, Union[bytes, float, int]]) -> tf.train.Example: tf_features = {} for key, value in features.items(): if isinstance(value, bytes): @@ -59,8 +60,8 @@ def _create_model_spec() -> model_pb2.ModelSpec: def _create_predict_request( - features: Mapping[str, Union[bytes, float, int]] -) -> predict_pb2.PredictRequest: + features: dict[str, Union[bytes, float, + int]]) -> predict_pb2.PredictRequest: tf_example = _create_tf_example(features) request_tensor_proto = tf.make_tensor_proto( values=tf_example.SerializeToString(), dtype=tf.string, shape=(1, )) @@ -73,7 +74,7 @@ def _create_predict_request( def _create_predict_response( - values: Sequence[float]) -> predict_pb2.PredictResponse: + values: list[float]) -> predict_pb2.PredictResponse: response_tensor_proto = tf.make_tensor_proto(values=values, dtype=tf.float32, shape=(1, len(values))) @@ -103,10 +104,10 @@ def setUp(self): self.filter_threshold = 0.5 self.dofn = executor.FilterPredictionToDictFn( - labels=self.labels, features=self.features, timestamp=self.timestamp, filter_threshold=self.filter_threshold, + labels=self.labels, ) def test_process(self): @@ -138,6 +139,30 @@ def test_process_below_threshold(self): with self.assertRaises(StopIteration): _ = next(self.dofn.process(element)) + def test_process_no_labels(self): + features = { + 'bytes_feature': tf.io.FixedLenFeature([], dtype=tf.string), + } + dofn = executor.FilterPredictionToDictFn( + features=features, + timestamp=self.timestamp, + filter_threshold=self.filter_threshold, + labels=None, + ) + element = _create_prediction_log( + request=_create_predict_request(features={ + 'bytes_feature': b'a', + }), + response=_create_predict_response([0.9]), + ) + expected = { + 'bytes_feature': 'a', + 'score': 0.9, + 'datetime': mock.ANY, + } + output = next(dofn.process(element)) + self.assertEqual(expected, output) + def _make_artifact(uri) -> types.Artifact: artifact = types.Artifact(metadata_store_pb2.ArtifactType()) @@ -146,7 +171,7 @@ def _make_artifact(uri) -> types.Artifact: def _make_artifact_mapping( - data_dict: Mapping[str, str]) -> Mapping[str, Sequence[types.Artifact]]: + data_dict: dict[str, str]) -> dict[str, list[types.Artifact]]: return {k: [_make_artifact(v)] for k, v in data_dict.items()} @@ -168,23 +193,31 @@ def setUp(self): 'gcs_temp_dir': 'gs://bucket/temp-dir', 'expiration_time_delta': 1, 'filter_threshold': 0.5, - 'table_suffix': '%Y%m%d', + 'table_time_suffix': '%Y%m%d', 'table_partitioning': True, 'vocab_label_file': 'vocab_file', } - self.executor = executor.Executor() - + self.enter_context( + mock.patch.object(executor, '_get_prediction_log_path', autospec=True)) + self.enter_context( + mock.patch.object(executor, + '_get_tft_output', + autospec=True, + return_value=object())) + self.enter_context( + mock.patch.object(executor, '_get_schema_features', autospec=True)) self.enter_context( mock.patch.object(executor, '_get_labels', autospec=True)) self.enter_context( - mock.patch.object(executor, '_get_bq_table_name', autospec=True)) + mock.patch.object(executor, '_check_bq_table_name', autospec=True)) + self.enter_context( + mock.patch.object(executor, '_add_bq_table_name_suffix', + autospec=True)) self.enter_context( mock.patch.object(executor, '_get_additional_bq_parameters', autospec=True)) - self.enter_context( - mock.patch.object(executor, '_get_features', autospec=True)) self.enter_context( mock.patch.object(executor, '_features_to_bq_schema', autospec=True)) @@ -193,15 +226,15 @@ def setUp(self): self.mock_pardo = self.enter_context( mock.patch.object(beam, 'ParDo', autospec=True)) self.mock_write_to_bigquery = self.enter_context( - mock.patch.object(beam.io.gcp.bigquery, - 'WriteToBigQuery', - autospec=True)) + mock.patch.object(beam.io, 'WriteToBigQuery', autospec=True)) self.enter_context( mock.patch.object(types.Artifact, 'set_string_custom_property', autospec=True)) + self.executor = executor.Executor() + def test_Do(self): self.executor.Do(self.input_dict, self.output_dict, self.exec_properties) @@ -215,30 +248,67 @@ def test_Do(self): class ExecutorModuleTest(parameterized.TestCase): """Tests for executor module-level functions.""" + def test_get_prediction_log_path(self): + inference_results = [_make_artifact('inference_results')] + expected = 'inference_results/*.gz' + output = executor._get_prediction_log_path(inference_results) + self.assertEqual(expected, output) + + @parameterized.named_parameters([ + ('no_inference_results', False), + ('inference_results', True), + ]) + def test_get_tft_output(self, has_transform_graph): + if has_transform_graph: + transform_graph = [_make_artifact('transform_graph')] + mock_tftransform_output = self.enter_context( + mock.patch.object(tft, 'TFTransformOutput', autospec=True)) + + _ = executor._get_tft_output(transform_graph) + + mock_tftransform_output.assert_called_once() + + else: + output = executor._get_tft_output(None) + self.assertIsNone(output) + def test_get_labels(self): mock_tftransform_output = self.enter_context( mock.patch.object(tft, 'TFTransformOutput', autospec=True)) mock_vocabulary_by_name = ( mock_tftransform_output.return_value.vocabulary_by_name) mock_vocabulary_by_name.return_value = [b'a', b'b'] - - transform_output_uri = '/path/to/transform_output' vocab_file = 'vocab' + tft_output = tft.TFTransformOutput('uri') - output = executor._get_labels(transform_output_uri, vocab_file) + output = executor._get_labels(tft_output, vocab_file) self.assertEqual(['a', 'b'], output) - mock_tftransform_output.assert_called_once_with(transform_output_uri) mock_vocabulary_by_name.assert_called_once_with(vocab_file) + @parameterized.named_parameters([ + ('project_dataset_table', 'gcp_project:bq_dataset.bq_table_name', True), + ('dataset_table', 'bq_dataset.bq_table_name', True), + ('table_only', 'bq_table_name', False) + ]) + def test_check_bq_table_name(self, bq_table_name, is_ok): + if is_ok: + try: + executor._check_bq_table_name(bq_table_name) + except ValueError: + self.fail('ValueError was raised unexpectedly.') + else: + with self.assertRaises(ValueError): + executor._check_bq_table_name(bq_table_name) + @parameterized.named_parameters([('no_timestamp', None, None), ('timestamp_no_format', _TIMESTAMP, None), ('timestamp_format', _TIMESTAMP, '%Y%m%d')]) - def test_get_bq_table_name(self, timestamp, timestring_format): + def test_add_bq_table_name_suffix(self, timestamp, timestring_format): basename = 'bq_table' - output = executor._get_bq_table_name(basename, timestamp, - timestring_format) + output = executor._add_bq_table_name_suffix(basename, timestamp, + timestring_format) if timestamp is None: expected = basename @@ -258,8 +328,8 @@ def test_get_bq_table_name(self, timestamp, timestring_format): ('table_partitioning_only', None, True), ('expiration_table_partitioning', 2, True), ]) - def test_get_additiona_bq_parameters(self, expiration_days, - table_partitioning): + def test_get_additional_bq_parameters(self, expiration_days, + table_partitioning): output = executor._get_additional_bq_parameters(expiration_days, table_partitioning) @@ -278,44 +348,60 @@ def test_get_additiona_bq_parameters(self, expiration_days, } self.assertEqual(expected, output) + def test_parse_features_from_prediction_results(self): + test_data_dir = pathlib.Path( + 'tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output') + prediction_log_path = (test_data_dir / + 'BulkInferrer/inference_result/7/*.gz') + output = executor._parse_features_from_prediction_results( + str(prediction_log_path)) + self.assertIn('Culmen Depth (mm)', output) + self.assertEqual(tf.float32, output['Culmen Depth (mm)'].dtype) + @parameterized.named_parameters([ - ('error_no_input', None, None), - ('schema_uri_only', 'uri', None), - ('prediction_log_path', None, 'path'), - ('schema_uri_prediction_log_path', 'uri', 'path'), + ('error_no_inputs', False, False, False), + ('schema', True, False, False), + ('tft_output', False, True, False), + ('prediction_log_path', False, False, True), ]) - def test_get_features(self, schema_uri, prediction_log_path): - schema = { - 'feature': tf.io.FixedLenFeature([], dtype=tf.int64), - } + def test_get_schema_features(self, has_schema, has_tft_output, + has_prediction_log_path): mock_load_schema = self.enter_context( mock.patch.object(utils, 'load_schema', autospec=True, - return_value=schema)) - mock_parse_schema = self.enter_context( - mock.patch.object(utils, - 'parse_schema', - autopspec=True, - return_value=schema)) + return_value=has_schema)) + mock_raw_feature_spec = self.enter_context( + mock.patch.object(tft.TFTransformOutput, + 'raw_feature_spec', + autospec=True)) + mock_parse_features_from_prediction_results = self.enter_context( + mock.patch.object(executor, + '_parse_features_from_prediction_results', + autospec=True, + return_value=has_schema)) - if schema_uri is None and prediction_log_path is None: + if (has_schema is None and has_tft_output is None + and has_prediction_log_path is None): with self.assertRaises(ValueError): - _ = executor._get_features(schema_uri=schema_uri, - prediction_log_path=prediction_log_path) + _ = executor._get_schema_features(has_schema, has_tft_output, + has_prediction_log_path) + return - else: - output = executor._get_features(schema_uri=schema_uri, - prediction_log_path=prediction_log_path) + if has_schema: + schema = [_make_artifact('schema_uri')] + _ = executor._get_schema_features(schema, None, None) + mock_load_schema.assert_called_once() - if schema_uri: - mock_load_schema.assert_called_once_with(mock.ANY) - mock_parse_schema.assert_not_called() - elif prediction_log_path: - mock_load_schema.assert_not_called() - mock_parse_schema.assert_called_once_with(prediction_log_path) + elif has_tft_output: + tft_output = tft.TFTransformOutput('uri') + _ = executor._get_schema_features(None, tft_output, None) + mock_raw_feature_spec.assert_called_once() - self.assertEqual(schema, output) + else: + prediction_log_path = 'path' + _ = executor._get_schema_features(None, None, prediction_log_path) + mock_parse_features_from_prediction_results.assert_called_once() def test_features_to_bq_schema(self): mock_feature_to_bq_schema = self.enter_context( diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/BulkInferrer/inference_result/7/prediction_logs-00000-of-00001.gz b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/BulkInferrer/inference_result/7/prediction_logs-00000-of-00001.gz new file mode 100644 index 0000000000000000000000000000000000000000..058b96b6c399491666376283b00ba089ae13a23c GIT binary patch literal 27429 zcmV)5K*_%!iwFP!000001H_$oT$IbN1Q%?(5!vk{gBRyl2jw zna|8b=5n(B&w#Tly^U85%2I<;xRm?gh(Sa94CvLhXGCPTA^oB~el{q1BEIU@e_+3e z!5$+GdE7h<9=Y;3vu7^KV;99!DZrw74)bl+ZSY{QR_WhA^Y7a1tdvv=G#=2i z&(J@;?N}K<6 zzah~*di$zTmj61sPt*VpH^o(P=@c>8{8!~}9ns73j}*oHvxE;A8Zl_FQbNf)IC@CW zVa>bskMPK+F3| zkZ+&CQPEKYBL>$}d=!t&-zWPQwEjYok)>I-^D_CW+0V;eacL9r)w@sHEWGIl9(By8 z<`y+1df*WAUtKDXv-uA(^I-lzmvZ@k`a7952|db69*+mWCf#EEX{rSM2mmX1^)w!3t(0-+ZV9(B2SB)-O_g z&XITyHr|j)TL0sPYW{HKHhNKtWbBhK_OgZkyV!zGwP{tXMLvX_mA;5eHUeLEN|4SuT^C&)=f!U6vs$%A+dBk9|FN!dq zq;FJr%MTyX)A!3hX67P?^y}x_!}>SXBF>m%`R8!rU;E4MoBX#IXqtIB|G!RN$nl98RD>5`mkr6|K}0ir?)hoG$?aG4o79@ z@M+eqd&fn&=|aU<&`D4C!cQ9hYGllthjH#2&unL8CDwdKp_U_};&`z*DbVcXWsEH= zdg2>1Me3Ar#QgFqRkLB`ZMLM6&7(5k74IThGh5N!`V6o{v~&%r}OS3dr_ZswkC|3fO0{KEDP> zd}aP2Yw-_BYp_hXrSY2%({5=z79U+JNnZN+;ciEt-a%%tVY2cOjyK9j375LRO(su< z+A9g*cQv=OYTY-p(B+dM0Iyf?|4I*6hksw*ZzW)d|HcFj)h2}Fu8lhFP0oCdVZff0 z#yK(KP7@`8wif#yxH+yO*bBgD=O;>loP*8gEShkFzfl4FQ`4u4M@;%TcZrJ~cmQZH z5`=5mdPbC_1DOhp9-&b*yXbB#66*?wxw&4@U8TRjuSe<*4~X*o zcqrZ`FGZg$@(n`G?vH%Ym14a2uMCCwS&H0Bg<)fY6J>-kIXTS15 z-g?GKn9Ml5m(9Dhi!Cb+CQL4W)aZP>OvV!H`?WNt8In_R_DtdE7 zB!YavoFv1kC*<%V;O|MxYm|CIF^+#?D*v0~`CSnBkh_Kxw8l~w$82%GhYGjVaWC4Up@I*;4nn{4E zJmpO)J7TbV1-xx>c6K=n`16b@MKnZKu#Mof$*@b_+3xf^v-F`{LHQXZ?C=Z)3FgOTT6 zfFWOrio9oIH>CL-1%lw5m_53>m%^+}?;<>vnv93AH6RQ`z+>WU8YScbJDd^rO5tWr z66Vr7m8I-WWs?hn9_(rQ(lUB@F!+b|BF@rBamVf@DrE^VcfpHJ1eId~AyjhG3?5@R zzqM{2(Ga!6=4RtQ^$pZ8PAzG0ipW3chvduh``Nflds&MGV6n5n(mzJ&8-wNKs9iJX zaN?IUl5T7=@2It$rMQXfn>Zt1&E*hXhLKK@{Qe{nOOcEyi2$rwq*pqpIg*4;Jh++V z8?=sW%&j5rmVzlZ#uEz_ zMDkYNsI`VjDn|hD@EDlVPmvr>*}I-aU0BV|o*4($IuACF7@Emp>#N5U<70!e3FL5o zb`GazarlEl8EpWgXgD}S2pH9?V7M%HJXZo+!2=)SNR4*a-*H=rzi)nj%ccFu(3aHL ziLmZNSejfsXZ$;Z1$m!l3og!wD4f)4_g+2XIZ`l-oW|+v=F;Z2LU+OiJ7-WTLRJ~fO5e6;ifurcaB-mUH92uk?p7mN$vn|8q`PxBO&!)%Oy2BZ)%mDB;8oOa# zC4K0^@F*IWpH7f(wM}x8AyIjTe5P{f(*XPC>DA1Uoy{3zkXhQ1^nFKgE=5V5oY{gv zTR_>Dr>lSO6YN6!4AvzTyc^Ff+P4!mJZGAKWRbUn{b*PSf7%n#9XSbOLe zU-YOfsUSHq>OKwO5l@zn8-ro62bO{n$(O7IQorDl%Vt&bHp~0j7NH_4C#Qv!rUSFE0B#`w z3H)%=+l^&nD{{*ysI8ND`JO+;s;xZDdUad`;kEqst=LxoYa4KW(ZqPvpiBnw`X@VH z_p3Id58MS%1i_C%ScWi z(6t4F^8|tIp$c-gt15_+X{7J4GR!t)z^#JUk%|V6bHbIj*-Z39rJ>S5lZpCrwC7UV zZr=;f+0L>~>h0MZ!2IsE21oV1e{=e*F_thFJ2Oj}PUh)my_R9(k!Kl1*xV9Q_)F?6 zsuM15L75W#@+{fir%5goig-OTzgUPC6J05=fmK|wihb=r0ZgsU%5_PP_k)e)(q9V) zj(W7hn5-`+g$W3vaP|~tsRo(8org;hFeq^Q4G-Q&RpzE87NNuIePrGb{kh|S&yOSOfnPimW&r=o=nU==fAV#o`x|@0ocbaDoEIE-2j0BFN zzJ{o+5{fqXA!wTX!u?6WJ~$Q6-mkKotq+_50XDPj^U8W$nJ_;_;>V^`Yq2rEQf$30 zQjlzhm3J95z>MzWS*?E6!+%ja2if0k$Drf!IE45i90|^Y&36jW zr#41{>Pt7p@5JOxNo8eQQAAV3)>^%`js~-dka&$Bdy8F8y2tw7{}sZk?}=}Wdf=t4 z0*%Qa@xwR-d%4isoP^n2sxP>+2f`2x+ z(Sr{5u#0SD#s%iw5xgG1jvSHV*n;P>4Y}pCV)*AmfzyVwNZfn8IIqn>G=HgRxP@Vm zsq~3eiY9waEd1;^o)b55!uwhISF#HU3)pYv#zKH~3!k^vv4V{Ob<(g@7ss|@V=BrG zZpyOjt(h=3sp2y8;tj%v7i$^E2**b8|eHzkp zL`w}zV^?3MuxAHBIpfr1pUHZ(*4d#Fjw*1B9?v<3=lNl{7Z=z0>9&p)#-DB+p@x%_ zhi(5TOkZ8Of#BE_1k3_)NNsO?lCZurma-*LbJ;&Jqaeashy2!A4-dwx5ZO4O?S=F7}#Y7yb1y4AK(MFF*qDKJrOkp4II?CLF z=0SYT+WK)jJv!zA_#qbiU6ec7B8!>c z=!WTJWg<^`RnntR!qelukw4?OYEgo#bosum`OBpugDD*tHN}xnBSNsU99I=P#gN z91Wv31%LH~gj0o0vl-D3wryBAgEOOkl$VOjcWz4s+e+==*N&G{ajf5~zGRa1_P&Fx zV6g)%<~}&?v**j*-#L23)E?^o(c%Si9@z)U$8D$75+gjG<(13BiOwyGjhTuGzUO$&2B9mAQW%kZ-Q9hj{TX+G#XnjK@wMvabkG0f6-miQaFXUcNTD=!=Fd;1 zDqudyv|C}hv>W=4l@fZ(u3{9AWUU)C0Ou+)pk+s;W$dpBbJ?QLM?-|I9=`mn9v)1i zBK=(!9_dpf79j0T(NNc+F2*V0;=DyhR-!U-3)UCF-Wgc$h`cm<53-re1ZBT)YnDm^ zl6;M*gw7&`TXagUplwP{zHj`Q_iol|`BwIRf$0!nM?90OeVqwd<=KCR8~-*a+d#mU zWS6$i%IZj(Xi&z}6Q;&UkL;0K6sLWVp~;AD&eS(YTY8xrb(h2ZVu92hPjt;N6}Z-1 zXbm?=oh4_UEY}l|C~oV&juk$>g1spT_K4I;xVlM?;r}xGsGOW~Yp4&4%2lk=V)T&; zo?!rehA;D-kZb3azdF%g@kXds1psrsarlLjaVw)WR~Y{#81 z)AShr&)M7{DLLh(q<1^xbi;@AA)SHr(ep*H({@&ZP`b+$@J<_yu;1zuG(tvp5dOn` zMUi0)784{9t1&Wz`OV8<=OvGS$0$BW1 zXZJs@;cbG>58#Xs=d`3@5uK2UHI^CM;Ln3B=DUNedhG-@yb8FD>9hPZ0`y322>z~3 z#akBz276iid1Xs^`lXf06n}LJ<|vbtFCd~TJ!v-u|h%JlS>bs4Mw$G&y9$~w_ zzR5QIcAG`j{0$&_OSs>_$BrovLyI**IP+rA~ubr8eCCKs^L_B*psY|Hbw$$BuR;rW>007Bm4jLRjORN7%|lV z-q3-yJyY3;v8gPgEjR=F&WW}K_2^2H@%L@^b6$;m`R>l9VfQl+tcKlQuerhK$uCAD zr~rSoAJQDF%W2DkWo6+4z48_TC5cyL=s|X^Xae(0n*-t1?!zh3`i@x72~ps|RJ3I; zz{nlc%jwL;v9@6;=U26(MBlhv-f);r!5xQk*D##9k}FAdpy?`1)79pmR90?7DqCC> zRP^uo*mJl(J#WabtZA)+$W}Sov?%;>JBkJBzYZmpuOTZ^Fs=A%FFVSW=0^=~pVa6X zYr7^HsTHxa{=Ms;1a?1l4>NQG6BYk_`^|7Yycc0o=!(;@!rB(vYA@ym@W+;uwpx;w z(wmPNG#41%P9g$3Vm`tflv#LAJhir1Zzb8-6{pycRj1j)*I-q$WP{0nIj-MLx3Pbu z0eWHj^v)d8G}b)>0<^TMNe@%Jam$su<59as6(rI_)4E(wfn;|iUrup{*pOle*!ds9 z*sIqNQ;g#VJ_)Y2y8VS4CwC8M?9hPO5zU21un?vTacn+=!DHhocjW^90G<0l0|Gg3 z$FvoP*r>M&%()o2FPaq_y-ufz7Su2gZuwfWdH-YLTnqemlzy5f<3UXzwvUU~wLxgC zTIZ+yl!V$LAXB=llg<(wLq+0sXZ~I0dhH$?`3&rKi(dG?yB_`ve>}_YF5p!mr3#1v zA*l8hCb+AltJ{HuYPdaA>6HfhIqh9nhA{=HO+1m26Z8+jKJ2!geYtisYcPK@1X$_F z2Q&5YU-+j^!ETk$umO{TgHQrOv_wc!W8H8Z7B2=m?$A+mxB-ex!!w!6^Xy7rh4{cN zoQY!11Macv1@5!k+rbg&4L0}4?dV?0-SE)45KZHh$OjkoCWndcX6N^}nS|TJ$r*20o;JDEF-5mQ zczsu{{W*PfhWK&+LCMDpd{IPtk@4sf#FwN1;%Q`yYt_h#H1_r)j`8zkc%q72D=<7C zjh6!YxD^5=^q~ctj*$nal|@bavRCl&<^|Axk3rdeH$pD*mUI5Wj#EY-d1xEoLA44?xWKry5`3d zAttort1tb0FQsh)f}|hL3Ekg!(ruHv`o{X$xLlI3&)!dE-Nzqg(U-x&)M>|x1?oW< z{=4YS-exT40T2muLzQYy`5n>FMF9a~^uynPvd(DHw$iv^Sen{lByN@v5bs#0L~^pi zBw@WqZDfBmTFW;0f@57{j{Y`Er?IZcnU$KCh&5>p&{$_#QIR+H1V06B5*y@%FBB;T zUZVtGIu)JeC5ij7v7ux%QG?-5g5+!Ji?{5fFI?28CBZz!kPjZ-)Wd@bi9z4XffdW=WjLZ$O4Ey&{l9gu6(!#%*YSYF#PN!T~TkF#Z4 z(^##3pwuw^(yP`wNeyKpcl_6Wu_#Th9R?>0KJS5Y38KJ%cfRH{qHl*m&0yMkA-VPx zL_J=t%_bs2*5Bnh0~$!azCQnqIqi7G?)KOS@%7=XbMtiS=%$uD%E{&e@u1SUTAezd zyN!07SX$pyG*yCw4t!%-;q9?xXoH_Z-{bf*HOR6D7>0Kc0bs?Z>2^-Coy#d$Q~~m^ z;-mFLl-q1ZgS+f$oz-}^{!5HG!c@9KJw%#r(n*85?{31pJ~V!4XI6!-!hqsVRfN6xz<-vo<&kr zOT+PL1Q@I)GpxX{J~FFqS3l&0(P?bUrzvb#l?4!D8+Y#LqR$*nvW*bT9a-OYuq;bt z1h_r69~#Hq9wn*S7!MV@F*9@>;Eo4z0oDWw!kLWieyMxSdqdt`?AGM1tj#A=A;@;c zRoJP={Bv?3Il4Ry{s_Zl$ED*}g{ZGV4^_|GG004hP!{p|xu-G&(@n5a5A@8tE6lV>NE@*saQ2qc>T$CNf8CfX)Xi$DI z$W5i3A5J686M5l>rtXIpUg}pdUhT!po#JMJlW7#LHo~?%+#cXI^fJePa=w7p0j=K8 z*8I7Lt;+?r@^?i9sdCSN0Hy(mQ-PSGn#a>-=UlzeHu?|Ovh zAYPSYJobLVOhAkBorKIKAdc-Qzl(iZ6ufvVuBe-=?*k&|0bTB$NDPYPOraM2SXADA z%IK0B@+X3m?O~F?5t1<;p)w!(dFUlRwBW8H!=a{Uq)$G{3RXYC@JsFOU zQTW}WZ-Z2+K6$98vdw}eBRU?4Gn*jaq1$ZgihHb-2`m>x7hAdZn@q+|sJDWR{}`0* zAY)6jzYJz)y$mK9lphVmkN$9R5?a^`Hjy%8OIsH2`HPNPg}&A6lCtk#!J(qNsp8eW zJ??V4R?=)rVXF;6Gc?ilJuSUHfQ62xi|-AK?_|>sZ(%?F0LE=TT^g?*p2?zn)n#tR z=LTgf$l~JcEY8egae_e^XRyzq?I+*)k&!j<3(1n@Bx%J@0zNq#Z>RUJJDkys3`y;^ zXuZq)#)erML+Q$IdUNt6F?7DfZNPrE>_r@V|1kJWZ6DY6k{%^4tZO{fo~L`?N9l5} zLx~}=!mSEUPGrVruSZ^N{tu4G%W|m61;K>{^8E$l_Jgy~`{p3+=_<|si;b)BoRxgN z9^xxJ{8*eGMSASB1fxr~piBr)3<;mgWeqh5(P=H#035h#b(;Z*Th+k#th*l|G{#Yv z`H8RcaTCYbeqc_sCrQ{RK6_Y1hn?(Ud2spa>w$%a{0B2zC{vrntFh`a@)w!z)J`D&LFI&c}cvkTz8| zRM27@+5P)UJD`Jv!ilKBN2HqgU^>D}~n$Tm=b4{Tf+;^66V(F=L5Q{#I9 zJR`}s-2^Hg|LvDFN^(~8V|R`=(w9ioN;a#EKYf->b3V(;7hMMNHSqk`ONVCi)%xl& z#rW8uYy$b3pZ&EtHS4wcgFzW>5O{R+%xXlLqSUOqnWfAQk$hirPge5%$jQ*~#OKI} zMpBjy&42-*oTPV|0khapbnQhfoqucSeUo+fy2HBWSp`wp^T_=T`ZRrFypflN6I~PB zNk@2JX%E(MtW&ROS(XL3TmXm3_I5MnG|3|SaGiOGKesCZCgzT(X!4mP>~6kwYuEgIS@LmTXn+kT^i_V04nsQVw zi-W`Zhi75_j$@f(=+f`ku&(EqvUaP$;X%EJWVF$PFw#?bP|J}$mPvk0j9(M^io=f} z{;fg{2R50eV{4eFYuKIy^Ed;0=!oRAw(_>1X?65+KJ5D&@oY@jIF_p(n9XR=;PGdA zyal)&j_6WV3=~Vxxo){w9Po9Plmq5s@KZfZg-7T?k3&>( zNAh*a`#c+5{2V*oWI4pwqAd&0w*TK-AlbR0G2Nid0Qq{8ov%k(eBE$Su96ld}1F%8xbA;CR3l?r{thuO=omzlBRJ3S)DNr3&usp?C3CXgqno4!0s@l8H zfWgs&x()Ol2pQOb=x&30MEB{@jY{@<_T9^QN@2w+y7hD5KGwc&99vchtSa@(JL#Mr zq9K8WYP2os@o95nly^l@PN3F|@*LLvMfkz3&UXNAvM~l(n8)o)TqB3EYP3og$j2*; zvh8d!7_z;kNyujXkjP@L9$+Wkz*Tb1?tlECLndTrI&XYwJZ?~Cf{?w<4%uH>kll1q zu2G5EL5BVD_MwBG{Y*NgyVEvAx8uY~;b0)P56U1XISR6Dh|?ZJVStM|^~#+UEa2Qv z`?AGX_OaE6_ptOfv%!@)1@f-d!>YO$$UkEuad{A6!^kcA4(ahE0iV*LX~i9G1DsYH+zt!XEl8_<76+eU9cG**1fTa6Z%fc zji9jPhkVqtuErM!$mqh!={*gk;9)cMy|;nsd`G-*tgGP!mXK&WzT6(BKlZ;`}Y zypFJ!3|x4^oS%p2;mc%x@PedtVhZ9C+mo_WWxA=u;g*od1 zf@zqnJMtg`o_nFAMuN3vmND6bAFNQ*`vP?*O?o^(FHO%B{q`Vh-zR}>YaRn(cI5U~ zef98V_&0=WiSLdF{vd_WxCZ@7bLrd}S_*pi^g`!iTq}(wkj#FE>)Yf(5qpzEbM-_U z7e1mY%qb`K(<_{OdR}DH2cKuB8R*MeyA^+|M=vKCPB&(BQA7STAbw4LbRNUH`bJKH z2pdIVvK1I-cw=i{D){X1**y? zib-|N;-7Krl6@LU6IJ9pIrYXA`Q#SW;>|qZX7hJ;1)G2eZQxpU zNhD?4Mklgf?;m1i>du8I3p{)Bj2_n|nUrk+j7rsVec5PcEAEH$Nbf$fL`NK_86G~i zqNq(7NhP>7!+|5zvtvE*-P#`<( zw_I*|cryIQLvk1)wCsxWtdfx6ZSf_}^Q-*{bkFD<&Q|i4nCz#N+8 zoI@YE`~(%*m`aQ($m@>f@yxfQ=4-@DQ#zTXEU`sA`@Z%zcK;_(B-`z)4WH`a$+T5f zLO2T?4w2JidogNOxXwOoZvZS=eZFqzMI?hyRx&P*L zc7NG1mUq}ZJh&6#n3%4;lSp1?G)Xhp_b}}lNt?P7zEjbQm%JL1K@1{ql4FAOVuM_5`B{hTt z;WuqRHgNDjdE)S36tSH)G$J5xBAZA|rZ#k!(n9U}-FJyP+zgZL=u{yg3-El-f?xm5 zPJZ(T1li@UCSTPDGVDmPv6IMI``V?vPfqG+yM2==IwYC>maz2jFcm2+zQSD&qqpDJ z+Y-07W!gQ+gAd;?v8L2=HoyTnzw-Nb)fC!4OsveFPjmoYU zIK_-zPcV<(pxVD_vtNemftmIPCj9j)^1qhMErbV)WP3tA${{2+*iXGMty9EtYU_Pm zz#jAAP9KM?eS(jW&dsV`7Q}3W#|d^aHjNE=u?WI!(-*~b3I5^VZ%;*$E@1p*R}9lu zpdJ7BV$3yPlNdlbm}Rg7NU{;BYV!syupX?+-9To1EG2Yox>Fkk-DhNX7jh6cGb^5U73Kz{}#$%^QeuQ zM=*a+mi#J-zXr##ae20}{VPEQ`~zJ6+S&Uh*K!y~U%li;Qvn*j)U zyf7Uv?aavVp*gjjU1-$64r&C)?Jp9L(i%23goJFi+g7$WdOfQd4(|T?wR>tA2hU(J znHr&LosbYt@NyWok=@L&4$I?zb&C%;+omT^yzSp%w=?(_Z|hm2r^DdLLxx#e}_Gwm3&j|!FbwtCVQT2gwk$jDiU1l{`D!KgNA8H_(ziqC0} zqXO|ctqy?EoK!Y9G=NmWR(w$sdn0i&)B$uMv;NMl>~{YREbe_U{GY$>mx7L)ji*&# z7ivj$!@qqU-W7=6ESl!;;HQ6l?>B%*Hj zlx6AXiDlPOSPeK+}_&9s>(=pb#))EM^m6NwrboemYAgmm?-|PRw$p4XJJZTZo zUg~;ls_qA`H@lw8zaW7c{zk=7#H|^-EZ);$*3W5|1MJ%EJ*-_mFfOam_Nyi>GBF!? zbKnhQl0o?i#O!%?%foK_XX7$ljD{w(8G(_v&QD#tBU%V zV;qx&6wMah=v|64=eRB*-;ji_o*i?FcA;XcP)i+FyUMM`nFLymQFz-CcCqV0w!|A8 zBm1Uto>a&6mHa!Ane)`@H}taAhhpoLC=RWQC&c&5;9qreBhFNcW4vUGNOmwxc*2Fr z)Dp>`2k0Ot0_~VZ6tv8EF4#_vDqQ~{eCLbeq~)5Otd=2mp3{=RK%h3 zv{58wM_XTIKQ+I^7F1pXQFeC8=*}H8DVu-S{k8FgL74?oHX{4d^2b?}-EvW`V_hY2 z-gfrxVrn4#*@Yk4pN)%SsmKNnNnnJ!?gbyBB7UDM6Oh?`Y!$8$W^a^uUM{Se##$FU z%8q{yb}uDYsC8VAg{#Q5WAf)b3)8`+40hljUP?5Nzxrwy@GKi_MBkd8KS4 zGdWRFAeaNY4IJ{$-DO&a_0MO02KR`|(@HULhu(YAGWZgGC zWpkfAW1CNHf%qDq)L=sMOun{us`Rlj(V$EM`FfU}uiIICU2sv(QltGFRZ8Tn6frw1 z@uPwdJ(uX_oM2*vqRF4s!uI|+7j~KLt0Cm9Za5?4QE#15(i3OJQcWaXt4t--pHfS! z?>9UP(bY4keh)po7k&t>I_E-TYLgLOn!t&^WbO@UdLwQUu^SSxzJ75-AmacgyRGMc z?p6rO*qCmZMO-O|En_A5dh_06*7)#ac6;Auh_9FPrwk-HFs#RQ%gvJL2HU z;J{~=v0b!B88@JE_gtvS7O^pX=#oOZ%ss1d){n_jBe!>bEXh~GK4-PU0~hs_x*Osv zZ?Op<>2W*CZnmLv3DMzl0gk4vdl=QYp=|H_&Zhw+W4qdTvs;NwT~3PEFLa?XJ&Oqg zQmPR2@%2J<(sg4#wMKq-^{=Z(Ai54;9UZ5K_Y#R(*Ow!{Vrs2iGEix?=0M4%kaAOP zQ^A)Dj<#LX?%Gost>@E@U`>E|=^RB(+Iv0N_mGW0@rcbcfo)0K+=evPH@3G?PR`B6 zFSy`17!h;@wHMl)8pX;EhF46qtZnT1GVuyec-+Z&#Gp(B@p_sauUlDonQz83 zBwp5m6?Q4^Pl1Yi1emp}oFzjx_z{L!#i)EqNMA@WJt7`vMPf4i+Uf)pz)}hr>HK8$ zJA%%7MSWRBy*#^w8qqo(g6m0o!;kfdypV;iJhxy+&tVXf*bk-8R`aV9$M@QqzrzUZ zerbp1G*H*U@x2&C!9bs?{+^cPt48(LY{|E8SWGXRwvVOeSuQy{9C?i z1E(_(U9LxC#^~X_?5N@-2PWBuURwK(a+25Wizi{onQQKy-Qp{J28D$SO{8b8B2&fS zq@t;cctZv`}6 zDj9_r5xaqLi+Ft9?2CDGs9;7BCIbAhNq|<~g~=uP`h147I`O27>U?$=#8=;9tB*SD zcI3RTV`IL+N)$KvwG;}!3lx64tC{h$xsL#GI$$vW)mrq4m)?XC_-N>z#CC4#OP6aO zcePxRB5Iol#~`|Hw7O7N54zCGuDF-SKB_-ylQ>O^t6;9Zm@buIfw%7JHLobuLUK}1 zGH+-VFj2;K##u}HO(yFV;O?oK7L-;u{&Nzd>tnxSaeB~&ArZh0HDNCARVE$_BC?rM zQtN$-_UFXRe? z^*HjLalb+N0i^3;cDk-+(RIp2IpLzOionUqNM&JDCni%B^1Sc9DIRW(@zqv;w2~~- zgy}^#VG?(qrDaO;Davtu;*II2=-sDk^J{Ebxy$U6BcNLK(|Tv~=;5`qf6a>=Npw9c=wlk{tBAXNaoav_nYdBhR@`={4ogni-gB5G6#ueEDMG4<2_Ug}lL zG6!2|?uYews|Q^ob?mj(MH+d&8he;`0OGXZ$YN&_j5ezJ(6=rn0|q*9Lz=-%5-Fzbd8`9oopElc=k>rcbc*C69IjjBGfTiomLj{NHQM!g!RBOH<7uS}0^W*eBicThV z{E?9ZWHR8CYs@+~>%Z_f);8hN+H&^H_If$XTHGWh`kYDg7)(GTHwl*#;;Ysb(POv za2H$RjxkL$>AD>d|EKY=L74#3^&~r8H?!z6d#=+O=%PO*a_GFh8!5JaO5w4o9i;Y+T!S(Zd-4dE)!u9p9+ix2W8I&JExE^PR>qZt_<~#8e zC8gg}Hn;8L8$JZ`PHRXnS|5R_%c~I>U)z;IT`(xJfYwuT?#jLzGV)Y?Gr*UfQQttK zwRESax*)N%TK&jr2(6{weLizs_Ea|Oyv2~4t8V9>B?3n9ySj>otRk5d3B+c3C6z@O zudYFFg_ywJG;@rFUJc@)iEN~!JFvUlV;{V@%Us%SfaqGi;&ByydVuf`cN#Vf>)T4$ zYgooskq)jFm9hT4gF$Nwb7>_6LI=D(ajqH$WzZX)KLvApGIh{e5|y7ha}8>*Yq{0# zHNbd2OdYz_(RASpN&WrFWlY$&%ZUfJ0?Gjb1JNMA$O<>AuGsg{ao8zYVrijl;VobK zCgt}e$oQ_l`){(m5jWVg^tBLOD>kejp~q{H{LMrj==e!=_~p2U_1yy3o0HQ=^ah0Q zut3gFWS_S<<1jySsu4zN#2MwpTexL>G)|#25$PujsTH~wRr`FJ2C?-+(${tLNTXtF z%yJ&J*v_EX7!$K&G(cNS;(D(#!SLTmRc$Em(MAXFsZKy@fV=C`*p=mDzlrQ}i%8eP@%-^PLE>-x*@ z=;6C?P@a628?Bf6YueJJv4=PSK}GXYT58Z0ukeSXgdAL0rwsZ6J|{D#0(FGA8IW-) zqi2$gbuXy?x~#C;&hsb;*MX-c_3&MKCX?^9u=oBWTlFF%Be82N5TDXR+J68;Uv~&~ z+Sk0x-Em04RP-J7cg0h*?<)D?A)B_~5lcw|N8ZHTF5g`b-zC@>Bl@mykUNi_qaesP zlZUr?&#SKS11H`L zx&GHUJ$#p>qCJ?Ggq8A%l+(fvAkf;kMg;W8(L7s{VcUd@TS??mw{Y`s%cNp7#q3)VgE!kJ`~KpV~Da*jeV{?6l6YvO#@|ttrmv_DjvP zY@{@3)~Xd*cdSh(WS;o^f6I&B%SLWb9f{XClasowhO>I5I5L;q2nyjM|NHew-WL<&I%DcKnHW-4dMpYS=f?`3Ed#ho#;$ z^u9}o2YrG{s2`pwt+omQ7il3An_g4Jyjt~Mnt2 zZh`8Grcj16hykbMqGXy3TUPk+1a?!dtu7Yce1!FY2>a$h6PoF1$nU%bCy*hnbM%1c z;ziX@hnG+vuL7q=Bz?W{Lw%l%WKvXOl4BiAJ&;mG0v?$UPUxk`E=%~(KOzinQJ1_L z3{HcZ3AYip+Z;_|s0LCljb7a{GM^ge=B~On0~;9|SGst^QSCrMbVbdsIQRzd8q;g; z0?bNNf&Bd033;*10D8BQH6KPjN~-*R0vMr4bzReaFm#vf_odXso64wvPXuqq8oiAX zj!kz(H_gx>D8Po`2eee0WB z{>k{+lYdxht5@ts{sHeQ5Y=s9OHk7ngB&tILa+;8Xu@n@2qN_5cuVRLHp(H=D$$Zw zlZqMC6oACRxp`tBn9Ak|;ty-lZn7z@z-d~|M=klXg8HETB?zvlE$PSf@LhVPJ$XV- zvEqj2f#E+_B!j7s(QHwWpO&R`#8~_~C$K*27I@$%lez!$F=r{-NhCZb@!I>3lR9Hc zF7-S?9)Tj`Qcb4O;RG>vtGHkh z9ZYOXoLJcGcGsdO4Yu@FdzG%BZr^YLf@|OC5I=oV^}vHRq&D7_=ufsC4BqZat*6)Q zlcrV>WCT8xR{CFo!-LW|!=K0zK-_ZYamj7ng1+n0ikGbC*w@VM1UR_s+KlV3^zdCM zL}c57lT&Nz{quW38g_5ACV=LY{p?>GW+7Z4!}tAk1EwWBQcVzlDlI+C5uqbJ6>e@} zb>5_6YM<+1$Jw}L-;Z%z*G3!SbmYfH9(RVx3akxJ6(L*N-lym;`ZzV7o_#{@5whc@ z+^ryb=jm08dRjr-yaN)i9c7)=3+tTKggoFJ|jzHE(Rq z4;?*a6W{(wWEy|`H;&yk+r$i6>~DgMD7NAz`Kqv3 zEtRO5K&MlqcNSBZzAC9!Y!{1;x%3#wOEnj2R*mZA^#3leF88_)dYhog;cLb3eUF7qrUdvyyLbYBq_oHB3zrnNhXq^^eQ~6-{ zcsmjw#Qv3r;)#tj9Ia3@OyEK_Z0gvSNW_S7Tt6yf38%lzMSHJ~7YeBDBMYko-NBZ| zA={E0{wM1g_3Emi-szu#QbPRWrMu;_tux_AIOl3Ark9vliCa^G4Xl&&(+8x*`nI9k z962GP_>iSdQ(V;9$6eJt7x#cNoV3bc>EXTLuh$uuuh=0-|^PT+8XDebE@h% zBxGKzCG~3E`5*rn@uZ(WLvL{(&=K}IF|^NjDDVM&MzD-iNzcN+f?<|Yg`lWpdjpnh=e~3ulDV?Zspn!C_jHPK}2XLv|%ZCa9jEW+4o zm@muO$3~{p>e5oLC*K>?_ujawg{CDye9hnf;cPwl(jJPCzf3r$tLN$-I0;gmlm9ZO zf|#IbMJ~+eQDh`y0}y&Z%wfR)Tep(aR(yRpMWL>5|6#6QzGe}XwnBVeKX5ZqkK0kk z&Y%31CUb8}0wG_$xs0|}8~trJgNbM?(9Mh-@hHLSd(F{WIQ1AGgN24YNxJIxE}%}R zUPxWE3e2lkc(NTDXPr=t>v>A4E|^J&gvgKKt)cV$J_@FdpY_+F}0rI ze8DNXfzSm$pMEkbs&*Q-e;}yDY)-?*-&&BwO~)t^Jf5X`WUcEPq}@KP_nPjZEnnrQVBYfsitkWrRL}e=>wy~&d(^P=bJyB zYPrwwy*6UVA`w3!Dv0S&}7IIhRK}<;5LPS%c?18ettx%!KfuZ_1ZSk@ZRm=E-$c7m|moEUZqd>8ZLuI0nJh&^YCYz78X`+EgqI8pxXs#;q^vJ3ak7B;V(!sVX5K*B3kG zt=8LDUcK7nGDMeCz2FxPRN9*Y(e$2dfycUPg*O*eubWu72V1LdB#?e6aAsJ+$WzHR zHN!w%t)12$!(_O#%aMHDFZqZys`8M9b=?f{rPgbk=BSZz)J|*sOVn!btW5$XT=fxF z51a4_{Lwm(Pp1chMn-;2IbdxuAWqlA!KldlOig;q_yRBWx1W90d4<5yC0)ICTy$*C z%QiAG*7m0hk@J_P+D>4HPx(vXBzfgsZ^KZWLO9B?E})iQa#sF-xM)3tlcsqOOVjMC zMAEhUnnCplcT=CP0P8`stHhOeT*h9T>Ivp%7*Q6{b>NQ4RF{cX1IgWwA9|%~7-J?U zZS9Fgv42&RUST+CCcc@@6w1lik8bFq)~f5Ou6hJ&39o&<{g|V2UK*P_IYKy_TbE2y zHKy(mz15wtVGJkE3TyX$R)gTRRofIQ=Ma-_py!xez$R*F8m&h6|HqV2pA_{`XMcJI zqHFCR*NeB!q^rk<$QQ;`gE9@I>s5BT{>-B5nu~H7R6^Uc$jiyehnLF~9TVR7=lxiC zampLc2%E3)U||09{|>5vVrI8()SzDGzc^xmV$5Q zyUzW|Dd~~^?lex@vDiNsVtT=xU8NK0+{CT&scg%`6!!k2xe#M5eaB4GgE8b|S+pVK z_1i*)XOH)dA3 zi7&cli9$W8dv`wqjHiVW*;`uUs7=IBO!OcrlgXR$k5I;9WSmZN(o#gBYc(p~t}2*( zlCU33C9)&;4zZf)F%V&w4=1L5kxAIX=LZeO2L@#wNLY*^8)1_S|3}zJgYpK8^SR$W z?>k0tv1hP#cpjJ`4@S6EFjDwkYr5Wo#1x$J9^pzrHt4G;m6Qt2qkOyfi;C*$JNVu2 z6xBVtTb}_DJ$)mi2Kf$-_{w}3wRo0{o8;NJL!`vZ$*BtVl8h}&rX&~9E^g|=8|;_k zx7o0teuV&x{rU8Y$V>nSoL%W{ylPOEf&l)U9l()U0Gb~NkCEbcpoJ1o=p1r+@HfLD zEO#}Y&Qj^D6f?b<8q3x;?q1T+BafA)R11i zGjX=y+h=f8bkx9z!L=ZfBWCwx(ykOONPKNdV*>`Ivh+RkAnHbB9mwY3morT4Zgtp{Obl!34}!{=~3$-#cw8!gjNB!G`&hV7gueXq_gM-{BU!h^ov zssS^Kg?w(PwOupVZP{7EO;{iCl+uJr42?xiqBzM{ty>4!ubu~3 zzTq<=zP>VcZ=(la_y@+n68!6w;FBUyD@{(Cf<;|2JA)tikY~G1?&`%DG?KZrU;fHX zJwj?p#MjuqH5WTpRvj7nGiMW>58b-4Q$zMA z=Xa!*AY_!MYf^=o6gk?uaOpgkwWSY#HsA=lob{kQ*GH!gW95$5C1n&!Bw#yV?_^D? z{lVO(P6Kx(57{zZ55Pzd_ zJmIeL=1>msFfzvGuvDSRG!9b?ZS|yYq@r}@^|zqa?Cp`I?CSh+5MZm%sCU231niT| zYYQ7+7?d3#V9T=mv^iP%{+|rW1Oxpenjibkv$4aJFr{7_YX!x3;GjPJ&20Jh?9zAl2lHng7zOi zLnY8JO4&2%pp~x+sOlxx!V7kF?sg^tb2$^w_P5^6dKhLvfITa8C0vgx00~iPtY{L- zIeFb`L4ovEM)>S6c%gO^@UC-4vvSzZ!E@WGa)^`DBns}J=_z*YCcaoHFa1XNWYvD= zRwjWxGtPntJFk4QP>=RrOgrjF^g^S}s$UzNeNyjK1Goh})@jeZGN0<)cYRmFUa%_9a!`woaO|Ixlt-7P#)tD3@ID zBNSgAvtKT{*ElQVBjht~@s%Ejv1m;?Qkt|Lo3_L+y=J-Yt6ZEJ!ogHR)B3)lItJ~^ zeVhrq%063tl|4EDO3^x0?fr=!if;Jao|Hxox{2WONGGR)=wyCV5Fl$I6w{=bP~WF3 zCyr80)<=Tv#UCQO@%#H``5e>iN-~z#GMSYulf;S)iG>*3a(wn|Jst$wB-4Nd8;Pta z6t;4Irs#t z_&S4KEIuD1?AHyi*6Bf*9;HK~7jk1dmJ_(;%R3gFH*tLkOj}z@{%dPO!L`XG^Vg9D zF{KG*ck!IQ_58eMGKBm9^P7$&EceZ8Y_b0>*673vh%oo=E{hzPH4CuJ>Bf(kT-cKn zv(6sraw=Rhgek_v%l>iB1R!PKb4AMKR0;V;xv**dcCo2Fwz3mJ zU|~HbWY$9mEwX{EH?TITrmZ*S1SZY-M63EIAc&U<&VzMd94ZCFW|vNUJm!z|lN!;% zS^9fUyxHA!wU6X$-rU`+!{i-oNj0#oZe#6H!yP+TYV0dF^c@orgyuZJgC^#n=0tUP z&vZD5<|Jsfl>k02>ZNOE5o1cO1YmnPz@{J@Uz%QNr&cmrU2cdB5G|5!^n+{KSyU!lalJS%#G;+x^w%o z?V`)I(LMHZ)t{{7XppWkt$UBwBj2y>0ZB~FYLbH8iam9H5P~O*a2n``&bk=mOTXu8 zQ17{lKez)UK`7++Xh@;Twq97_#m4=Hka%6rdx=#HxXcRwvK+!|aqZ#h|34=6%ceA| zu1uy|opy8PCzZg8r~OdGOr-XH&G3@p$Q*9|_Tat7#c*>OUE5 z{xeYYUu#H2j^DY@MIFDX_h%T&Fd3} zi0OHPOhB?%%P7&D$X2-GLG2CW^x;Z%%N#FG81<*Mj?$~RR##iersQ70n$-INf^1dp zp{w<%6;TZBvYn!eGKHOA0GWYIPMU#*AYbV?;zeFf&#R;3v$^!Y2c{=MF(i(B;cCU` zy7;|YJ6J~dEo}5w@Cn&Mx$CWmAJh5Gl$?|wyU)HA4)G2wI!52E+wB^TW1S0(pHP{? z1<)sSkUdl1aYqN6!W~bcF+@4M0twl{-`2A4(^jyjXU0R2MK#)gS`R-)8frOe#rq%U zlK`{Azo1k;I_TYeI$cMv;LskV#a?r;@&YmpR%YrAY@pu}#^Ccy5iFm(undXV?EU9h z{rAqXwi7^o&&(g^CA7~(taY+;Lu0x@nE@j9COcw}vJkuBqFhCD`N0+itjq~&kSPlX zaMrY^zT@|fUr)A{vHpFW)^izY8zA6zBekQ#s{BNK77o*>d`0NYS)8+Ir(Dsw>)7>A zR$>ioph@A4x$>}~l;!el>Nt^evz?*wm(V?K!`uCsL z`+k*4*nC$OV0>y&wt$2!%&vu=;H)VZ6bAQM<%Dpof`g~>YUF@-w)fI zW%ae3#xlGFDwG%rwmc8y(qTchWl9NS!fA!C;BESB>TagE?qK~Az_+Ke<5=@&^rZgMnB1{_Tz zLekH+PXlVq){mqR(fWdS$hA$``VLxv5#D_-^IW@|&AtKhRV|=zZwGE(3h^g8^(j?K z?^}g1VGPchmQi^C1XJid1ox?`O)H|u^MG{fWcn!nzpL?$A!56pYhK{u6J=<{xhy73l8Ny@-f}AOSV#D95%lf;9DRj z7Q%f}lYAw5A7KUe9Aa*L=0JRPOb$HfxGFhmZ>4$9O1ol-8FCd)O;1@98d}3ZOn2rL z_BA%h(Z$cBdkbT!)pvuh5z?!Daug3sdHd|I&AP$dYu{mA3xTKOjue@bzFqM+*@+LV?36tC59b0O5tr05fPAu1Yofv?{G~fPurEz4hqak zQ7jDDEUE$FNh~05qi5q)Phkz`A7%aDf)UuLNB0iv;la>eN}iQzx{|C%`k#zy!asf6 zCk4?ucZg+$tepB^^c8~}nCn!s-QJpjpr48ek+bO-fBWtvHv35;Yw-uTxn$ClL&x=~ zDGASi4)Uir&Nw-Ji1Y`642IGJF)QvUs3|{0YOl3t8=RcxhASai_}5gbD^=CreFhAU z9@K3hxU#O>faq?6dPMi>(XA>jBos*aRJ$6F^=wIuQOHFy_U(+5%-!<@JN9@9I6~HG zVOobw#?Ex!_|kaXpv(jrdz+oHzp@y+>7rcYjPD7-RtNG&YsnuJ{IT-NCCzLqN$vN- zDLbP-4SV1uylo~Zn6u$^ww-p8fc1-P@zpS3h{Vk6$`$r~>{a$#eK6dwu(b|4w@Ug?J*alE}-Lgdf@_P6(Vtja@J10|J2W_0iINESz9~FF9AD@zXt9+7b zHOur*@rt(g6q_;R-8flfd6PWj_+K{ z7o*f;O1b@LN;TL>hJV)7wQw1Hc2Yc>ID98N<_8LX%6!qqM-Oo<{y~FvkZa4#n&K&OBaRV0i_89|+I(;1t8p|G9#B1ygO7&z?+IL`HsuwCaRVo0#yUZNC2?T>^~PT)7EGM;e1?B+rvwlhl9Vn6P#+ zP^FPYv0BtAZ;~-n|1{S9lVhy*dho)mK4s!;#|HmjbS{i_tCi6*pBMhEN0j8bC6L?z z%g6(v(8{DmF^Dt&dCz_JxHHjN2hy{PVq?nF&u)$@%fDkYJKA_1>*De=gxOU0A*~#X z*}e0{s51vZvttf1rJ5MaO?qS7yof;<^Ab2anyaCcAAmTf{H#i@vVfWp^db}}3W`rk-S0?}L7LSIrL6%!t@ANHyt_}H`n*~g`z)cMu zvLQ1Ls`Qp3oK1aAk<^jA#-~BE?@BOm$v~RGux&(gMLz%*ZATGG=Q+ zLl*wcLAGpQJYx-i0b6{V{Jm43pp5*0T+-yfFpf*ZHu1uOHS4w-TnK?Zql2tsHB3YT zn*_F43`e->>*C}^Xn{d6EQ6UlTUxX$DOc@eHnGJa7BL*0J3MDuyLo!}GGxsY0l_d! z*et$O96RV>;D)5wKaOpl73Altth}gg~VY>LfF#T zlb(AvhbI%pgb3md&<}h_$UfbFhOysIvcq@58F>3TC%L)oVNSj2_Xu=26OexJv zvjXW5j6tKu1Q_~yoIqWwf>r;e0}o)sB?G0#dFv5-*eJ`F6sLVzP19i(b8{biz8hQx zvANKImwI4^j`a~IV1X3(;(R6Txwv@QF`TLM7dT3i2^wl#VpFAzn7$Y@ zLK7nQB{2(GypwJF{0~;7B$zA?$|7#mA8KO_RjJ(lcU>IHC zY={XnGKKGGl7A|tRxmE#uR&hJowXy7e6z+vWGYMdEJkQ3I!`&yTU7BlzK17$WBSxf zW!(!NW#eyv+Opcg`JMHctPcM^`)Y7i{j$?3cLpS4l6EbP9KiBTjjq?aUy(|M3=UWP zvY4r;lvm0&GXJ4bQ6Mk_yA2$IJ}wNT?Vh3D4nD;VSo9bS=bPcnHkhRA%OalY$lpq< zKi)bCPK{mJ#77UhNLN%(Zd~r1YO=9OFlV{fNqP)HZ>%{S0}h%=4D;eDK^``ejS`)W z-s1v}ZpVz3bxDes7<~QWrn`FZtDO1@qN>{OPrnT#iS8JVxKy#MD$YNrIjjyguGkpZe`(R_FZSF zZcF%RrGHrjWO3y1!&`1lqW4gLEx0n|H1@tJgz$W(ra(YTged7YdqGf!3Sp{xYF;Od9Fv~1%Esb@|&NhyjSaP2277R*B>Dk`$nM}winytBD*2R z8rchqpM(D$N9DwQsLP>v8{DK^xaiEA26X+qa=D7?km#D~=A@etU7b!BKcmyI9^kk+ zu{FNq659c%-JoCAc@o9}MErQj*Gm8sQ~B@QmPmO>>+=FOukn>-+xm(VaLaYPt>2zo zZTnMR_4nTUAikXT&W?2C$|UTU!8$|W54ygbbeFk3cV%lAovX*~hy_>>E!!(goVxZY z6Uj;6U_z}DGDTvmJ>Z`o=$h1Zeo8R8KXyogL{2MwC^ZkTW9ipnoLh#Z>$~f2>JL{H z^}F-?A-eL69C%6(?}h*V{mUg{Lm~3( z^iu6Au`#V|eH$lykjxJ2?9b_k2BcdSm?Wb6&x3R;T+&cdm`_%F#B|| zJ;Nc)bd9x=lftNOVvNjSw9=|MgJUzyrBIS>*bme1jhOl0T*r<3<0Li;kWC#`d zE-(isqB-cYBwp#|-PG6%iW=QJ0mAEQ!+jp*|i?;_ALXB#e^Q6_Ybs((dBj-nQdT%74d*RzL26jFZ~T~xg= zJ_XE7q#fSVG!w4d0r7tt4;z#TAY4zf!*w$YF0V(#*PG?2VVWutN0HZ*3v(HxczukLYG1vK3NeuYPENJ(wfw5f#zN|d<%tkm zH{D+D)#KGD{`*MgZ7S36d_QVHMQltbEZZ4^%4gq{)uL%5Hg~jg!iwWGZ^(fM{wqkx zX;HbpJP{AtacFI{ctTls3jJ#Q_M->v`v(u$v4a~Rx;#q_3ev-KQ9t1+>F1A)K||lo z+8(D@`dVAh&CeAHoO;(caHgjPJzQzF#n!5JCqvsx6aK;$5 z6xan+I6F=pmr>!Q^oJdqKPQkxk-0Krq z14SL(WEVohF#jN)FeXUKqJ&4d+2Oe68r{_VGxMtDu7au5HFFGK>j9U*IoI#;&J`mt zPJEP)HrGqg7JJ(>2&&C35@Go{?A%290p3HQvRr)PIHNZK`C@6TA@f0H4Wntey45bK z?i*V|ZE^+lTqDy*_0S{FC8!@van><;fU$s`ZU~xZf1iNf&`Y77rp}h9DJl=+p0KPS zBRT0~j`}8$@PC@3P7<%^$eXNV_Z#e3D5&q6v*?o#^+?l`1`j?_uyfjyah!5(RooTw zds;heu=XOfM6OF(Ulx(b$WLi+oq^3D!VD-Bn^|DeG#f;OuT~o#vHO!Bu@!|jgI#C^ zkJr;99l?>?5xo56MtOy;J2Gsm9Cj|+g6RuG=$lqU_;8IREjNKNi@MgROr}oAMpsVE zCLY#b^6^v^cQ18Rc~C&u=IFDAjWX#v(Bs@a<38)6Wq__fv(t4Yi>{L{%5k}&)2k{F zSUnt@fsOzR^bf~0gn8ec8y1d*YMtzZ*g~(%E*#$(LeG7U6x)(%Idd5i5$OqG>r#cB zR!hIvtY^Qs%ysw<2&);Ra&2|&rHG+nB6EQeqfUUbB)g`V_TUU)t3`=aruR_bv5fCr zfnFi{xe7wLc)MQPFkfPOKYW`xssD6!R$U+Lg81rksL}KPKecO-*HqIWKJ`l;PulD>6^ zGmcGmCC+ulx~T*OU)t;6OMW$fd~xD6)dV=S#l{Sy&+oi5cS8VdV;|ZxrdFzFBal~L ztkl#^UHD^OwN{%05M2GHtsSNhT;%7}*e;JoN!qz;)OocaAco-)z3qz+! z1qPBLqfL)oq&MX>mXR6Kt=O~js-vstS6RSea3ubu3X}D?7||$#XQjZ62Y;F-g8p2( z0yN<_H_X+FLrdw@>=5AK##Lv@Nw|6)q~U%CO`gPS&BeFO!`E4D*%pku?*Eo|hl3OK zHgn4ZT&Sru*18&R`UF11L^QWHt42Cj9%?HmUI^0C-KOsw5U5=U?6D8T=c?VH1vNU&@|LYBq)Aa7Ar^eH%uFXzrs$ai^F!R){#sw=o;`vO!d+55+2=&sXZiw$LLQRKz=>-WXgkltSEw%}6` zT+Jpe{!@=EJvKljlou^&dmIYfy8~x9QTML35e+BG{%Rn?Nd9pth`I$cOrxLK@56-e zs-$mTKQ`tA?48<;q-&a&t9qcSn>uO+sHJ~ZXv+jWvh?_8t$jNcO}85@NgutWF9I#9 zCe!=;nNNtbkaVGacxJ{aQl>gXC^n(NS$ zU;XvC5ydjgXnV$x6p5*TC;|BQ>|fIr`<}0=i7Uy-HbfHdx@bzmY;r~f(a#CUr25T? z_=JQjsji1w_cKqmTBlS9u8(iDa?zs%WY^qn(qgTXJ_NcW{c1~9<79;H+B@V-MzY{l zdw43*Lac4)qlGM(#VEqcIzFaLr4}JD$=9Kj+^TQ+yz0r#pk`3{tlck;D>34^0ezq1 zCwRp)?AQayV-LNO0@gV-4NecH-DlaMyo0T!LGK3x^#v&v;MlQXs>`Fm|Jz@u!!Q0j zJzcsB>l9Wohn!LSzne?*+C*J5d3s(o$)b$BD zbv;F-E?~RsIl||8I*3SI+nBjF<4Ih2&Pl{O zeu*VXG6~_D)}Yq8-4*zclt=2^WA<9HWCf6e#jzzUkc1@m1j!@?sspjbH%JqiyFi7} z^A6A!ys4&YdYf(HgAIGcR z|7d*v{H&SoI0tJ?jPVvu*rU6K;P@DNQtiCNikVcY-Ww(1EJY)Iyi0Px6I>M5gg6f- oAz`PVupNPUL!N#7&J}?o&U0^^! Date: Wed, 15 Mar 2023 13:31:17 -0400 Subject: [PATCH 02/22] pred2bq: Add integration test. --- .../integration_test.py | 124 ++++++++ .../transform_graph/5/metadata/schema.pbtxt | 290 ++++++++++++++++++ .../5/transform_fn/assets/Species | 3 + .../5/transform_fn/fingerprint.pb | Bin 0 -> 55 bytes .../5/transform_fn/saved_model.pb | Bin 0 -> 38511 bytes .../variables/variables.data-00000-of-00001 | Bin 0 -> 530 bytes .../5/transform_fn/variables/variables.index | Bin 0 -> 145 bytes .../5/transformed_metadata/asset_map | 1 + .../5/transformed_metadata/schema.pbtxt | 33 ++ 9 files changed, 451 insertions(+) create mode 100644 tfx_addons/predictions_to_bigquery/integration_test.py create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/metadata/schema.pbtxt create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/assets/Species create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/fingerprint.pb create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/saved_model.pb create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/variables/variables.data-00000-of-00001 create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/variables/variables.index create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transformed_metadata/asset_map create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transformed_metadata/schema.pbtxt diff --git a/tfx_addons/predictions_to_bigquery/integration_test.py b/tfx_addons/predictions_to_bigquery/integration_test.py new file mode 100644 index 00000000..db6f6558 --- /dev/null +++ b/tfx_addons/predictions_to_bigquery/integration_test.py @@ -0,0 +1,124 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Integration test for the predictions-to-bigquery component.""" + +import datetime +import logging +import os +import pathlib + +from absl.testing import absltest +from google.api_core import exceptions +from google.cloud import bigquery +from ml_metadata.proto import metadata_store_pb2 +from tfx import types +from tfx.types import artifact_utils + +from tfx_addons.predictions_to_bigquery import executor + +_BQ_TABLE_EXPIRATION_DATE = datetime.datetime.now() + datetime.timedelta( + days=1) + + +def _make_artifact(uri: pathlib.Path) -> types.Artifact: + artifact = types.Artifact(metadata_store_pb2.ArtifactType()) + artifact.uri = str(uri) + return artifact + + +def _make_artifact_mapping( + data_dict: dict[str, pathlib.Path]) -> dict[str, list[types.Artifact]]: + return {k: [_make_artifact(v)] for k, v in data_dict.items()} + + +class ExecutorBigQueryTest(absltest.TestCase): + """Tests executor pipeline exporting predicitons to a BigQuery table. + + Prerequisites: + - 'GOOGLE_CLOUD_PROJECT' environmental variable must be set. + - BigQuery API must be enabled. + - A BigQuery dataset named 'test_dataset' should exist. + """ + def _get_full_bq_table_name(self, generated_bq_table_name): + return f'{self.gcp_project}.{self.bq_dataset}.{generated_bq_table_name}' + + def _assert_bq_table_exists(self, full_bq_table_name): + full_bq_table_name = full_bq_table_name.replace(':', '.') + try: + self.client.get_table(full_bq_table_name) + except exceptions.NotFound as e: + self.fail(f'BigQuery table not found: {full_bq_table_name} . ' + f'Reason: {e} .') + + def _expire_table(self, full_bq_table_name): + try: + table = self.client.get_table(full_bq_table_name) + except (ValueError, exceptions.NotFound): + logging.warning('Unable to read table: %s', full_bq_table_name) + else: + table.expires = _BQ_TABLE_EXPIRATION_DATE + self.client.update_table(table, ['expires']) + + def setUp(self): + super().setUp() + self.test_data_dir = pathlib.Path( + 'tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output') + self.input_dict = _make_artifact_mapping({ + 'transform_graph': + (self.test_data_dir / 'Transform/transform_graph/5'), + 'inference_results': + (self.test_data_dir / 'BulkInferrer/inference_result/7'), + 'schema': + (self.test_data_dir / 'Transform/transform_graph/5/metadata'), + }) + self.temp_dir = self.create_tempdir() + self.output_dict = _make_artifact_mapping( + {'bigquery_export': pathlib.Path(self.temp_dir.full_path)}) + self.gcp_project = os.environ['GOOGLE_CLOUD_PROJECT'] + self.bq_dataset = 'executor_bigquery_test_dataset' + self.bq_table_name = f'{self.gcp_project}:{self.bq_dataset}.predictions' + self.client = bigquery.Client() + self.client.create_dataset(dataset=self.bq_dataset, exists_ok=True) + self.exec_properties = { + 'bq_table_name': self.bq_table_name, + 'table_expiration_days': 5, + 'filter_threshold': 0.5, + 'gcs_temp_dir': 'gs://pred2bq-bucket/temp-dir', + 'table_partitioning': False, + 'table_time_suffix': '%Y%m%d%H%M%S', + 'vocab_label_file': 'Species', + } + self.generated_bq_table_name = None + + self.executor = executor.Executor() + + def tearDown(self): + self._expire_table(self.generated_bq_table_name) + + def test_Do(self): + self.executor.Do(self.input_dict, self.output_dict, self.exec_properties) + self.assertIsNotNone(self.output_dict['bigquery_export']) + bigquery_export = artifact_utils.get_single_instance( + self.output_dict['bigquery_export']) + self.generated_bq_table_name = ( + bigquery_export.get_custom_property('generated_bq_table_name')) + # Expected table name format by BigQuery client: project.dataset.table_name + self.generated_bq_table_name = (str(self.generated_bq_table_name).replace( + ':', '.')) + self._assert_bq_table_exists(self.generated_bq_table_name) + + +if __name__ == '__main__': + absltest.main() diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/metadata/schema.pbtxt b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/metadata/schema.pbtxt new file mode 100644 index 00000000..332bbd3c --- /dev/null +++ b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/metadata/schema.pbtxt @@ -0,0 +1,290 @@ +feature { + name: "Body Mass (g)" + type: INT + presence { + min_fraction: 1.0 + min_count: 1 + } +} +feature { + name: "Clutch Completion" + type: BYTES + domain: "Clutch Completion" + presence { + min_fraction: 1.0 + min_count: 1 + } + shape { + dim { + size: 1 + } + } +} +feature { + name: "Comments" + type: BYTES + domain: "Comments" + presence { + min_fraction: 1.0 + min_count: 1 + } +} +feature { + name: "Culmen Depth (mm)" + type: FLOAT + presence { + min_fraction: 1.0 + min_count: 1 + } +} +feature { + name: "Culmen Length (mm)" + type: FLOAT + presence { + min_fraction: 1.0 + min_count: 1 + } +} +feature { + name: "Date Egg" + type: BYTES + domain: "Date Egg" + presence { + min_fraction: 1.0 + min_count: 1 + } + shape { + dim { + size: 1 + } + } +} +feature { + name: "Delta 13 C (o/oo)" + type: FLOAT + presence { + min_fraction: 1.0 + min_count: 1 + } +} +feature { + name: "Delta 15 N (o/oo)" + type: FLOAT + presence { + min_fraction: 1.0 + min_count: 1 + } +} +feature { + name: "Flipper Length (mm)" + type: INT + presence { + min_fraction: 1.0 + min_count: 1 + } +} +feature { + name: "Individual ID" + type: BYTES + presence { + min_fraction: 1.0 + min_count: 1 + } + shape { + dim { + size: 1 + } + } +} +feature { + name: "Island" + type: BYTES + domain: "Island" + presence { + min_fraction: 1.0 + min_count: 1 + } + shape { + dim { + size: 1 + } + } +} +feature { + name: "Region" + type: BYTES + domain: "Region" + presence { + min_fraction: 1.0 + min_count: 1 + } + shape { + dim { + size: 1 + } + } +} +feature { + name: "Sample Number" + type: INT + presence { + min_fraction: 1.0 + min_count: 1 + } + shape { + dim { + size: 1 + } + } +} +feature { + name: "Sex" + type: BYTES + domain: "Sex" + presence { + min_fraction: 1.0 + min_count: 1 + } +} +feature { + name: "Species" + type: BYTES + domain: "Species" + presence { + min_fraction: 1.0 + min_count: 1 + } + shape { + dim { + size: 1 + } + } +} +feature { + name: "Stage" + type: BYTES + domain: "Stage" + presence { + min_fraction: 1.0 + min_count: 1 + } + shape { + dim { + size: 1 + } + } +} +feature { + name: "studyName" + type: BYTES + domain: "studyName" + presence { + min_fraction: 1.0 + min_count: 1 + } + shape { + dim { + size: 1 + } + } +} +string_domain { + name: "Clutch Completion" + value: "No" + value: "Yes" +} +string_domain { + name: "Comments" + value: "Adult not sampled." + value: "Nest never observed with full clutch." + value: "Nest never observed with full clutch. Not enough blood for isotopes." + value: "No blood sample obtained for sexing." + value: "No blood sample obtained." + value: "Not enough blood for isotopes." + value: "Sexing primers did not amplify. Not enough blood for isotopes." +} +string_domain { + name: "Date Egg" + value: "11/10/07" + value: "11/10/08" + value: "11/10/09" + value: "11/11/07" + value: "11/11/08" + value: "11/12/07" + value: "11/12/09" + value: "11/13/07" + value: "11/13/08" + value: "11/13/09" + value: "11/14/08" + value: "11/15/07" + value: "11/15/08" + value: "11/15/09" + value: "11/16/07" + value: "11/16/09" + value: "11/17/08" + value: "11/17/09" + value: "11/18/09" + value: "11/19/07" + value: "11/19/09" + value: "11/2/08" + value: "11/20/09" + value: "11/21/07" + value: "11/21/09" + value: "11/22/07" + value: "11/22/09" + value: "11/23/09" + value: "11/24/08" + value: "11/25/08" + value: "11/25/09" + value: "11/26/07" + value: "11/27/07" + value: "11/27/09" + value: "11/28/07" + value: "11/29/07" + value: "11/3/08" + value: "11/30/07" + value: "11/4/08" + value: "11/6/08" + value: "11/7/08" + value: "11/8/08" + value: "11/9/07" + value: "11/9/08" + value: "11/9/09" + value: "12/1/09" + value: "12/3/07" + value: "11/14/09" + value: "11/18/07" + value: "11/5/08" +} +string_domain { + name: "Island" + value: "Biscoe" + value: "Dream" + value: "Torgersen" +} +string_domain { + name: "Region" + value: "Anvers" +} +string_domain { + name: "Sex" + value: "FEMALE" + value: "MALE" + value: "." +} +string_domain { + name: "Species" + value: "Adelie Penguin (Pygoscelis adeliae)" + value: "Chinstrap penguin (Pygoscelis antarctica)" + value: "Gentoo penguin (Pygoscelis papua)" +} +string_domain { + name: "Stage" + value: "Adult, 1 Egg Stage" +} +string_domain { + name: "studyName" + value: "PAL0708" + value: "PAL0809" + value: "PAL0910" +} diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/assets/Species b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/assets/Species new file mode 100644 index 00000000..d919d4f2 --- /dev/null +++ b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/assets/Species @@ -0,0 +1,3 @@ +Adelie Penguin (Pygoscelis adeliae) +Gentoo penguin (Pygoscelis papua) +Chinstrap penguin (Pygoscelis antarctica) diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/fingerprint.pb b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/fingerprint.pb new file mode 100644 index 0000000000000000000000000000000000000000..d8f6e4a825f78de12273b5a6fbf9e13a64d53a2c GIT binary patch literal 55 zcmV-70LcFc@6X$v{PV8b$N>5~81tmmg2Lqc(nTQpj@FTtyVTE^0Vtfl NmigX*^5~iYG638xA=Cf> literal 0 HcmV?d00001 diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/saved_model.pb b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..c03d97d8fbf4b1749df5bee942a8e65fbeec09c1 GIT binary patch literal 38511 zcmeG_ZIBzuRXft|N_yJQ-nA`{?XxVO?X~V~Nh4`jE4bX<`s}lvJNs_e$t5IskfqtR z&RS_BY5j3fMSy&65g_D970DlBC_b*BAmK-ejSy__x>;h|2a#(ISV&o?!`{0zSaC- zrCqPwYG~JY$P`4|TBo(!u4;)zG6NHLD~(+(HBsw6*wIqcU?j-6o|Rz+i|e&YS5rH; zD?1t)ztw6r~!T>m~8EKcXqp0JF{lHWU{L@JFT{>Vfcy!@ynIY z?Hd%U4=VB;Om%5ey9*_5EjdKSQimaqsI;}3+N^AA zj-nHAV7H^G%~lP6VIjqtzQ?dm%|q;t_CVEpYj$!0;yMlZhqQO+A$p~zHM{lh0|GEn zZ_@rrH{`h8S?NY92GR|q9rz~+j$LWiyLF`4TbkapOYPS74efsSQl0ja^z@RB#wM0g zWD}9pF$1}Jv!>lA;d-;XBxQf}WAmSsI&L8#e3)g48_%v)Ew`S-I7yskM5$ zWAg>2?Lwu|Xx&q{>nJhJEtL)ntDsY062`B!uJ6FBF#cMldIyEa>V|Y9EnRam#uP}` ztTB{0Hc8FGc;$Y*W78HcK>W2vrK;UVw6u1wEV*QI5he^#H2%or_)T@;4@;pS0dL3m_ zm#+@;n3*EbsCT+cRH>XrHk2s9225^L?hXl!3<~XegodME?UHF-v(!*fTg?QJNsy>m z)4!UZcwdIu4OAyJtwyC;*F7k`w<;Y|dPz(ln074@M$Uld?zXOeYMn$_c8EEAQ-VVq zUDTZB#IbiB`LS3`kIXD($U~LIr}#iJ?{%LrxY9IdRZ3wP?zV^C{}3O)|c@ z+pJ=RNz|m9^(}|4lwrzviUQK=wj5%>bkb2 z-M_TkXuMkKR&P@x=!z2g0eQ&4J^`pwB0H5XYMQ3i7F+|^(6#zilxG~2N&P)!Bq~bm z-AKwQNjNWbw4F-3g2-8I(Rl&Bh*Ju(|7CpwxxPbb&L+B{rW2Rop$PGUmpTcvy2`+r zsHWYeLO`Z;E;lBtvvLS@wXq5W@w(Dsb3@HN56c8*>K(Oi&mOe|AfZze9j$#=OHV52 zbGh?b<;Yzlajsoe&gE9lZPkjmiYwL1YF;arB=X6>li&?FSm~hZR*~Gdxdz*WS2F2? zFrylSW6bNQ=^#`dOUJGGKa`EojL~5%fnSgyYH1h36FPkaCe>;Kp`uLAlIcdPT7JQ* zIXIxSYkmh}gM|v5wHnSi=$Y{#um{m)yD1S~@D|M4AdIyW=~+XuW@G`e)2AVcOPP+k zU1{!C8Y*4Mv|A12lN}8w_hz+@%o19X9Q1~thNCuYs?0MkjqcL$3=L6loXx*%uGWaL zXbbXEEKRJ1Tx=~8jghf+DGXO2a)B=U)Z9Ls8e>f<`)rECD!=ch{zn49(YvkcYf{3m zRE1QV?M&B7sax&J&h1PwbH1x>@91)#>29*(?)FY*V@IPbcIMfnF2K(% zoqaZ`Psz`jx=B5!sONI&d22;!tcOF{7(9T3RO!$qkhOF(6th!-oas>@2VkK*PvTLw z#3Z3;h-blA`lzK*ysAP665gPZ(Hz?AWDjuEJmqNza3|n|1=w8;ehvM1`IZ=};_Nei0J`RhC`^{edq);O_>8&2EcK&n-U%ZhU7gC(mz5+x=!K!6 z55&kmqgr%AAC!}a9Mxqf)JHO^D^3^zM|D(mEjgjTOLSdzLLHv26`|^T7X(sytcp(A zr=p7urRbD*gRYNN(J8|zx)G^a#ii&1s)JEMcPYGKp!=^@{H7S?v8qC4pQ?NV}q_M{Det7 z=exAm@XHD@QQrj>B_`@VHI&~XSWMKBDX9MjvH4bH*Odhy)w1x5R~CPkii>jMB1K%3 z7Z(-8MMZJZvbbnPTvQSltqSQR?v|1jwx@{cr1&%_rjsJ3lOm>*BBqlfrjsJ3lOm>* z827xGPI)n%@``Xs3c`}2uw+?SvLY-g2}@RG(?FTG5{k?j^sI+D*|db_48KYmmXy=6 zSWJpV@Q417|00nH{-S^Im+^2h7DFqw;iW{wlQK=~*)ODz!67vGHmI+uepjO=;L4WP zR_ir1`NCNG&!irk%I`_=JCcX-%w^It>@JV39=aLRNs!exnx@rOv+)2^#zq44GoWwW z=-xS(U+EY*0ucybc32}i7_Q~b*v=)qBNF%hWe>auRjeb6eo+cbCr3hui zSx$79xh&rl618`v;9tir4@1GU<)@}IhL5d`j8x7B8q4%=ja$x(FOIK$VLTckV{5-W z4nGeEq7s>}A|H3rcX!v^8`FX5Xo$otp6+0+FqucYdi9RJGo^LwO_b;k%tgn^6!X-z zI{?uMGO2srXz3)GV#fH*Cd?pQuo=3O9)l46pr14vB~q5l#*8d20VDZ7L@7>U)yj@8 z3t7xgv1|^r(=40E>-Fm-beBj#{hl9i+U;>hF6jy!#vuU|+=j(pKojvaKAuej=$T;{Q84t;$R& zSu)3x3QOi$vcQr>mRx4Z6_zZqB!hyF-u}ii(3&dE16HbKFF*uFaE=feV4^zeLl$E`{LrixRP{U zI0N|7!a6Xn<`C}ZE`~Xn$Jy3?CV_^W01s6yu zJun@M1Ar^Kz{S!*>9{-ql;s{ML(y?%0FWy#kVDY1Gyu4g3tVtIt_}cYwFgRII<_Fr zVLU9uBWU`oiheA&DMyAX|JGr ztFq!U*= zz1G_K{ev)kKODfSjD{fB=FWij>Cv0E^$mytP!`nlKxpVkq2Y<}$YBa_8!MatDazpA zOXcg)G4g&0ItM@!7F9fy*H$}pPx5BnI=2@k=7(4LRkX%lfgt)FQi8_tkbq_QD6gXx z_gZlLP70btyQ|e?wtNju!B>N!7@)me`6AkoUj&~S24Q)VjYCVpu1*B+suZNnJhnL* zyvEl&nt{LiJVL9q4V!6>de>hK^(?gbfoL)SJ)sy76V^{_7 z)Rkem+Sn`XMAVl+i$>z@7o+I z7`>#asSQ2ShWST7g}R9WH|{FNiBvmE3i%M+7*WI4VNX3U2coRN`M@oAc5kWo(OdU_ z(Ay}#js`A$cN^MQb{=pxO!M<~2zSRDc-Y@)GM{y~YJGI(vsS~Apneod{y_*X`3+iz zQq6pbU*)N;$j%N#N6P$?7 z1280G6Gg}-ip?eg=NXOn_3}<^8-IU32?&Pqi6Z0^B_N-Clt-C&gWD+Yhf!!?8|LQa z=d$agS@e{R;DE#LD3acDY(G%nb8H87YzOxe3mmFLd&Yyd=iw<1`s%3bd7v`%qoBA2 z1zJ2MZPNIAHsGlbdVTXgmmazJG@ah!RiQuSC z_C+-^80=_p*s;E_VKCV7;IOCq!k!Ken-0-T@alAkW`md~#zIOHEg&XS2>o2}`S#E>OaM2GPav_-ai_ZZI zpQ$yl@fnkUTjB#lvl!7bPdoWI!;7a8`=~p~D!381E4fek3eBx)3^;Fum?p*{K>5SG z`PJRd{tToa@-&b-dT-c8-C)gw5E0R7W%(-iJEW>&QCq& zZ}|eQm7W(lWZ{%xBN|5A0=JCB6BPk$1(xDkwcOwWd*TT9=&s^XzO4-tGQ zd4zwL?+F4>dxoWU@Tzl|(xTBpa@jGfaev{>PQIMUoKK@6*N7V7DQ@D{97l!^TB0_+ z7DrJVTbnqKA+5V|#X9$HMb-_9tE2TJR_kPKPH!Dl8Y(uUgTOe>Rv>iquY8Q|#&<{Z zwuMrj(S!Z6{G6WWH-S42=coJfs=Vn4{mq2|cZ`!fYXoe!A-joz@^y`b5+TtD%CGQY z*v;`0Rp#TxhMi%L!(^2FAIAih`q~1vL?$Gn{!iF6? z-KLNHlqY_ye1Qd}w;wu*DnwiUv;+$qdbs|_JqfmO)3Skxdm<5xMBo`&Fa!Pr6&W2O zG5^Z4i_&GP3u)8?G+G_?Ad^jQJVxh@JFDT+iS7Z~*JRc^asHYFr+aVL$aq|>LG)WM ze7hdGY4pyGvni<(Qw=_2BLAuPg&$tCKqeUKjJW)y<;HSfltuMmA6(>2^E^Cx;bt$o zZY26H8eR{)XmB0e60RNOkMN5pPb>rZp%;7XC-jUFe`F`eonB!_f6|BC3ln+*3XM6+ zlIP7Y9*i)OMGS5}lYBOi{ceQb+~Hi~aB*Ptus7rc9P8q_8$F%~F1G4jRSyiK=joLU zxM%D+Sanrltr%N<@YWz&gX)x?EkBM;Hevld{IkyHGFR+tB#f(i9G4Ku|0TgdFA)Dr zjpX4wBm0vo?a5CVom9WU$+!b4o?!PQ;VDw{A}6big`$^-xctCtc9w8W;U(toHIj3q zL|C-wkar!DckbpOBe@Tm^gs7Ez-cm}4(>9eVd-J&Fc1d64Kh4B&2!Ona9o7NyOoJN!#Ui+4q>(>ZL>*#o@gx~hNa}KS zE++SS|L|3NR~tD8vMT_a!D}7-f~K!KuPuS+!QK^;#Z9w|-fU>G*1B?O%9|{PyADe87K9h~Xa<0jSX@zAUt-xzOhUTh0;D62)irQdY<((3V@> zZER~zdiNjR$hf_YXh_4NQ4phn0loz*ta6cjQi3@X4K!13?Hi93F&-GxT!cr7d`5!# z%|>01=*$r5^cj~8kVQ@AvSLhD(NyVu%W657L+*>{5h?H@#*|O{3gSj%@yLc(a}%!! z&X`NG4Bn7it5>_qDclZ}Fw8veTQ_mx8!<2AD}Sd5UzQ$g?BUc=bmnboc%yc`Y6Rsl zf*O z;gZ*W7!n^2wLS#cs@_WB_E+l!lbyoz2Ie!|%5SD{f4rH(t>$J5w{V-ORl+U$?PQKU zRai35k_DD5vg9&LuCQc@B^fka*;&wVjbX zg0_iS59eIru=~g!jZuB#YRvNg=HFbT_k&upsC!&1%`RU>>0c!$sB-*j#B^c{ zD*VR8zC`Iu3Xfs*;0VUQr#Ft% zyD9JjXS^l8edivDzBzg4p8j|SqUO zGS!pU3)B#JkA&R?_?ooVhm)SXK9KkMDW((h^usU!RS7`#Vrp(Lgqqlle@(_PtosZ8 zqSLp9FuByvRecIoezYGZVe*M-@w)imrpQEmDmpzQe>?Ut{H+-LCI0zF4E{X!PWbCF z*nsIZ{Qge9iXTVGrLD7M?bk!kqmTW}!|>B^?1z%i$2f#P9*W6x8@Ok!qZ#+4)7dK- zhL>UfLSwgEy`8*(!8{roonDBt7eXVwIiFYcV%$?`7xx;<5)rrtQEUM}&GF`L3=EDHvAzVX2Y-OAMc}Uxd_ph@DiNl#jP!Jb}w`qW=#^- z@TscdQ%}laPhZj+-AXc-PhLnawKA>NXnM*TE+(&yvf&TI!X?A*kQ|UZn4-AW^Qf?0$YfO%6h2a{JijY_jd&g_MrwvY1Hjc1+%fUm4; zTa@t~ue6P4p06O98x<-L$!ojYx3sns&5mBA;{HHdnCO*bYDa(M`N9gsI^EscgKM}gkEHiPXJF2L z2_MtldR{%nJIbg^{v7`PY#1>73vWUkOiy?-%m;c+f`c{dttCF_Vd5J_yC{>rCcD8J z_NK$L%mQ~}FZ2}4d5mU^ygeJ}oATdhtQqNF;t#{$jT@8f-^bqxeolTc-7NE(X35FDP?FDkw*D-ukt4_VLPz<$t9=^J&;Dvv z$icnP0-x8NuudfME+q2ae9oa=aJ^CJeS;E(L%ZlgyGZ=dUUs3qtRHuwO>#)AxR6-s zi$uwVM9CM4RTmPgHWDZ7(N)NSy%6EDz2rqBfn$s@-)D@`w&oaP%)5vc) zphtLOx&|H1krpw}+oPN(slCu~F58EcXs&S}jro3WXV7(|fd*Ly> zPQ6xi>a`vLjsO#jWv5s$@nzswoZtoH$kb}d3BG3(B@XRu|({D0f!Bq2mk;8 literal 0 HcmV?d00001 diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/variables/variables.data-00000-of-00001 b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..dfb85a805e12a5eb1287c928d15c88b62fbe7c40 GIT binary patch literal 530 zcmbu6O-{ow5QS;!&t#G|&9o7BU{wSw7D!y77qHyasSK%uY^M@zxB$oDelRAbiP*BR zpxjQN~iv+xqoW5f%@H)!#ziFNEh$)19Mir}_rD+kDjk literal 0 HcmV?d00001 diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/variables/variables.index b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transform_fn/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..b238b806d023886ffcd70b901f2e3ce514d7b2fb GIT binary patch literal 145 zcmZQzVB=tvV&Y(Akl~4U_HcFf4)FK%3vqPvagFzP@^Wn!8r#$E!@Ds$dtff0~acOF(Jy4!;cRnKnR5YZs=Aib-xV&#R(ZK literal 0 HcmV?d00001 diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transformed_metadata/asset_map b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transformed_metadata/asset_map new file mode 100644 index 00000000..3b7264a4 --- /dev/null +++ b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transformed_metadata/asset_map @@ -0,0 +1 @@ +{"Species": "Species"} \ No newline at end of file diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transformed_metadata/schema.pbtxt b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transformed_metadata/schema.pbtxt new file mode 100644 index 00000000..d3845d77 --- /dev/null +++ b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Transform/transform_graph/5/transformed_metadata/schema.pbtxt @@ -0,0 +1,33 @@ +feature { + name: "body_mass_g" + type: INT +} +feature { + name: "culmen_depth_mm" + type: FLOAT +} +feature { + name: "culmen_length_mm" + type: FLOAT +} +feature { + name: "flipper_length_mm" + type: INT +} +feature { + name: "species" + type: INT + int_domain { + min: 0 + max: 2 + is_categorical: true + } + presence { + min_fraction: 1.0 + } + shape { + dim { + size: 1 + } + } +} From 9708b46ccc710987e2cce0ef20806c9a6e518f36 Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Mon, 27 Feb 2023 13:08:14 -0500 Subject: [PATCH 03/22] pred2bq: Refactor executor.py. - Adds unit tests - Also adds credits to original code author --- data | 1 + 1 file changed, 1 insertion(+) create mode 120000 data diff --git a/data b/data new file mode 120000 index 00000000..2c787f25 --- /dev/null +++ b/data @@ -0,0 +1 @@ +/usr/local/google/home/cezequiel/datasets/pred2bq \ No newline at end of file From 7f256338971b1bdb8768b2516f2d4703aaabe96e Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Fri, 3 Mar 2023 13:31:19 -0500 Subject: [PATCH 04/22] pred2bq: Remove symlink to data folder - not needed. --- data | 1 - 1 file changed, 1 deletion(-) delete mode 120000 data diff --git a/data b/data deleted file mode 120000 index 2c787f25..00000000 --- a/data +++ /dev/null @@ -1 +0,0 @@ -/usr/local/google/home/cezequiel/datasets/pred2bq \ No newline at end of file From f4ca2210b1170eebd551657d6a4ab739278d6c5b Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Mon, 27 Feb 2023 13:08:14 -0500 Subject: [PATCH 05/22] pred2bq: Refactor executor.py. --- data | 1 + 1 file changed, 1 insertion(+) create mode 120000 data diff --git a/data b/data new file mode 120000 index 00000000..2c787f25 --- /dev/null +++ b/data @@ -0,0 +1 @@ +/usr/local/google/home/cezequiel/datasets/pred2bq \ No newline at end of file From 6938e586658113accb1abe7857c478d79a1b4d39 Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Tue, 21 Feb 2023 12:20:10 -0500 Subject: [PATCH 06/22] pred2bq: Add integration test - executor to BQ Adds a test that runs the executor module's Beam pipeline using a DirectRunner and exports prediction data to an actual BigQuery table. --- data | 1 - 1 file changed, 1 deletion(-) delete mode 120000 data diff --git a/data b/data deleted file mode 120000 index 2c787f25..00000000 --- a/data +++ /dev/null @@ -1 +0,0 @@ -/usr/local/google/home/cezequiel/datasets/pred2bq \ No newline at end of file From cd17b95c897111f6a0c83ab77e3bfb046726d346 Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Wed, 8 Mar 2023 16:55:48 -0500 Subject: [PATCH 07/22] pred2bq: Update component spec. --- .../predictions_to_bigquery/component.py | 139 ++++++++---------- .../{test_component.py => component_test.py} | 49 +++--- 2 files changed, 94 insertions(+), 94 deletions(-) rename tfx_addons/predictions_to_bigquery/{test_component.py => component_test.py} (52%) diff --git a/tfx_addons/predictions_to_bigquery/component.py b/tfx_addons/predictions_to_bigquery/component.py index c89c8ca9..f9c204ed 100644 --- a/tfx_addons/predictions_to_bigquery/component.py +++ b/tfx_addons/predictions_to_bigquery/component.py @@ -1,23 +1,20 @@ # Copyright 2023 The TensorFlow Authors. All Rights Reserved. # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, +# distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== # This code was originally written by Hannes Hapke (Digits Financial Inc.) # on Feb. 6, 2023. -""" -Digits Prediction-to-BigQuery: Functionality to write prediction results usually - from a BulkInferrer to BigQuery. -""" +"""Predictions-to-bigquery component spec.""" from typing import Optional @@ -26,103 +23,93 @@ from tfx.types import standard_artifacts from tfx.types.component_spec import ChannelParameter, ExecutionParameter -from .executor import Executor as AnnotateUnlabeledCategoryDataExecutor +from tfx_addons.predictions_to_bigquery import executor -_MIN_THRESHOLD = 0.8 -_VOCAB_FILE = "vocab_label_txt" +_MIN_THRESHOLD = 0.5 # pylint: disable=missing-class-docstring -class AnnotateUnlabeledCategoryDataComponentSpec(types.ComponentSpec): +class PredictionsToBigQueryComponentSpec(types.ComponentSpec): PARAMETERS = { - # These are parameters that will be passed in the call to - # create an instance of this component. - "vocab_label_file": ExecutionParameter(type=str), - "bq_table_name": ExecutionParameter(type=str), - "filter_threshold": ExecutionParameter(type=float), - "table_suffix": ExecutionParameter(type=str), - "table_partitioning": ExecutionParameter(type=bool), - "expiration_time_delta": ExecutionParameter(type=int), + 'bq_table_name': ExecutionParameter(type=str), + 'table_expiration_days': ExecutionParameter(type=int), + 'filter_threshold': ExecutionParameter(type=float), + 'table_partitioning': ExecutionParameter(type=bool), + 'table_time_suffix': ExecutionParameter(type=str), + 'vocab_label_file': ExecutionParameter(type=str), } INPUTS = { - # This will be a dictionary with input artifacts, including URIs - "transform_graph": - ChannelParameter(type=standard_artifacts.TransformGraph), - "inference_results": - ChannelParameter(type=standard_artifacts.InferenceResult), - "schema": - ChannelParameter(type=standard_artifacts.Schema), + 'inference_results': + (ChannelParameter(type=standard_artifacts.InferenceResult)), + # TODO(cezequiel): Implement schema or transform_graph logic + 'schema': (ChannelParameter(type=standard_artifacts.Schema)), + 'transform_graph': + (ChannelParameter(type=standard_artifacts.TransformGraph)), } OUTPUTS = { - "bigquery_export": ChannelParameter(type=standard_artifacts.String), + 'bigquery_export': ChannelParameter(type=standard_artifacts.String), } -class AnnotateUnlabeledCategoryDataComponent(base_component.BaseComponent): - """ - AnnotateUnlabeledCategoryData Component. +class PredictionsToBigQueryComponent(base_component.BaseComponent): - The component takes the following input artifacts: - * Inference results: InferenceResult - * Transform graph: TransformGraph - * Schema: Schema (optional) if not present, the component will determine - the schema (only predtion supported at the moment) - - The component takes the following parameters: - * vocab_label_file: str - The file name of the file containing the - vocabulary labels (produced by TFT). - * bq_table_name: str - The name of the BigQuery table to write the results - to. - * filter_threshold: float - The minimum probability threshold for a - prediction to be considered a positive, thrustworthy prediction. - Default is 0.8. - * table_suffix: str (optional) - If provided, the generated datetime string - will be added the BigQuery table name as suffix. The default is %Y%m%d. - * table_partitioning: bool - Whether to partition the table by DAY. If True, - the generated BigQuery table will be partition by date. If False, no - partitioning will be applied. Default is True. - * expiration_time_delta: int (optional) - The number of seconds after which - the table will expire. - - The component produces the following output artifacts: - * bigquery_export: String - The URI of the BigQuery table containing the - results. - """ - - SPEC_CLASS = AnnotateUnlabeledCategoryDataComponentSpec - EXECUTOR_SPEC = executor_spec.BeamExecutorSpec( - AnnotateUnlabeledCategoryDataExecutor) + SPEC_CLASS = PredictionsToBigQueryComponentSpec + EXECUTOR_SPEC = executor_spec.BeamExecutorSpec(executor.Executor) def __init__( self, - inference_results: types.Channel = None, - transform_graph: types.Channel = None, - bq_table_name: str = None, - vocab_label_file: str = _VOCAB_FILE, + inference_results: types.Channel, + bq_table_name: str, + bigquery_export: Optional[types.Channel] = None, + transform_graph: Optional[types.Channel] = None, + schema: Optional[types.Channel] = None, + table_expiration_days: Optional[int] = 0, filter_threshold: float = _MIN_THRESHOLD, - table_suffix: str = "%Y%m%d", table_partitioning: bool = True, - schema: Optional[types.Channel] = None, - expiration_time_delta: Optional[int] = 0, - bigquery_export: Optional[types.Channel] = None, - ): - + table_time_suffix: str = '%Y%m%d', + vocab_label_file: Optional[str] = None, + ) -> None: + """Initialize the component. + + Args: + inference_results: Inference results channel. + bq_table_name: BigQuery table name in either PROJECT:DATASET.TABLE. + or DATASET.TABLE formats. + bigquery_export: Outputs channel containing generated BigQuery table name. + The outputted name may contain a timestamp suffix defined by + `table_suffix`. + transform_graph: TFTransform graph channel. + If specified, and `schema` is not specified, the prediction + input schema shall be derived from this channel. + schema: Schema channel. + If specified, the prediction input schema shall be derived from this + channel. + expiration_days: BigQuery table expiration in number of days from + current time. If not specified, the table does not expire by default. + filter_threshold: Prediction threshold to use to filter prediction scores. + Keep scores that exceed this threshold. + table_partitioning: If True, partition table. + See: https://cloud.google.com/bigquery/docs/partitioned-tables + table_time_suffix: Time format for table suffix in Linux strftime format. + Example: '%Y%m%d + vocab_label_file: Name of the TF transform vocabulary file for the label. + """ bigquery_export = bigquery_export or types.Channel( type=standard_artifacts.String) - schema = schema or types.Channel(type=standard_artifacts.Schema()) + schema = schema or types.Channel(type=standard_artifacts.Schema) - spec = AnnotateUnlabeledCategoryDataComponentSpec( + spec = PredictionsToBigQueryComponentSpec( inference_results=inference_results, + bq_table_name=bq_table_name, + bigquery_export=bigquery_export, transform_graph=transform_graph, schema=schema, - bq_table_name=bq_table_name, - vocab_label_file=vocab_label_file, + table_expiration_days=table_expiration_days, filter_threshold=filter_threshold, - table_suffix=table_suffix, table_partitioning=table_partitioning, - expiration_time_delta=expiration_time_delta, - bigquery_export=bigquery_export, + table_time_suffix=table_time_suffix, + vocab_label_file=vocab_label_file, ) super().__init__(spec=spec) diff --git a/tfx_addons/predictions_to_bigquery/test_component.py b/tfx_addons/predictions_to_bigquery/component_test.py similarity index 52% rename from tfx_addons/predictions_to_bigquery/test_component.py rename to tfx_addons/predictions_to_bigquery/component_test.py index f07bae40..2d78e888 100644 --- a/tfx_addons/predictions_to_bigquery/test_component.py +++ b/tfx_addons/predictions_to_bigquery/component_test.py @@ -1,52 +1,65 @@ # Copyright 2023 The TensorFlow Authors. All Rights Reserved. # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, +# distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== # This code was originally written by Hannes Hapke (Digits Financial Inc.) # on Feb. 6, 2023. -""" -Tests around Digits Prediction-to-BigQuery component. -""" +"""Tests for component.py.""" + +import unittest -import tensorflow as tf from tfx.types import channel_utils, standard_artifacts -from . import component +from tfx_addons.predictions_to_bigquery import component -class ComponentTest(tf.test.TestCase): +class ComponentTest(unittest.TestCase): def setUp(self): - super(ComponentTest, self).setUp() + super().setUp() self._transform_graph = channel_utils.as_channel( [standard_artifacts.TransformGraph()]) self._inference_results = channel_utils.as_channel( [standard_artifacts.InferenceResult()]) self._schema = channel_utils.as_channel([standard_artifacts.Schema()]) - def testConstruct(self): - # not a real test, just checking if if the component can be - # instantiated - _ = component.AnnotateUnlabeledCategoryDataComponent( + def testInit(self): + component_instance = component.PredictionsToBigQueryComponent( transform_graph=self._transform_graph, inference_results=self._inference_results, schema=self._schema, - bq_table_name="gcp_project:bq_database.table", - vocab_label_file="vocab_txt", + bq_table_name='gcp_project:bq_database.table', + vocab_label_file='vocab_txt', filter_threshold=0.1, - table_suffix="%Y", table_partitioning=False, + table_time_suffix='%Y%m%d', ) + self.assertCountEqual({ + 'inference_results', + 'schema', + 'transform_graph', + }, component_instance.inputs.keys()) + self.assertCountEqual({'bigquery_export'}, + component_instance.outputs.keys()) + self.assertCountEqual( + { + 'bq_table_name', + 'table_expiration_days', + 'filter_threshold', + 'table_partitioning', + 'table_time_suffix', + 'vocab_label_file', + }, component_instance.exec_properties.keys()) -if __name__ == "__main__": - tf.test.main() +if __name__ == '__main__': + unittest.main() From 60faa2deb0aa090302e34072ca6f40e1868b549d Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Thu, 16 Mar 2023 17:50:37 -0400 Subject: [PATCH 08/22] pred2bq: Update utils.py. --- .../predictions_to_bigquery/executor.py | 77 +----- .../predictions_to_bigquery/executor_test.py | 81 +----- tfx_addons/predictions_to_bigquery/utils.py | 258 ++++++++++-------- .../predictions_to_bigquery/utils_test.py | 156 +++++++++++ 4 files changed, 300 insertions(+), 272 deletions(-) create mode 100644 tfx_addons/predictions_to_bigquery/utils_test.py diff --git a/tfx_addons/predictions_to_bigquery/executor.py b/tfx_addons/predictions_to_bigquery/executor.py index a271d733..fd2fc7b7 100644 --- a/tfx_addons/predictions_to_bigquery/executor.py +++ b/tfx_addons/predictions_to_bigquery/executor.py @@ -17,7 +17,6 @@ """Implements executor to write BulkInferrer prediction results to BigQuery.""" import datetime -import os import re from typing import Any, Optional, Union @@ -32,10 +31,8 @@ from tfx.dsl.components.base import base_beam_executor from tfx.types import Artifact, artifact_utils -# TODO(cezequiel): Move relevant functions in utils module here. from tfx_addons.predictions_to_bigquery import utils -_SCHEMA_FILE_NAME = "schema.pbtxt" _DECIMAL_PLACES = 6 _DEFAULT_TIMESTRING_FORMAT = '%Y%m%d_%H%M%S' _REQUIRED_EXEC_PROPERTIES = ( @@ -43,7 +40,6 @@ 'filter_threshold', 'gcs_temp_dir', ) -_REGEX_CHARS_TO_REPLACE = re.compile(r'[^a-zA-Z0-9_]') _REGEX_BQ_TABLE_NAME = re.compile(r'^[\w-]*:?[\w_]+\.[\w_]+$') @@ -108,71 +104,6 @@ def _get_additional_bq_parameters( return output -# TODO(cezequiel): Move to a separate module with called functions. -# pylint: disable=protected-access -def _parse_features_from_prediction_results( - prediction_log_path: str) -> dict[str, Any]: - filepath = tf.io.gfile.glob(prediction_log_path)[0] - compression_type = utils._get_compress_type(filepath) - dataset = tf.data.TFRecordDataset([filepath], - compression_type=compression_type) - - for bytes_record in dataset.take(1): - prediction_log = prediction_log_pb2.PredictionLog.FromString( - bytes_record.numpy()) - - example_bytes = ( - prediction_log.predict_log.request.inputs['examples'].string_val[0]) - example = tf.train.Example.FromString(example_bytes) - features = {} - - for name, feature_proto in example.features.feature.items(): - feature_dtype = utils._get_feature_type(feature=feature_proto) - feature = tf.io.VarLenFeature(dtype=feature_dtype) - features[name] = feature - - return features - - -def _get_schema_features( - schema: Optional[list[Artifact]] = None, - tft_output: Optional[tft.TFTransformOutput] = None, - prediction_log_path: Optional[str] = None, -) -> dict[str, Any]: - if schema is not None: - schema_uri = artifact_utils.get_single_uri(schema) - schema_file = os.path.join(schema_uri, _SCHEMA_FILE_NAME) - return utils.load_schema(schema_file) - - if tft_output is not None: - return tft_output.raw_feature_spec() - - if prediction_log_path is None: - raise ValueError( - 'Specify one of `schema`, `tft_output` or `prediction_log_path`.') - - return _parse_features_from_prediction_results(prediction_log_path) - - -def _get_bq_field_name_from_key(key: str) -> str: - field_name = _REGEX_CHARS_TO_REPLACE.sub('_', key) - return re.sub('_+', '_', field_name).strip('_') - - -def _features_to_bq_schema(features: dict[str, Any], required: bool = False): - bq_schema_fields_ = utils.feature_to_bq_schema(features, required=required) - bq_schema_fields = [] - for field in bq_schema_fields_: - field['name'] = _get_bq_field_name_from_key(field['name']) - bq_schema_fields.append(field) - bq_schema_fields.extend( - utils.create_annotation_fields(label_field_name="category_label", - score_field_name="score", - required=required, - add_datetime_field=True)) - return {"fields": bq_schema_fields} - - def _tensor_to_native_python_value( tensor: Union[tf.Tensor, tf.sparse.SparseTensor]) -> Optional[Any]: """Converts a TF Tensor to a native Python value.""" @@ -224,12 +155,12 @@ def _parse_example(self, serialized: bytes) -> dict[str, Any]: parsed_example = tf.io.parse_example(serialized, self._features) output = {} for key, tensor in parsed_example.items(): - field = _get_bq_field_name_from_key(key) value = _tensor_to_native_python_value(tensor) # To add a null value to BigQuery from JSON, omit the key,value pair # with null value. if value is None: continue + field = utils.get_bq_field_name_from_key(key) output[field] = value return output @@ -279,7 +210,7 @@ def Do( tft_output = _get_tft_output(input_dict.get('transform_graph')) # get schema features - features = _get_schema_features( + features = utils.get_feature_spec( schema=input_dict.get('schema'), tft_output=tft_output, prediction_log_path=prediction_log_path, @@ -300,7 +231,9 @@ def Do( exec_properties['table_time_suffix']) # generate bigquery schema from tf transform features - bq_schema = _features_to_bq_schema(features) + add_label_field = labels is not None + bq_schema = utils.feature_spec_to_bq_schema( + features, add_label_field=add_label_field) logging.info(f'generated bq_schema: {bq_schema}.') additional_bq_parameters = _get_additional_bq_parameters( diff --git a/tfx_addons/predictions_to_bigquery/executor_test.py b/tfx_addons/predictions_to_bigquery/executor_test.py index d1f01ebc..1b210103 100644 --- a/tfx_addons/predictions_to_bigquery/executor_test.py +++ b/tfx_addons/predictions_to_bigquery/executor_test.py @@ -15,7 +15,6 @@ """Tests for executor.py.""" import datetime -import pathlib from typing import Union from unittest import mock @@ -206,7 +205,7 @@ def setUp(self): autospec=True, return_value=object())) self.enter_context( - mock.patch.object(executor, '_get_schema_features', autospec=True)) + mock.patch.object(utils, 'get_feature_spec', autospec=True)) self.enter_context( mock.patch.object(executor, '_get_labels', autospec=True)) self.enter_context( @@ -219,7 +218,7 @@ def setUp(self): '_get_additional_bq_parameters', autospec=True)) self.enter_context( - mock.patch.object(executor, '_features_to_bq_schema', autospec=True)) + mock.patch.object(utils, 'feature_spec_to_bq_schema', autospec=True)) self.mock_read_from_tfrecord = self.enter_context( mock.patch.object(beam.io, 'ReadFromTFRecord', autospec=True)) @@ -348,82 +347,6 @@ def test_get_additional_bq_parameters(self, expiration_days, } self.assertEqual(expected, output) - def test_parse_features_from_prediction_results(self): - test_data_dir = pathlib.Path( - 'tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output') - prediction_log_path = (test_data_dir / - 'BulkInferrer/inference_result/7/*.gz') - output = executor._parse_features_from_prediction_results( - str(prediction_log_path)) - self.assertIn('Culmen Depth (mm)', output) - self.assertEqual(tf.float32, output['Culmen Depth (mm)'].dtype) - - @parameterized.named_parameters([ - ('error_no_inputs', False, False, False), - ('schema', True, False, False), - ('tft_output', False, True, False), - ('prediction_log_path', False, False, True), - ]) - def test_get_schema_features(self, has_schema, has_tft_output, - has_prediction_log_path): - mock_load_schema = self.enter_context( - mock.patch.object(utils, - 'load_schema', - autospec=True, - return_value=has_schema)) - mock_raw_feature_spec = self.enter_context( - mock.patch.object(tft.TFTransformOutput, - 'raw_feature_spec', - autospec=True)) - mock_parse_features_from_prediction_results = self.enter_context( - mock.patch.object(executor, - '_parse_features_from_prediction_results', - autospec=True, - return_value=has_schema)) - - if (has_schema is None and has_tft_output is None - and has_prediction_log_path is None): - with self.assertRaises(ValueError): - _ = executor._get_schema_features(has_schema, has_tft_output, - has_prediction_log_path) - return - - if has_schema: - schema = [_make_artifact('schema_uri')] - _ = executor._get_schema_features(schema, None, None) - mock_load_schema.assert_called_once() - - elif has_tft_output: - tft_output = tft.TFTransformOutput('uri') - _ = executor._get_schema_features(None, tft_output, None) - mock_raw_feature_spec.assert_called_once() - - else: - prediction_log_path = 'path' - _ = executor._get_schema_features(None, None, prediction_log_path) - mock_parse_features_from_prediction_results.assert_called_once() - - def test_features_to_bq_schema(self): - mock_feature_to_bq_schema = self.enter_context( - mock.patch.object(utils, 'feature_to_bq_schema', autospec=True)) - mock_create_annotation_fields = self.enter_context( - mock.patch.object(utils, - 'create_annotation_fields', - autospec=True, - return_value={})) - - features = { - 'feature': tf.io.FixedLenFeature([], dtype=tf.int64), - } - required = True - - output = executor._features_to_bq_schema(features, required) - - self.assertIn('fields', output) - mock_feature_to_bq_schema.assert_called_once_with(features, - required=required) - mock_create_annotation_fields.assert_called_once() - if __name__ == '__main__': absltest.main() diff --git a/tfx_addons/predictions_to_bigquery/utils.py b/tfx_addons/predictions_to_bigquery/utils.py index ee79b126..e5629e40 100644 --- a/tfx_addons/predictions_to_bigquery/utils.py +++ b/tfx_addons/predictions_to_bigquery/utils.py @@ -14,41 +14,46 @@ # ============================================================================== # This code was originally written by Hannes Hapke (Digits Financial Inc.) # on Feb. 6, 2023. -""" -Util functions for the Digits Prediction-to-BigQuery component. -""" +"""Schema parsing and conversion routines.""" -import glob -from typing import Any, Dict, List +# TODO(cezequiel): Rename file to schema_utils.py + +import os +import re +from typing import Any, Dict, List, Optional, Union -import numpy as np import tensorflow as tf import tensorflow_transform as tft -from absl import logging from google.protobuf import text_format from tensorflow.python.lib.io import file_io from tensorflow_metadata.proto.v0 import schema_pb2 +from tensorflow_serving.apis import prediction_log_pb2 +from tfx.types import Artifact, artifact_utils +FeatureSpec = dict[str, Union[tf.io.FixedLenFeature, tf.io.VarLenFeature]] +BigQuerySchema = dict[str, Any] -def load_schema(input_path: str) -> Dict: - """ - Loads a TFX schema from a file and returns schema object. +_SCHEMA_FILE_NAME = "schema.pbtxt" +_REGEX_CHARS_TO_REPLACE = re.compile(r'[^a-zA-Z0-9_]') - Args: - input_path: Path to the file containing the schema. - Returns: - A schema object. - """ +def _get_feature_spec_from_schema_file(input_path: str) -> FeatureSpec: + """Loads a TFX schema from a file and parses it into a TF feature spec. + + Args: + input_path: Path to the `_SCHEMA_FILE_NAME` file. + Returns: + A `FeatureSpec` object. + """ schema = schema_pb2.Schema() schema_text = file_io.read_file_to_string(input_path) text_format.Parse(schema_text, schema) - return tft.tf_metadata.schema_utils.schema_as_feature_spec( - schema).feature_spec + return ( + tft.tf_metadata.schema_utils.schema_as_feature_spec(schema).feature_spec) -def _get_compress_type(file_path): +def _get_compress_type(file_path: str) -> Optional[str]: magic_bytes = { b'x\x01': 'ZLIB', b'x^': 'ZLIB', @@ -57,7 +62,9 @@ def _get_compress_type(file_path): b'\x1f\x8b': 'GZIP' } - two_bytes = open(file_path, 'rb').read(2) + with open(file_path, 'rb') as input_file: + two_bytes = input_file.read(2) + return magic_bytes.get(two_bytes) @@ -83,86 +90,69 @@ def _get_feature_type(feature=None, type_=None): return None -def parse_schema(prediction_log_path: str, - compression_type: str = 'auto') -> Dict: - """Parses feature schema from predictions.""" - - features = {} +def _get_feature_spec_from_prediction_results( + prediction_log_path: str) -> FeatureSpec: + """Parses a TensorFlow feature spec from BulkInferrer prediction results. - file_paths = glob.glob(prediction_log_path) - if compression_type == 'auto': - compression_type = _get_compress_type(file_paths[0]) + Args: + prediction_log_path: Path containing BulkInferrer prediction results. - dataset = tf.data.TFRecordDataset(file_paths, + Returns: + A `FeatureSpec` object. + """ + filepath = tf.io.gfile.glob(prediction_log_path)[0] + compression_type = _get_compress_type(filepath) + dataset = tf.data.TFRecordDataset([filepath], compression_type=compression_type) - serialized = next(iter(dataset.map(lambda serialized: serialized))) - seq_ex = tf.train.SequenceExample.FromString(serialized.numpy()) - - if seq_ex.feature_lists.feature_list: - raise NotImplementedError("FeatureLists aren't supported at the moment.") + for bytes_record in dataset.take(1): + prediction_log = prediction_log_pb2.PredictionLog.FromString( + bytes_record.numpy()) - for key, feature in seq_ex.context.feature.items(): - features[key] = tf.io.FixedLenFeature((), - _get_feature_type(feature=feature)) - return features + example_bytes = ( + prediction_log.predict_log.request.inputs['examples'].string_val[0]) + example = tf.train.Example.FromString(example_bytes) + features = {} + for name, feature_proto in example.features.feature.items(): + feature_dtype = _get_feature_type(feature=feature_proto) + feature = tf.io.VarLenFeature(dtype=feature_dtype) + features[name] = feature -def convert_python_numpy_to_bq_type(python_type: Any) -> str: - """ - Converts a python type to a BigQuery type. + return features - Args: - python_type: A python type. - Returns: - A BigQuery type. - """ - if isinstance(python_type, (int, np.int64)): - return "INTEGER" - elif isinstance(python_type, (float, np.float32)): - return "FLOAT" - elif isinstance(python_type, (str, bytes)): - return "STRING" - elif isinstance(python_type, (bool, np.bool)): - return "BOOLEAN" - else: - raise ValueError("Unsupported type: {python_type}") +def get_feature_spec( + schema: Optional[list[Artifact]] = None, + tft_output: Optional[tft.TFTransformOutput] = None, + prediction_log_path: Optional[str] = None, +) -> dict[str, Any]: + """Returns a TensorFlow feature spec representing the input data schema. + Specify one of `schema`, `tft_output`, `prediction_log_path` as the source + for the data schema. -def convert_single_value_to_native_py_value(tensor: Any) -> str: + Args: + schema: Path to a `_SCHEMA_FILENAME` file. + tft_output: TensorFlow Transform output path. + prediction_log_path: Path to a TFRecord file containing inference results. """ - Converts a Python value to a native Python value. + if schema is not None: + schema_uri = artifact_utils.get_single_uri(schema) + schema_file = os.path.join(schema_uri, _SCHEMA_FILE_NAME) + return _get_feature_spec_from_schema_file(schema_file) - Args: - value: A value. + if tft_output is not None: + return tft_output.raw_feature_spec() - Returns: - Value casted to native Python type. - """ + if prediction_log_path is None: + raise ValueError( + 'Specify one of `schema`, `tft_output` or `prediction_log_path`.') - if isinstance(tensor, tf.sparse.SparseTensor): - value = tensor.values.numpy()[0] - logging.debug(f"sparse value: {value}") - else: - value = tensor.numpy()[0] - logging.debug(f"dense value: {value}") - - if isinstance(value, (int, np.int64, np.int32)): - return int(value) - elif isinstance(value, (float, np.float32, np.float64)): - return float(value) - elif isinstance(value, str): - return value - elif isinstance(value, bytes): - return value.decode("utf-8") - elif isinstance(value, (bool, np.bool)): - return bool(value) - else: - raise ValueError(f"Unsupported value type: {value} of type {type(value)}") + return _get_feature_spec_from_prediction_results(prediction_log_path) -def convert_tensorflow_dtype_to_bq_type(tf_dtype: tf.dtypes.DType) -> str: +def _convert_tensorflow_dtype_to_bq_type(tf_dtype: tf.dtypes.DType) -> str: """ Converts a tensorflow dtype to a BigQuery type string. @@ -184,64 +174,90 @@ def convert_tensorflow_dtype_to_bq_type(tf_dtype: tf.dtypes.DType) -> str: raise ValueError(f"Unsupported type: {tf_dtype}") -def feature_to_bq_schema(features: Dict[str, Any], - required: bool = True) -> List[Dict]: - """ - Convert a list of features to a list of BigQuery schema fields. +def get_bq_field_name_from_key(key: str) -> str: + field_name = _REGEX_CHARS_TO_REPLACE.sub('_', key) + return re.sub('_+', '_', field_name).strip('_') - Args: - features: A list of features. - required: Whether the field is required. - Returns: - A list of BigQuery schema fields. - """ +def _feature_spec_to_bq_schema_fields(feature_spec: FeatureSpec, + required: bool = True) -> List[Dict]: + """Convert a TensorFlow feature spec to a list of BigQuery schema fields. + + Args: + feature_spec: TensorFlow feature spec. + required: Whether the field is required. + + Returns: + A list of BigQuery schema fields. + """ return [{ - "name": feature_name, - "type": convert_tensorflow_dtype_to_bq_type(feature_def.dtype), + "name": get_bq_field_name_from_key(feature_name), + "type": _convert_tensorflow_dtype_to_bq_type(feature_def.dtype), "mode": "REQUIRED" if required else "NULLABLE", - } for feature_name, feature_def in features.items()] + } for feature_name, feature_def in feature_spec.items()] -def create_annotation_fields( - label_field_name: str = "category_label", - score_field_name: str = "score", +def _create_annotation_fields( + *, required: bool = True, + add_label_field: bool = False, add_datetime_field: bool = True, ) -> List[Dict]: - """ - Create a list of BigQuery schema fields for the annotation fields. + """Creates a list of annotation fields in BigQuery schema formatkjjjj. - Args: - label_field_name: The name of the label field. - score_field_name: The name of the score field. - required: Whether the fields are required. - add_datetime_field: Whether to add a datetime field. + Args: + label_field_name: The name of the label field. + score_field_name: The name of the score field. + required: Whether the fields are required. + add_datetime_field: Whether to add a datetime field. - Returns: - A list of BigQuery schema fields. - """ + Returns: + A list of BigQuery schema fields. + """ - label_field = { - "name": label_field_name, - "type": "STRING", - "mode": "REQUIRED" if required else "NULLABLE", - } + fields = [] + if add_label_field: + label_field = { + 'name': 'category_label', + 'type': 'STRING', + 'mode': 'REQUIRED' if required else 'NULLABLE', + } + fields.append(label_field) score_field = { - "name": score_field_name, - "type": "FLOAT", - "mode": "REQUIRED" if required else "NULLABLE", + 'name': 'score', + 'type': 'FLOAT', + 'mode': 'REQUIRED' if required else 'NULLABLE', } - - fields = [label_field, score_field] + fields.append(score_field) if add_datetime_field: datetime_field = { - "name": "datetime", - "type": "TIMESTAMP", - "mode": "REQUIRED" if required else "NULLABLE", + 'name': 'datetime', + 'type': 'TIMESTAMP', + 'mode': 'REQUIRED' if required else 'NULLABLE', } fields.append(datetime_field) return fields + + +def feature_spec_to_bq_schema(feature_spec: FeatureSpec, + required: bool = True, + **kwargs: int) -> BigQuerySchema: + """Converts a TensorFlow feature spec into a BigQuery schema. + + Args: + feature_spec: TensorFlow feature spec. + required: If True, mark BigQuery fields as required. + **kwargs: Additional keyword-arguments to pass to + `_create_annotation_fields`. + + Returns: + A `BigQUerySchema` object. + """ + bq_schema_fields = _feature_spec_to_bq_schema_fields(feature_spec, + required=required) + bq_schema_fields.extend( + _create_annotation_fields(required=required, **kwargs)) + return {"fields": bq_schema_fields} diff --git a/tfx_addons/predictions_to_bigquery/utils_test.py b/tfx_addons/predictions_to_bigquery/utils_test.py new file mode 100644 index 00000000..70c3ffb9 --- /dev/null +++ b/tfx_addons/predictions_to_bigquery/utils_test.py @@ -0,0 +1,156 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for utils.py""" + +import pathlib +from unittest import mock + +import tensorflow as tf +import tensorflow_transform as tft +from absl.testing import absltest, parameterized +from ml_metadata.proto import metadata_store_pb2 +from tfx import types + +from tfx_addons.predictions_to_bigquery import utils + + +def _make_artifact(uri) -> types.Artifact: + artifact = types.Artifact(metadata_store_pb2.ArtifactType()) + artifact.uri = uri + return artifact + + +# pylint: disable=protected-access +class UtilsTest(parameterized.TestCase): + """Tests for utils module functions.""" + def test_get_features_from_prediction_results(self): + test_data_dir = pathlib.Path( + 'tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output') + prediction_log_path = (test_data_dir / + 'BulkInferrer/inference_result/7/*.gz') + output = utils._get_feature_spec_from_prediction_results( + str(prediction_log_path)) + self.assertIn('Culmen Depth (mm)', output) + self.assertEqual(tf.float32, output['Culmen Depth (mm)'].dtype) + + @parameterized.named_parameters([ + ('error_no_inputs', False, False, False), + ('schema', True, False, False), + ('tft_output', False, True, False), + ('prediction_log_path', False, False, True), + ]) + def test_get_feature_spec(self, has_schema, has_tft_output, + has_prediction_log_path): + mock_load_schema = self.enter_context( + mock.patch.object(utils, + '_get_feature_spec_from_schema_file', + autospec=True, + return_value=has_schema)) + mock_raw_feature_spec = self.enter_context( + mock.patch.object(tft.TFTransformOutput, + 'raw_feature_spec', + autospec=True)) + mock_parse_features_from_prediction_results = self.enter_context( + mock.patch.object(utils, + '_get_feature_spec_from_prediction_results', + autospec=True, + return_value=has_schema)) + + if (has_schema is None and has_tft_output is None + and has_prediction_log_path is None): + with self.assertRaises(ValueError): + _ = utils.get_feature_spec(has_schema, has_tft_output, + has_prediction_log_path) + return + + if has_schema: + schema = [_make_artifact('schema_uri')] + _ = utils.get_feature_spec(schema, None, None) + mock_load_schema.assert_called_once() + + elif has_tft_output: + tft_output = tft.TFTransformOutput('uri') + _ = utils.get_feature_spec(None, tft_output, None) + mock_raw_feature_spec.assert_called_once() + + else: + prediction_log_path = 'path' + _ = utils.get_feature_spec(None, None, prediction_log_path) + mock_parse_features_from_prediction_results.assert_called_once() + + @parameterized.named_parameters([ + ('no_label_field', False), + ('with_label_field', True), + ]) + def test_feature_spec_to_bq_schema(self, add_label_field): + feature_spec: utils.FeatureSpec = { + 'Some Feature': tf.io.FixedLenFeature([], dtype=tf.int64), + } + required = True + if add_label_field: + expected = { + 'fields': [ + { + 'name': 'Some_Feature', + 'type': 'INTEGER', + 'mode': 'REQUIRED', + }, + { + 'name': 'category_label', + 'type': 'STRING', + 'mode': 'REQUIRED', + }, + { + 'name': 'score', + 'type': 'FLOAT', + 'mode': 'REQUIRED', + }, + { + 'name': 'datetime', + 'type': 'TIMESTAMP', + 'mode': 'REQUIRED', + }, + ] + } + else: + expected = { + 'fields': [ + { + 'name': 'Some_Feature', + 'type': 'INTEGER', + 'mode': 'REQUIRED', + }, + { + 'name': 'score', + 'type': 'FLOAT', + 'mode': 'REQUIRED', + }, + { + 'name': 'datetime', + 'type': 'TIMESTAMP', + 'mode': 'REQUIRED', + }, + ] + } + + output = utils.feature_spec_to_bq_schema(feature_spec, + required, + add_label_field=add_label_field) + + self.assertEqual(expected, output) + + +if __name__ == '__main__': + absltest.main() From e4d78ceb7a1042796f972fa9ee0b0a0fddac7685 Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Fri, 17 Mar 2023 16:08:22 -0400 Subject: [PATCH 09/22] pred2bq: Add component integration test. --- .../predictions_to_bigquery/component.py | 16 +- .../predictions_to_bigquery/executor.py | 5 +- .../integration_test.py | 162 ++++++++++++++++-- .../penguins-dataset/test/test-tiny.csv | 4 + .../model/6/Format-Serving/assets/Species | 3 + .../model/6/Format-Serving/fingerprint.pb | Bin 0 -> 56 bytes .../model/6/Format-Serving/keras_metadata.pb | 13 ++ .../model/6/Format-Serving/saved_model.pb | Bin 0 -> 191307 bytes .../variables/variables.data-00000-of-00001 | Bin 0 -> 9370 bytes .../Format-Serving/variables/variables.index | Bin 0 -> 1495 bytes tfx_addons/predictions_to_bigquery/utils.py | 4 +- 11 files changed, 183 insertions(+), 24 deletions(-) create mode 100644 tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/assets/Species create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/fingerprint.pb create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/keras_metadata.pb create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/saved_model.pb create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/variables/variables.data-00000-of-00001 create mode 100644 tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/variables/variables.index diff --git a/tfx_addons/predictions_to_bigquery/component.py b/tfx_addons/predictions_to_bigquery/component.py index f9c204ed..5bc8a1dc 100644 --- a/tfx_addons/predictions_to_bigquery/component.py +++ b/tfx_addons/predictions_to_bigquery/component.py @@ -34,19 +34,21 @@ class PredictionsToBigQueryComponentSpec(types.ComponentSpec): PARAMETERS = { 'bq_table_name': ExecutionParameter(type=str), + 'gcs_temp_dir': ExecutionParameter(type=str), 'table_expiration_days': ExecutionParameter(type=int), 'filter_threshold': ExecutionParameter(type=float), 'table_partitioning': ExecutionParameter(type=bool), - 'table_time_suffix': ExecutionParameter(type=str), - 'vocab_label_file': ExecutionParameter(type=str), + 'table_time_suffix': ExecutionParameter(type=str, optional=True), + 'vocab_label_file': ExecutionParameter(type=str, optional=True), } INPUTS = { 'inference_results': (ChannelParameter(type=standard_artifacts.InferenceResult)), - # TODO(cezequiel): Implement schema or transform_graph logic - 'schema': (ChannelParameter(type=standard_artifacts.Schema)), + 'schema': (ChannelParameter(type=standard_artifacts.Schema, + optional=True)), 'transform_graph': - (ChannelParameter(type=standard_artifacts.TransformGraph)), + (ChannelParameter(type=standard_artifacts.TransformGraph, + optional=True)), } OUTPUTS = { 'bigquery_export': ChannelParameter(type=standard_artifacts.String), @@ -62,13 +64,14 @@ def __init__( self, inference_results: types.Channel, bq_table_name: str, + gcs_temp_dir: str, bigquery_export: Optional[types.Channel] = None, transform_graph: Optional[types.Channel] = None, schema: Optional[types.Channel] = None, table_expiration_days: Optional[int] = 0, filter_threshold: float = _MIN_THRESHOLD, table_partitioning: bool = True, - table_time_suffix: str = '%Y%m%d', + table_time_suffix: Optional[str] = None, vocab_label_file: Optional[str] = None, ) -> None: """Initialize the component. @@ -103,6 +106,7 @@ def __init__( spec = PredictionsToBigQueryComponentSpec( inference_results=inference_results, bq_table_name=bq_table_name, + gcs_temp_dir=gcs_temp_dir, bigquery_export=bigquery_export, transform_graph=transform_graph, schema=schema, diff --git a/tfx_addons/predictions_to_bigquery/executor.py b/tfx_addons/predictions_to_bigquery/executor.py index fd2fc7b7..8e2ae41f 100644 --- a/tfx_addons/predictions_to_bigquery/executor.py +++ b/tfx_addons/predictions_to_bigquery/executor.py @@ -196,8 +196,6 @@ def Do( ) -> None: """Do function for predictions_to_bq executor.""" - timestamp = datetime.datetime.now().replace(second=0, microsecond=0) - # Check required keys set in exec_properties _check_exec_properties(exec_properties) @@ -226,9 +224,10 @@ def Do( # set BigQuery table name and timestamp suffix if specified. _check_bq_table_name(exec_properties['bq_table_name']) + timestamp = datetime.datetime.now().replace(second=0, microsecond=0) bq_table_name = _add_bq_table_name_suffix( exec_properties['bq_table_name'], timestamp, - exec_properties['table_time_suffix']) + exec_properties.get('table_time_suffix')) # generate bigquery schema from tf transform features add_label_field = labels is not None diff --git a/tfx_addons/predictions_to_bigquery/integration_test.py b/tfx_addons/predictions_to_bigquery/integration_test.py index db6f6558..e6102216 100644 --- a/tfx_addons/predictions_to_bigquery/integration_test.py +++ b/tfx_addons/predictions_to_bigquery/integration_test.py @@ -12,25 +12,44 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Integration test for the predictions-to-bigquery component.""" +"""Integration test for the predictions-to-bigquery component. + +Prerequisites: +- 'GOOGLE_CLOUD_PROJECT' environmental variable must be set containing + the GCP project ID to be used for testing. +- 'GCS_TEMP_DIR' environmental variable must be set containing the + Cloud Storage URI to use for handling temporary files as part of the + BigQuery export process. e.g. `gs://path/to/temp/dir`. +- BigQuery API must be enabled on the Cloud project. +""" import datetime +import json import logging import os import pathlib +import shutil from absl.testing import absltest from google.api_core import exceptions from google.cloud import bigquery from ml_metadata.proto import metadata_store_pb2 from tfx import types +from tfx import v1 as tfx +from tfx.proto import example_gen_pb2 from tfx.types import artifact_utils +from tfx.types.standard_artifacts import Model, String + +from tfx_addons.predictions_to_bigquery import component, executor -from tfx_addons.predictions_to_bigquery import executor +_GOOGLE_CLOUD_PROJECT = os.environ['GOOGLE_CLOUD_PROJECT'] +_GCS_TEMP_DIR = os.environ['GCS_TEMP_DIR'] _BQ_TABLE_EXPIRATION_DATE = datetime.datetime.now() + datetime.timedelta( days=1) +_TEST_DATA_DIR = pathlib.Path('tfx_addons/predictions_to_bigquery/testdata') + def _make_artifact(uri: pathlib.Path) -> types.Artifact: artifact = types.Artifact(metadata_store_pb2.ArtifactType()) @@ -43,14 +62,9 @@ def _make_artifact_mapping( return {k: [_make_artifact(v)] for k, v in data_dict.items()} +@absltest.skip class ExecutorBigQueryTest(absltest.TestCase): - """Tests executor pipeline exporting predicitons to a BigQuery table. - - Prerequisites: - - 'GOOGLE_CLOUD_PROJECT' environmental variable must be set. - - BigQuery API must be enabled. - - A BigQuery dataset named 'test_dataset' should exist. - """ + """Tests executor pipeline exporting predicitons to a BigQuery table.""" def _get_full_bq_table_name(self, generated_bq_table_name): return f'{self.gcp_project}.{self.bq_dataset}.{generated_bq_table_name}' @@ -73,8 +87,7 @@ def _expire_table(self, full_bq_table_name): def setUp(self): super().setUp() - self.test_data_dir = pathlib.Path( - 'tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output') + self.test_data_dir = _TEST_DATA_DIR / 'sample-tfx-output' self.input_dict = _make_artifact_mapping({ 'transform_graph': (self.test_data_dir / 'Transform/transform_graph/5'), @@ -86,7 +99,7 @@ def setUp(self): self.temp_dir = self.create_tempdir() self.output_dict = _make_artifact_mapping( {'bigquery_export': pathlib.Path(self.temp_dir.full_path)}) - self.gcp_project = os.environ['GOOGLE_CLOUD_PROJECT'] + self.gcp_project = _GOOGLE_CLOUD_PROJECT self.bq_dataset = 'executor_bigquery_test_dataset' self.bq_table_name = f'{self.gcp_project}:{self.bq_dataset}.predictions' self.client = bigquery.Client() @@ -95,7 +108,7 @@ def setUp(self): 'bq_table_name': self.bq_table_name, 'table_expiration_days': 5, 'filter_threshold': 0.5, - 'gcs_temp_dir': 'gs://pred2bq-bucket/temp-dir', + 'gcs_temp_dir': _GCS_TEMP_DIR, 'table_partitioning': False, 'table_time_suffix': '%Y%m%d%H%M%S', 'vocab_label_file': 'Species', @@ -105,6 +118,7 @@ def setUp(self): self.executor = executor.Executor() def tearDown(self): + super().tearDown() self._expire_table(self.generated_bq_table_name) def test_Do(self): @@ -120,5 +134,127 @@ def test_Do(self): self._assert_bq_table_exists(self.generated_bq_table_name) +@tfx.dsl.components.component +def _saved_model_component( + model: tfx.dsl.components.OutputArtifact[Model], + saved_model_dir: tfx.dsl.components.Parameter[str], +): + """Creates a component that outputs a TF saved model.""" + target_dir = os.path.join(model.uri, 'Format-Serving') + os.makedirs(target_dir, exist_ok=True) + shutil.copytree(saved_model_dir, target_dir, dirs_exist_ok=True) + + +@tfx.dsl.components.component +def _get_predictions_to_bigquery_output( + bigquery_export: tfx.dsl.components.InputArtifact[String], + output_filepath: tfx.dsl.components.Parameter[str], +): + """Checks output of the predictions-to-bigquery component.""" + generated_bq_table_name = bigquery_export.get_custom_property( + 'generated_bq_table_name') + output = { + 'generated_bq_table_name': generated_bq_table_name, + } + with open(output_filepath, 'wt', encoding='utf-8') as output_file: + json.dump(output, output_file) + + +class ComponentIntegrationTest(absltest.TestCase): + """Tests component integration with other TFX components/services.""" + def setUp(self): + super().setUp() + # Pipeline config + self.dataset_dir = _TEST_DATA_DIR / 'penguins-dataset' + self.saved_model_dir = (_TEST_DATA_DIR / + 'sample-tfx-output/Trainer/model/6/Format-Serving') + self.model_channel = types.Channel(type=Model) + self.pipeline_name = 'component_integration_test' + self.pipeline_root = self.create_tempdir() + self.metadata_path = self.create_tempfile() + self.gcs_temp_dir = _GCS_TEMP_DIR + self.output_file = self.create_tempfile() + + # GCP config + self.gcp_project = _GOOGLE_CLOUD_PROJECT + self.bq_dataset = 'component_integration_test_dataset' + self.bq_table_name = f'{self.gcp_project}:{self.bq_dataset}.predictions' + self.client = bigquery.Client() + self.client.create_dataset(dataset=self.bq_dataset, exists_ok=True) + + # Components + test_split = (example_gen_pb2.Input.Split(name='test', + pattern='test/test-tiny.csv')) + self.unlabeled_example_gen = tfx.components.CsvExampleGen( + input_base=str(self.dataset_dir), + input_config=example_gen_pb2.Input( + splits=[test_split])).with_id('UnlabeledExampleGen') + self.saved_model = _saved_model_component(saved_model_dir=str( + self.saved_model_dir)) # type: ignore + self.bulk_inferrer = tfx.components.BulkInferrer( + examples=self.unlabeled_example_gen.outputs['examples'], + model=self.saved_model.outputs['model'], + data_spec=tfx.proto.DataSpec(), + model_spec=tfx.proto.ModelSpec(), + ) + + # Test config + self.generated_bq_table_name = None + + def tearDown(self): + super().tearDown() + self._expire_table(self.generated_bq_table_name) + + def _expire_table(self, full_bq_table_name): + full_bq_table_name = full_bq_table_name.replace(':', '.') + try: + table = self.client.get_table(full_bq_table_name) + except (ValueError, exceptions.NotFound): + logging.warning('Unable to read table: %s', full_bq_table_name) + else: + table.expires = _BQ_TABLE_EXPIRATION_DATE + self.client.update_table(table, ['expires']) + + def _create_pipeline(self, component_under_test, output_filepath): + get_output = (_get_predictions_to_bigquery_output( + bigquery_export=component_under_test.outputs['bigquery_export'], + output_filepath=output_filepath)) + components = [ + self.unlabeled_example_gen, + self.saved_model, + self.bulk_inferrer, + component_under_test, + get_output, + ] + return tfx.dsl.Pipeline( + pipeline_name=self.pipeline_name, + pipeline_root=str(self.pipeline_root.full_path), + metadata_connection_config=( + tfx.orchestration.metadata.sqlite_metadata_connection_config( + self.metadata_path.full_path)), + components=components) + + def _run_pipeline(self, component_under_test): + output_tempfile = self.create_tempfile() + pipeline = self._create_pipeline(component_under_test, + output_tempfile.full_path) + tfx.orchestration.LocalDagRunner().run(pipeline) + with open(output_tempfile.full_path, encoding='utf-8') as output_file: + output = json.load(output_file) + return output + + def test_bulk_inferrer_bigquery_integration(self): + """Tests component integration with BulkInferrer and BigQuery.""" + predictions_to_bigquery = component.PredictionsToBigQueryComponent( + inference_results=self.bulk_inferrer.outputs['inference_result'], + bq_table_name=self.bq_table_name, + gcs_temp_dir=self.gcs_temp_dir, + ) + + output = self._run_pipeline(predictions_to_bigquery) + self.generated_bq_table_name = output['generated_bq_table_name'] + self.assertStartsWith(self.generated_bq_table_name, self.bq_table_name) + + if __name__ == '__main__': absltest.main() diff --git a/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv b/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv new file mode 100644 index 00000000..0723251e --- /dev/null +++ b/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv @@ -0,0 +1,4 @@ +studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo) +PAL0708,2,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A2,Yes,11/11/07,39.5,17.4,186,3800,FEMALE,8.94956,-24.69454 +PAL0708,3,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,11/16/07,40.3,18.0,195,3250,FEMALE,8.36821,-25.33302 +PAL0708,5,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N3A1,Yes,11/16/07,36.7,19.3,193,3450,FEMALE,8.76651,-25.32426 diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/assets/Species b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/assets/Species new file mode 100644 index 00000000..d919d4f2 --- /dev/null +++ b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/assets/Species @@ -0,0 +1,3 @@ +Adelie Penguin (Pygoscelis adeliae) +Gentoo penguin (Pygoscelis papua) +Chinstrap penguin (Pygoscelis antarctica) diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/fingerprint.pb b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/fingerprint.pb new file mode 100644 index 0000000000000000000000000000000000000000..04e0508ee9625dcd88a0574b588cdb511a052481 GIT binary patch literal 56 zcmV-80LT9b`rn43!<)3sfB_K9-l5y|TransformFeaturesLayer", "config": {"layer was saved without config": true}, "name": "transform_features_layer", "inbound_nodes": []}], "input_layers": [["culmen_length_mm", 0, 0], ["culmen_depth_mm", 0, 0], ["flipper_length_mm", 0, 0], ["body_mass_g", 0, 0]], "output_layers": [["dense_2", 0, 0]]}, "shared_object_id": 14, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}, {"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}, {"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}, {"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": [{"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}], "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_depth_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "flipper_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "body_mass_g"]}]], {}]}, "save_spec": [{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_depth_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "flipper_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "body_mass_g"]}], "keras_version": "2.11.0", "backend": "tensorflow", "model_config": {"class_name": "Functional"}, "training_config": {"loss": {"class_name": "SparseCategoricalCrossentropy", "config": {"reduction": "auto", "name": "sparse_categorical_crossentropy", "from_logits": true, "ignore_class": null}, "shared_object_id": 19}, "metrics": [[{"class_name": "SparseCategoricalAccuracy", "config": {"name": "sparse_categorical_accuracy", "dtype": "float32"}, "shared_object_id": 20}]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Custom>Adam", "config": {"name": "Adam", "weight_decay": null, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "jit_compile": false, "is_legacy_optimizer": false, "learning_rate": 0.009999999776482582, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}}}}2 +ˆ root.layer-0"_tf_keras_input_layer*Ø{"class_name": "InputLayer", "name": "culmen_length_mm", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "culmen_length_mm"}}2 +† root.layer-1"_tf_keras_input_layer*Ö{"class_name": "InputLayer", "name": "culmen_depth_mm", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "culmen_depth_mm"}}2 +Š root.layer-2"_tf_keras_input_layer*Ú{"class_name": "InputLayer", "name": "flipper_length_mm", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "flipper_length_mm"}}2 +þ root.layer-3"_tf_keras_input_layer*Î{"class_name": "InputLayer", "name": "body_mass_g", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "body_mass_g"}}2 +” root.layer-4"_tf_keras_layer*ê{"name": "concatenate", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Concatenate", "config": {"name": "concatenate", "trainable": true, "dtype": "float32", "axis": -1}, "inbound_nodes": [[["culmen_length_mm", 0, 0, {}], ["culmen_depth_mm", 0, 0, {}], ["flipper_length_mm", 0, 0, {}], ["body_mass_g", 0, 0, {}]]], "shared_object_id": 4, "build_input_shape": [{"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}]}2 +­root.layer_with_weights-0"_tf_keras_layer*ö{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 5}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["concatenate", 0, 0, {}]]], "shared_object_id": 7, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 4}}, "shared_object_id": 21}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}}2 +¬root.layer_with_weights-1"_tf_keras_layer*õ{"name": "dense_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 8}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 9}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense", 0, 0, {}]]], "shared_object_id": 10, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 8}}, "shared_object_id": 22}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 8]}}2 +²root.layer_with_weights-2"_tf_keras_layer*û{"name": "dense_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 3, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 11}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 12}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_1", 0, 0, {}]]], "shared_object_id": 13, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 8}}, "shared_object_id": 23}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 8]}}2 +ö8  root.layer-8"_tf_keras_model*Ì8{"name": "transform_features_layer", "trainable": false, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "TensorFlowTransform>TransformFeaturesLayer", "config": {"layer was saved without config": true}, "build_input_shape": {"Body Mass (g)": {"class_name": "TensorShape", "items": [null, null]}, "Comments": {"class_name": "TensorShape", "items": [null, null]}, "Culmen Depth (mm)": {"class_name": "TensorShape", "items": [null, null]}, "Culmen Length (mm)": {"class_name": "TensorShape", "items": [null, null]}, "Delta 13 C (o/oo)": {"class_name": "TensorShape", "items": [null, null]}, "Delta 15 N (o/oo)": {"class_name": "TensorShape", "items": [null, null]}, "Flipper Length (mm)": {"class_name": "TensorShape", "items": [null, null]}, "Sex": {"class_name": "TensorShape", "items": [null, null]}, "Clutch Completion": {"class_name": "TensorShape", "items": [null, 1]}, "Date Egg": {"class_name": "TensorShape", "items": [null, 1]}, "Individual ID": {"class_name": "TensorShape", "items": [null, 1]}, "Island": {"class_name": "TensorShape", "items": [null, 1]}, "Region": {"class_name": "TensorShape", "items": [null, 1]}, "Sample Number": {"class_name": "TensorShape", "items": [null, 1]}, "Species": {"class_name": "TensorShape", "items": [null, 1]}, "Stage": {"class_name": "TensorShape", "items": [null, 1]}, "studyName": {"class_name": "TensorShape", "items": [null, 1]}}, "is_graph_network": false, "full_save_spec": {"class_name": "__tuple__", "items": [[{"Body Mass (g)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Comments": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Culmen Depth (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Culmen Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 13 C (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 15 N (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Flipper Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Sex": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Clutch Completion": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Clutch Completion"]}, "Date Egg": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Date Egg"]}, "Individual ID": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Individual ID"]}, "Island": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Island"]}, "Region": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Region"]}, "Sample Number": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "int64", "Sample Number"]}, "Species": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Species"]}, "Stage": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Stage"]}, "studyName": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "studyName"]}}], {}]}, "save_spec": {"Body Mass (g)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Comments": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Culmen Depth (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Culmen Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 13 C (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 15 N (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Flipper Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Sex": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Clutch Completion": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Clutch Completion"]}, "Date Egg": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Date Egg"]}, "Individual ID": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Individual ID"]}, "Island": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Island"]}, "Region": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Region"]}, "Sample Number": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "int64", "Sample Number"]}, "Species": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Species"]}, "Stage": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Stage"]}, "studyName": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "studyName"]}}, "keras_version": "2.11.0", "backend": "tensorflow", "model_config": {"class_name": "TransformFeaturesLayer"}}2 +¹vroot.keras_api.metrics.0"_tf_keras_metric*‚{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 24}2 +üwroot.keras_api.metrics.1"_tf_keras_metric*Å{"class_name": "SparseCategoricalAccuracy", "name": "sparse_categorical_accuracy", "dtype": "float32", "config": {"name": "sparse_categorical_accuracy", "dtype": "float32"}, "shared_object_id": 20}2 \ No newline at end of file diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/saved_model.pb b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6395ba7317b134c443033b467e90c01ffdaa82c GIT binary patch literal 191307 zcmeFa36vbkbr{%L)!mg9-Dq?t8bBj)HF|K?%%H2X&MpqgL1Qp87|sC01_s07fn=d8 zyHSJcs%9NHMixa<6h#e1i6TXc9EqX?TDB!w)@j<>)#~t*yq08p4_e1+y?BHnQ{I2@#Dvjh<}KJ^m~6if&S%j=@*Zq>uCC1 zx7%26U8{8(wHrFwqup-GiPIyW76kNBlv-}oy65V336Y9~t5O<`-hj^w!c}J?Y_qCLutkVuY>jf?4t$z4-3(w=?$7oJ2D&{j=dYj-wkJ!!n#>oi*H z^0+j5>G@aBSEtaV@dSU*KmRJaghtQTx;khL zyy36r+bj~VcKWSV=$PFlbQ6i^+xX)(1u!$NV+MC4BU}Q!jn?iqx(lOM8}++vza|l+ zCmXHaX(5|f00>Acb4br^v?(XD$?7;7RT9ZGs-Q7wsoRrxTL>~oV7G}#X}qVky6ui* zNYJYYUAVhhYt^4=Y;-Z&8!ZF9(gaG@8yf&1{AB@jfCT9rkglWAXB*9?B%x8r&|N@{ zR$aTRVmhEl*R$p$h%TWIGt=e2BOvj)TJNUT0qoA8@y%MN2F;|d8V}MQl!6u;t6FzK zylOs4yG*Q)UB$?kp_oZdq}akMV>~JJ^ZNN1nf6q_AKm2>D6|tS8KGijzz2q zX=LF5ntS5sTXqSfgl@N2Yd6#`0AP&Q#2hLAZ2_i`{jxBJ z(sixb*l6@91ScgS4LBNaudQ{po;Bnc9e2>oi|zKU{-)70fPvei2{Z}_?k*&OBBZ@& zS6y4H^_x9aM=?wwpfTB=|N8p^%r2bU7!F_tIFs>)8~z9yf2r1cso#{wkx&Dcfq!pc z4mgQYo1J#OzpAsq{b-`sskOSBZJ>b~HReph4JLuGljy`**4 zVbIRs)K+h8wt)k6rTu8ns`aXFcC@v|U9CHplJ+5~u628jR;|})x6Bk6(1U0<;8+6= z-)`1n7InH->v%Z#)SAuq9d)At3rTBT#gt%Y&{u(XqompKyJpAl;f@0W-hf{q7^?#k znpUQ82S&$d#*+K?LpMGL$OEQ$seO49y@Wi(QwVi6SPC)m*8wMKG=|9Qx4ESNOl1LTF7Wxqp)PHlZ1 z=Cqk%4ow*uO!Da}<5%?$@)0y;)&&%ZIoe!0gIfI!)%YqOM!U>(V~J5F%~w3*yHv8i z<~mF7Lz70kW?|CmV`!HNpqU`iuR0^|%BRqb`LzS1+TCn6dR>KN)L)H^F!E--5^6TO zz0(-9GpB{B&NYQ}3Rn`=C6f~@oIp%hyvja~NRv9KMw4EdGgf8_K1XJAT?Jw@absXk zn$Tg_UjTS?9!LZ4NiaWgDdd%wD^L7cT^2h0gI{{^Es9m!R}z7SHHe zG_Chmh4owQHuZ^_0+}LZWV3 zmiR0E4J=t>`4~t8`20lINKhMfN@QXOJ6PoQL@s(abQBhb)gI6(mUHSLHndxF2rq4T zF~h$ardvkHx5*2=jK;3$n+8fsl_g7r6JZ6_XFkov4|;Ja4c6 zhWx(^jqAjNxv_8wx-n%)qN>H>;oC0ro56rFZh#8SZ+huDEV&>+CTudW#H#a6*s1V0 z5a{m-XpaFU^tX9oW3zW}rG2LhqP`6*yx2Pxyo?!(gV`L2^A7O_fC>vd$PyZEQ^jkA z-p?9Lv^ga2g;hkmt>M)YKEp0yqqYfMqsIPs1hoH354Klp{U$8JAfN*6X!Y|TO#r~7 zwa$81!2%VQvkU|&hMcv>kh7RYXJOWqx7e@H+?q7H2HQBuFb+svYpgS}wu~l?uORKT zdu=A|Aljwf)mHn^A)N^6M40A(3ugM$Xcl*&vAUwIYj>aRH=7{it=_~$rf<6DQZg_c z1Nu1x1Do2c^*~Z>k-E_TgQ*Pmut2a^Fu$Q?bfB=Y8`^rKwJ>%O;}FOG4j!&c0w086L3lsbiyu%BN@!6*VBy-SfW2*%_q zhZB7XHisYqJg;vBFK=S*0;{-rtTNEX(6Q(Nb?8&rlsy(6L%VfJ9OTcsb{nr<(xf5W z=x9D8Cy*|UFChuwy4baMd3EQh51#)pnhk3oSf4{c@(W`i8{gJ4oQO6Xdihkxk(jqr;GbJ`RTkM8J}0L}w17adou`t-?~eTiVrZuU4NXIioj}aqQHQ zWHpOw=nScNk-_I82f-Yq%T`rhaG^KQZmS97q)u|3*xA*{9>~s|MDuVcf#+wdUu&ws z^Lm|j6X;|UX1dl|Z2++(P74ga(TC6>t8FaLE;@u-hmdQ7;nf?q`sd6OF3C9P3R5VZ zk;vg!dO0PgrI8gOi7ukld3;o+=7y|l#I8yivMLIy{Loc>D&f6Udaa?Ew6RRWq|d+^ znSK~ChLVafn>md30|hj(;MTaUVVbC|YaJ{kN;tRIT%1R;+5s|9+pQ)n_$DudISr?a zM#(1~tLb40I3!A*fBBVZ0Y5wk43@2IzWgTt{BVe zbrmn$`a#5vcKx1O&K1gVTs^kh?zejEg!clNVjenWTg(Hx$eg~VB%@m3RtNO@bxIE+ zccY9ay6Dz1;JtRQ29gcTD6%AkD#~y@4&ZvV7;ya|9N>D^P_3gID5YZr=GKs)&#^d9 zOQd4hr5-ff2n9YV7rFy_cekPgX2rMz=3}@Hn3=STb? zAe(+^ygDi+#AN0;J0g1}UPEwVdWzG2sE+dL%fOA1W2qMrfryziq~jag)p52VtK;nF z2pwk&hVM9>+wOIs{1)sPv_ZJX9wIZLstALQ|4V{2z_7pl62Ce0J?IPZ;{glog0M$&H-oz9gsbgP z9e39ieiD!ln`qnW4%5w89i~4<=rCO@e1|^)=eZ1;?wwqSs53=s^lpCe#^Ef@_0Ewrnt)jBPFn-1rk+5oaXVAS629${xU zJ`N9*Vq^C46>**d~)W!6v{>uKn>mytV_W_Q9e+o=i3t{SQ6u+>he&e&zcbR>;7 z$t7}po3X=ef2Jh!n%ZaqHPAf0{R`DArUoAtlw>8YyU5g9P|S%Rd8mX8ciRD)KR}e0 z&eOEyXa{s;7Dy+JNgt}4j#_-?nz8#~xQ#WE@yV55BWX&~)<_!fp}J^}q*20WBprg2 zHIhW`if1InjMqq#cI=TPUqW@$97$3pU?d%#ZJOFRvB?MrS#q6}3~e91ex#uzI;rY! zp*lyE4nKlDlhMRB6O9GegG9;tzhN?)C=0(qqGk!7k>jlRT{MU9mGE*$^z1Z4Aq$SV zC~cg~W3ICUxZ$bpk|so3Bar50smvT%2c6l_A0^QNkLzbRD)c&4mpvhs%{z!Zc~E4} zO2~DEY^wM&xRWVDksUKaW*7lUC48Es8cRlIzi|ODji!{zR60Ei|JVPq*8xJ%b-*bX z(m^%5I>R7cKUF&Idq04XD36cBoei4EM7$?>9N!b1Tse*w=z9XJwXSPMsdqZH%}uz# zC@HxmbQ{iMvHm1h5Lb!rl)Clw^z~71dQ!aU$q!gUNdu5nX2w*KK%z2k3_e_B8n-~Y zo>&_G3HtxxA&3NiA2hujhj!l((60(^Do8kHqkhtyczE&&nb>c!LQ`kHP=Xs{$EB&{ zc(*T6<>~9z?W=L~8%8!wp>HO<2{G?Yh)y`Xm=|ylUCminuuILd%p@5^Nn!7yy{`0P zAH9>9w;WqJiVoAfg;rF6n@1&&s)hzKn3aGYvMgWsS~~LXVME`1id;|^%&XvqU2vy? zRWzz_H(Jex8raMaK1J@nKk?M^x8EVAq><&ndhOP;Uz&{a`2KQ zFABVjl9xQZjFFcDniI#RS-k@FP6PBb?r4qmo4u}H+c?QqglwQ9$;pP>Ns(+N$R={L zVF*N$tqd=42WUWDLAyjGrEi-?NL{pFoRIba9|v7@Y)@e)91tg^y&Y}6--Ntx7>er6 zHfWf1p^9A+m_>9z&7rp0K$GH>B&zBv=)|Zh%80wAlln7O?d)ivQrl8-5~xBLYKx#2 zVODfdoR;=mqDU81Il&acTx_6~87bY~?19O_8(Ifm(^9&()>HM4peYg5>8#A%OhOb; zA~VAL_d^1d+QasC)~mOBy?eWl?d^W1w{u~83&8yi4A21lR(E5e@P92jvpvPE17rX#{%p1_-ETAVCKu<;h^b`f?X(ym(SU@vg zfIjXG=o2iUPcnd>jR5E(1!$H6^w;pv=U6}$FF^C&fEHLliwvNp(10En%hJ4xXB?;> zw!!EGreqED@Ks<1tGBRXTVu5c6nhqp0g(t<_?ef49Q>S;1OVy+DKc2F`lt2f?inPB4rZ7BCgv2FjT%Yw_U}aS)wV{EY-D(#d72hEpG`J+6QO$@? z!4Vv0*D5I!1N&q7oi7iI!VFIW<6LkuPB+uk(Od#Vyi-cy!O~Q`61|JO;1%NCsu-BOh?NW%7c{ohL82+%x0_m&3yZdVo{n1qo4W#BT9fX+pJ50K1^F_nb7P zGH0ArYfZpS7T?Gs;;BtV8^GhzZxmW3a|bau&)&Ms4Q_A*Up?+v1}6*rv? zz3@zm1Fkgaq&OFfltvKfsEb#~NM`|I_`_BD1Ly(L96%oi-ZzZpFJKJ6;)mgD6oy}= zG5o$D48P{a@X#yiq&Vo(c~U$Q3d2||!6@rLA`eF z#aAwvr_$yzk6bdi7%MZ9Q=_xzu3lZaxcti13s)A|TJ+I+pS0qtoj&>~aO=jhz+ynn zm=0h)z^enS2RVy2oZl?k_@Ezce2Air>ojesLA3E(cvNqvt%V;!y`V+YK7S_e)n@^|OaxJo+$?)I-n*NC=odKtizT!}-mkk5xbV zs8jTz(e$wvL?7#J`WRYfoD>HY5>JX}BhklAnjA2FH0TdL^kE%W^5}yk4nZHr@u$qA z4`QyeQDm=e<_8t_O;f5m50hFj=61r^8GthYyfYXzb_Qd{&R`sN2JkT@2_^Uu zC7}#IX-TM9i^XlDk~@S!&_v<)yap9LYm~if0C3L$;0*(S?=k@RYX$({Z2<651Ay-V z0MNMjG3dY&N{JsAq(m+Y51+sfIe7S9{GjMCUXfmMF-ZFwTh3ZX6pNH397Op%uFGEZ z%Xz9|MxqX?8zT*~T)+zc0d(Mf8{h$t4j>?J_PhYU&|!1uhO+_!@eg1QmjIOE5`ZgQ z0&oOdy>W~gvjlEu8Kn773Q~Htwu$Kq_99c__fhXTc>fgjo`?7Er``)O)!|1d+m-gP z;Ns^6nC_nzq?uJPA_1O2ZP=#79{}VNi;^zndlT~Mdw%`+uYKcnuvO`z8SyiMG-+F< zb@0yn2L)+dH!;Iow$BRm@f$iW{*WLYRl8bqO+QtjYE#$1lD^%+8@p*>PLtF=t+fTC z{}fQz4?`#IlgugcbMS7TS0eF81j(=xfg`Zb!?SJ_p-*<0>f#q1rGL~>`ioTQFOkxK zk{<(4}^ggv0f@=S;^nMrLHk4#d*i|ATd%#~DyalHQuSW}|pccHx*Y1N4j>?1A zZ}-5b{z_2&!DB$gP!9|(1+`Fetye*o`d7K9~WF_Ze}Sk_Q%{5#D;Oq zNzFaptdj@p%JgQyHF6xOv!^5rCG;`xut0x9ki+cu~Uhf%j zd8%j3`rs)d8Vd{$fLyqK@#IcprQ%l_s#kcVufg&TBRvT+J@G!)fqlbI2yp0Xhzyp^ zDXiFE74*|vk`qLRp9GNsF7045(YCEqq23CNI#5kocT9yHf`C-;wP2~hq@*~cC-Cqg z{uJEMn&;wBGyd|^t}pkQFJyLrDZUQBCCi$S0oCG*l8)6cR#(dF2A$G=bXe4XJ?*VH zQ1DS4V4;u=JymiD$gvxj12pGkQwfx;gFNjzvlUn?F$d!22%$}TjKdz)~}-e&1UNvx1LVJPMtL5CDMCFuXs#t-#3 z#VYKif)sYV*vNJ%;^;gRd~snVjIvU!px+qt=2FG*Txv^HE@kQf#&K-INM>aF!!wqV zIU5M)@Qa}Q{H5HMTAgGq9WG&vMGs} zyWHaj@g7D8l>;e5Pu2bRVWr(OSc&ZT$^tl90qKP}ve1S}cDZMG;83%3AU#=cIp$4a-tuz- zc1HxRqjbNp@b(g#FhE$uZk!@GEJ?B+S(4I7RS@LQfXhy!KFRWzEL(6|PdV#Jv7Yks zECor`IQRt>xfJ$*mY5sG4_Bo`R>*`@OdgeArCi`z&T!@3Ml(zc(JBUCk`@l7nTsYi z?Fvt$y;!}>INrt{RpF%vesm?Uz`At-`&?N8UnY$WEu*4c#!3ZwFXeJ?n)0dDG0Gu-wWsLu~wObDiiTmsX&#< zc&p5#e>n#BU*TNV305FL-I)%HQ(wmgiZB zv@F}L2B_~o=;fZ;`>DlBQIW56=kC0R`mHthPnx>*oBtCpmgV1aK1pUrgY;=-6>LhOk%&2Nq`D=b)-6?DABz`RSy9}m_r`J zjlPmVkE4V10!4P#z!Qpq^l{DtEJcetm{V=ih9Zs4fKhw~XH&PUMqkD(=W zfWeJ^JON%J0W&2AGqd`nM&NnvrmA~}mwzUKs8@-0AMZt@nccLvckBr7m%z0kZAl4- z3ttuNM%3Qg*{ix^+tW8rZ8kQ+KYB~+E`qDK^?sxEWEXB_o7&=4qL{X5Uwy2DCC$x6 zIAQLAS5Mt#-y-NXlZl~PHol&1u)F*Sg z;wB&@U&dHbz$u@HQ+w=nZicvo*DHd>JuP2h=HOliY;Id&}sQYv_M z!+*#*T8+3x)BE5jxx8yl*ytec{G$ag&aEOWgt%QVAd zEv+(M96YnvcFhc%oa^0ub#d`*Mt(o*qcm)7%pp5?U)H^SOS_h_nbgSg*jeyj+>>j> zDvY=-9jNw$q;_d}TCW|cc=2A;n*il_;-TIAl@IeAR$HQ#>=3o&X{y*W_&Wp4+_n}WP*3jN7*e|^8M}84YT=l^X_;>>E zi2Mam#C9|!IX}pIu2_z;E&Rww9K{Uy?DRdhZ+q(_J9wUUhw@(`@G1{N!Zl@)j*#Hd zd2f%72Uu(Tuag*H(pS*(mk^LE_6BfZnWqSvB{v}6%Dl@w70;}3Bji(x*P3;g;Zu&+ zTA;!)w{PV9)aSC7+c)#p!3W03YJbav=<_Y}HEaAd3OtJV=?Hif@zdY&DB`D-<59#< z%f_RKpW2K^5kE~Bk0O4`EgnVuv{jZplsEj;PrS>2 z-6Zb0?gp2GDtwLWy&-)sLgcO%Jbl?Mxo>bC=fB=netHa0zJ zrIXrmdQxu#w=<%g<_0L`&}sEN+*iFDdEHl?@VKuA*KpL~cl9`2Sc>CFaq#yO)KU^CLJE;<9^C%; zwT8!fHXz3#aj(y49uoKYrtbGmo%2mS;GK&8Mhg871de8IMV)u~(lK-xtH0<9%et1g zs@rYp?>IadC`h~$EjhB-wdi+qYY>YX=fKk%uCVC)I<7*+6Kr*h7cJ%TtJoNf4Q(y| zM*&4DAo+*NmIv>Q?2oDpcL@b|mwH@q-K&W-F=t2J264d*4^Gl-kD2k^-uIbh@X6~@ za6kA0DZj`S9GoZFAvHqcyUX>|*Z5#WB8x-Fpe;s zjmzG^Q&ad+k{I2(?}Ny7=&$ynpGXcT+XL5^;L6!BJz^}`0<7HNEX{B!sg_C=dS7X5 zIzhGC(1C_jynnQ}qen=9ryMR>a_lWdNoLY3{IUv^U7}mXyV;~&R!^tfkuw_ z7`)sUjv7741?Et^a>)M+_(z31G-80b_%diGf@6htORJv;7wOV&#|POQ3IFXam@Vkr zQ;6=XjNz1vBRX8d8`6s_&&6S)Su7~>e@Y<#QS>%jKJGIv*7c7WF-E6x z>N%g8GDaYU#*`$S4s6W;;Glwjf#JbQNlplfcuoZjVmG`d20NuzOT}VQ{!|hLO%m?` zIO#k6>>(h&Np*HX0`cg~qw+`(!wYyWTv$1e&e4Z6CeMjrs$489@~5qV@Z?3_193Wl z&$))e;J#!i3|_lELxCf!Z^4Z48ww@LP*f_QP;cFPQ#aryjy*@m#xZ-C9^u67=LF`C z;K+5~`6bUA>EN=!V*#{scmh56#X@{n0o%GQdsonE4RXW7MBVIN~lg# zw>`Nz`b}Zp@^DFOJDDgjX+q#YJ*>^l$%lb(0H7r+q19X$iX$CpMF#UGhJ|Vuz8P?5KLA|)DY8Qov!GM{zUN~V5Zck^Os?}hbMBL8Iq zJ?l4UixizLUcz+tqE2Uv#*d)}0(5pwk!D<<Gyis!Ht%d9Zx zq+Cuh&3Fu`pBZocm7KBggW*h4k)yal)71-D|u?X4)80P|TJT!msI1YDzjZ*cwU ze#L^a>;+}nhB72ONyzII$axOPMq}O}hi=HGj*1uj6#~ByD)cc!9R&{TMnhh(hit^A zjwLU+mk8WxA2oHBIv{EVQEZk8br|(~f$Lwlz1-wHmz$im+(ekvH!1okQ9u)(ZZ_u) zb!Y;)Vy&1t7ylqDX3kJmdou#1$UI!!Qw> zOcYNtQHCXxB@WbPb6$9kK#(?>D4t}ZSY(n8$4N9aL8hac%ktp&r<0J)*mR)r8- zKTDa)Qs!r=P+2PcEK5|DB`XUk;e>S@`R1rot?w_B=vzs*Vqe(6>>P)ZVT;C{@8Jf8 z16{aJVbK(>Hf-ZX{`Vvtn4SrHV4A3&f#c8<+X7@C4WAu@ zIu;&Oe`_fWYARaWV)(YxzScXqZ4|BTiSTXvTUOz=cZF{o57AWk?0ATFhX>W)=n8{6 z6RmA2eB1seSh(%kXl?He-}b&h^4K4~Z9GJC(Xtzdp6?D>-x1w_V zMN+^UL%=J!WRxUc96Sauk50JSghLCaQAg)RiCZYgw&yDJ4s^!dlESv+Nder#dA3E* z!bNn--EzU%vI}5tv7)n0S1~TACAKwBD&!U}vu%17<^t>>AjA0w5D?LBOiJ8h4(j0) zJAn?munOlGrmIPdsJYqjaRE5H2QQC!o?zO43>|RS1{S^TUaje$DR0z8GBGz;3S6%0 zCVkWAbe49l)@fiv-^-g2UCH@L&D|IFxIbpOU-@|zZr(iiYk~W<$o*R4el2sqR=8i6 zcpc_~t8hE4a67EeC+x9vE#c-+;L=h?&{T*i_5QEy>Mmm+PM{) zMEMxoab%cX40JBYaew%L92wphkZkkcht!whk0(&t0#dRcS;tdHtt^Z~T8a|jOR-`N)v;p5R{s~?x!DJ}dJ=oA&Q=Zgz;JEh?3@**$y zuH9~|YSr^EzXAcZwg@Zq)ffjW$^cmT`#h{D9ION%G*FnKE}6CmGqg4d2Q&ErF!K!_ zX7U`&1f8Z(c%g3Awg)e?rV0lyg#qxwrI-Q-FK&u)9ve}Zp)M4+2Q#!%3g#wff{x2W5L7h3OzgO`#AUee$n%Ne1Is4`-gnJ&&-;^4*i5RgI&b#J^? zNJ%rx5v|R`K}y*JDS{7DK)=(Clrje?fhV36VyN5ltwIdDPSZ+59K=)xK+J!`LrjH( zn2^W26nZF4f$c#Lt)0X{&r)3ISqg<7$0?+7hE1Wz){=0-6{9?((z-sh_S39e@XZY( z`TnMWc;}8B423(Zm-(7HTSq_)hG;>UhoM0vVLpa(;V{Ixn4mGVEiy5KAzCQrVQ3Jk z_#bd-h$j|@zlEW(v@P;6gC$x(=3!|N37L;2o`}rBlE-xujiqgomKiM3;xZ3QgUHLj z&7~!tz#R5ojmFWo$juCnXu+9>qd_ERK8|>zGY3c9OGFw++ag0VIHHAU9*(@EXg`6G zKiha>G-tMX+?di>+6JLokQpq|QZ)}tgNW69Eb-)O4wgKwhG{Hqi-gT!i59VWSQf(WehqnEW89AgUd|%#V~O{%%==j3eOxkw2sz0wFmCRvG=VW28rx3F-dV}1 z>zCS>H&x5f&pTMrKORNnMqX(Y|LzcJ?mM1HL%Cew#vEbH0hJ8IFx+RO!~nOXrU zmfaGuUkdzHj)zegz)_`~To(d;alR=Tj%Wzxu5Nh|T*Gqn=6HE?{Jdb{=mra{TX`^m zsiQk@o}U+N9^H8h{JdcN=+0Z@=LIWBcis{|FPKBR^OpH}!7kFBx5Cd029oZ)OWYAw zI3vu*E(rhbS{42XgXqo4%O7D7M>%;tVBn3glIQj;&*>RIXMvNmz^${$$ywy)EOBy{ zxH-$5oMmp#3MXfUn{$bib19Z+aOg`2{X2nmOl(-agm*8xlq*s`)C4Juj%?I=8~vu* z(Q0+doTj67+x^a}20H5`w2fiM0Y)g;o}gB*`xGIX`y%6P$_QPvZyAc8tJfD-pha5S zeeX-P-b?+a2rgn1w5B_i9s+bCrYgG3(WX`^G+%}%@CU+sZm{}KLB4bj(^(ddfS>`VGz zdD^VsQaBs{do`}YpM+bf)>eCs+coSM+U=$wvsyze@fGKP>; z4kNJvGaP(d-;>;0gAuImGfrvd0NU?1xgW&>j*|5xM-MVp&U(#I{=|*uCvy}9`pHDU zmPBQAiuM@^>de1Kt|-u9Sd*-L{d;;*orO1)LAi zBk4!6&!MsNz{7fy@#6{V1RAUN?rmxiO*om6(3tL_MM^KHz}LY@72G6V;F5q6jw?Tp(16Tphw z0OYIon}D6I44V3OiQDiRxFLHbtQ#@`Wjx|GNRmsIslE?9M_dhG^%;+x9M(<4gag^`Nz z31KAj$i`Iz8n@(gpy)?hFyu#Dk9zU(0OG0vyC6cL&OC~b^e{Zq@;Pu{G87=61Fzkl zq1chncjWV17w{?b3b+z$Y-pLCxN7~T<1%6=u37``samxSO`;>e#T-}d8x{0RDYQc0 zIP^NTR(Gx4*-+QCTCWcm5auyUXmx^ecBw?k<8%-|-D z*O|yA}W5G{@Q)X~-r+}h>4HH>n#)!(?Gb(nVD@!{yeY!DwVElF04jP9nk+R(bt z+BBLp0qFLC&%qbCc(K0p>-R8S!`BIv>bGvS+ILz;tLBSp2B{;j;A|1&g1nkYC$BjC z7^VgDN*vzk;A1#682t`~nfHe_c-eiyGs^uJQ^+gLJMgSuuxrmLXobekTTcb+sc1cw ztf#W|RI#3xWGhxm)_%;%&Z%XmDfVODek|CJMfAI@wPQ+^4XzXt>&_h*gRb zeLogW-8m7g?p%EjTq-{oDh^hN%9i0C`CZ6|m4~RK2KU7u3V&a$c-$AulIQjmkT@#6 zCu|epkT@16632%|;$?{P6>|^8ID43mp@%5$9!|vB!^xq0_&$hO7LU4zd3r$(BZ2;> z;>O+XIB_>UH0~}#2(_5$Uy8Gbdt&I}Y}`HE8)pyq4c)^JiTkD3!g6T=o%L7<_DS5+ zyXrLvAGZ>|`cod&@0QqOK&baXy!9IAiPaB@a=hqZ4km#o9E_Wu4#i1Nhli%8SH&aJ zmGA?8+GD_{CC3qM^%V%H_ulXopY*7BN@C7>tFMXkF=-My?@%BN--8L#_Zunn1p%F; zPeKjd8--TNUTdkPr9y$a3>2gT_MMMY^KHL<$CIU0=6GsvUiTmJA19*A;+7FF=LmjB zB?McMR4OqYZilw7O+N5rn+R5n;S6a--H>+6_ljch)(rjjQS{|8)@Q&*yRJ3q$qN_9 zV9O6004=zahwCzYB@Z{u&33noEsH8(PG?`i)}S8H&}h(bVi?Yw0*^f&(yLkIo21_t4jRc^ z!}R;Y5wp{t#SfDV{uL-e6)20`3gp+&exSHj(70&9e-J7PKQGqqHo8Nbx_%uz-n;9T z#PfO^*A(m0I%_@UtS37)FVDMc(IK>M_3*L(B`GmCO3wd7v%XRJV*>IUB9{kX5wEhxl|}%laYt4Co=~ zy;O5f@|&}J?{MT{4mv5}a2PWL%0D5X0G8;&O4xNHJPG~~csU>_aS@?`6bo4t8Is|N zpAbK6@&{;@B1H?5Sc%bNp>Uzy>}}Ax8$gUnmKZ;45hDS^;)OC95sqIOlN?EzxX3Xn z+vMoAz`F2al4GD$;X{rNq}b$W0b%n*lL$!H-Q;MMi4QC5T-_TB5+^x2uE|5M%}M$D z63lh-;p+xNG8=GoNIqmH^97xwMk-7pkK#wH4E_~(Ayg(6kg;&Uw`F(U5jIcTs{H;` zfF$87XDC&FvCL0qQ=Sf7M2-+{!Y2lW7}i>R-Z7Yx|I%$k;6*Kx34vF-4S@$a5ePhz ztM3Jq+LywguPu0-uOUe^55lrgr4VnGIjAxbZHtcN6G1%^F>& zoA_#?1iF=5;1P^_!(n(7@VMa+rGxg>2Uxo>9sxv&DusQ05qAJgDW?yDBwZ@V1DNM+ z%0EmFaGT=tcM;il*Fd%qwpm0Wv)3?-m_m~Z65!vg{*U$2C}aX$|AkaAO@gc|>t1iw zcB^?0o(vzksv)Dm<(DXrkB*I}#B}CKGzrINYmIdk+^P23(qS0*6iPxQ>uCW$JjZ$( z;2XKSXgT1d9^7N>+}`My7(MN7Pl9{?2WWje$o0>TCmPlb;}&_M`NlX}Lx<^A1Ew%s zd)1naH^2*IIR_DGD2qki5|UVA!F@GjgqZxaAoy7D{p29G)V$m2T?V6r@XtyjHOn%% zWDtXUSbdx3igIkeO<&}m3psq7I!=Q;U7S8jchdWv&Yf|HaO}x>5Ds(*HtuuCgI=%+ zIh@#Epl1$sNfhBu8N5|8x+0-(l$p6F$S3O%&4Aa6@@b??~Tyr!&m(Hq_@GhYjLzz2-)w{n8#28v7R0!6F0F5~z z?g|reR~WP7hrgdfKO(SOB;)jv^75c-Z)HPhZ~5im!h>y}b8BTX=@TD^nw+a*1AclA^;P#AJXQLeZ8k7C) zc1yPw` zHw)p5nrge>1B#Lor-ckZXy-PE!g^$H#Qvy+{$f0CKYyh{B~OWwv}ZlHpFjI3Cl8L_ z>^JNq&weX1X--dmbj}K6aKO6915bX8^E_;hK>5;hIqjo;+T~nM`^9M z3zhD)&o1kBtw+}kuJvfB6zuSjDvR+%y|Pd#EL2K`&CEiT&oZg`9Q&%kD|P0rgJ_?5 zGp{>d`Z^Gj4~%5A`uSS3DSvZ(fV)$2mO)Ms`Njt%H9lW_#BW{Bc43!`+4UZ2!qZ`% zmBDuOr`V4E6x-3CVmta%Y)5~J?dT7}`H{-(u4?ps*HaESr=l*qaG1{@U|y} z`pA349glhhWg*bDEDrBc?=pDQi<#_q9O;oU3{Cbsj`Rr1LXrLOj`S{rBfXf(elG3` z55!%e9Cw9pclUUg!98BgRB|-#3g_dk@Ikr3`;-0X*OSBf(JF&3))p9{9WwpIsHJkA zjxlSfZ_U}M1#E9-o0J`~+{XjtzylHq$HENae9$% zm*>OEux4NM@Ya7{I7-argK{Wdb;y5}K#V&hXAHXCjt}zirfr$shDNH0#+6ZQnzl76 zsiX0q%|EhXZ(#yhVQhVXLUgsD zOxh3{u|~x%GkoJ1N-Dx^#=|Bve(`Sshd(^Lau6M$k)p14AWCq%qZTWLl6;+OoEW6K z;EY-3*{KI_Rs@lC4Yv+z_hxMq`1wHQwCKKwIw(Wq(Xijt{oT^`ve*Q#QYldyl93!O zGju7)-%KFqjz)5Ru$Z}v~uovxdUy=$a*t1<8ed}H5-z3=(3!K>Om_y{VMa&_zyLudN9Dw^6I?Tyy*!Gr` z5>^5B&r-%3+yPm@LU*5G0JBj(1YTfSq>Gdab{UEKfS26*2%Y-XS8OoHlNGSpPxV7UTn9| zER#>g`Ag)}o@bj4ymy+XizcsVcja$FC?C3;&ORF_?cutdX1jck^muc*W%f4>-D>uG zt2gJNpPNk$@1>T-XCTzW{Dt-Pi4!b$JSA$c{pQh14 z9}G%UbRrHjK^AK{L#H?quhX<8I?7qUycA*DC724C7a(POSnUG7O;9MaOR)Xcr=Oj8 zn1NVNRCE5xwtl3{D{bpWa+7VnNN#e`zwMHLa@jw*VkKwxqw%XcdcjOp{=be^E|l+H4hOgsH8`Q zHuP2_{LM@w)aq}j#?*&kGa@W}k~vGeo6SZ~?cKx26!_rCPSFKKD&-KGG?|BydlDf< zOsDYeWI84P)-;MN%6i`%J;5*p`sJEZHKAz00OND5zU&I8I)2S?Ux*Q7<8tT;{c}Y0A_$ep)HUHrqqf9 zjV2}BkbjWD z1jRwNL_YJWz65@`FQPCD zt>67OIT)`V0yy5iheik5m-&3HFE-Dt}GQh@aq!CVFJUH;j6;=&IZ1VK*svkOxX!Jv@5DKfb(=z~G3JdK;z8 zVX(uaP8;~A!lEAG57oeKB$Kkr-EmEu?N}VgA&G?c@ST+JFw5Qk>--+b`>u0|ZOazB z2rM+5TiLpnsJ>3$&79x3S);_4*x6z0;}bTC}BnL02v$On1lIpCJNawWHm% zOawk7{ndJZa5TDFFLevdiCtjFGXocP#OETo=Od4KP!~yFZM1NJF=afJPS2*(nFFSF zFq}9jyVCGa(Ekq)LBW82AHrNal-Ir?pkL)&nT7T7@u2bv@t<)mV&&(|%IUD`crnFW zi?Chapw4sX-B^(FwR_JMMu217g3(*Apyu2gEWa(@j+?qBTz~>gvb>6tl{?zO5{*at zTq$U@hB!W=>5)D?3Tkq|Ln7L3B7H~{)LgW)AR3SKQBY7TL!9`u>0=i9z~`8U*)yS# zLCIg8PSEn6jv z!A^-fsj}6wm@H)~i>;c)WT{YDZ1pTA%Mz7kiOimCgpEYBvzS4`@4ZbwLrzD9NtZdeA25K zy(2c8Od(;=P?m6k6Fl0kv$+Tp+2jiHuJP~9VI;X*#+ zF21;;)#}%3oks0OQ@gx5@QI#??;%4>AQ!r#a4w%?O~#Nxklzx}9=fyUY_`uh57DUx z2C4H-V+?&c!4R0biKgk+SF~nd8pVHQ1}YO~Yxp7?W{UFX1QdqJxCSNCn&3ml`WnGf z@mB>jOZ6Vuqk(;kj*n2OK%LEgwFcJgw`+KhscX?v=~n6Z$oPjK>L}X+e<`iD*(GUdPWEc}bK#{Zqwk;rPkHyY4Knzg_$r=Gf z?YKqxnuDkbNET7gvm+Qp)}(w~WSx|qWbGwLxUj~Mb)bahL)H$IImy}r$b+m2O1#M0 zDjy%tHn?|G3_)A1T3mYY8>^p9Fq{9!u2VGQA@<1&OeVh2cY@9XlatTSgOd&->nf9WGin^U?6(t(%z+A33OgTVos&|f%3h@^j#5d9DusQP5`U(=BaHnX z2gdT>PHy}4uYD5?@zThpFvLqEj}R}7JVLxQ@(A(L$m0@Nsp=QV(`ZVWG%k>5^?$6B zDIw_UIOOqr64nh_1%FX>wcTpogQsr0-&xfRk4jy9k1danr^Gaxh#EKSEWV#TvvLZZ zpbrO=OxLp!)MXy@#Y2Q8yW(830jr*>g|?C*tiRvEN87(P ziGDYMj?>hqiwFFZl_((aBXOY6kTx$ZWcC`obP7!>2wP4+a85cphIwgJPWn9Nq~}-84d>Ln zv{c^Vu|^zg?QT!nE$w=Mo=*p1%R55&wg}<>qJ(~PJWxq93nm!|nW2HfqnaHr8+&Q9 z<3K-5aj$I{*>KwrQxt3)MmE}Zh?v-R(f9(G$F;M1d%Dx}+zgLF`bu+OJ;d1>?f zpe6^j!l!k4X)FAo=5}25{a?5G(#-CtqWF0 zH+Dxw`R$>i{N||hQ1>sB=v!Q;lMV-uE)PN;Qj05J{DKxf&*XZ*bb#!Uj|0%apN2rf zmbd8vn{`n>&@p}Nk+fd|>;B=L1vFZ~*1OwJ^$>W@1ndEWGK+M3a|%;mu6jWnmChj_o0u@wDA3ih@LdH3#o(^y zq0aH}b%sJ;0nnF<5q(8KpEx}FO3>AG%v~)*S5b^ztw2{NhVSZ9^%;n?{j@K^1;?Sl z^tLNrgeiDxO~r>W1y8NrLnBPVU7Zf!)i6^_!BcA{<~j>drxbIYMW}O6%yr_qHXFXq zFlo8cv;j(6CH|A;wS=hJv=mXEkQqz#M{p@^z&$Z{j5Mg=ZEg+61ZzR z=IwF`QWtzvkNc*c@J>aa919)BFRL&YCp%&M@Ya{|Fc@_F_6UXXJB{|LV93?&b^1mm zM|kORFY03o`1czr^aY5qLaPTEmZ&+Wg3yw}Iu7G%_87LIl-?qv*~5^r;P1BPj}rsv zEt)scJe8U42_^gpM0IX+jT@rQq0kzJ;)9Qk8Gcel@w_#N@=Bz`3K7kF2oIuU5_&u? z0gIIirNtpgJ1zkYHkJ?_rb|060UVPu_q$n*osIqWQS{}p;S@V71>GLR+*CyhMl6-k@qyfLHp1H!A2Pk+3#=-IY3&m_@0GPLYxWH5g?2>S!=&HyCLC8K6^rNQc|Q0P((X&`9PQ z9^My@n4R`4ewbwNufWSj?7m=D;9>VAzlQd^oh!uSHskBy+QD5nIUBA4lKsM+LWXsz1^u-*f}$;2d0C6X5wEhxjI*&Mf-C#&&1C9>KhsCnWuHTz~3EXnbpI$$+ES_&~ zY(QAO?()oe-M7d5GuXGs{OOI2Gs}|s=|$bKhw*LZ8Lio?&FAv-=jTti7u)SK%j8pW z{u24L=UKxi$UI#%c}2S`e-o4p{Q7LHrQy1qX8F-Q(&NqLmf7DlbgS9#t=^o6er`53 z>_T)|d)LX<+w0fwT>@H=U-uu#PHlZ16u9YL4bEf#G>s1WU{IQ(6R#;$ z2%ij{;v_bmrmZtNIqR2~B22pkQz7#LGsYfPyMS*K6w2%pY`^uz%1%7YK&&T%+I_p< z!**3!nU`z_eB>tQ{Yw}8lkKPh+{*2U0o-IeS^ziMjuZecvc|9K=mkFu6G}RT|KY!x zkB_?6@xMr-f6cY2aT&*O%~-ZLm%~bC+g=TGobZ2LXc*XHW~&Qko+qcF-J7*dShFgq zRK*0|n420YF(-nUD4DagyV-2?)ZRVZ0$g`fDY}42r5r+&Ci5_IPa>p< z=@fQHlupUNH9bHWP*1lDPcRIDez~SpO(+_$0IX`dS7|zfgME?QdT+vJHZTsH$B*q+ z4aR6y90lf_{CIen!(}#M&dE{WP@=#QF0%=T5($oc6ga|VHsQ!e7{h3|D@1`gTxJvI zLL}T3qred^vk6Br5*(!{aD>Zj!cmGahDwCIQD~zOq26eCD@VawxXdQr$`N3Wg5xOl zR%HK!LEmHuGv-%1r}!rq{F6(WoZ(h4gH9_aA&Mo)US>?oloi?%OJq0!{{+Oq+zE1- zLdo+lzcMW#cz6yTMwG-0BNgEj!U#mo1lNBDR}P^4w4VBAr{99zRdK0Mkl}_N8K-;a zYjB`7ivP;hsFCi8V4uro5E^}^+q)_yvI2g-(pkmN=}Z`-TN&cVOxn=EIhX;@(!lTx z2hPXG(GdfjRVw<=mCahGtHFK=;sHsM^qV{|pryf!<_9mjGk40@X2Ka z_kJUp(GmD3Lv-tWP-7ebq&=LE@}mFDHCYyTswNwnv*8L@TMr}7hZzja2nh!OQY4v;s|Q5w}?I0YVL>T0(M+q=bXueN%NjHve! zYJxsbOoC5wb5v)SCAcTm&j%JnJhB2F-3}XpfzzUW@exCtiI8ntcOpZ!iTkMc^Uo9L zfG28U!7wm=1tVI1hR(>LV%~fsgk@9iX}T5;UM-@MnJ`_Pob&-;U|#z)5Yqu14QqE{ zsZSEh4Y%fFmU9D+k6RGRuzL%$956T&wHCyWB=7ssboXYb(YmG9?l!vQdqM_n&NAI| z=MAKJ_5#$rm9!1RK7vEJ8F$-e1hfdC#7gu!0S*I9?6U!!f9E0((w5AUrz3-5@Iia_ zEDu-%J*x(;<3Z0hxFBOVf)|Ou(rSF9uSw&^ugoYIFf-A@tIaSJ*HSl+jyNL~uI`ZJkDeT(rj~FbYVh(rJWZ(Mc=tVTHt~TM0QG%sidRMc( zS`9_5&ZZWzs!S%N&m`soRFFTNK<^Ag->x=^zd6dVe4vV4^dMv&3|{2l6VSVdLIv=2 zk)TC>cBra2SP476^3Mv05e=L}M;1tW>mLSKk7;ag{gctR{@rlPgfUm)f)tAaL_!U* z=SXHGBAup}Qy`s=RD}`DJLpJ{E*&O)brnuGY8!AZ1?M-R3GMZ8bhc7!S-Xk>$1EY= zLQGIzCyL3Cb^A9K?+r;Hg8U88I0=JnXiZ99>EDn;$vPMh*1VRLkPR%}v&xVaGnw%~ zqufKGs=aV`6BIa}X>4>C@rK`2JM*ZN-R<$_(;o_m+2(o;j@_(;>}T1CY#by)_it3+ zfJyLLdnlYo0KJMsEMp2&>Ev~YzPmoU8F)IKLkRok{ILo5HdkfpY%a}LjK zbgt7z%6y20I%MX2(Z82W2aOAjkPD(9|C5mG#zwy>C12_{{!Fg9`f-!HiY5wO=RA= z7_5^rUp~a?g9X(9ME5zC=q$5XhwYndkPK{@gOr2w!X8kW9;Nbjt<$L8Xlm_ED^z8G z4XI#P*_zVC?`dH<1kRdBq6imSph(&Qn>m@{Bx)}UJUE@I)vuZKdU;c3h&o=w=&w15 znt)^x^}HKDeq>F`JLc)YLzI-AWbI|x=ug&xwo*PE+hI2CBx?&G53(jG@gi%he0(_D z;2Kgh1Z}ly5%l2Z)c=+>r+(}@MKc~w3*;h`$x9k@ewR+1qNO=(*r^wTjgrrcp6o@Fm z*2bkYDNH6OWM1^hLgtmr37H4kvXFUXQtAV!nn$*?1Xsr+F}OM=(ZRtDvEc1W7*Z3y zsiVHBW8SIgTNU)h6e`nNcV3*q;V!nERf?2TNoQzKJ4z)E!g&bInSR?=b*n9Kut%)6 z=2BEp(LCg}OBR^4B9n&gwC$W_2t;kaVS94>Z3*6(K&1lxio=*f{A&}sIv9|t+WI;) zOci0ni01Op7?p~R%BfcjCiPlkGnd&^3@MeF1yzq%I)`RW*zi4^VbZ2H>Hr}42PP@H zbsO`kqZ_)xC%b%Cw|P^+)&p$bn06I5maC#L-B<9SNNCDzOb=V4bs@_%nk3(_*&KX< z@Z#1Nu%N@1XYh3=xad7K2Pl>yb~@-Twe+}7IUp*3|H%I~HH3d;;#DpulP5*?oPt)K zy!BMDo{H8}$$BbVPZjIQ4g?B%*;Xi8=IzIV{aCagOZH>geyrG!cJmO2 zlsjc)cDh)v##FJ&Fw?}U-!r1AV_L|}l#pH1f#IkIi0h zV2kBi6I_aPyINN*6)O3iD5YTa2FwuN1>Iy$kuU^5D=X)f#9&ekgogJH;cGiY$3S9EZQvS0MiBd&5_J(xc)ji8)2Cz6NnnUkP7r#-mzEaveuM zsNkpzW8$aK^rZYESj^UL!{PH#0~H@itU@b z>bi7iIxWl0!5Z_j@Tzyelxog-K)B9Qy2{JZK`f;05HPfz!dy| z$@>9Ql)yjDvMM7`9V{WKo4zZ&G6K!+Th#^7^&CLA+PBLBjqzQmFkl0 zS2PDKJ7r!ROL^Z`6oAo1=rE?i|0;&RwgY z|K0@pyGb+w+uT&Mc2Da(nT3}GdC9>`lDsJJGD=?Z@G?eT3TRFomuB?})H@Am?vB=2 zzuD{RwT+W(MaTyC;hb!^ofOGdf@~r;8-_q6*~;*eCNH1`B_b(JRD`=|zc?Z7ftlWD zVU=OkPBbzC$Hr87;xaiWLl^H4B-t09t@L<4eT1xlUdWI$m zni4^+z{=cRIDlr<-WoX5#Za}h-ko;mmNY5~;4$hb1X|o{C+bQDMlIWy;b$Nwm$-*T z{Hzz^_j)6KAB*_?4C3d)B7R{%+HE65r!x~3|22m501Kz=1?NF;I1jOK9%kS?5*AK` zKqeZ8F>Ps0B!VC_imed>pg+b!zu*P^W8TmoXQ4mAKz}j<^rtB3Pltm33=4h63;M^s zp?`vf{z(SEwXT-H;yH*Eb z5qRC|EfDjtn8Q+%F8G=A-~}`Wq$gzIXI>I=@N-HM6!Qgl!OBOWF?`va0Kns zE7cijvjIN3?sRIKn_6cJ>Y5L1?GF;bRx8r}7Vv8xUq|ASG_Fs1xDOMnQffm3&oQfA zbX0tYbkJCH_-ubhd`jA7WFTc?SWMW}!*@l9vb3CpWoW^<47rz}g+s94S=U+`pg`?| z6s)#yybii8=Ft5fUh>~5rSSM_Dqiy6MPBey{BH81z{}I*C9nKh0v)GSJd7=cfjS+g zwU$~c<+34iRkERSDPeMz^NRFa0{VvVE6HC0IsDmvvw5zwj{Vn5IW$`9taqg|ZzWHn z@z+PYT5}DFuaiLH=yg%2CD^;*RNZM2n9+l@pqB#g&Y-9Vez7{Y?qFBX(taeGTb!;m z@m5kqiOms&6J`*)b*J5G-qXt^-%5^v-rtn*$tr;}N@$|iYPI147zB^5l>E8m$h44w zlsD)Bffsu~;Kd#gc(Dfr{qiUb&^a3m(Vw&AR%gNuv>gqnot`(khS&MiNIrsGzELbTBn6iH{i2|c8kyB zOxRzVzScr57onDoHnyDYZ*(EU3y|Tq27Z8G%cXVEtoR;jTJ3M@deq&XwyEB2*0AiZ z-G#-pfy>pJm&t|zICKJp`)AM|@kPmbu!8o9FG-vOjD@{um$hktGl)%-&j&B7apd>~ zyfk0(TbeIZOY_V0()`|_rFq4@G!MPqn1rKqdSe%Qe}PWIsrc}#?_a=l5U&t+oK^k@ zaLz?fN5hGL?v&DDUr?%4O45A+eFY{Jxb$Ks)nRi|?R_gLxtBW7sP|sh;+$O8YG!v3 z{XhabeAVvozY)-d5FHL$g_eqs3NGxvqq87hQxbTf+#5B2@PyW<>nncK^%^x@U!|w( z`+}zHYwqbf^sakS9CVv~67DpHpRWHWp04k=sTX#Hj1+A66L3huOWDGsLY#Kj5S#{1 zh##=K`sd(Fjh?8kt_DoerQ9PkMeSaL(?V={h9jb5oiA}tP$mtR0OpC!Ty-C$lQc6D zJU+qECpT=U@%9=v)VWZTJ4*hLlly~yllw!|C*2gEbFRZn$|Ezq?am?aVl*M%usaK!?OZ@-gC=wjjc%z=VW-2!#P>FNggGC$VtBHH_7YNB-iLk z4iP2%&%)N-lYHp2+evZI!`hSL*|;Y8O`GodlU#94^1>rC$?eV=le}SfmOsgv&IV2L z;v<6xyWfsUp0p>q;-2I<)tuytG0B4`VH`PhQT%nkiF}Ki$W3}8Zv;)`mU|)(eT^|G z4tj5KQY^(Vk^euuTDNW5{ukgxo?gJ4U z4=P9yf+0vGjLW#BaTzyiT*i$VmvQ5883#F2l2C#lQ4-4Vla_=E3j*hc4xb&|HX67S zMp6j|*^z_O?O#eP16N}<$X?e(@tiR~@;5)|i;3mWhd~+KnMuTRf&B!Qg{FTAu>CWi z@^JS49fTLf#)kd$_QgxsDTePwW4Xq8^hwD;8FQB9;z|KhFkE_C^Xof@%7#N3T(T0D z^qqjRKTItDqXc_m8kNf6VtZ%K>IxAk4r~Ge?r=c;zWWmb2!qcW!r)y)+Pr5-n{OD> z=64y==3g_U&F?m(&5s(==J$ZKiN?i`fwa1WQsT!2DFIqODe)8dAqNlNiyxFgfpn(2 z8zk2eC2d`@qrKHVAOw#nyVz^?U_tXm=nr%RAHzWpuqqpXj#b$J=DZ1P0e%6-5Xx-0 zHoyn24G@8A0}SBWpzF9c=rU-=jpL4nsO`4|Z-6oYf@h+yjK*G1-qP-E^X+X`K!1<`#fDD`QhK$viML9iBsL{}AN8Jt z_fJvpd3gVR>bl%WZ{tUp9;%=6jtTIRICM7{zwq$e-xJgT?{G?k65oC9n$+V zVfjzOU&udV!|*L(`9H*R0sn~g`)xpe%GV*P$} z$nTTmL+`;pJ~sFTsX4CJ2_Ai*?&&?s%AL2K3f2=`9-9{wuv0Og%GQ&8R=6ZtzLK-{ zW6tHO$4*o1$GrUrHg{$#poeHZmh8u}{aCRd?dIWPf$JW{;nl=?HJp)HWf&hKR{aWG z!<%3!Hrz_y={bXSc;WU_F?Z~d5VD5eJ%4TZY48(cTXIvrGegv!88VC|l4&e4?D-O$ zwCxi=El4w~9Z-7#wFYv0*o6h;2jEbyV+Kp_LcTX4AGZ6vrQ7uyW}#g)BYp;=MNnQM zuodkO3evdlLjqeheO90y6QJYb4++vy6&qCQ=EziS>RN5J*Y053r)ki`n549FS^qIDk}D>sMxEbVy}vMdr{8YYgt~IJ2o`0cL-;k(}*~7tN@YyFiZ=2 zntTr44KEwm_XiSxM3BaMV5i%J=g-6QYP;X+EzF^5+rflkEpD3HtwNn&aMbyujyk_c z)%hh-Crqs$gQ=y$Ho~xV^l9X=>M^)1u`CmmJ?HARjl~T!=(4H1%+GY2&2+eW5NgtG z7?c^N7G1~LolMH>$tS9hnQ-U|)XHAE2GEl>A>n5;YqGN)tsb^uVR}R~uH6~zIymqo z9q?ERpDa8i8>!36W;d5)F|3-(q3#TJ9XdS6UPz28lJm%N^o0~M;(mK%a7-7@0pMx4)%r5rYGng&@50sfl~8rzR^JMh^GPtE{c*u% z3LDbomBT@@&@{|$xa%9oGdq+9zk^={tYY|@6YezUlcxp6WFpKbotGfoK5zsb4&VkM z;b3@7EO0o6m4mTsARfT5PQt2cd65DY%byUyuGtVdtca|z;(t}p4Wvm<5IKGlL=JF= z1bUb4dsfbs5p)%JCJ;f|@^lAM#n*zRihy1xG9&1#V?)akZ;b|CUuMDomwOLDmQ+Zz z9_?i^ZV`ZpEw*F?k9f} zd!V|HJy89k@K*BYLGk0w#3_#dQ@10}IdD(^X5ysVaVt0L<8LNT^BkFSGoN@fvA}w! zX4v*-zwGE89=>DZfzBlerC9N5fB_hZxU_)|gtn}EHq3+Tt;=f?!}qkzQE zfd<)|iKB?f8d=E&g#gH7^5jC~Q9|U&^YiGKdg6ZajM=K;f>q@}HVTmG4vzj)G##LS z@+e`#pb;{QU84gykp5OIcigK|;?!w*@qY3NbmD&U|8MVVLL(`nFx`z#`o(eAT9got zTV_``{;+W}nMrb5Fi{C;K;zHuX^HMIE=kAlJ;JSgnJ zg9pWf9`>}Cz3fRmZ2YSKtGY7XJ<|gw^d&=uuBzAHd-Y!Rt5@$$5_ilA3ijTKd2yxN z(=wSj*fEm{n3+x7bw{Wue^t{zzce5ICx>@dy=ikOCmKm90y+g(366f z&_Q1n5WoYpj`PfDxhq@Puzb?BlQoD6SoK3q3RXh4ektH~!h4a~y9QPLvq4w!^TR;ObktzQ}U17Ab4lKamI>Dz%nk6@&%Ngps&RdZ;jpiBtxRE@zP=aY6g9 zNRAGkDk=;J_@ECS5cwqHN_GD@gPV6Tdf)f}u7=CThsOA?cGtRQ(@*%hYSWMQuJyf5 zt5kgo6=V4Gh_K*O?ME4s0Gz?G(W$O=k|zE&dX$eLYsa+5kFH)?dvmk(l3VRYWV_}T zJxlj2cDA=(e}gY>tY7N1MAi#7&Y0a&yUjfRj32e*h(czk9P=c})#UUHK$jO=Z6Wwl7e6*rI}6BMb!%1emF@M{P~ku# zQx4!V%k?@`Yu|NH2!_1h>t7W&kBJJ`%I z8_x!Q*&2J$G4GEx<$`W__DKqkq_NdL;g~$$PP~#B{fX#_8awWo55(Kb_y#I8hvUz% z`yBIFyzN%*mWik6JK`3eyqj z8Gv~@oui^_8=$DE_{&6b7+rsDoDy4=&4!xEi~+v^;$#L zYl#J1p=en&nP(&(pg&`GRN!$ufM=2%CzwtV{-{xj`jW zZx<1?5{pszfo@x=y9kkE;t$l9h$tN&fW~qg>3_BlO*ZpwBmb-p<(kyHRY_nin2RlceAe-plR=d0%or2)%vR8tzTJ*0;m@vTwm&KD`?dugl+mG1n# Ry*&gO?wId3aM*7AI|KUmvB*?cTBpZP`gTx+X76lK>8zr7b(pki52mGzm#sHWg41R4Dtd zAjO5j5%`ed6WYAA9R?H_2M0lL5I(^{{DYw=%76vu+?RzHnlz>8eDl69-@WH9=lY;`ho~!);_WR1pS52#9Htb~eeC3SG9_^vE2;4@8*Lln3fS3adrKHJ-0g-OgORcsgdsDs_8&ki7Q%LmVKG;V86 z?;}2`n`Jv?zH;Q_;2zYfkC3fyz`M4Km;Y{?95~Sa&x^H`eo;Mjsb345VvnFse|VIt z+tb4Kc&moW6W_M&Oh}Y8j{A}QZ2cW->HY`o*yG9enQ10!c1wW0KJ_FuH0j5}NR^ze z85U{#de0U%!B|2~jeUperP<4p=Scg5`?o~*5A+o)CdN}mr`EGMU=E4njTiq_U zMpVxRU18bCb%FNd^UhOU((l!4S2j|$N3OCrqFcnuCCOs>Gs|qx^?a2I*m0R!J0`+D z=8N~l#=qXNVR`I1R&VS3eU5nVlz4V~;1b)HKjgBeoxSR_a(mc*zFI?7j=LfzAI@a& z|Krb8!0o+kAHyx1`Yd5@n0B8EUw4iAcGqs|bk>U$I`OB5F%eg(A!EOy@_+i38u3Fn z_WF!&_FqnPw_klN$bLH{+IBBeMeVI!!Cqr?#Im9fY>LuWu@-}XkNmQq=>Cvk39g?H zk=WBNjI@sz?&4_NcJ=*!_ToQorEq=I_lH>K^l|nyljiRKqMBv1zK)RMetU6rmqpgb^*DKC9Hl==jyXlvOb2u8eT{_dfIcwE-9RJ$rVrfao@!~L=EFOyfwd&r}sx4ZeT z7`a(C9KEQ-{VR*EsLtG3s;UeO9D!;4RcQiMvkpuX;X1oR->~hAr47rcU*~At7WT^0 z21Sx7AJ?;6zbcqf@KQm`x(5ZA#^0UYck+`ig-eHC78YXqBTcL*r+HaZq%p;=yU|OM zwR1j)4fpx_@;=)wne)ayZVlBfaUuP%XVc=mHSF4aJ7wGxEt?k@B@Ni5;rM@l(;`~A zqf2wk>L{r?cabFJ=UcK1`O~B)ryS?{KfQfQG&f{ib3*Lv(xdffB=sN4+_LbszQ1)hHJt{iS$qAH6$qZ%@`wCHfER^@MffBi9* z{mP*=eK`L6KaW=)+q+h!Ie$}S`L?oXeDKSvO#?Lb=YoD6j`>)vak-}_3T?rw8Bz4D zm;%$dzT#~|78hjPsLjLuyQ5C$N9e~E?tW%p;hL0zdB1EeEL{DsvgUtJA1;5XSFkMc z?DB%hsKH0a4f^y59`E`N$JdD##3eVhCJbyCrhV9Wa?4@qu}u-uBH7WV8wZL>3TTT|_pSKm?;0jsZPaXRV~?GP;UN ztA*nh>>4&4hJ| z)vVSTFik(|NF>5gteL5(*2CBZYOBs@p!G(J1?F@ox}(t9j9G1=)g~PoMua1RrZsB4 zo~Fq`LS}|QHZ#_0vw_B48lyq0!)(-g7o%h%62;S6rd(aEx6&4M4MSUW6$UlT zVPKUg6lyeCbyd0rj2XyK6lyKE(wy|jZ~`IHJ=iiRfDn)YgSvS9Y6UPW+H>E%IPe46A0QKr9Ad)0{qhQdZfzgu$(FX<1 z)Ttek-p#6LUP4NVp>H#$E34r$J0Uhu-(1D&n2l)az*f-E2oEVytGJ#zOWc3Q@AdnR<6rQG^I}cMNdN`3>Bkr@w30 zhdqfiOasT+_i@MB)@gEL&H+J)Q3zHRL*qz9xdD+(d7M=Nmr=l_%2>r1tkqQ(81O6%sA2R*4a5hCFEPX)P$XS#(!zAwVr5M9JiVIR zw~&k`MxzJ|W3Ivcosol#BgP;E!Z29Mjpi!4oZ%vf#R=RM7S>`r8{3|pjpNzbc)p!I z=g-cH+uGSv2c9sZlaYo@BN94fN?phZ(DVeCrqe-SxS@pKfZ%xZJOHN#Z*5r0>#>AO z+mmn^Pr?)V5}xEw!js!d_^Bgl7}43#nMU;Lkc4qsf<-!oFH+nvmESN8m673Pvf@!P z`Di6wCL3LuOjc4-JW5_#qMRV~@zlvn7}42m zC5=dXBHK{&O#$1+On$=*G^vgD!a42H8SPnztJ9wjr=XHH{2jIj{C(Poqst#}Y#)#z zVEcd!;gNkj-sZLsO?&pC<=F?rw~umv_EFK+KAt*f3?n+5B&HF=p2$8b`R0J_L&tAu z$38OKvkzCNKOMGXADO=G!_noBw<|K^l8er>+SC7Rp8oZG{a5+ZzoD)EpE?x}BRZRV zrx7_F(!V1Z8~Mt{+BfkVtZ=KCgF>>;3WruMF4;dCKkg6^2rcli2wy;q(kLpT*-jSE4Y%$VVr=* zv~iH3?wmI%-Tg!%h6&@CuQ*%d-V9@jfI9?E=UCzAP#(Q136?u0RqT! z?mQbx*s$~FhKGUxaRHLUGeBf`2FMD}02#qmFBnV4IRmdFhirbS0EKGQCTv%57YQep z@#qYoU**wRKriRf*}oBw_6+5J)%X7*iw^{F+*b%tl*SAfAvo)g9`(YB*FgAY#$v2C zYv868N4yT>xH~`h{AZjwi_w5vh$7w)pfH^Q?qg~_etyF@@09`+tcC|b=#L;)3HZ-% zWC~F$K)q=Tqc7)P6?oj}a<#^4G~S{Gw)p~dWTL*QGvD#qe4m%umV*O*YH#|0bBX9O5*K82UW)KNoZlGt+Rhugu?`YnF zbF@sWt`b%WJ@Weu-eGqm7iRc7>c08cbcBDVKmOh>CQ%;vxeU=e`mD}#ccE{Gd$vFB z&!Z`A{B|rK`=k8XJO1IFU%`pr-wHYYR`9J~r~SBcbniRJ?L7ZSJ7`A-dH(qO*MTby zP4d2je19vb(NrH=cJ08uLZ=R>V9UC4Q8f6DQeojLruccz@H_s10P3cNhGEAa5VU3tWYPV6e<_t;g2 zutU4TS}udo4q6U_eBK?~;m+`;03KW&=>q&^=g1BqTE8XWUT$3@Azj!3=>j~v!bQS3 z-#rre=7Gk+VnXS1ot`nMs~E@#cKXiM6{{mC1)nGAI`2YBYhyq_peo0~uAq^8i8XONOH47>684_p598&F=0n%W9 z^>q$|iBL{rWol7;d1gsQe0geSdPYgHu7Q41W@0gZhq4GUad0R!Fls26?reHIPgGF^ z7({ug1iZ%q)CJb5(ZFb0u&&%ySW(E3XxkPr18v)@pb;giC{CEm^Bhi0}^D{GZ^sY7nEe?W>%#Z>Bnc5 z0Mk)PW_}*t(B%ZCn>8#N9H!3kGWWSP_;OMci}Et_(&K@8Q}IU&gTfXTVBB)NI{ArP zO}s3zC^IoBC$(7L5WhBfyeRBo0Yy#dJl`TQ88HLGxu;fRK*yQm*9FP6Ke&L7tN#Aw6i|Z&F&buY12yajUVcwhM$8hw2AFS{6?Sj~ zlZ1K@KLZFPF*7hQff)UfZ`XooMi6n})4~l5jLZcLjofgtdEXgz+qgL Date: Tue, 21 Mar 2023 16:33:06 -0400 Subject: [PATCH 10/22] Add Vertex AI Pipelines test. Changes: - Refactors the component integrate test and adds a test to run the component on Vertex AI Pipelines. - Adds a Dockerfile to package the component code into a Docker image based on OSS TFX image. The image can then be used as the base image when running a pipeline in Vertex AI. - Updates the `bigquery_export` output of the predictions-to-bigquery to store the generated BigQuery table name. This aids with checking the output of the component during testing, but also allows any downstream component receive this component's output. --- tfx_addons/predictions_to_bigquery/Dockerfile | 10 + .../predictions_to_bigquery/component.py | 4 +- .../predictions_to_bigquery/executor.py | 32 +- .../predictions_to_bigquery/executor_test.py | 1 + .../integration_test.py | 318 ++++++++++++++---- tfx_addons/predictions_to_bigquery/utils.py | 10 +- 6 files changed, 283 insertions(+), 92 deletions(-) create mode 100644 tfx_addons/predictions_to_bigquery/Dockerfile diff --git a/tfx_addons/predictions_to_bigquery/Dockerfile b/tfx_addons/predictions_to_bigquery/Dockerfile new file mode 100644 index 00000000..74f1212a --- /dev/null +++ b/tfx_addons/predictions_to_bigquery/Dockerfile @@ -0,0 +1,10 @@ +ARG PLATFORM=cpu + +FROM gcr.io/tfx-oss-public/tfx:latest + +WORKDIR /tfx-addons +RUN mkdir -p /tfx-addons/tfx_addons +ADD __init__.py /tfx-addons/tfx_addons +COPY ./ ./tfx_addons/predictions_to_bigquery + +ENV PYTHONPATH="/tfx-addons:${PYTHONPATH}" diff --git a/tfx_addons/predictions_to_bigquery/component.py b/tfx_addons/predictions_to_bigquery/component.py index 5bc8a1dc..00467797 100644 --- a/tfx_addons/predictions_to_bigquery/component.py +++ b/tfx_addons/predictions_to_bigquery/component.py @@ -101,7 +101,9 @@ def __init__( """ bigquery_export = bigquery_export or types.Channel( type=standard_artifacts.String) - schema = schema or types.Channel(type=standard_artifacts.Schema) + # schema = schema or types.Channel(type=standard_artifacts.Schema) + # transform_graph = (transform_graph or + # types.Channel(type=standard_artifacts.TransformGraph)) spec = PredictionsToBigQueryComponentSpec( inference_results=inference_results, diff --git a/tfx_addons/predictions_to_bigquery/executor.py b/tfx_addons/predictions_to_bigquery/executor.py index 8e2ae41f..e7c58fbc 100644 --- a/tfx_addons/predictions_to_bigquery/executor.py +++ b/tfx_addons/predictions_to_bigquery/executor.py @@ -18,7 +18,7 @@ import datetime import re -from typing import Any, Optional, Union +from typing import Any, Dict, List, Optional, Tuple, Union import apache_beam as beam import numpy as np @@ -43,21 +43,21 @@ _REGEX_BQ_TABLE_NAME = re.compile(r'^[\w-]*:?[\w_]+\.[\w_]+$') -def _check_exec_properties(exec_properties: dict[str, Any]) -> None: +def _check_exec_properties(exec_properties: Dict[str, Any]) -> None: for key in _REQUIRED_EXEC_PROPERTIES: if exec_properties[key] is None: raise ValueError(f'{key} must be set in exec_properties') -def _get_prediction_log_path(inference_results: list[Artifact]) -> str: +def _get_prediction_log_path(inference_results: List[Artifact]) -> str: inference_results_uri = artifact_utils.get_single_uri(inference_results) return f'{inference_results_uri}/*.gz' def _get_tft_output( - transform_graph: Optional[list[Artifact]] = None + transform_graph: Optional[List[Artifact]] = None ) -> Optional[tft.TFTransformOutput]: - if transform_graph is None: + if not transform_graph: return None transform_graph_uri = artifact_utils.get_single_uri(transform_graph) @@ -65,7 +65,7 @@ def _get_tft_output( def _get_labels(tft_output: tft.TFTransformOutput, - vocab_file: str) -> list[str]: + vocab_file: str) -> List[str]: tft_vocab = tft_output.vocabulary_by_name(vocab_filename=vocab_file) return [label.decode() for label in tft_vocab] @@ -89,7 +89,7 @@ def _add_bq_table_name_suffix(basename: str, def _get_additional_bq_parameters( table_expiration_days: Optional[int] = None, table_partitioning: Optional[bool] = False, -) -> dict[str, Any]: +) -> Dict[str, Any]: output = {} if table_partitioning: time_partitioning = {'type': 'DAY'} @@ -123,16 +123,16 @@ def _tensor_to_native_python_value( @beam.typehints.with_input_types(str) -@beam.typehints.with_output_types(beam.typehints.Iterable[tuple[str, str, +@beam.typehints.with_output_types(beam.typehints.Iterable[Tuple[str, str, Any]]) class FilterPredictionToDictFn(beam.DoFn): """Converts a PredictionLog proto to a dict.""" def __init__( self, - features: dict[str, tf.io.FixedLenFeature], + features: Dict[str, tf.io.FixedLenFeature], timestamp: datetime.datetime, filter_threshold: float, - labels: Optional[list[str]] = None, + labels: Optional[List[str]] = None, score_multiplier: float = 1., ): super().__init__() @@ -143,7 +143,7 @@ def __init__( self._score_multiplier = score_multiplier def _parse_prediction( - self, predictions: npt.ArrayLike) -> tuple[Optional[str], float]: + self, predictions: npt.ArrayLike) -> Tuple[Optional[str], float]: prediction_id = np.argmax(predictions) logging.debug("Prediction id: %s", prediction_id) logging.debug("Predictions: %s", predictions) @@ -151,7 +151,7 @@ def _parse_prediction( score = predictions[0][prediction_id] return label, score - def _parse_example(self, serialized: bytes) -> dict[str, Any]: + def _parse_example(self, serialized: bytes) -> Dict[str, Any]: parsed_example = tf.io.parse_example(serialized, self._features) output = {} for key, tensor in parsed_example.items(): @@ -190,9 +190,9 @@ class Executor(base_beam_executor.BaseBeamExecutor): """Implements predictions-to-bigquery component logic.""" def Do( self, - input_dict: dict[str, list[types.Artifact]], - output_dict: dict[str, list[types.Artifact]], - exec_properties: dict[str, Any], + input_dict: Dict[str, List[types.Artifact]], + output_dict: Dict[str, List[types.Artifact]], + exec_properties: Dict[str, Any], ) -> None: """Do function for predictions_to_bq executor.""" @@ -262,4 +262,6 @@ def Do( output_dict['bigquery_export']) bigquery_export.set_string_custom_property('generated_bq_table_name', bq_table_name) + with tf.io.gfile.GFile(bigquery_export.uri, 'w') as output_file: + output_file.write(bq_table_name) logging.info(f'Annotated data exported to {bq_table_name}') diff --git a/tfx_addons/predictions_to_bigquery/executor_test.py b/tfx_addons/predictions_to_bigquery/executor_test.py index 1b210103..d788fc2a 100644 --- a/tfx_addons/predictions_to_bigquery/executor_test.py +++ b/tfx_addons/predictions_to_bigquery/executor_test.py @@ -231,6 +231,7 @@ def setUp(self): mock.patch.object(types.Artifact, 'set_string_custom_property', autospec=True)) + self.enter_context(mock.patch.object(tf.io.gfile, 'GFile', autospec=True)) self.executor = executor.Executor() diff --git a/tfx_addons/predictions_to_bigquery/integration_test.py b/tfx_addons/predictions_to_bigquery/integration_test.py index e6102216..9e4a298c 100644 --- a/tfx_addons/predictions_to_bigquery/integration_test.py +++ b/tfx_addons/predictions_to_bigquery/integration_test.py @@ -29,13 +29,19 @@ import os import pathlib import shutil +import subprocess +from typing import List +import tensorflow as tf from absl.testing import absltest from google.api_core import exceptions -from google.cloud import bigquery +from google.cloud import aiplatform, bigquery +from google.cloud.aiplatform import pipeline_jobs from ml_metadata.proto import metadata_store_pb2 from tfx import types from tfx import v1 as tfx +from tfx.dsl.component.experimental import container_component, placeholders +from tfx.dsl.components.base import base_node from tfx.proto import example_gen_pb2 from tfx.types import artifact_utils from tfx.types.standard_artifacts import Model, String @@ -44,6 +50,8 @@ _GOOGLE_CLOUD_PROJECT = os.environ['GOOGLE_CLOUD_PROJECT'] _GCS_TEMP_DIR = os.environ['GCS_TEMP_DIR'] +_GCP_SERVICE_ACCOUNT_EMAIL = os.environ.get('GCP_SERVICE_ACCOUNT_EMAIL') +_GCP_COMPONENT_IMAGE = os.environ['GCP_COMPONENT_IMAGE'] _BQ_TABLE_EXPIRATION_DATE = datetime.datetime.now() + datetime.timedelta( days=1) @@ -62,7 +70,6 @@ def _make_artifact_mapping( return {k: [_make_artifact(v)] for k, v in data_dict.items()} -@absltest.skip class ExecutorBigQueryTest(absltest.TestCase): """Tests executor pipeline exporting predicitons to a BigQuery table.""" def _get_full_bq_table_name(self, generated_bq_table_name): @@ -119,7 +126,8 @@ def setUp(self): def tearDown(self): super().tearDown() - self._expire_table(self.generated_bq_table_name) + if self.generated_bq_table_name: + self._expire_table(self.generated_bq_table_name) def test_Do(self): self.executor.Do(self.input_dict, self.output_dict, self.exec_properties) @@ -134,8 +142,17 @@ def test_Do(self): self._assert_bq_table_exists(self.generated_bq_table_name) +def _gcs_path_exists(gcs_path: str) -> bool: + files = tf.io.gfile.glob(gcs_path + '/*') + return bool(files) + + +def _copy_local_dir_to_gcs(local_dir: str, gcs_path: str): + subprocess.check_call(f'gsutil -m cp -r {local_dir} {gcs_path}', shell=True) + + @tfx.dsl.components.component -def _saved_model_component( +def _saved_model_function_component( model: tfx.dsl.components.OutputArtifact[Model], saved_model_dir: tfx.dsl.components.Parameter[str], ): @@ -145,35 +162,115 @@ def _saved_model_component( shutil.copytree(saved_model_dir, target_dir, dirs_exist_ok=True) +def _create_saved_model_container_component_class(): + return container_component.create_container_component( + name='SavedModelContainerComponent', + inputs={}, + outputs={ + 'model': Model, + }, + parameters={ + 'saved_model_dir': str, + }, + image='google/cloud-sdk:latest', + command=[ + 'sh', + '-exc', + ''' + saved_model_dir="$0" + model_uri="$1" + gsutil cp -r $saved_model_dir $model_uri/ + ''', + placeholders.InputValuePlaceholder('saved_model_dir'), + placeholders.OutputUriPlaceholder('model'), + ], + ) + + +def _saved_model_component(saved_model_dir: str): + if saved_model_dir.startswith('gs://'): + saved_model_component_class = ( + _create_saved_model_container_component_class()) + saved_model = saved_model_component_class(saved_model_dir=saved_model_dir) + else: + saved_model = _saved_model_function_component( + saved_model_dir=saved_model_dir) + return saved_model + + @tfx.dsl.components.component -def _get_predictions_to_bigquery_output( +def _get_output_function_component( bigquery_export: tfx.dsl.components.InputArtifact[String], output_filepath: tfx.dsl.components.Parameter[str], ): - """Checks output of the predictions-to-bigquery component.""" - generated_bq_table_name = bigquery_export.get_custom_property( - 'generated_bq_table_name') - output = { - 'generated_bq_table_name': generated_bq_table_name, - } - with open(output_filepath, 'wt', encoding='utf-8') as output_file: + """Copies component-under-test output to `output_filepath`.""" + with tf.io.gfile.GFile(bigquery_export.uri) as input_file: + bq_table_name = input_file.read() + with tf.io.gfile.GFile(output_filepath, 'w') as output_file: + output = { + 'generated_bq_table_name': bq_table_name, + } json.dump(output, output_file) +def _create_get_output_container_component_class(): + return container_component.create_container_component( + name='BigQueryExportContainerComponent', + inputs={ + 'bigquery_export': String, + }, + parameters={ + 'output_path': str, + }, + image='google/cloud-sdk:latest', + command=[ + 'sh', + '-exc', + ''' + apt install -y jq + bigquery_export_uri="$0" + local_bigquery_export_path=$(mktemp) + local_output_path=$(mktemp) + output_path="$1" + gsutil cp $bigquery_export_uri $local_bigquery_export_path + bq_table_name=$(cat $local_bigquery_export_path) + jq --null-input \ + --arg bq_table_name "$bq_table_name" \ + '{"generated_bq_table_name": $bq_table_name}' \ + > $local_output_path + gsutil cp -r $local_output_path $output_path + ''', + placeholders.InputUriPlaceholder('bigquery_export'), + placeholders.InputValuePlaceholder('output_path'), + ], + ) + + +def _get_output_component(output_channel, output_file): + if output_file.startswith('gs://'): + get_output_class = _create_get_output_container_component_class() + output_component = get_output_class(bigquery_export=output_channel, + output_path=output_file) + else: + output_component = _get_output_function_component( + bigquery_export=output_channel, output_filepath=output_file) + return output_component + + class ComponentIntegrationTest(absltest.TestCase): """Tests component integration with other TFX components/services.""" def setUp(self): super().setUp() # Pipeline config - self.dataset_dir = _TEST_DATA_DIR / 'penguins-dataset' - self.saved_model_dir = (_TEST_DATA_DIR / - 'sample-tfx-output/Trainer/model/6/Format-Serving') - self.model_channel = types.Channel(type=Model) - self.pipeline_name = 'component_integration_test' - self.pipeline_root = self.create_tempdir() - self.metadata_path = self.create_tempfile() + self.pipeline_name = 'component-integration-test' + self.test_file = 'test-tiny.csv' self.gcs_temp_dir = _GCS_TEMP_DIR - self.output_file = self.create_tempfile() + self.dataset_name = 'penguins-dataset' + self.saved_model_path = 'sample-tfx-output/Trainer/model/6/Format-Serving' + + # Vertex Pipeline config + self.service_account = _GCP_SERVICE_ACCOUNT_EMAIL + self.location = os.environ.get('GCP_REGION') or 'us-central1' # GCP config self.gcp_project = _GOOGLE_CLOUD_PROJECT @@ -182,28 +279,13 @@ def setUp(self): self.client = bigquery.Client() self.client.create_dataset(dataset=self.bq_dataset, exists_ok=True) - # Components - test_split = (example_gen_pb2.Input.Split(name='test', - pattern='test/test-tiny.csv')) - self.unlabeled_example_gen = tfx.components.CsvExampleGen( - input_base=str(self.dataset_dir), - input_config=example_gen_pb2.Input( - splits=[test_split])).with_id('UnlabeledExampleGen') - self.saved_model = _saved_model_component(saved_model_dir=str( - self.saved_model_dir)) # type: ignore - self.bulk_inferrer = tfx.components.BulkInferrer( - examples=self.unlabeled_example_gen.outputs['examples'], - model=self.saved_model.outputs['model'], - data_spec=tfx.proto.DataSpec(), - model_spec=tfx.proto.ModelSpec(), - ) - # Test config self.generated_bq_table_name = None def tearDown(self): super().tearDown() - self._expire_table(self.generated_bq_table_name) + if self.generated_bq_table_name is not None: + self._expire_table(self.generated_bq_table_name) def _expire_table(self, full_bq_table_name): full_bq_table_name = full_bq_table_name.replace(':', '.') @@ -215,46 +297,140 @@ def _expire_table(self, full_bq_table_name): table.expires = _BQ_TABLE_EXPIRATION_DATE self.client.update_table(table, ['expires']) - def _create_pipeline(self, component_under_test, output_filepath): - get_output = (_get_predictions_to_bigquery_output( - bigquery_export=component_under_test.outputs['bigquery_export'], - output_filepath=output_filepath)) - components = [ - self.unlabeled_example_gen, - self.saved_model, - self.bulk_inferrer, - component_under_test, - get_output, - ] + def _create_gcs_tempfile(self) -> str: + timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + self.gcs_temp_file = os.path.join(_GCS_TEMP_DIR, 'pipeline-outputs', + f'output-{timestamp}') + return self.gcs_temp_file + + def _create_upstream_components(self, use_gcs=False): + if use_gcs: + gcs_test_data_dir = os.path.join(_GCS_TEMP_DIR, _TEST_DATA_DIR.stem) + if not _gcs_path_exists(gcs_test_data_dir): + # Copy test files to GCS + _copy_local_dir_to_gcs(str(_TEST_DATA_DIR), _GCS_TEMP_DIR) + dataset_dir = os.path.join(gcs_test_data_dir, self.dataset_name) + saved_model_dir = os.path.join(gcs_test_data_dir, self.saved_model_path) + else: + dataset_dir = str(_TEST_DATA_DIR / self.dataset_name) + saved_model_dir = str(_TEST_DATA_DIR / self.saved_model_path) + + test_split = example_gen_pb2.Input.Split(name='test', + pattern=f'test/{self.test_file}') + unlabeled_example_gen = tfx.components.CsvExampleGen( + input_base=dataset_dir, + input_config=example_gen_pb2.Input( + splits=[test_split])).with_id('UnlabeledExampleGen') + + saved_model = _saved_model_component(saved_model_dir) + + bulk_inferrer = tfx.components.BulkInferrer( + examples=unlabeled_example_gen.outputs['examples'], + model=saved_model.outputs['model'], + data_spec=tfx.proto.DataSpec(), + model_spec=tfx.proto.ModelSpec(), + ) + + return { + 'unlabeled_example_gen': unlabeled_example_gen, + 'saved_model': saved_model, + 'bulk_inferrer': bulk_inferrer, + } + + def _create_pipeline(self, + component_under_test: base_node.BaseNode, + upstream_components: List[base_node.BaseNode], + output_file: str, + pipeline_dir: str, + metadata_connection_config=None): + output_component = _get_output_component( + component_under_test.outputs['bigquery_export'], output_file) + components = (upstream_components + + [component_under_test, output_component]) return tfx.dsl.Pipeline( pipeline_name=self.pipeline_name, - pipeline_root=str(self.pipeline_root.full_path), - metadata_connection_config=( - tfx.orchestration.metadata.sqlite_metadata_connection_config( - self.metadata_path.full_path)), - components=components) - - def _run_pipeline(self, component_under_test): - output_tempfile = self.create_tempfile() - pipeline = self._create_pipeline(component_under_test, - output_tempfile.full_path) - tfx.orchestration.LocalDagRunner().run(pipeline) - with open(output_tempfile.full_path, encoding='utf-8') as output_file: - output = json.load(output_file) - return output - - def test_bulk_inferrer_bigquery_integration(self): - """Tests component integration with BulkInferrer and BigQuery.""" - predictions_to_bigquery = component.PredictionsToBigQueryComponent( - inference_results=self.bulk_inferrer.outputs['inference_result'], - bq_table_name=self.bq_table_name, - gcs_temp_dir=self.gcs_temp_dir, - ) + pipeline_root=pipeline_dir, + components=components, + metadata_connection_config=metadata_connection_config) + + def _run_local_pipeline(self, pipeline): + assert pipeline.metadata_connection_config is not None + return tfx.orchestration.LocalDagRunner().run(pipeline) + + def _run_vertex_pipeline(self, pipeline): + pipeline_definition_file = os.path.join( + '/tmp', f'{self.pipeline_name}-pipeline.json') + runner = tfx.orchestration.experimental.KubeflowV2DagRunner( + config=tfx.orchestration.experimental.KubeflowV2DagRunnerConfig( + default_image=_GCP_COMPONENT_IMAGE), + output_filename=pipeline_definition_file) + runner.run(pipeline) + + aiplatform.init(project=_GOOGLE_CLOUD_PROJECT, location=self.location) + job = pipeline_jobs.PipelineJob(template_path=pipeline_definition_file, + display_name=self.pipeline_name) + return job.run(service_account=self.service_account, sync=True) + + def _check_output(self, output_file: str): + with tf.io.gfile.GFile(output_file) as output_file_handler: + output = json.load(output_file_handler) - output = self._run_pipeline(predictions_to_bigquery) self.generated_bq_table_name = output['generated_bq_table_name'] self.assertStartsWith(self.generated_bq_table_name, self.bq_table_name) + def test_local_pipeline(self): + """Tests component using a local pipeline runner.""" + upstream = self._create_upstream_components() + component_under_test = component.PredictionsToBigQueryComponent( + inference_results=( + upstream['bulk_inferrer'].outputs['inference_result']), + bq_table_name=self.bq_table_name, + gcs_temp_dir=self.gcs_temp_dir, + ) + output_file = self.create_tempfile() + pipeline_dir = self.create_tempdir() + metadata_path = self.create_tempfile() + metadata_connection_config = ( + tfx.orchestration.metadata.sqlite_metadata_connection_config( + metadata_path.full_path)) + pipeline = self._create_pipeline( + component_under_test, + [ + upstream['unlabeled_example_gen'], + upstream['saved_model'], + upstream['bulk_inferrer'], + ], + output_file.full_path, + pipeline_dir.full_path, + metadata_connection_config, + ) + self._run_local_pipeline(pipeline) + self._check_output(output_file.full_path) + + def test_vertex_pipeline(self): + """Tests component using Vertex AI Pipelines.""" + upstream = self._create_upstream_components(use_gcs=True) + component_under_test = component.PredictionsToBigQueryComponent( + inference_results=( + upstream['bulk_inferrer'].outputs['inference_result']), + bq_table_name=self.bq_table_name, + gcs_temp_dir=self.gcs_temp_dir, + ) + output_file = self._create_gcs_tempfile() + pipeline_dir = os.path.join(_GCS_TEMP_DIR, 'pipeline-root') + pipeline = self._create_pipeline( + component_under_test, + [ + upstream['unlabeled_example_gen'], + upstream['saved_model'], + upstream['bulk_inferrer'], + ], + output_file, + pipeline_dir, + ) + self._run_vertex_pipeline(pipeline) + self._check_output(output_file) + if __name__ == '__main__': absltest.main() diff --git a/tfx_addons/predictions_to_bigquery/utils.py b/tfx_addons/predictions_to_bigquery/utils.py index 4ffb690b..870f7051 100644 --- a/tfx_addons/predictions_to_bigquery/utils.py +++ b/tfx_addons/predictions_to_bigquery/utils.py @@ -30,8 +30,8 @@ from tensorflow_serving.apis import prediction_log_pb2 from tfx.types import Artifact, artifact_utils -FeatureSpec = dict[str, Union[tf.io.FixedLenFeature, tf.io.VarLenFeature]] -BigQuerySchema = dict[str, Any] +FeatureSpec = Dict[str, Union[tf.io.FixedLenFeature, tf.io.VarLenFeature]] +BigQuerySchema = Dict[str, Any] _SCHEMA_FILE_NAME = "schema.pbtxt" _REGEX_CHARS_TO_REPLACE = re.compile(r'[^a-zA-Z0-9_]') @@ -62,7 +62,7 @@ def _get_compress_type(file_path: str) -> Optional[str]: b'\x1f\x8b': 'GZIP' } - with open(file_path, 'rb') as input_file: + with tf.io.gfile.GFile(file_path, 'rb') as input_file: two_bytes = input_file.read(2) return magic_bytes.get(two_bytes) @@ -123,10 +123,10 @@ def _get_feature_spec_from_prediction_results( def get_feature_spec( - schema: Optional[list[Artifact]] = None, + schema: Optional[List[Artifact]] = None, tft_output: Optional[tft.TFTransformOutput] = None, prediction_log_path: Optional[str] = None, -) -> dict[str, Any]: +) -> Dict[str, Any]: """Returns a TensorFlow feature spec representing the input data schema. Specify one of `schema`, `tft_output`, `prediction_log_path` as the source From c68667b56a85b4b9a5c19c58f6a63f2c9b8ba53f Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Mon, 27 Mar 2023 10:16:22 -0400 Subject: [PATCH 11/22] pred2bq: Add deps to version.py; update pkg version. --- tfx_addons/version.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tfx_addons/version.py b/tfx_addons/version.py index 41213317..8c036f61 100644 --- a/tfx_addons/version.py +++ b/tfx_addons/version.py @@ -79,5 +79,6 @@ "huggingface_pusher": [f"tfx{_TFXVERSION_CONSTRAINT}", "huggingface-hub>=0.10.0,<1.0.0"], "model_card_generator": - [f"tfx{_TFXVERSION_CONSTRAINT}", "model-card-toolkit>=2.0.0,<3.0.0"] + [f"tfx{_TFXVERSION_CONSTRAINT}", "model-card-toolkit>=2.0.0,<3.0.0"], + "predictions_to_bigquery": [f"tfx{_TFXVERSION_CONSTRAINT}"], } From 8f485179a506b329275afd1855a9203a157a1955 Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Tue, 28 Mar 2023 10:01:51 -0400 Subject: [PATCH 12/22] pred2bq: Add integration test with transform. Adds a test that integrates the transform component into the pipeline. Test is implemented for local runner only. --- .../predictions_to_bigquery/executor.py | 4 +- .../integration_test.py | 56 ++++++++++++++---- .../penguins-dataset/test/test-tiny.csv | 8 +-- .../model/6/Format-Serving/fingerprint.pb | Bin 56 -> 53 bytes .../model/6/Format-Serving/keras_metadata.pb | 12 ++-- .../model/6/Format-Serving/saved_model.pb | Bin 191307 -> 197618 bytes .../variables/variables.data-00000-of-00001 | Bin 9370 -> 9382 bytes .../Format-Serving/variables/variables.index | Bin 1495 -> 1495 bytes 8 files changed, 57 insertions(+), 23 deletions(-) diff --git a/tfx_addons/predictions_to_bigquery/executor.py b/tfx_addons/predictions_to_bigquery/executor.py index e7c58fbc..54d902b3 100644 --- a/tfx_addons/predictions_to_bigquery/executor.py +++ b/tfx_addons/predictions_to_bigquery/executor.py @@ -216,8 +216,10 @@ def Do( # get label names from TFTransformOutput object, if applicable if tft_output is not None and 'vocab_label_file' in exec_properties: - labels = _get_labels(tft_output, exec_properties['vocab_label_file']) + label_key = exec_properties['vocab_label_file'] + labels = _get_labels(tft_output, label_key) logging.info(f'Found the following labels from TFT vocab: {labels}.') + _ = features.pop(label_key, None) else: labels = None logging.info('No TFTransform output given; no labels parsed.') diff --git a/tfx_addons/predictions_to_bigquery/integration_test.py b/tfx_addons/predictions_to_bigquery/integration_test.py index 9e4a298c..cb446193 100644 --- a/tfx_addons/predictions_to_bigquery/integration_test.py +++ b/tfx_addons/predictions_to_bigquery/integration_test.py @@ -33,7 +33,7 @@ from typing import List import tensorflow as tf -from absl.testing import absltest +from absl.testing import absltest, parameterized from google.api_core import exceptions from google.cloud import aiplatform, bigquery from google.cloud.aiplatform import pipeline_jobs @@ -44,7 +44,7 @@ from tfx.dsl.components.base import base_node from tfx.proto import example_gen_pb2 from tfx.types import artifact_utils -from tfx.types.standard_artifacts import Model, String +from tfx.types.standard_artifacts import Model, String, TransformGraph from tfx_addons.predictions_to_bigquery import component, executor @@ -129,6 +129,7 @@ def tearDown(self): if self.generated_bq_table_name: self._expire_table(self.generated_bq_table_name) + @absltest.skip def test_Do(self): self.executor.Do(self.input_dict, self.output_dict, self.exec_properties) self.assertIsNotNone(self.output_dict['bigquery_export']) @@ -151,6 +152,16 @@ def _copy_local_dir_to_gcs(local_dir: str, gcs_path: str): subprocess.check_call(f'gsutil -m cp -r {local_dir} {gcs_path}', shell=True) +@tfx.dsl.components.component +def _transform_function_component( + transform_graph: tfx.dsl.components.OutputArtifact[TransformGraph], + transform_dir: tfx.dsl.components.Parameter[str], +): + """TFX Transform component stub.""" + os.makedirs(transform_graph.uri, exist_ok=True) + shutil.copytree(transform_dir, transform_graph.uri, dirs_exist_ok=True) + + @tfx.dsl.components.component def _saved_model_function_component( model: tfx.dsl.components.OutputArtifact[Model], @@ -257,7 +268,7 @@ def _get_output_component(output_channel, output_file): return output_component -class ComponentIntegrationTest(absltest.TestCase): +class ComponentIntegrationTest(parameterized.TestCase): """Tests component integration with other TFX components/services.""" def setUp(self): super().setUp() @@ -267,6 +278,7 @@ def setUp(self): self.gcs_temp_dir = _GCS_TEMP_DIR self.dataset_name = 'penguins-dataset' self.saved_model_path = 'sample-tfx-output/Trainer/model/6/Format-Serving' + self.transform_path = 'sample-tfx-output/Transform/transform_graph/5' # Vertex Pipeline config self.service_account = _GCP_SERVICE_ACCOUNT_EMAIL @@ -303,7 +315,7 @@ def _create_gcs_tempfile(self) -> str: f'output-{timestamp}') return self.gcs_temp_file - def _create_upstream_components(self, use_gcs=False): + def _create_upstream_component_map(self, use_gcs=False): if use_gcs: gcs_test_data_dir = os.path.join(_GCS_TEMP_DIR, _TEST_DATA_DIR.stem) if not _gcs_path_exists(gcs_test_data_dir): @@ -311,9 +323,11 @@ def _create_upstream_components(self, use_gcs=False): _copy_local_dir_to_gcs(str(_TEST_DATA_DIR), _GCS_TEMP_DIR) dataset_dir = os.path.join(gcs_test_data_dir, self.dataset_name) saved_model_dir = os.path.join(gcs_test_data_dir, self.saved_model_path) + transform_dir = os.path.join(gcs_test_data_dir, self.transform_path) else: dataset_dir = str(_TEST_DATA_DIR / self.dataset_name) saved_model_dir = str(_TEST_DATA_DIR / self.saved_model_path) + transform_dir = str(_TEST_DATA_DIR / self.transform_path) test_split = example_gen_pb2.Input.Split(name='test', pattern=f'test/{self.test_file}') @@ -322,6 +336,8 @@ def _create_upstream_components(self, use_gcs=False): input_config=example_gen_pb2.Input( splits=[test_split])).with_id('UnlabeledExampleGen') + transform = _transform_function_component(transform_dir=transform_dir) + saved_model = _saved_model_component(saved_model_dir) bulk_inferrer = tfx.components.BulkInferrer( @@ -333,6 +349,7 @@ def _create_upstream_components(self, use_gcs=False): return { 'unlabeled_example_gen': unlabeled_example_gen, + 'transform': transform, 'saved_model': saved_model, 'bulk_inferrer': bulk_inferrer, } @@ -378,14 +395,32 @@ def _check_output(self, output_file: str): self.generated_bq_table_name = output['generated_bq_table_name'] self.assertStartsWith(self.generated_bq_table_name, self.bq_table_name) - def test_local_pipeline(self): + @parameterized.named_parameters([ + ('inference_results_only', False), + ('inference_results_transform', True), + ]) + def test_local_pipeline(self, add_transform): """Tests component using a local pipeline runner.""" - upstream = self._create_upstream_components() + upstream = self._create_upstream_component_map() + upstream_components = [ + upstream['unlabeled_example_gen'], + upstream['saved_model'], + upstream['bulk_inferrer'], + ] + if add_transform: + transform_graph = upstream['transform'].outputs['transform_graph'] + upstream_components.append(upstream['transform']) + vocab_label_file = 'Species' + else: + transform_graph = None + vocab_label_file = None component_under_test = component.PredictionsToBigQueryComponent( inference_results=( upstream['bulk_inferrer'].outputs['inference_result']), + transform_graph=transform_graph, bq_table_name=self.bq_table_name, gcs_temp_dir=self.gcs_temp_dir, + vocab_label_file=vocab_label_file, ) output_file = self.create_tempfile() pipeline_dir = self.create_tempdir() @@ -395,11 +430,7 @@ def test_local_pipeline(self): metadata_path.full_path)) pipeline = self._create_pipeline( component_under_test, - [ - upstream['unlabeled_example_gen'], - upstream['saved_model'], - upstream['bulk_inferrer'], - ], + upstream_components, output_file.full_path, pipeline_dir.full_path, metadata_connection_config, @@ -407,9 +438,10 @@ def test_local_pipeline(self): self._run_local_pipeline(pipeline) self._check_output(output_file.full_path) + @absltest.skip('debugging') def test_vertex_pipeline(self): """Tests component using Vertex AI Pipelines.""" - upstream = self._create_upstream_components(use_gcs=True) + upstream = self._create_upstream_component_map(use_gcs=True) component_under_test = component.PredictionsToBigQueryComponent( inference_results=( upstream['bulk_inferrer'].outputs['inference_result']), diff --git a/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv b/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv index 0723251e..5fd8fa83 100644 --- a/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv +++ b/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv @@ -1,4 +1,4 @@ -studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo) -PAL0708,2,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A2,Yes,11/11/07,39.5,17.4,186,3800,FEMALE,8.94956,-24.69454 -PAL0708,3,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,11/16/07,40.3,18.0,195,3250,FEMALE,8.36821,-25.33302 -PAL0708,5,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N3A1,Yes,11/16/07,36.7,19.3,193,3450,FEMALE,8.76651,-25.32426 +studyName,Sample Number,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments +PAL0708,2,Anvers,Torgersen,"Adult, 1 Egg Stage",N1A2,Yes,11/11/07,39.5,17.4,186,3800,FEMALE,8.94956,-24.69454,No comment +PAL0708,3,Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,11/16/07,40.3,18.0,195,3250,FEMALE,8.36821,-25.33302,No comment +PAL0708,5,Anvers,Torgersen,"Adult, 1 Egg Stage",N3A1,Yes,11/16/07,36.7,19.3,193,3450,FEMALE,8.76651,-25.32426,No comment diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/fingerprint.pb b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/fingerprint.pb index 04e0508ee9625dcd88a0574b588cdb511a052481..079090b8d405e704f8573e8072af033c96e0b73b 100644 GIT binary patch literal 53 zcmV-50LuRejsKg^zKXo&dl2Z3&zzt2hrC%B)}zhstiSTvnE@ch@vilU-_D~>DD{iR L(%6ui^f593ijpAL literal 56 zcmV-80LT9b`rn43!<)3sfB_K9-l5y|TransformFeaturesLayer", "config": {"layer was saved without config": true}, "name": "transform_features_layer", "inbound_nodes": []}], "input_layers": [["culmen_length_mm", 0, 0], ["culmen_depth_mm", 0, 0], ["flipper_length_mm", 0, 0], ["body_mass_g", 0, 0]], "output_layers": [["dense_2", 0, 0]]}, "shared_object_id": 14, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}, {"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}, {"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}, {"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": [{"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}], "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_depth_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "flipper_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "body_mass_g"]}]], {}]}, "save_spec": [{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_depth_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "flipper_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "body_mass_g"]}], "keras_version": "2.11.0", "backend": "tensorflow", "model_config": {"class_name": "Functional"}, "training_config": {"loss": {"class_name": "SparseCategoricalCrossentropy", "config": {"reduction": "auto", "name": "sparse_categorical_crossentropy", "from_logits": true, "ignore_class": null}, "shared_object_id": 19}, "metrics": [[{"class_name": "SparseCategoricalAccuracy", "config": {"name": "sparse_categorical_accuracy", "dtype": "float32"}, "shared_object_id": 20}]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Custom>Adam", "config": {"name": "Adam", "weight_decay": null, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "jit_compile": false, "is_legacy_optimizer": false, "learning_rate": 0.009999999776482582, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}}}}2 +À3root"_tf_keras_network*ž3{"name": "model_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Functional", "config": {"name": "model_1", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "culmen_length_mm"}, "name": "culmen_length_mm", "inbound_nodes": []}, {"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "culmen_depth_mm"}, "name": "culmen_depth_mm", "inbound_nodes": []}, {"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "flipper_length_mm"}, "name": "flipper_length_mm", "inbound_nodes": []}, {"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "body_mass_g"}, "name": "body_mass_g", "inbound_nodes": []}, {"class_name": "Concatenate", "config": {"name": "concatenate_1", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_1", "inbound_nodes": [[["culmen_length_mm", 0, 0, {}], ["culmen_depth_mm", 0, 0, {}], ["flipper_length_mm", 0, 0, {}], ["body_mass_g", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_3", "inbound_nodes": [[["concatenate_1", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_4", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_4", "inbound_nodes": [[["dense_3", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_5", "trainable": true, "dtype": "float32", "units": 3, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_5", "inbound_nodes": [[["dense_4", 0, 0, {}]]]}, {"class_name": "TensorFlowTransform>TransformFeaturesLayer", "config": {"layer was saved without config": true}, "name": "transform_features_layer_1", "inbound_nodes": []}], "input_layers": [["culmen_length_mm", 0, 0], ["culmen_depth_mm", 0, 0], ["flipper_length_mm", 0, 0], ["body_mass_g", 0, 0]], "output_layers": [["dense_5", 0, 0]]}, "shared_object_id": 14, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}, {"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}, {"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}, {"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": [{"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}], "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_depth_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "flipper_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "body_mass_g"]}]], {}]}, "save_spec": [{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "culmen_depth_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "flipper_length_mm"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "body_mass_g"]}], "keras_version": "2.11.0", "backend": "tensorflow", "model_config": {"class_name": "Functional"}, "training_config": {"loss": {"class_name": "SparseCategoricalCrossentropy", "config": {"reduction": "auto", "name": "sparse_categorical_crossentropy", "from_logits": true, "ignore_class": null}, "shared_object_id": 19}, "metrics": [[{"class_name": "SparseCategoricalAccuracy", "config": {"name": "sparse_categorical_accuracy", "dtype": "float32"}, "shared_object_id": 20}]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Custom>Adam", "config": {"name": "Adam", "weight_decay": null, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "jit_compile": false, "is_legacy_optimizer": false, "learning_rate": 0.009999999776482582, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}}}}2 ˆ root.layer-0"_tf_keras_input_layer*Ø{"class_name": "InputLayer", "name": "culmen_length_mm", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "culmen_length_mm"}}2 † root.layer-1"_tf_keras_input_layer*Ö{"class_name": "InputLayer", "name": "culmen_depth_mm", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "culmen_depth_mm"}}2 Š root.layer-2"_tf_keras_input_layer*Ú{"class_name": "InputLayer", "name": "flipper_length_mm", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "flipper_length_mm"}}2 þ root.layer-3"_tf_keras_input_layer*Î{"class_name": "InputLayer", "name": "body_mass_g", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "body_mass_g"}}2 -” root.layer-4"_tf_keras_layer*ê{"name": "concatenate", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Concatenate", "config": {"name": "concatenate", "trainable": true, "dtype": "float32", "axis": -1}, "inbound_nodes": [[["culmen_length_mm", 0, 0, {}], ["culmen_depth_mm", 0, 0, {}], ["flipper_length_mm", 0, 0, {}], ["body_mass_g", 0, 0, {}]]], "shared_object_id": 4, "build_input_shape": [{"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}]}2 -­root.layer_with_weights-0"_tf_keras_layer*ö{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 5}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["concatenate", 0, 0, {}]]], "shared_object_id": 7, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 4}}, "shared_object_id": 21}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}}2 -¬root.layer_with_weights-1"_tf_keras_layer*õ{"name": "dense_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 8}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 9}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense", 0, 0, {}]]], "shared_object_id": 10, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 8}}, "shared_object_id": 22}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 8]}}2 -²root.layer_with_weights-2"_tf_keras_layer*û{"name": "dense_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 3, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 11}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 12}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_1", 0, 0, {}]]], "shared_object_id": 13, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 8}}, "shared_object_id": 23}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 8]}}2 -ö8  root.layer-8"_tf_keras_model*Ì8{"name": "transform_features_layer", "trainable": false, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "TensorFlowTransform>TransformFeaturesLayer", "config": {"layer was saved without config": true}, "build_input_shape": {"Body Mass (g)": {"class_name": "TensorShape", "items": [null, null]}, "Comments": {"class_name": "TensorShape", "items": [null, null]}, "Culmen Depth (mm)": {"class_name": "TensorShape", "items": [null, null]}, "Culmen Length (mm)": {"class_name": "TensorShape", "items": [null, null]}, "Delta 13 C (o/oo)": {"class_name": "TensorShape", "items": [null, null]}, "Delta 15 N (o/oo)": {"class_name": "TensorShape", "items": [null, null]}, "Flipper Length (mm)": {"class_name": "TensorShape", "items": [null, null]}, "Sex": {"class_name": "TensorShape", "items": [null, null]}, "Clutch Completion": {"class_name": "TensorShape", "items": [null, 1]}, "Date Egg": {"class_name": "TensorShape", "items": [null, 1]}, "Individual ID": {"class_name": "TensorShape", "items": [null, 1]}, "Island": {"class_name": "TensorShape", "items": [null, 1]}, "Region": {"class_name": "TensorShape", "items": [null, 1]}, "Sample Number": {"class_name": "TensorShape", "items": [null, 1]}, "Species": {"class_name": "TensorShape", "items": [null, 1]}, "Stage": {"class_name": "TensorShape", "items": [null, 1]}, "studyName": {"class_name": "TensorShape", "items": [null, 1]}}, "is_graph_network": false, "full_save_spec": {"class_name": "__tuple__", "items": [[{"Body Mass (g)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Comments": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Culmen Depth (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Culmen Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 13 C (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 15 N (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Flipper Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Sex": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Clutch Completion": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Clutch Completion"]}, "Date Egg": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Date Egg"]}, "Individual ID": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Individual ID"]}, "Island": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Island"]}, "Region": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Region"]}, "Sample Number": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "int64", "Sample Number"]}, "Species": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Species"]}, "Stage": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Stage"]}, "studyName": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "studyName"]}}], {}]}, "save_spec": {"Body Mass (g)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Comments": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Culmen Depth (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Culmen Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 13 C (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 15 N (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Flipper Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Sex": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Clutch Completion": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Clutch Completion"]}, "Date Egg": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Date Egg"]}, "Individual ID": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Individual ID"]}, "Island": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Island"]}, "Region": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Region"]}, "Sample Number": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "int64", "Sample Number"]}, "Species": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Species"]}, "Stage": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Stage"]}, "studyName": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "studyName"]}}, "keras_version": "2.11.0", "backend": "tensorflow", "model_config": {"class_name": "TransformFeaturesLayer"}}2 +˜ root.layer-4"_tf_keras_layer*î{"name": "concatenate_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Concatenate", "config": {"name": "concatenate_1", "trainable": true, "dtype": "float32", "axis": -1}, "inbound_nodes": [[["culmen_length_mm", 0, 0, {}], ["culmen_depth_mm", 0, 0, {}], ["flipper_length_mm", 0, 0, {}], ["body_mass_g", 0, 0, {}]]], "shared_object_id": 4, "build_input_shape": [{"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 1]}]}2 +³root.layer_with_weights-0"_tf_keras_layer*ü{"name": "dense_3", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 5}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["concatenate_1", 0, 0, {}]]], "shared_object_id": 7, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 4}}, "shared_object_id": 21}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}}2 +®root.layer_with_weights-1"_tf_keras_layer*÷{"name": "dense_4", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_4", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 8}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 9}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_3", 0, 0, {}]]], "shared_object_id": 10, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 8}}, "shared_object_id": 22}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 8]}}2 +²root.layer_with_weights-2"_tf_keras_layer*û{"name": "dense_5", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_5", "trainable": true, "dtype": "float32", "units": 3, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 11}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 12}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_4", 0, 0, {}]]], "shared_object_id": 13, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 8}}, "shared_object_id": 23}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 8]}}2 +€6  root.layer-8"_tf_keras_model*Ö5{"name": "transform_features_layer_1", "trainable": false, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "TensorFlowTransform>TransformFeaturesLayer", "config": {"layer was saved without config": true}, "build_input_shape": {"Body Mass (g)": {"class_name": "TensorShape", "items": [null, null]}, "Comments": {"class_name": "TensorShape", "items": [null, null]}, "Culmen Depth (mm)": {"class_name": "TensorShape", "items": [null, null]}, "Culmen Length (mm)": {"class_name": "TensorShape", "items": [null, null]}, "Delta 13 C (o/oo)": {"class_name": "TensorShape", "items": [null, null]}, "Delta 15 N (o/oo)": {"class_name": "TensorShape", "items": [null, null]}, "Flipper Length (mm)": {"class_name": "TensorShape", "items": [null, null]}, "Sex": {"class_name": "TensorShape", "items": [null, null]}, "Clutch Completion": {"class_name": "TensorShape", "items": [null, 1]}, "Date Egg": {"class_name": "TensorShape", "items": [null, 1]}, "Individual ID": {"class_name": "TensorShape", "items": [null, 1]}, "Island": {"class_name": "TensorShape", "items": [null, 1]}, "Region": {"class_name": "TensorShape", "items": [null, 1]}, "Sample Number": {"class_name": "TensorShape", "items": [null, 1]}, "Stage": {"class_name": "TensorShape", "items": [null, 1]}, "studyName": {"class_name": "TensorShape", "items": [null, 1]}}, "is_graph_network": false, "full_save_spec": {"class_name": "__tuple__", "items": [[{"Body Mass (g)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Comments": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Culmen Depth (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Culmen Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 13 C (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 15 N (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Flipper Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Sex": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Clutch Completion": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Clutch Completion"]}, "Date Egg": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Date Egg"]}, "Individual ID": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Individual ID"]}, "Island": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Island"]}, "Region": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Region"]}, "Sample Number": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "int64", "Sample Number"]}, "Stage": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Stage"]}, "studyName": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "studyName"]}}], {}]}, "save_spec": {"Body Mass (g)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Comments": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Culmen Depth (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Culmen Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 13 C (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Delta 15 N (o/oo)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "float32"]}, "Flipper Length (mm)": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "int64"]}, "Sex": {"class_name": "TypeSpec", "type_spec": "tf.SparseTensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string"]}, "Clutch Completion": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Clutch Completion"]}, "Date Egg": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Date Egg"]}, "Individual ID": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Individual ID"]}, "Island": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Island"]}, "Region": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Region"]}, "Sample Number": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "int64", "Sample Number"]}, "Stage": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "Stage"]}, "studyName": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "string", "studyName"]}}, "keras_version": "2.11.0", "backend": "tensorflow", "model_config": {"class_name": "TransformFeaturesLayer"}}2 ¹vroot.keras_api.metrics.0"_tf_keras_metric*‚{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 24}2 üwroot.keras_api.metrics.1"_tf_keras_metric*Å{"class_name": "SparseCategoricalAccuracy", "name": "sparse_categorical_accuracy", "dtype": "float32", "config": {"name": "sparse_categorical_accuracy", "dtype": "float32"}, "shared_object_id": 20}2 \ No newline at end of file diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/saved_model.pb b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/saved_model.pb index e6395ba7317b134c443033b467e90c01ffdaa82c..81c8723592e7a5000edb360faa027a73c7c497fa 100644 GIT binary patch delta 25939 zcmeHv34ByVnmARRq`NC2>2&AlJaUI5B#@-fbay}`0rB7v!=<7$4GEYvB!MJ|?u@{A zUmQNQ6lD}0W*lV{rhn2ZDxfneC<3G7Ipejn>#92n>yElR9{;cE9sOSS>m>Lef4eh3 z`Dy39s_(9b>x4=6_z3m1RNp|F|ZL?5Vb2SG_xHp|$Q@QZT1GvaIu>-tLah z)eE}@E}GW4JTlllV9zG$T|Iq+1C{9=1DE!7iW96A;=$aU=u+zkmNK`eaE+(Y?QV26 zgnSJvBHm8VvgVLynL8Ln_Rn68cH2)k;cGD6a2(ANJ2qZR+ypJRPiU_vW^wwPmUcz@$t9$= zwIi~!X=PJKXHS1;m|?lZzA}pz^TA?bU84f*8hfVr^_XhG-*j>3>YmPSvX0bo8gY>0 z?PJ7U&RnslJR9h2DDjIsiy3nXwZt0d)==Vv@;H2kczFfmy`khZRJ!;yvSkC;4J9=++yd5o zu-Fc-Od*Lzp{kTmWJ+lfC8adT50zTQr%Eiyt7_=w8hW{gUap~1Cu!(4){r4c8Zsok zhF*~FY|&9sD3+I71aB%0y*drOpkbxBsoKF?Od(MQk}{>Vu#!?7KDw#;DFlsnpy*7I z);e?LzSTv__6B{;;@gdv;-954?$Q3HKcYk^tJ@n2cu9n0i>=jV;@$36 z^h)$ScLO4|j9g`VIVlx4FD_zkC*TtO-cvH)nMBM}dwco^!ai~G#Vt;A{q2)T`kF}h zV5fcKq_!I-SeKKWu~RjmMc0bZrz$p+zgvsOxY1+96}f>yv&?Zf%~v~8+c>Y@UG z`#GKKXJt+d0oXmxnkXnN}}p3;q2Yd zh}Bw@V)2~WPVrM-m@d}M7Q_o@GM_thc8Pd?pTxIrb|vyhXU;xs!n>@ztNPo=TjX)c zI&>9u=2oG|D#&GQC{@8Q(v~sDUIm1qOl+7tRkY2^2CU=l#qgA&dSa=hZ|-Ou<|M`5 zxn}Y1xn|`xXI>7_O~jH)I2LCb$C6$d7UQqyC6#b2Ni3BYe~pIc2X!5N`+_2~+ZS^A z$w6`dg6ZGSSY5ENyggqx3#%PaDpn>|wb+pW&8BieDpX;`{R{J^sl7^1NG;YAlKYdM zkdG5mx{jbHq!#N5$)ih8$S?Mt`!A%Pd6DY&DpDbCUyxU{y0d?vcXekNj2sNu|K~Xi zdHhuJjfvFK?f2s{VR=s&{E^pV&mxx2wUL#5-JShTvwbWnr|;c?iblJkIv8qpS`&+c ziA5oo%bBN#3b{NQC^MrKa{1`4Md)?p;q=A#|5_BgGaFNqcFac9GC^#*t|+#?2)~$) zBtfjQAcF2SsC@+i^sgTIRb_na4Rm&_dn+nI)fLQ2#q9!dhQnPw%R5(h_AKiR_pKi6 z>FfY8{oaspJ))~>Q9hceN=n~-3r)q>B5HdW6>7`Z--S*^>*)Tw^u_N+kcHZ|p+y%A$mnfo$2y!{?YJ38(bl*1;C*kSspwi7xf?mPQgIfpWhj8w ziM#LDQDRlM&BIJE+B8qkg`@3%2P6Xvv~~c^NpTBI*^Z{*4Tx^ot}h1*Sn169Kyf3Y z2cia*pMoWA|4hrUHeICY1DY;EKi-3$Hy@jJ$n+W6J0m_Dt6vsiQ#VI@fr@atI;j_Ky+Y?QnpX-M^w( zJh#IJo>M7`jX! z@nJa96T^`>Dmc=c7DtkU;Yd#mN0Lav@kjTsxPqYD#A&O0a*~y;2SGOX3*v%)7Mthx z*Tb*!&Pdh+xy}9mB<>s>ty8WTO7gjK#p2ui$u-yn?@Qp9(UIzrZd|1yIhCIRzDnlkdXDIf5X+~apP8BXMb1AfC%j#d7hh^mxB~~6+ zNW;pnW93U=#f=Q`vR0DKnO4i`;Tp7XNY(s|9|S5Mk6*ZiPYafDa@%q)bmJUs;|@kJ z2|JO_%at!y%mb|_9=Y8DEvh^*iJju{+fR@-eFIJT3T(wI%eNBV1&TygCUpi@CMnCx z{7i(R*y5Mb3S@q6Ecq%OX762tcnmBdTrMwtVF1l@Od*YYfP~$O4+_)WKSy=+lRA`P zKZP_U)Y3>BE*E!wn3ZQwLiKUfhCE9$o^L2TKgVlI#_}sznY8>1lxM4vg^{;JytLw3 zRA|2!RVS7o+lvaIJOSR90^UcB;r%J#{T#kXSVk%o#=0>>w*>KwF-Z6wBE`l4WG|Xu zP;XqfYA-s+aSI|wO4a5XwpJ;f(t^go?ty)ga6cji3Sqf}HO-0)%o^;rXGnhqm9kFi zCh4uEcW?l<%vxMRLjoDPPux&-cb!L%EwVki~5GOs3K@;i%Nqe zg++Z$sV(YDWl=s{k}N6>dT!A&IwuY17?{;Rj9Gn#X7w|(o}F(JlDjG&(T0ZQuI4O* zUg{s#rizrQZ7MkuX44Qdv7pnLqwgPn+TcSo${)kJ8KQQv9!mCiLQPUR_g_LHyzK9i z;AMZ81TXu$BzVc+(WC#3uB7|Sc%^-gu?sNv&koOQLtj>CQ~i+ar)aJf7bX*zMnfWT zNjU@JT)QOV&^rx@OFbfyxMT?f;@lKT#CeY^jJ@$3%(Tv57Lo^3g`hC=dc8g|`+x!^4MI?_$DfGZD2FL5{`F{?8-$=yT zG+J>pLVw%%(;ZYIH7}RlHEdAmm4m{QjX%Mx3M&?lf)R&h^5i$9sc|54&k zx?eyI^r!{Z&{=1p|4h#yD|8##Jr|yV@5XGQ?iD}2&?7GPl!=WCx{OZSnmuCf`mBx*=mnIys#PwDkmn zy$fH!@1ZxYM-zrRpM+&ez!wr0vt`L?^vwcwr#9O9(SKH&~RK0_(jYrw=0 zOt$|Vh4@k+y79wX(lV}2C-XJOpS}>60=-GsA4f4##TJ33?ZvX3W-w^LlTv_;8`4RO zsuBYg!;vy{zql?kEiZ~lX8%Qz)v)W<6It15zYp2(5nqeUz;2gVwe-wPAngyphMccC z7#&S7O{hjyaaZ!(bu(JL-Mr6qF+nR`*Bud``tq<_Gk#(o70PbujSd+0tlE~2rY zqGHp9#6f>li~aSeK9fkvCU)#H)fXtov7_5EVPlUCEYyNV31N~?`|5B${jwfcjtmJ& zNaJx}{D_z!;TR&L>DS|NaZ`3&iIz)Y5nbFruo~vdj&Of>*Rsy0{(;D{i{tD5QYKm% zfbW=^S=-egUK#1X*p8h@XeU{me%L*XnVg`ujx%6z z-t<%lt_+Pxtx-aR6ebX%$Ln$Z$k34ydha+~zivd_k?Ko(jp;#sCj<*Q|(L&H)*5Mk~EcGdyWe&-{taEj5e^Vc{Zf8dm>?1eh z)>8*bmbTcQHV3K&X9E2==Rz$s5P2wsgVvcOQ!1wKorYYr=Q@g%%wxs$>l)aK-f_?PHi)%SC2*xE7bMM8igbGmt}E0IJ#&3AN$+Pv*Dh|k zDbFN)l)*zx7LgT;>#rcOuP%VYsOB=`%rTX+V3kL^*A;(s(-YHB9^Lf@>XVKQD~CTe zjKQkEDqZy4u%7lVz&+6$cDA9yFHB@K95cdpTbRq*Z~s>ldEZ1UiqPxSbvC|~9`~ab zvHYQsxc`pQ*qrm>M2`OXEHo#!;9|7gVk)+j)7_t<5^>ohrKn1Jy!(+c7V>U0*_RPN z5^Hw(W1qZ&#+dML4b1Qaij}{Kve5{%5ZyN{Wtt4;nV(%wZqJqulUwwM$w|j$Zc-NM z@9$dCvnH~-E3&k^v$s##Y!)AiK_}Rg2j{qQh%GMO(+JzyC-xMG_w6Z#r^S0}#Il1d zPvXQLAN~9F`1#&);r(Nq!Um^k}}A|7Z@>Ss$wv&*D#0;AtH^NpyES zS|dNb&!4Q1HN*Q9_+>nt;-<&O%1OJIi%USjr#m$Fm?y6+K`AHDF<2N>@|;;Mho7Z)A2BgGas9e!0Rc8F_V z%7bFbC_7(zU3~G40&(jRNcDNeAsgq&k=J|Zm8&qqg4oG%Ku$8`!n%n{wMMZ zFTCpa@W&th<*QKWj~;oA|M|=7ys+ras{<&r&;{!?C@&_O5n7jtrN1? zxP9r~o6t;Z+lVelcgHquM4uuzOnw>j_&|i-K|B^S`)4(q>pwA(?67o_p}5w!OyqSF zJyw91#G1CCwJ5K?jmRl~vh(#27k&3;G?x8j(Zf#MnYIzmVR$lv-QC3Jbxnvvb8ms2 zjd&2amNaru87@k|4URNCd>zAB zWyw4s?^ok-37q+{H=Uw9djN}Ctx>*YAq%k?Wx5;3lXlnjf)7sag$Ondfrz8jrilIF7WZ3Sjj%QaKG9$fgVL8Rf&Bk&arp zB+!A3C5f}xoQF%Os}yI`x;s##HjfTwQEjFi%%J+$knYvzmR~cZS(6@p0|HZ?MmKTz zcv#qkNR~1YX;#kzv@r8=k`Tb z_jgX^Nis07G;i|QV0niW(&k8FzMRRcg?Jaz>EgWcH+lDlKpu*Bz+AB{mf=QNUjN0I58%KA+erG zBd7x_IW>U`#3n{tQq}@g@fmWR<3>a#CK5g|0Z(Az!we@VGD?6Tv0p(17PVUchb=mbR~uBn5uI~MtE5_tWfQzlhf=| zyL*?lOi9!WLgaeL8HK2y9+(f2QeP9!cHD|ds}Ud_?Zy_muL&C7sJ@PV+k_q8fo;9x zFd|b^b3Ek2qituUs-NcA4VO+5Yq=RNb%Usi4Ylo3LqBc6gzk6aQ|y-{cccv-TtO#z z@N~!TF>xi5hS~|XnTe&$l9GWw_TVg=FR@-}7jr>Z$69bKNn&xP3%uOD8LRW27oTSD zB?;L!UJ9fII@^a&b3Bd6cmu`)eZq$=HlqPkL_hT5Y+JJ-R*4@s+1HY!%42?9MStuE zhOZ%#SGxmaWOrnvWaw?v4EIloVYwUwWokVoQ@8%$$$wpC< zd*q!UBZPAe4PHbig}`|wH4_tCs&KOPVY> z&Ffllo8t}RK2=n-5>oF>kbftH5(Q#iStD?ggX6?OBr^?9#FHFvLIjaW0zjN4b~3kf z#Y#B)o;$cwP;&5eia9|D-ldX8Dd$l!3D5YhnSK5}BgyPD;_IZ7Pl0(gsr6Y_XNm+g z?+^)S-c$)_Kcw^xVbLE_`XB=}F+22cm(sVvG@_Kg(Bi~_%{S$eQuO||DSamGRU1e* zzmmcgn3m)WJSoFBIWlA2%HbI>j?8lHN&bxKE&0MoL`oa`_lDkHoU3XjTDx@;r2uRMkq5#YUHU=lYdYAq*j?QmArep=_^FA zoNAwCv8y2Q6nhCo&Z>{oNXVwQlq#ep@9;H(+tfU>oo-^!Qdgt9q-U04DwQkAtgKwh z49yi5k)njvA!NWiknjsc^7IuGzd*#puI^E_%>>3J~nWtA5!Siv9i^h0mE}HCKrKR}w z06tOC^=@R%*G*1Ng&X9V&jw@0b<*aN({LIlMM_q-nxQW!$SiG~-s1t$q-1ePY-zx- zjC)D!cvg53z;Uq^lF4E#?FuRLlGwT#Ko*-PbI+FAS8j^Lgzcgt#Y-lNEv02kVoS-` z^v`KH3#dU)SHGRIN)P9b*{my!NMhC7(Wi;|KTvLC%13Tcf z#vtEk3`v>Gt<5HeOy*N!bRN1bZOCiV5R`f$SDM`pO}L~I(B|nHBnf+YPDF}((s18C zAD8dqrCw&qn6)O39yXXr-r;t{GIQq zz~u*-Gry-2^nP!7!rq{&6~Ctv{Kv@?mhMS6ewayiV*T9@d1apDjeOofH0RnhgdgJH zneau=ysi?P<>-j7xIS_(8{pJ`X_Pb4uk7tqR zMktoY7vO`Jjy{53zz4Z(Uic0`TkPzkkl1{(0>*5=LyzE$KcwkyeuR7{b@StW^B<;? zjb^>ighZ<(Z~jBZ^$!`>x?K4mGOqt#Gp>K3jmyf1OSF#d%0>r9l7jrd7iO8s`{`tr z=K34lM`X7bKS#>##XA$e)yI=t1M&^H*rMqWv!=dD=1-bn{6T(`tUKuz+0h~62+i%a z_&mcta71UHizd)V$Dyl+zxw76x#g>GCo%PSn?wIUQxANg$LpnAZ0HRB$rNekNqw_V ziEaG>ezQ-GZBJ?j_kV{OJV8o6ftP*cCBL?${%Dwby=X6re!A_W!i@VZWRNuQ&+!xUn?<57AQ|z)sCG4`Nu7uT2ipmW3`6a(B$iN`>o3FxcU{&pBbYe#a@dc*5 z6L4YGGS_Y%SPfqy4!bePE8e|8kreEN>~*elg35 zJPF@ZB|?i+`D7NuO{4Q^LzpR2FNa0!Jzcr`A`kiP0j z&)J4o#QfJ_7s~sFHGglM=zD0$O%tiN#o@^vtl|6O_;948yAcVYxzyWMU5_bhjN-Ok zBX=4UM#OZwS;S8_-j3TKy+hYCQ;m37P0=NH;qf$Q%Sj^owTLGnDx0lTQJoT^@t5OZ zIR@?}F5Zy9FhBU+G<+AN@G(`KFX6Tqa0&+_@3q9G7zP-;QC#HMJ-6Z}j9#EeDQ-)9 z9>wo_!RLl)@OJzsqdGVV;B~ZXD}KPd0k=V48q!liqZ~Egh1;Od=oYBpkz!DH8ESkB zwY`24{s-NMcL|@Pw&A}%z#yq>8-5@)3u6y4Gz+a$7y%385k52v?RZvM;4jkYF^XH$ z{wr`=|MaN-<%=gZ-UP-XfH6cm2@&r3~&g--@v68 zU|RfFJVbl~*d=t`UDz5sv=iTnr&qUBX}!D0_%pf07)<>XF7@uhm2i-{U>BT!dFcla zgHQ9(tX&xH-$`{osm@0)*~R_cU98SepJ8}@`fm)+A2aX9*O>56W#iqMCU=Ps~j^Ps|0RN16H)>PaLi_-1D=B&mZ*h{}!ydC`E3#XcMfwH? z;oG~eHiaJp47fPp&GeML*k+ERHibB=47w#MY4%ffD$ylR;L7wod#hFsRyfvlrj&~? znv=HI+S9h8+Yv2)0+(l)>^t;93_bA#ekaFouectU68UDMpT6@XE=U8 >b-KZXC0 z_O0SK3X?bLA%L|Xrc<86JL%{9a8=S^3eYYwC&uN~%# z_v7W`jmaB(kWcpGMJdGAj_r*HQqnbc8>LU;(s*U~bTD{j`)yfvawWP7M&8xv8u&_$ z^kxz0+iwmx9`Nv!EG7SibD$8c}f$M{~;2bjErA27WY+x7;&5UzG?HI1?# Y%#!DeVDeCdGco)1C&MLOC4T|>e^!lCi2wiq delta 21180 zcmeHvd3;nww(vXEo$gL0o#kfh+-#kNEeT2brniJG1Vz*!7{CpNCO{)`LJ|m|fD2K_ zK|~2$r4W!+pCh8eYsYpz!2w4{MP(U8)KSD)_(sQ>ftm3=-y28Yse5mC-(CWl_w@ID z^ZWTnI``H&b?Tf`r>f35RdxUT!_+t4NlCSU@X9+W^h|~DOvS#`Ym);9lFwR()=33R zr5cB$rf&G6Inr>4XZVeCU9(-DIc*K~ewP=74<1XV*V0_pdnd^ad?-{K;2!zh;d*&g z?k*O19dhN*PUcxX4yT9Cq)BtSmUJ$bC+9Z>rswS^Ajz2pv*g>0E%?+c=Hl zJGX5??NU_`&Z)~%Sw>TO=C>{K&-Hb-E$|CjLKIT%?Dn=E+Cv9L)m&FlX^lb)$rWxx zQ`Uvs(2%0r2-S?V5h~GimvJZ1tSEL$2ThW1EH2Kg{l0%ur@w=)phKeIyjPEuSy5?a z^4{G-CJj>vkya_>$+wl8gr^Ui3azlp<6?4POc;<+a1_o2&w~I-;&oF-_G*ND@)S^r?CXK||ULK_$7f z<|wVAFi#lVT0zU$Rr^S>#Vt7+=u%oBI~HWIqjwOCd~fXyWS3Fy8!iUEuKf_=Yt%d4 zw2h|8>#~Q+F3C$y5C!>ONdP*SlP_&8p+#)saguNJIHY>{XZ54tgbdE{xobw!q@`^g zOZ>u}Bb&b-VKEE;()`7U!1tWfY55G{8k%;^*w)L(Hn&{ryJl?5v`d9(6}VZ`NXU_E zSCz_LW0LIG39WGz^jeayUgZjOjkz0WSW#~I;Hn(?5tV@a^!V(wXw_kr>_F$($GE*_ zkMo&TTNQaRs~rWUfoH~@HOU{gl*$Ju7RzVuE#!>Iqb92F-YZK>6G@@Y0@@H+Ke15W zctJ#M>fwmXR;w*MGO-Nplef7iTp}iGh&~nakp}s&1##C;^kIcr_$y(xHX1etNj`K% zuDs_ymF~~366IU3vRE`ZRp6O0uq+yoy{0fwGU-7B*_F(DTt3=bWl{&Rd~Rx?DLfGJ zvNG?lvR z_yW4^y4)&lx`?73nnR*%NlGn=*UHsQCAyX*mtX&DI*bqgiqD~L@dn}!+P-Ub)D;RfPm-OEXOp!$<)gXv5X?WRNIGbhV&(r~zB`tk(w zImqV^*RzaE$mZCY#o+Ry?{R8EP`s4Xf$3p_^Hc#V{RPQGh*Q4)^)JW#Fo`zMe4nqq zbDn>ZzjKb?=bN+0-?q4Gkq=SaVk)(?<3zf+eep6|_9}p#KzIoD2>yVvEQG($B>3P2U&>$Um##6_s>tI=qUUYDah z@n!$3wYB(L(^so7T_|%Hr_P-W4pZm{j;+hHnE%yG4?$?HLW`1uzx_A(fuZWxDf9_K zEA+Orplhzb!`IQa%)iJtr>&#IH?Py@_SV$}(T~hinIYY1Fy?%M?WxAOz z&FykZ;+KR*NV0RUf@rPKlaMoS%g<9i7ezOkug9P3S*DpB=*OT`HqoH48JA1E$3RQe zqReHY(u`@aCdq-akQb=xFzStEn8}Oc6`cD^TgTC{dYk3-+0{8@c!t~Mk;GM@CNRJ& zonccNS-;JUDiYK#(5o$!$EEi;+JYvVSMg6zL<3<$6APNv&pA<=BMV-`_j}^FJD>p;->ssoMgT?4)R!^8}bUOXi-~FPy3CXOWPK; z^VO!S8;x5Lg@FrjQe$;4aN<9F8_bSe2FMUE%4NG

j#7yI`KdO9!)}7O*QsmA6iJ z=&ha(V!Pc?%7_cHU=aJ5KY!*yYWpVxdPSU?jpcYChu!Fj6s+|?1p?o6!w^{R$ks{}RZ9WQR|U zOGSCs%{gp(3(S)bcc-$J%OHzgHXd#UNARcPIieIoWPO8?d5+0$m;f7)hw~GF7~lZ= z>y;Qv2gt^T)7fpY#naw`8X1!qm; z&lak6jBq zZfD~89Op5Q+oP))zZQ7h4T8Z8ZoUEFw2v%-KQDF2>n$ zx%4hetbw~+{WvVx8!iubghFmoyn(wsuKo?&<>l6|uN>{{xEkcT%@?unZ^_rm4f#5# zny`7hT#6s^4Y)cw>qZTR>`u|A>szsIRCTBov~E{}1U!ehUte&rQ!|O@)w89x`LlJ|g(! zS!jp!80_Gd;RFLNJ713qTJ!Zzm3;oz%UGI&c!DeK@R%`ZJ_?5kyL|{W1~aVi9LRh9 zS;3Wmfn~`_*IJ5&DRSoTD`BWS_CNCSGK83B8mC!GgsV9QyQ!H}8tlR}8U3yqEQ(CK5hj3*uN4(_@9cwk;EMtNSZ66LX3gjBP8E+;>* z9W0?6lcE=SC9qV(LN6|l7)~}MyJ>~8_WR_Ij~B~ZA5W2+j+-p#)2TkK$XIC|oY- z%Ozf(CSFhTA|VVrx&vA=LEHsYh?M$S?@ypH#U3eP=XOFd>&hk0c4FJ&iAy>Jllotb3cFOIja^F?qct|xiRG$h;2=z*7V&nfiP`()P?M@O6yDC( zz5taCtAJV+?Am5@^~TPfE10zhF;^{D8*J=e)HQd>98O*357m$-&w6HrI75q7Yg_#t zO9UhT)vB`TkHaNw&wik&O~+G1*y;UXR=cW!*$+V0sQZA1)Aum~+SJxLxyH6Ri`$pB zEpG4X?7{X`5quaApnO(~hA(ew3 z0Gh67!6)@AfTRk*tul`t+XExm{6kQLVTt1@xH9(#Bq~BIQX@om_0wQ87i!@mw`}`M z+~Sq;WG#%;Y+iv#(GbPzS@%S8H&5AEMn>$*rj00e=w+~38Y5PnryE}>Qd79Y*p8Q> z1Y1UY+|XogFJnNb3^pB-q#{WwlB6Ojs>=)JzXC0gw~^96LqD;)5fd4%AVv!(j8)I3 z(w7I4$#OgDg6Nr-0Q@|NnXKFY!6a4?lUSR<^qh#HSPtsl(d!d9F!oji#v$-kFM zw^b}OVomg+`xfX=7Jg59~>i*Y_S1zDDw8tJW8%uuKr|FFfSMPoT{ET({&J? zqf;#>CP&v$r_9}D@|xwDY{evEW$%53p8=ii(O z7YWob6B7y4F49eyQc=6fe9}$-Xwt2_2Dj78J}xDd!CAA&3lRKkIkD*0*b^qc#xh)E zv*o8hq3p_P(i$`s!A%fssv$-HyA{?B>|q<39CT;EJd0tH#m*dAP$}DgR>8imhg&Wd z;-UPpAzr=S6^-?PJx-pLOPucWIC=P|cVeT{eI6(GtrOH$LOiFw*#se;>*5~pw;$ov zH-uea)Wo+D;W;ssS4Req@SN^%#Rv8{^**r2^-t*os}Y{*eIuP3-imU1c>0^X$G4&+ zOsN#GVq|NyTlbmttS8Plvxy-cFLKo3WW&bN9W_I$ju_vHWncLt* zaOHB61c`D7m=eWA@+>7~6U3CwSYn}{kEWj)Fa?OddPzRtji|443Am}{zol(KcL#UP zg;Ly%axBQFZV7WC$+}mMBl;BsO+C~Vfg0VC$0OHFf0vD20r`Q_8P8Di(+t+M0>p3% zIAFi=%_LBoGV!Lw(4)r@d905-y3|L7bnyIe^s8e-0yEFEimNc^ggIbVdK=_xDd037 zBUF)lfbIPh)2RyNe7BuFa2Pyr8*8g0kwSTGcMa}Lz1VFF;Z5fix4ogOC~sfL>rI-V zD`RIIq=u{pmM&@3Sm}LGCO^HXf!A;PgC4i|Ev4b}Aymv8+u8kZL5&R^S$a7RJifu` zYY;4k@rW7CiBY5EsIdh4=@ZBR&OV$7@nH+`NbQDHr zEQ@k^95WS4JU@>K@K*!FZ+-}1Zn(!lP?(uZk}xC-fw=Ts-;`E+=2mMg`%yKK+DfhewQVJPHzA`*Ro%WQ~z;)duNn@C3+*Zp~$%9wJ$0%_Pc+5)OZFARq+{$8-K9W|dMe0PR4~xSg{}ylOTU3PkhGiOs2v7dn*x+oKuDTM!WLiWV9Z@R8cpz2ZkD*Ye^2$wklubtw4~MlxL9u9X)rKLUG{6CMm#xY6uS(@9F; z;0gd*!~MWvtwV6TwYMEgjc!Ln9VY!+$$%qoPrdYyMcz@zIMl$~eA-Q4G=-PtI!7Jm zYzrR0J|7I^<$hwtTS%9~pP zgn_0mup2^23haW&qZD~`hEf-(_Qa`+0wr~UDQv0=H?FN`v0&uCVkE@>>RxEd#qcXN zg5z_jSdsL=`qeNdr`JS_RU}=6#XemPg-TLG$xMXO8)cyfJ76%vvkZ7{j#Vwk>1D|Ni#ItEvSf8ub2%h~<+!`!r+=pdD#5}>n> zAB1d{_W)dxj@g3xILM402;TAlw{2|dP(@b$FC{?#Vllhe-OKQs2+BF9TcbP!&0>^`*KK$E6!yzmgAmjLme5+?p+Tf zthjsW2@_LC&2|PBc5Xe)vEs6?0YHE`;W{{@g$v{WM@@Pd^Ch z7ZQ{P+WTrx>xNutLj=TByl4rvndh3eB`h7~Rn|r!iJ?oXhet9B?`0 zh0hFQqc=jDbrqqbVzhu*)rksi?Bqts`bXQTw7!Bz2;-Y5WluZ|RysZrPfJjC`*TVgqA&X9k zX#urTn(K2!w)g8|5Rip*R=fpFiJ8M@J5q0o6Y1O|kjj3y1;(a+pGI<0I%aCOLK3_F z5oGr{phM!@v}E-xnJpNmESW9FjuE7tOQi%=7!s{ZU7Q%UdMobdzky+7qKXKlEPWex zTqLCtVOBs^8kgfFIHI4rY3%N8P@CGFnE2^!aHaLt#ND3L)#>jMjN{uoI>dyN4d*O7 z(mM%pumQ@BZHHPbCJ+;9yJ`nmQ^$q5*Ug$&q%Ne|$`Cz~9mjO16}R{!vu8vh?DCzE znNZF7Q|7mI`y>3u_MKwTFYJUg>J4+Fqxg^W*0PD-|ENG7U0nr1D<~oT`|on+~TmYur-gOml5kjg}7?A9?>T* z)n;cgFtg#0^=DGpreabFM=H(*Mz--WxGa6Zx!bDU7t7tow!|6Cjr3HcpL_hF6y$fw z+qt6V5-JYPcgb76R~0%!^#3S%`=^T~Z&O^6q^vp?m0;&vNZ=a8=pDf8-2aZ(HWMz0QXIVIFV4FptaC4=f4qFnBd4 zmOVgRgYPEIz2xG1*~gwDiUetzy)=9@PtWi#g#Mq(R>vgkU znf!=}GD*q4mteK2k!qjmkIE?*tAmK_C*I$2`Zq)n(aDUu&#EpR(KheqXXQ#akiZ}` zi4@E9NbR#1zL~`N8xujN#f^pf%boZon5Zu~P+Mbe=z1r@)awAjoQ>ppX#AA(-4eC_PQcXoyKfIf3urnRV5K*x`Au0J5u>SK|e?Fn*d6c;JRIeLDgQ;{L}zv zh#=a%1*#q*&FcqJ17{1?`z)#AgZ!m|*OK(L2Fe}ZeN(uzsWlSMAvmyb*C))=#c=T$ ze+2gp(u`Y*iRl{L9#`<_Q+Q$3b|Ew)pKdst9s|KOVcUyjtPw*u^%E|5iVC+W7w#j! ziH8w)J?q*>&a#_dCFvV}LN=Sff@ZXl{?4Lh?k2<8o@dEp=7&i$nnUCiuKLXelol{Rviggk-b*j}sc4{1TZKcRPlyJxEINn(n@X&E$^q#!~&PEveu5i2f7_ zO^l>{ye0I3qoT%w(EKjNf2M@SgL!=eemIQTUnl$m@};kn!T9fk*9d;hhJF1S?ngV= zUJjGkNe+|Pm#-sCVkL(WCIy=glOG!35w`V3(rnyh437=m3J;>39#Z=zreQ7k{f_X8 z`xNwZc$0tgKwx{{ChOR+<0L)j7_YfQS94$Jn-O^VRjSue|ISe{oR!>ub{SK?^v*^0MG zrupBXIouq=>TxYcSdlRL0&+6nNm5~G(wJ0hVNqF`I&YF7LWcK){{+kz{Yurq#g`N1jjf$_`GpjQurOa{kLQv zJNK)B;G-X{;O|#RSM3;xgMgP+gfD=tJx$)=t<(&x7LOa$TJNyG{fd+iy=Ne8ac89a zH{|01Vxtp+*a5Dr8soiI{d^l-c9twO86QrMnw&EKK$e<~k4LZOPydzNZT!j*M&##g zum7C<5{&Ozb<2U7`((nM*5IvY$!TNiPQy;aRs-E&*lk!J^!=WE5At2qtXbKPzFZ-ge!cK`qY diff --git a/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/variables/variables.data-00000-of-00001 b/tfx_addons/predictions_to_bigquery/testdata/sample-tfx-output/Trainer/model/6/Format-Serving/variables/variables.data-00000-of-00001 index c16e62ca543e313309f678c31dbed8f1c03cd2ac..530bfdb7d741fac0f1574995f5a61a6c23183d21 100644 GIT binary patch delta 1803 zcmY*XdsvKF7@uxZwu`QmW|}Ud-K|WfRNr^bOieY2B?^ml8P{j2M@6+Up)B1sQYczY zw0Xo9sj2UL6H1Rwu{^AjW!y$uOPg!5JKwQ0&+hr-ocI0x-rxJa=RN1p?Blbg`IW99 z_R9n4nz;k0p1_5AEDNZXzopKk*nrVWD_WB12|M5ra!GN7A6;o8Yc7}m&_4!ZhYf*M zswr&G$sn&~d_^DIJRWvmoC{dhufaQmyCm4s2Kr{tqo8mP=!-o99+=p`+Jntxk!3b0 z&nlu4yts5|ub~unJUUHy@O!B!b{>^B=uVnCdr+)@R{@z%#@-o0XW|RMem)mHDp4SD z+&fYc(F_LXXQQZFQ|PJzPcozE4yi0zLmF08BKnFxeEv)yw$^qbr-}h;ak4cX^du0i zVVl#pE?q}`UMyHW=LS)JI2ahlNOP%=&uPSa(?VtDC86mebFf2VM|;(sC*Pbpj&8Ib zLeq%L$kBc(W#e!dthrK(;=Uh27T5Q-Js#*m=Qaz;x{){NZKghCW#_k5S)K)QZxxzz zT7hid9R`*PHBz#MiBC_;fX@gUSS9v=Lf;qY0&qi}!B;_-^szruXO&Wmizh(!!fWK+ zn0fG?suPW}4_BO)RJ zx6>Xfj@~2Rua70|)gEyE_C|1OToW1?TL_8SQDAF*2{miG0+lR@1Cbv%P`YMsE?V(q z5^PY_0CTt9plVq^Y4X~R9vtrkYHnD1|g+;+0KR zgy+)H#ufCkz;c+@y#sz`S*#Z}4KUg*9w-NQ05$(Sm0Ph3)SH$(6Mo+PJgnlT-bl95agZU3g#rsAg}&wXvb&k8dNz!L28=RB3Nls9Ww8w zU5HEe+2EWUd9b!XL(9dGZQpzscJhXiI5{&t{OKh7aAa!|uH}cXa>XZke+H?;iJ+Ui zE!qyeP9QeP?FlW<`Z%$*r(jc{w85AlpQQ&yEU^hvlg2?>KDCWYu%sH#r|d26N~DxD zzvMksM!Yw}xBQ{H1_vjd-mmMroU>S&6+a|kk{IxeaWUr$KVqN3k_>jWzY8eF`oGKb zFrRcnh4l;ny4~C}rB=DeZx)XCR&@qCGomzc0tOh{Q|{U>&Uk@+?kgig#x?Yp`52l> z7c^vIa9g%p%l$mi?sHj!tE8-7gsU7WEe&N7jF{*6EZ^xDHmxx_jPX51Mk3}MxAq|B z8M~SX!Wud8n4i3{1@kKmTCvaYLFQ}2M=SQ=QJ3UiWOO7e#=;i|x|@2>SUPe`ls@nY z`_Zrb@tcwPwYmaUs`+8H>059@)A8p-x?3#$l!L)vUZ0pDcXtnG_X=^9GWiNDF77$4 zU^>{k)TY%~eWcC0r32s2KW4~}!Iwxf@SF6nE8zELP9iSRewa_F+#AU7x>faCn|@cG z@Xo%xb{)pjJomj{Jro6 zqO|Fcz#^{@+?bXN{J9`(sOorF+1n7^*ci?uGS}Da)k@x?`dVJo(_ztpDF09@qgQ17 z1t+wlAYw`A#p^0OHf^GmpKOEgM|K29b?*~)&-HJ+o>WNInI}^9R;k2a6N-pud2B+Q zX{*tQHDhLemN+N7#Jef1wR(8)>^go~x Ia+e$Y15k%>3;+NC delta 1843 zcmaJ<3s6*L7zG!>S(VqiEN>QM5foh%1@>|Od+*+d(5MgrYCG~3rWnNR_F+%7s{IbMRy_op4<1LF zB}Jez`79V*C1EXL&#^u)NjjYT?lUlDtO-gtjWX*lO=AtIlfb$1Hn?-50rW)f1n%cQ zF)Vi(0^0BuH-O^Tr(o&*>2Ua^>FlHPGuffH9oR7^5i{XRgQoKR@HUJ>aN=WzOtvDK zTnbwWugz9ZhC8AhzUqXsA$_rIg8T%}x!%x+v)ROtssV=&-#rVndOBfGR>(8`@>U2V zZ@OqP&cG&$jN!&P#xtK+@to@wUJ5hjF?JdbzjD=RlHD9LJ=(FE=UlJ9ccwYA>_>h< zxhN4!iuo0t2+o zS0fS~d$wSI>=As4`%a3}ONl|ccYF_>Ef6f_I9ItD0{YdKR5gcT?M6e*s9-@}qX^$7aKok?&ftgKy;qNh-m3|7xD!-C8w#;&7n*tx?l7=d?~M$VY=H@)|s zw#4Jvxj3vn52>ioUTRa$5o%^%H$Ko2fET83!1%#`9O|fWa;W7@ntW=u3MVtMq|plssW1eV>I&2f_j8C_X^%4y7<9K~qD%W(h5 zePKl*L;a-8#Nx>p8RaIivQ6I4$jYv@;YXFIqWI)kMvckmSlEOYFlsa~nMO%|@Bk_| zLQ>8$S&xaGJ?+z*Y*9rqkaB(wAtnxHg#*kQ38wj>4c8|ZGi_tJzh_gzWL;)MmJ|Cw zicU^qcIMc2flXFSM#u?7(U$dgCMGPX-`JVrF1q0x^~_riV;!V+{lV=YnEn delta 308 zcmcc4eVu!P0sAj&(=Ttng-$dtVsSj6)I4!*0*n3C*Ey2~8C_X+Ha(s8$S&xaGJ#R;jtf-7+4Yl)nYOWn&hssrtjlc3qV|0C zg2_qD&K&di@_rMO5i){U$};&pGdstRh*>X1WyC>ZN|RW4SS~E_HJ>cNqRZlcTG?u{ zBa1G_g@=qkfGSLZDl{g~V`1aq6)@Na6fgq`aQ)x_Ir8gop>vb(0d-Y>|8i Date: Tue, 28 Mar 2023 12:52:55 -0400 Subject: [PATCH 13/22] pred2bq: Add integration test with schema. --- .../integration_test.py | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/tfx_addons/predictions_to_bigquery/integration_test.py b/tfx_addons/predictions_to_bigquery/integration_test.py index cb446193..022d4f15 100644 --- a/tfx_addons/predictions_to_bigquery/integration_test.py +++ b/tfx_addons/predictions_to_bigquery/integration_test.py @@ -299,6 +299,11 @@ def tearDown(self): if self.generated_bq_table_name is not None: self._expire_table(self.generated_bq_table_name) + def _add_test_label_to_table(self, table): + labels = {'test_method_name': self._testMethodName} + table.labels = labels + self.client.update_table(table, ['labels']) + def _expire_table(self, full_bq_table_name): full_bq_table_name = full_bq_table_name.replace(':', '.') try: @@ -307,7 +312,8 @@ def _expire_table(self, full_bq_table_name): logging.warning('Unable to read table: %s', full_bq_table_name) else: table.expires = _BQ_TABLE_EXPIRATION_DATE - self.client.update_table(table, ['expires']) + table = self.client.update_table(table, ['expires']) + self._add_test_label_to_table(table) def _create_gcs_tempfile(self) -> str: timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') @@ -338,6 +344,12 @@ def _create_upstream_component_map(self, use_gcs=False): transform = _transform_function_component(transform_dir=transform_dir) + statistics_gen = tfx.components.StatisticsGen( + examples=unlabeled_example_gen.outputs['examples'], ) + + schema_gen = tfx.components.SchemaGen( + statistics=statistics_gen.outputs['statistics'], ) + saved_model = _saved_model_component(saved_model_dir) bulk_inferrer = tfx.components.BulkInferrer( @@ -349,6 +361,8 @@ def _create_upstream_component_map(self, use_gcs=False): return { 'unlabeled_example_gen': unlabeled_example_gen, + 'statistics_gen': statistics_gen, + 'schema_gen': schema_gen, 'transform': transform, 'saved_model': saved_model, 'bulk_inferrer': bulk_inferrer, @@ -396,10 +410,16 @@ def _check_output(self, output_file: str): self.assertStartsWith(self.generated_bq_table_name, self.bq_table_name) @parameterized.named_parameters([ - ('inference_results_only', False), - ('inference_results_transform', True), + ( + 'inference_results_only', + False, + False, + ), + ('inference_results_schema', True, False), + ('inference_results_transform', False, True), + ('inference_results_schema_transform', True, True), ]) - def test_local_pipeline(self, add_transform): + def test_local_pipeline(self, add_schema, add_transform): """Tests component using a local pipeline runner.""" upstream = self._create_upstream_component_map() upstream_components = [ @@ -407,6 +427,14 @@ def test_local_pipeline(self, add_transform): upstream['saved_model'], upstream['bulk_inferrer'], ] + + if add_schema: + upstream_components.append(upstream['statistics_gen']) + upstream_components.append(upstream['schema_gen']) + schema = upstream['schema_gen'].outputs['schema'] + else: + schema = None + if add_transform: transform_graph = upstream['transform'].outputs['transform_graph'] upstream_components.append(upstream['transform']) @@ -414,20 +442,24 @@ def test_local_pipeline(self, add_transform): else: transform_graph = None vocab_label_file = None + component_under_test = component.PredictionsToBigQueryComponent( inference_results=( upstream['bulk_inferrer'].outputs['inference_result']), transform_graph=transform_graph, + schema=schema, bq_table_name=self.bq_table_name, gcs_temp_dir=self.gcs_temp_dir, vocab_label_file=vocab_label_file, ) + output_file = self.create_tempfile() pipeline_dir = self.create_tempdir() metadata_path = self.create_tempfile() metadata_connection_config = ( tfx.orchestration.metadata.sqlite_metadata_connection_config( metadata_path.full_path)) + pipeline = self._create_pipeline( component_under_test, upstream_components, @@ -436,6 +468,7 @@ def test_local_pipeline(self, add_transform): metadata_connection_config, ) self._run_local_pipeline(pipeline) + self._check_output(output_file.full_path) @absltest.skip('debugging') From 4fca0a90eb5ea56795154ab04644b0789d71970f Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Wed, 29 Mar 2023 22:38:23 -0400 Subject: [PATCH 14/22] pred2bq: Add Transform component in Vertex AI test. Adds a container component stub to represent the TFX Transform component for integration testing on Vertex AI. --- .../predictions_to_bigquery/executor.py | 1 - .../integration_test.py | 77 +++++++++++++++---- .../penguins-dataset/test/test-tiny.csv | 8 +- tfx_addons/predictions_to_bigquery/utils.py | 4 +- 4 files changed, 70 insertions(+), 20 deletions(-) diff --git a/tfx_addons/predictions_to_bigquery/executor.py b/tfx_addons/predictions_to_bigquery/executor.py index 54d902b3..7318440a 100644 --- a/tfx_addons/predictions_to_bigquery/executor.py +++ b/tfx_addons/predictions_to_bigquery/executor.py @@ -219,7 +219,6 @@ def Do( label_key = exec_properties['vocab_label_file'] labels = _get_labels(tft_output, label_key) logging.info(f'Found the following labels from TFT vocab: {labels}.') - _ = features.pop(label_key, None) else: labels = None logging.info('No TFTransform output given; no labels parsed.') diff --git a/tfx_addons/predictions_to_bigquery/integration_test.py b/tfx_addons/predictions_to_bigquery/integration_test.py index 022d4f15..9a5d3b5d 100644 --- a/tfx_addons/predictions_to_bigquery/integration_test.py +++ b/tfx_addons/predictions_to_bigquery/integration_test.py @@ -162,6 +162,40 @@ def _transform_function_component( shutil.copytree(transform_dir, transform_graph.uri, dirs_exist_ok=True) +def _create_transform_container_component_class(): + return container_component.create_container_component( + name='TransformContainerComponent', + inputs={}, + outputs={ + 'transform_graph': TransformGraph, + }, + parameters={ + 'transform_dir': str, + }, + image='google/cloud-sdk:latest', + command=[ + 'sh', + '-exc', + ''' + transform_dir="$0" + transform_graph_uri="$1" + gsutil cp -r $transform_dir/* $transform_graph_uri/ + ''', + placeholders.InputValuePlaceholder('transform_dir'), + placeholders.OutputUriPlaceholder('transform_graph'), + ], + ) + + +def _transform_component(transform_dir: str): + if transform_dir.startswith('gs://'): + transform_class = _create_transform_container_component_class() + transform = transform_class(transform_dir=transform_dir) + else: + transform = _transform_function_component(transform_dir=transform_dir) + return transform + + @tfx.dsl.components.component def _saved_model_function_component( model: tfx.dsl.components.OutputArtifact[Model], @@ -326,6 +360,8 @@ def _create_upstream_component_map(self, use_gcs=False): gcs_test_data_dir = os.path.join(_GCS_TEMP_DIR, _TEST_DATA_DIR.stem) if not _gcs_path_exists(gcs_test_data_dir): # Copy test files to GCS + # NOTE: If local `testdata` files are updated, forcing a copy to the + # GCS mirror directory may be needed. _copy_local_dir_to_gcs(str(_TEST_DATA_DIR), _GCS_TEMP_DIR) dataset_dir = os.path.join(gcs_test_data_dir, self.dataset_name) saved_model_dir = os.path.join(gcs_test_data_dir, self.saved_model_path) @@ -337,15 +373,15 @@ def _create_upstream_component_map(self, use_gcs=False): test_split = example_gen_pb2.Input.Split(name='test', pattern=f'test/{self.test_file}') - unlabeled_example_gen = tfx.components.CsvExampleGen( + example_gen = tfx.components.CsvExampleGen( input_base=dataset_dir, input_config=example_gen_pb2.Input( splits=[test_split])).with_id('UnlabeledExampleGen') - transform = _transform_function_component(transform_dir=transform_dir) + transform = _transform_component(transform_dir=transform_dir) statistics_gen = tfx.components.StatisticsGen( - examples=unlabeled_example_gen.outputs['examples'], ) + examples=example_gen.outputs['examples'], ) schema_gen = tfx.components.SchemaGen( statistics=statistics_gen.outputs['statistics'], ) @@ -353,14 +389,14 @@ def _create_upstream_component_map(self, use_gcs=False): saved_model = _saved_model_component(saved_model_dir) bulk_inferrer = tfx.components.BulkInferrer( - examples=unlabeled_example_gen.outputs['examples'], + examples=example_gen.outputs['examples'], model=saved_model.outputs['model'], data_spec=tfx.proto.DataSpec(), model_spec=tfx.proto.ModelSpec(), ) return { - 'unlabeled_example_gen': unlabeled_example_gen, + 'example_gen': example_gen, 'statistics_gen': statistics_gen, 'schema_gen': schema_gen, 'transform': transform, @@ -409,6 +445,7 @@ def _check_output(self, output_file: str): self.generated_bq_table_name = output['generated_bq_table_name'] self.assertStartsWith(self.generated_bq_table_name, self.bq_table_name) + @absltest.skip('debugging') @parameterized.named_parameters([ ( 'inference_results_only', @@ -423,7 +460,7 @@ def test_local_pipeline(self, add_schema, add_transform): """Tests component using a local pipeline runner.""" upstream = self._create_upstream_component_map() upstream_components = [ - upstream['unlabeled_example_gen'], + upstream['example_gen'], upstream['saved_model'], upstream['bulk_inferrer'], ] @@ -471,29 +508,43 @@ def test_local_pipeline(self, add_schema, add_transform): self._check_output(output_file.full_path) - @absltest.skip('debugging') + @absltest.skip('long-running test') def test_vertex_pipeline(self): - """Tests component using Vertex AI Pipelines.""" + """Tests component using Vertex AI Pipelines. + + This tests the case where a Transform component is used for the input + schema. + """ upstream = self._create_upstream_component_map(use_gcs=True) + upstream_components = [ + upstream['example_gen'], + upstream['transform'], + upstream['saved_model'], + upstream['bulk_inferrer'], + ] + transform_graph = upstream['transform'].outputs['transform_graph'] + vocab_label_file = 'Species' + component_under_test = component.PredictionsToBigQueryComponent( inference_results=( upstream['bulk_inferrer'].outputs['inference_result']), + transform_graph=transform_graph, bq_table_name=self.bq_table_name, gcs_temp_dir=self.gcs_temp_dir, + vocab_label_file=vocab_label_file, ) + output_file = self._create_gcs_tempfile() pipeline_dir = os.path.join(_GCS_TEMP_DIR, 'pipeline-root') + pipeline = self._create_pipeline( component_under_test, - [ - upstream['unlabeled_example_gen'], - upstream['saved_model'], - upstream['bulk_inferrer'], - ], + upstream_components, output_file, pipeline_dir, ) self._run_vertex_pipeline(pipeline) + self._check_output(output_file) diff --git a/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv b/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv index 5fd8fa83..086441e9 100644 --- a/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv +++ b/tfx_addons/predictions_to_bigquery/testdata/penguins-dataset/test/test-tiny.csv @@ -1,4 +1,4 @@ -studyName,Sample Number,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments -PAL0708,2,Anvers,Torgersen,"Adult, 1 Egg Stage",N1A2,Yes,11/11/07,39.5,17.4,186,3800,FEMALE,8.94956,-24.69454,No comment -PAL0708,3,Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,11/16/07,40.3,18.0,195,3250,FEMALE,8.36821,-25.33302,No comment -PAL0708,5,Anvers,Torgersen,"Adult, 1 Egg Stage",N3A1,Yes,11/16/07,36.7,19.3,193,3450,FEMALE,8.76651,-25.32426,No comment +studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments +PAL0708,1,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A1,Yes,11/11/07,39.1,18.7,181,3750,MALE,,,Not enough blood for isotopes. +PAL0809,44,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg Stage",N75A2,Yes,11/14/08,45.5,17,196,3500,FEMALE,9.36493,-24.66259, +PAL0910,124,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N43A2,Yes,11/22/09,49.9,16.1,213,5400,MALE,8.3639,-26.15531, diff --git a/tfx_addons/predictions_to_bigquery/utils.py b/tfx_addons/predictions_to_bigquery/utils.py index 870f7051..3ab63c29 100644 --- a/tfx_addons/predictions_to_bigquery/utils.py +++ b/tfx_addons/predictions_to_bigquery/utils.py @@ -243,13 +243,13 @@ def _create_annotation_fields( def feature_spec_to_bq_schema(feature_spec: FeatureSpec, - required: bool = True, + required: bool = False, **kwargs: int) -> BigQuerySchema: """Converts a TensorFlow feature spec into a BigQuery schema. Args: feature_spec: TensorFlow feature spec. - required: If True, mark BigQuery fields as required. + required: If True, mark BigQuery fields as required (i.e. not nullable). **kwargs: Additional keyword-arguments to pass to `_create_annotation_fields`. From b57d02abf56664542863deb5d8dfb04366ab7c0f Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Thu, 30 Mar 2023 23:15:15 -0400 Subject: [PATCH 15/22] pred2bq: Code cleanup and documentation. --- tfx_addons/predictions_to_bigquery/Dockerfile | 14 ++++ .../predictions_to_bigquery/component.py | 19 +++--- .../predictions_to_bigquery/component_test.py | 4 +- .../integration_test.py | 67 ++++++++++++++----- 4 files changed, 80 insertions(+), 24 deletions(-) diff --git a/tfx_addons/predictions_to_bigquery/Dockerfile b/tfx_addons/predictions_to_bigquery/Dockerfile index 74f1212a..50cfe5b0 100644 --- a/tfx_addons/predictions_to_bigquery/Dockerfile +++ b/tfx_addons/predictions_to_bigquery/Dockerfile @@ -1,3 +1,17 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== ARG PLATFORM=cpu FROM gcr.io/tfx-oss-public/tfx:latest diff --git a/tfx_addons/predictions_to_bigquery/component.py b/tfx_addons/predictions_to_bigquery/component.py index 00467797..1cf3281e 100644 --- a/tfx_addons/predictions_to_bigquery/component.py +++ b/tfx_addons/predictions_to_bigquery/component.py @@ -55,7 +55,11 @@ class PredictionsToBigQueryComponentSpec(types.ComponentSpec): } -class PredictionsToBigQueryComponent(base_component.BaseComponent): +class PredictionsToBigQuery(base_component.BaseComponent): + """Predictions to BigQuery TFX component. + + Exports BulkInferrer inference_results data to a BigQuery table. + """ SPEC_CLASS = PredictionsToBigQueryComponentSpec EXECUTOR_SPEC = executor_spec.BeamExecutorSpec(executor.Executor) @@ -80,13 +84,13 @@ def __init__( inference_results: Inference results channel. bq_table_name: BigQuery table name in either PROJECT:DATASET.TABLE. or DATASET.TABLE formats. - bigquery_export: Outputs channel containing generated BigQuery table name. + bigquery_export: Outputs BigQuery table name containing results. The outputted name may contain a timestamp suffix defined by `table_suffix`. - transform_graph: TFTransform graph channel. + transform_graph: TFTransform output. If specified, and `schema` is not specified, the prediction input schema shall be derived from this channel. - schema: Schema channel. + schema: SchemaGen output. If specified, the prediction input schema shall be derived from this channel. expiration_days: BigQuery table expiration in number of days from @@ -97,13 +101,12 @@ def __init__( See: https://cloud.google.com/bigquery/docs/partitioned-tables table_time_suffix: Time format for table suffix in Linux strftime format. Example: '%Y%m%d - vocab_label_file: Name of the TF transform vocabulary file for the label. + vocab_label_file: Name of the TF Transform vocabulary file for mapping + string labels into integer IDs. If specified, this would be used to + get back string labels from predicted label IDs. """ bigquery_export = bigquery_export or types.Channel( type=standard_artifacts.String) - # schema = schema or types.Channel(type=standard_artifacts.Schema) - # transform_graph = (transform_graph or - # types.Channel(type=standard_artifacts.TransformGraph)) spec = PredictionsToBigQueryComponentSpec( inference_results=inference_results, diff --git a/tfx_addons/predictions_to_bigquery/component_test.py b/tfx_addons/predictions_to_bigquery/component_test.py index 2d78e888..dd8d29b1 100644 --- a/tfx_addons/predictions_to_bigquery/component_test.py +++ b/tfx_addons/predictions_to_bigquery/component_test.py @@ -33,11 +33,12 @@ def setUp(self): self._schema = channel_utils.as_channel([standard_artifacts.Schema()]) def testInit(self): - component_instance = component.PredictionsToBigQueryComponent( + component_instance = component.PredictionsToBigQuery( transform_graph=self._transform_graph, inference_results=self._inference_results, schema=self._schema, bq_table_name='gcp_project:bq_database.table', + gcs_temp_dir='gs://bucket/temp-dir', vocab_label_file='vocab_txt', filter_threshold=0.1, table_partitioning=False, @@ -53,6 +54,7 @@ def testInit(self): self.assertCountEqual( { 'bq_table_name', + 'gcs_temp_dir', 'table_expiration_days', 'filter_threshold', 'table_partitioning', diff --git a/tfx_addons/predictions_to_bigquery/integration_test.py b/tfx_addons/predictions_to_bigquery/integration_test.py index 9a5d3b5d..7b4c7a4e 100644 --- a/tfx_addons/predictions_to_bigquery/integration_test.py +++ b/tfx_addons/predictions_to_bigquery/integration_test.py @@ -12,15 +12,46 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Integration test for the predictions-to-bigquery component. +"""Integration test for PredictionsToBigQuery component. Prerequisites: -- 'GOOGLE_CLOUD_PROJECT' environmental variable must be set containing - the GCP project ID to be used for testing. -- 'GCS_TEMP_DIR' environmental variable must be set containing the - Cloud Storage URI to use for handling temporary files as part of the - BigQuery export process. e.g. `gs://path/to/temp/dir`. -- BigQuery API must be enabled on the Cloud project. + +The following environmental variables should be defined. + + GOOGLE_CLOUD_PROJECT: environmental variable must be set containing + the GCP project ID to be used for testing. + + GCS_TEMP_DIR: Cloud Storage URI to use for handling temporary files as part + of the BigQuery export process. e.g. `gs://path/to/temp/dir`. + + GCP_SERVICE_ACCOUNT_EMAIL: Service account address to use for Vertex AI + pipeline runs. The service account should be have access to Cloud + Storage and Vertex AI. Local test runs may still work without this variable. + + GCP_COMPONENT_IMAGE: Docker image repository name that would be used for + Vertex AI Pipelines integration testing. The Dockerfile associated with + this component will create a custom TFX image with the component that + should be uploaded to Artifact Registry. + A new Docker image should be uploaded whenever there are any changes + to the non-test module files of this component. + + +The following Google Cloud APIs should be enabled + + BigQuery API: For generating the BigQuery table output of this component. + + Vertex AI API: For running TFX pipeline jobs in Vertex. + + Artifact Registry API: For storing the Docker image to be used in order + to run a TFX pipeline with this component in Vertex AI. + +Vertex AI test: + +The `ComponentIntegrationTest` test class has a test to run the component +in Vertex AI Pipelines. The test is skipped by default, since it can take +several minutes to complete. You can comment out the skip decorator +(i.e. `@absltest.skip(...)`) and add similar decorators to other tests that +you don't want to run. """ import datetime @@ -71,7 +102,10 @@ def _make_artifact_mapping( class ExecutorBigQueryTest(absltest.TestCase): - """Tests executor pipeline exporting predicitons to a BigQuery table.""" + """Tests executor pipeline exporting predictions to a BigQuery table. + + This test generates a BigQuery table with an expiration date of 1 day. + """ def _get_full_bq_table_name(self, generated_bq_table_name): return f'{self.gcp_project}.{self.bq_dataset}.{generated_bq_table_name}' @@ -103,9 +137,9 @@ def setUp(self): 'schema': (self.test_data_dir / 'Transform/transform_graph/5/metadata'), }) - self.temp_dir = self.create_tempdir() + self.temp_file = self.create_tempfile() self.output_dict = _make_artifact_mapping( - {'bigquery_export': pathlib.Path(self.temp_dir.full_path)}) + {'bigquery_export': pathlib.Path(self.temp_file.full_path)}) self.gcp_project = _GOOGLE_CLOUD_PROJECT self.bq_dataset = 'executor_bigquery_test_dataset' self.bq_table_name = f'{self.gcp_project}:{self.bq_dataset}.predictions' @@ -129,7 +163,6 @@ def tearDown(self): if self.generated_bq_table_name: self._expire_table(self.generated_bq_table_name) - @absltest.skip def test_Do(self): self.executor.Do(self.input_dict, self.output_dict, self.exec_properties) self.assertIsNotNone(self.output_dict['bigquery_export']) @@ -138,6 +171,8 @@ def test_Do(self): self.generated_bq_table_name = ( bigquery_export.get_custom_property('generated_bq_table_name')) # Expected table name format by BigQuery client: project.dataset.table_name + with open(self.temp_file.full_path, encoding='utf-8') as input_file: + self.generated_bq_table_name = input_file.read() self.generated_bq_table_name = (str(self.generated_bq_table_name).replace( ':', '.')) self._assert_bq_table_exists(self.generated_bq_table_name) @@ -303,7 +338,10 @@ def _get_output_component(output_channel, output_file): class ComponentIntegrationTest(parameterized.TestCase): - """Tests component integration with other TFX components/services.""" + """Tests component integration with other TFX components/services. + + This test generates a BigQuery table with an expiration date of 1 day. + """ def setUp(self): super().setUp() # Pipeline config @@ -445,7 +483,6 @@ def _check_output(self, output_file: str): self.generated_bq_table_name = output['generated_bq_table_name'] self.assertStartsWith(self.generated_bq_table_name, self.bq_table_name) - @absltest.skip('debugging') @parameterized.named_parameters([ ( 'inference_results_only', @@ -480,7 +517,7 @@ def test_local_pipeline(self, add_schema, add_transform): transform_graph = None vocab_label_file = None - component_under_test = component.PredictionsToBigQueryComponent( + component_under_test = component.PredictionsToBigQuery( inference_results=( upstream['bulk_inferrer'].outputs['inference_result']), transform_graph=transform_graph, @@ -525,7 +562,7 @@ def test_vertex_pipeline(self): transform_graph = upstream['transform'].outputs['transform_graph'] vocab_label_file = 'Species' - component_under_test = component.PredictionsToBigQueryComponent( + component_under_test = component.PredictionsToBigQuery( inference_results=( upstream['bulk_inferrer'].outputs['inference_result']), transform_graph=transform_graph, From a33b4191b41cb4610f2467ffe7216ab6745fa798 Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Fri, 31 Mar 2023 00:33:49 -0400 Subject: [PATCH 16/22] pred2bq: Add readme file. --- tfx_addons/predictions_to_bigquery/README.md | 94 ++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 tfx_addons/predictions_to_bigquery/README.md diff --git a/tfx_addons/predictions_to_bigquery/README.md b/tfx_addons/predictions_to_bigquery/README.md new file mode 100644 index 00000000..59ac4955 --- /dev/null +++ b/tfx_addons/predictions_to_bigquery/README.md @@ -0,0 +1,94 @@ +# Prediction results to BigQuery component + +[![Python](https://img.shields.io/pypi/pyversions/tfx.svg?style=plastic)](https://github.com/tensorflow/tfx) +[![TensorFlow](https://img.shields.io/badge/TFX-orange)](https://www.tensorflow.org/tfx) + +## Project Description + +This component exports prediction results from BulkInferrer to a BigQuery +table. +The BigQuery table schema can be generated through one of the following sources: +1. From SchemaGen component output +2. From Transform component output +3. From BulkInferrer component output (i.e. prediction results) + +If both SchemaGen and Transform outputs are passed to the component, +the SchemaGen output will take priority. It would be best to use SchemaGen +for generating the BigQuery schema. + +If the Transform output channel is passed to the component, without the +SchemaGen output, the BigQuery schema will be derived from the pre-transform +metadata schema generated by Transform. Note that the metadata schema may +include a label key, which may not be present in the BulkInferrer prediction +results. Therefore, this option may not work for unlabeled data. + +If neither the SchemaGen nor Transform outputs are passed to the component, +the BigQuery schema will be parsed from the BulkInferrer prediction results +itself, which contains tf.Example protos. + +Prediction string labels from the BulkInferrer output may be derived by passing a 'vocab_label_file' execution parameter to the component. This will only work +if the Transform component output is passed and if it the `vocab_label_file` +is present. + +## Project Use-Case(s) + +The main use case for this components is to enable export of model prediction +results into a BigQuery for further data analysis. The exported table will +contain the model predictions and their corresponding inputs. If the input +data is labeled, this would allow users to compare labels and corresponding predictions. + +## Project Implementation + +PredictionsToBigQuery component uses Beam to process the prediction results +from BulkInferrer and export it to a BigQuery table. + +The BigQuery table name is passed as a parameter by the user, however the user +can also choose to have the component append a timestamp at the end of the table name. + +The output component is the fully qualified BigQuery table name where the inference results are stored, and this can be accessed through the `bigquery_export` key. The same table name is also stored as a custom property +of the `bigquery_export` artifact. + +### Usage example + +```python + +from tfx import v1 as tfx +import tfx_addons as tfxa + +... + +predictions_to_bigquery = tfxa.predictions_to_bigquery.PredictionsToBigQuery( + schema=schema_gen.outputs['schema'] + transform_graph=transform.outputs['transform_graph'], + bq_table_name='my_bigquery_table', + gcs_temp_dir='gs://bucket/temp-dir', + vocab_label_file='Label', +) +``` + +TFX pipeline examples can be found in `integration_test.py`. + +For a description of the inputs and execution parameters of the component, +refer to the `component.py` file. + +## Project Dependencies + +See `version.py` in the top repo directory for component dependencies. + +## Testing + +Each Python module has a correspondin unit test file ending in `_test.py`. + +An integration test is also available and requires use of a Google Cloud +project. Additional instructions for running the unit test can be found in `integration_test.py`. + +Some tests use Abseil's `absltest` module. +Install the package using pip: +```bash +pip install absl-py +``` + +## Acknowledgements + +This code was originally written by Hannes Hapke (Digits Financial Inc.) +on Feb. 6, 2023. From 6df100726ed1be8289de0f2010b589c2f19849a8 Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Fri, 31 Mar 2023 09:58:43 -0400 Subject: [PATCH 17/22] pred2bq: Replace abseil tempfile creation. Replaces create_tempfile and create_tempdir calls from abseil's absltest.TestCase and parameterized.TestCase with equivalent methods from the tempfile package. The reason is that the abseil methods require parsing of the FLAGs variable, which may not be executed if absltest.main() is not invoked. This can happen when test filtering is performed, e.g. ``` python -m unittest path.to.test ``` --- .../integration_test.py | 66 +++++++++---------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/tfx_addons/predictions_to_bigquery/integration_test.py b/tfx_addons/predictions_to_bigquery/integration_test.py index 7b4c7a4e..73784bc9 100644 --- a/tfx_addons/predictions_to_bigquery/integration_test.py +++ b/tfx_addons/predictions_to_bigquery/integration_test.py @@ -61,6 +61,7 @@ import pathlib import shutil import subprocess +import tempfile from typing import List import tensorflow as tf @@ -74,7 +75,6 @@ from tfx.dsl.component.experimental import container_component, placeholders from tfx.dsl.components.base import base_node from tfx.proto import example_gen_pb2 -from tfx.types import artifact_utils from tfx.types.standard_artifacts import Model, String, TransformGraph from tfx_addons.predictions_to_bigquery import component, executor @@ -106,9 +106,6 @@ class ExecutorBigQueryTest(absltest.TestCase): This test generates a BigQuery table with an expiration date of 1 day. """ - def _get_full_bq_table_name(self, generated_bq_table_name): - return f'{self.gcp_project}.{self.bq_dataset}.{generated_bq_table_name}' - def _assert_bq_table_exists(self, full_bq_table_name): full_bq_table_name = full_bq_table_name.replace(':', '.') try: @@ -137,9 +134,6 @@ def setUp(self): 'schema': (self.test_data_dir / 'Transform/transform_graph/5/metadata'), }) - self.temp_file = self.create_tempfile() - self.output_dict = _make_artifact_mapping( - {'bigquery_export': pathlib.Path(self.temp_file.full_path)}) self.gcp_project = _GOOGLE_CLOUD_PROJECT self.bq_dataset = 'executor_bigquery_test_dataset' self.bq_table_name = f'{self.gcp_project}:{self.bq_dataset}.predictions' @@ -164,18 +158,18 @@ def tearDown(self): self._expire_table(self.generated_bq_table_name) def test_Do(self): - self.executor.Do(self.input_dict, self.output_dict, self.exec_properties) - self.assertIsNotNone(self.output_dict['bigquery_export']) - bigquery_export = artifact_utils.get_single_instance( - self.output_dict['bigquery_export']) - self.generated_bq_table_name = ( - bigquery_export.get_custom_property('generated_bq_table_name')) - # Expected table name format by BigQuery client: project.dataset.table_name - with open(self.temp_file.full_path, encoding='utf-8') as input_file: - self.generated_bq_table_name = input_file.read() - self.generated_bq_table_name = (str(self.generated_bq_table_name).replace( - ':', '.')) - self._assert_bq_table_exists(self.generated_bq_table_name) + with tempfile.NamedTemporaryFile() as output_file: + output_dict = _make_artifact_mapping( + {'bigquery_export': pathlib.Path(output_file.name)}) + + self.executor.Do(self.input_dict, output_dict, self.exec_properties) + + with open(output_file.name, encoding='utf-8') as input_file: + self.generated_bq_table_name = input_file.read() + + self.generated_bq_table_name = (str( + self.generated_bq_table_name).replace(':', '.')) + self._assert_bq_table_exists(self.generated_bq_table_name) def _gcs_path_exists(gcs_path: str) -> bool: @@ -527,23 +521,23 @@ def test_local_pipeline(self, add_schema, add_transform): vocab_label_file=vocab_label_file, ) - output_file = self.create_tempfile() - pipeline_dir = self.create_tempdir() - metadata_path = self.create_tempfile() - metadata_connection_config = ( - tfx.orchestration.metadata.sqlite_metadata_connection_config( - metadata_path.full_path)) - - pipeline = self._create_pipeline( - component_under_test, - upstream_components, - output_file.full_path, - pipeline_dir.full_path, - metadata_connection_config, - ) - self._run_local_pipeline(pipeline) - - self._check_output(output_file.full_path) + with tempfile.TemporaryDirectory() as pipeline_dir, \ + tempfile.NamedTemporaryFile() as output_file, \ + tempfile.NamedTemporaryFile() as metadata_path: + metadata_connection_config = ( + tfx.orchestration.metadata.sqlite_metadata_connection_config( + metadata_path.name)) + + pipeline = self._create_pipeline( + component_under_test, + upstream_components, + output_file.name, + pipeline_dir, + metadata_connection_config, + ) + self._run_local_pipeline(pipeline) + + self._check_output(output_file.name) @absltest.skip('long-running test') def test_vertex_pipeline(self): From 685cd609d35e2821696060a3f19b46fbcfaea96f Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Fri, 31 Mar 2023 11:08:13 -0400 Subject: [PATCH 18/22] Add tests to expand code coverage. --- tfx_addons/predictions_to_bigquery/README.md | 31 +++++++++++++++++++ .../predictions_to_bigquery/executor.py | 10 ++++-- .../predictions_to_bigquery/executor_test.py | 15 +++++++++ .../predictions_to_bigquery/utils_test.py | 26 +++++++++++++++- 4 files changed, 78 insertions(+), 4 deletions(-) diff --git a/tfx_addons/predictions_to_bigquery/README.md b/tfx_addons/predictions_to_bigquery/README.md index 59ac4955..b0648c81 100644 --- a/tfx_addons/predictions_to_bigquery/README.md +++ b/tfx_addons/predictions_to_bigquery/README.md @@ -88,6 +88,37 @@ Install the package using pip: pip install absl-py ``` +### Test coverage + +Test coverage can be generated using the `coverage package`: +```bash +pip install coverage +``` + +To get test code coverage on the component code, run the following from the +top directory of the tfx-addons repository: + +```bash +coverage run -m unittest discover -s tfx_addons/predictions_to_bigquery -p *_test.py +``` + +Generate a summary report in the terminal: +```bash +coverage report -m + +``` +Generate an HTML report that also details missed lines +```bash +coverage html -d /tmp/htmlcov +``` + +If working on a remote machine, the HTML coverage report can be viewed +by launching a web server +```bash +pushd /tmp/htmlcov +python -m http.server 8000 # or another unused port number +``` + ## Acknowledgements This code was originally written by Hannes Hapke (Digits Financial Inc.) diff --git a/tfx_addons/predictions_to_bigquery/executor.py b/tfx_addons/predictions_to_bigquery/executor.py index 7318440a..9fd2fd9f 100644 --- a/tfx_addons/predictions_to_bigquery/executor.py +++ b/tfx_addons/predictions_to_bigquery/executor.py @@ -111,10 +111,14 @@ def _tensor_to_native_python_value( values = tensor.values.numpy() else: values = tensor.numpy() - if not values: + if not np.any(values): return None - values = np.squeeze(values) # Removes extra dimension, e.g. shape (n, 1). - values = values.item() # Converts to native Python type + # Removes any extra dimension, e.g. shape (n, 1). + values = np.squeeze(values) + try: + values = values.item() # Convert to single Python value + except ValueError: + values = list(values) if isinstance(values, list) and isinstance(values[0], bytes): return [v.decode('utf-8') for v in values] if isinstance(values, bytes): diff --git a/tfx_addons/predictions_to_bigquery/executor_test.py b/tfx_addons/predictions_to_bigquery/executor_test.py index d788fc2a..55a9ea6f 100644 --- a/tfx_addons/predictions_to_bigquery/executor_test.py +++ b/tfx_addons/predictions_to_bigquery/executor_test.py @@ -348,6 +348,21 @@ def test_get_additional_bq_parameters(self, expiration_days, } self.assertEqual(expected, output) + def test_check_exec_properties_error_key_not_found(self): + exec_properties = { + 'bq_table_name': None, + 'filter_threshold': 0.1, + 'gcs_temp_dir': 'dir', + } + with self.assertRaises(ValueError): + executor._check_exec_properties(exec_properties) + + def test_tensor_to_native_python_value_bytes_list(self): + tensor = tf.constant([b'1', b'2', b'3']) + expected = ['1', '2', '3'] + output = executor._tensor_to_native_python_value(tensor) + self.assertEqual(expected, output) + if __name__ == '__main__': absltest.main() diff --git a/tfx_addons/predictions_to_bigquery/utils_test.py b/tfx_addons/predictions_to_bigquery/utils_test.py index 70c3ffb9..ae9d7fc1 100644 --- a/tfx_addons/predictions_to_bigquery/utils_test.py +++ b/tfx_addons/predictions_to_bigquery/utils_test.py @@ -85,11 +85,15 @@ def test_get_feature_spec(self, has_schema, has_tft_output, _ = utils.get_feature_spec(None, tft_output, None) mock_raw_feature_spec.assert_called_once() - else: + elif has_prediction_log_path: prediction_log_path = 'path' _ = utils.get_feature_spec(None, None, prediction_log_path) mock_parse_features_from_prediction_results.assert_called_once() + else: + with self.assertRaises(ValueError): + _ = utils.get_feature_spec(None, None, None) + @parameterized.named_parameters([ ('no_label_field', False), ('with_label_field', True), @@ -151,6 +155,26 @@ def test_feature_spec_to_bq_schema(self, add_label_field): self.assertEqual(expected, output) + @parameterized.named_parameters([ + ('none', None, None, None), + ('int', None, int, tf.int64), + ]) + def test_get_feature_type(self, feature, type_, expected): + output = utils._get_feature_type(feature=feature, type_=type_) + self.assertEqual(expected, output) + + @parameterized.named_parameters([ + ('unsupported', None, None), + ('boolean', tf.bool, 'BOOLEAN'), + ]) + def test_convert_tensorflow_dtype_to_bq_type(self, tf_dtype, expected): + if tf_dtype is not None: + output = utils._convert_tensorflow_dtype_to_bq_type(tf_dtype) + self.assertEqual(expected, output) + else: + with self.assertRaises(ValueError): + _ = utils._convert_tensorflow_dtype_to_bq_type(tf_dtype) + if __name__ == '__main__': absltest.main() From f68f0d716e49091795210041a3477683afaf246d Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Fri, 31 Mar 2023 22:19:03 -0400 Subject: [PATCH 19/22] Add project team to readme. --- tfx_addons/predictions_to_bigquery/README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tfx_addons/predictions_to_bigquery/README.md b/tfx_addons/predictions_to_bigquery/README.md index b0648c81..94d6b0b9 100644 --- a/tfx_addons/predictions_to_bigquery/README.md +++ b/tfx_addons/predictions_to_bigquery/README.md @@ -119,7 +119,9 @@ pushd /tmp/htmlcov python -m http.server 8000 # or another unused port number ``` -## Acknowledgements - -This code was originally written by Hannes Hapke (Digits Financial Inc.) -on Feb. 6, 2023. +## Project team +- Hannes Hapke (@hanneshapke, Digits Financial Inc.) +- Carlos Ezequiel (@cfezequiel, Google) +- Michael Sherman (@michaelwsherman, Google) +- Robert Crowe (@rcrowe-google, Google) +- Gerard Casas Saez (@casassg, Cash App) From 6035c17a5073102a128831a07a7811f35596a54e Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Fri, 31 Mar 2023 22:32:37 -0400 Subject: [PATCH 20/22] Update top-level readme. Mentions the predictions-to-bigquery component in top-level readme. --- README.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 7636d02b..f9ad0e79 100644 --- a/README.md +++ b/README.md @@ -10,25 +10,25 @@ SIG TFX-Addons is a community-led open source project. As such, the project depe ## Maintainership The maintainers of TensorFlow Addons can be found in the [CODEOWNERS](https://github.com/tensorflow/tfx-addons/blob/main/CODEOWNERS) file of the repo. If you would -like to maintain something, please feel free to submit a PR. We encourage multiple +like to maintain something, please feel free to submit a PR. We encourage multiple owners for all submodules. ## Installation -TFX Addons is available on PyPI for all OS. To install the latest version, +TFX Addons is available on PyPI for all OS. To install the latest version, run the following: ``` pip install tfx-addons ``` -To ensure you have a compatible version of dependencies for any given project, +To ensure you have a compatible version of dependencies for any given project, you can specify the project name as an extra requirement during install: ``` pip install tfx-addons[feast_examplegen,schema_curation] -``` +``` To use TFX Addons: @@ -45,18 +45,19 @@ tfxa.feast_examplegen.FeastExampleGen(...) ## TFX Addons projects -* [tfxa.feast_examplegen](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/feast_examplegen) +* [tfxa.feast_examplegen](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/feast_examplegen) * [tfxa.feature_selection](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/feature_selection) * [tfxa.firebase_publisher](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/firebase_publisher) * [tfxa.huggingface_pusher](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/huggingface_pusher) -* [tfxa.message_exit_handler](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/message_exit_handler) -* [tfxa.mlmd_client](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/mlmd_client) +* [tfxa.message_exit_handler](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/message_exit_handler) +* [tfxa.mlmd_client](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/mlmd_client) * [tfxa.model_card_generator](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/model_card_generator) -* [tfxa.pandas_transform](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/pandas_transform) +* [tfxa.pandas_transform](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/pandas_transform) * [tfxa.sampling](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/sampling) -* [tfxa.schema_curation](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/schema_curation) +* [tfxa.schema_curation](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/schema_curation) * [tfxa.xgboost_evaluator](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/xgboost_evaluator) - +* [tfxa.predictions_to_bigquery](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/predictions_to_bigquery) + Check out [proposals](https://github.com/tensorflow/tfx-addons/tree/main/proposals) for a list of existing or upcoming projects proposals for TFX Addons. From d0919221b688d6ddfb8b8c865ba1678cce6a3138 Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Mon, 15 May 2023 13:24:34 -0400 Subject: [PATCH 21/22] Update code based on reviewer comments. - Fix issues in pred2bq readme - Reverted version change in setup.py - Add abls-py test prerequisite in setup.py --- setup.py | 2 +- tfx_addons/predictions_to_bigquery/README.md | 8 +++++--- tfx_addons/version.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 5db798e3..56e98a80 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ def get_long_description(): return fp.read() -TESTS_REQUIRE = ["pytest", "pylint", "pre-commit", "isort", "yapf"] +TESTS_REQUIRE = ["pytest", "pylint", "pre-commit", "isort", "yapf", "absl-py"] PKG_REQUIRES = get_pkg_metadata() EXTRAS_REQUIRE = PKG_REQUIRES.copy() diff --git a/tfx_addons/predictions_to_bigquery/README.md b/tfx_addons/predictions_to_bigquery/README.md index 94d6b0b9..aecef72c 100644 --- a/tfx_addons/predictions_to_bigquery/README.md +++ b/tfx_addons/predictions_to_bigquery/README.md @@ -58,7 +58,8 @@ import tfx_addons as tfxa ... predictions_to_bigquery = tfxa.predictions_to_bigquery.PredictionsToBigQuery( - schema=schema_gen.outputs['schema'] + inference_results=bulk_inferrer.outputs['inference_result'], + schema=schema_gen.outputs['schema'], transform_graph=transform.outputs['transform_graph'], bq_table_name='my_bigquery_table', gcs_temp_dir='gs://bucket/temp-dir', @@ -66,7 +67,8 @@ predictions_to_bigquery = tfxa.predictions_to_bigquery.PredictionsToBigQuery( ) ``` -TFX pipeline examples can be found in `integration_test.py`. +Refer to `integration_test.py` for tests that demonstrates how to use the +component. For a description of the inputs and execution parameters of the component, refer to the `component.py` file. @@ -77,7 +79,7 @@ See `version.py` in the top repo directory for component dependencies. ## Testing -Each Python module has a correspondin unit test file ending in `_test.py`. +Each Python module has a corresponding unit test file ending in `_test.py`. An integration test is also available and requires use of a Google Cloud project. Additional instructions for running the unit test can be found in `integration_test.py`. diff --git a/tfx_addons/version.py b/tfx_addons/version.py index 8c036f61..df85c376 100644 --- a/tfx_addons/version.py +++ b/tfx_addons/version.py @@ -16,7 +16,7 @@ # We follow Semantic Versioning (https://semver.org/) _MAJOR_VERSION = "0" -_MINOR_VERSION = "7" +_MINOR_VERSION = "6" _PATCH_VERSION = "0" # When building releases, we can update this value on the release branch to From a5de9d2a76a87ad753dedf3f1e48704cf7d72236 Mon Sep 17 00:00:00 2001 From: Carlos Ezequiel Date: Thu, 25 May 2023 21:30:03 -0400 Subject: [PATCH 22/22] pred2bq: Update code based on code reviews. --- README.md | 2 +- tfx_addons/predictions_to_bigquery/README.md | 1 - .../predictions_to_bigquery/component.py | 31 +++++++++------ .../predictions_to_bigquery/executor.py | 20 ++++++---- tfx_addons/predictions_to_bigquery/utils.py | 39 ++++++++++++------- tfx_addons/version.py | 2 +- 6 files changed, 59 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index f9ad0e79..402f5e15 100644 --- a/README.md +++ b/README.md @@ -53,10 +53,10 @@ tfxa.feast_examplegen.FeastExampleGen(...) * [tfxa.mlmd_client](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/mlmd_client) * [tfxa.model_card_generator](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/model_card_generator) * [tfxa.pandas_transform](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/pandas_transform) +* [tfxa.predictions_to_bigquery](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/predictions_to_bigquery) * [tfxa.sampling](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/sampling) * [tfxa.schema_curation](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/schema_curation) * [tfxa.xgboost_evaluator](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/xgboost_evaluator) -* [tfxa.predictions_to_bigquery](https://github.com/tensorflow/tfx-addons/tree/main/tfx_addons/predictions_to_bigquery) Check out [proposals](https://github.com/tensorflow/tfx-addons/tree/main/proposals) for a list of existing or upcoming projects proposals for TFX Addons. diff --git a/tfx_addons/predictions_to_bigquery/README.md b/tfx_addons/predictions_to_bigquery/README.md index aecef72c..c9d5da0d 100644 --- a/tfx_addons/predictions_to_bigquery/README.md +++ b/tfx_addons/predictions_to_bigquery/README.md @@ -1,6 +1,5 @@ # Prediction results to BigQuery component -[![Python](https://img.shields.io/pypi/pyversions/tfx.svg?style=plastic)](https://github.com/tensorflow/tfx) [![TensorFlow](https://img.shields.io/badge/TFX-orange)](https://www.tensorflow.org/tfx) ## Project Description diff --git a/tfx_addons/predictions_to_bigquery/component.py b/tfx_addons/predictions_to_bigquery/component.py index 1cf3281e..15163401 100644 --- a/tfx_addons/predictions_to_bigquery/component.py +++ b/tfx_addons/predictions_to_bigquery/component.py @@ -25,7 +25,7 @@ from tfx_addons.predictions_to_bigquery import executor -_MIN_THRESHOLD = 0.5 +_MIN_THRESHOLD = 0.0 # pylint: disable=missing-class-docstring @@ -81,26 +81,33 @@ def __init__( """Initialize the component. Args: - inference_results: Inference results channel. + inference_results: TFX input channel for inference results. bq_table_name: BigQuery table name in either PROJECT:DATASET.TABLE. or DATASET.TABLE formats. - bigquery_export: Outputs BigQuery table name containing results. - The outputted name may contain a timestamp suffix defined by - `table_suffix`. - transform_graph: TFTransform output. + bigquery_export: TFX output channel containing BigQuery table name + where the results are stored. + The output table name will have the following format: + _ + where `bq_table_name` is argument of the same name and timestamp + is a timestamp string having the format given by `table_time_suffix` + argument. + transform_graph: TFX input channel containing TFTransform output + directory. If specified, and `schema` is not specified, the prediction input schema shall be derived from this channel. - schema: SchemaGen output. + schema: TFX input channel for the schema, which is primarily + generated by the SchemaGen component. If specified, the prediction input schema shall be derived from this channel. - expiration_days: BigQuery table expiration in number of days from - current time. If not specified, the table does not expire by default. + table_expiration_days: Expiration in number of days from current time of + the output BigQuery table. + If not specified, the table does not expire by default. filter_threshold: Prediction threshold to use to filter prediction scores. - Keep scores that exceed this threshold. - table_partitioning: If True, partition table. + Outputs that are below this threshold are discarded. + table_partitioning: If set to True, partition table. See: https://cloud.google.com/bigquery/docs/partitioned-tables table_time_suffix: Time format for table suffix in Linux strftime format. - Example: '%Y%m%d + Example: '%Y%m%d'. vocab_label_file: Name of the TF Transform vocabulary file for mapping string labels into integer IDs. If specified, this would be used to get back string labels from predicted label IDs. diff --git a/tfx_addons/predictions_to_bigquery/executor.py b/tfx_addons/predictions_to_bigquery/executor.py index 9fd2fd9f..2a971250 100644 --- a/tfx_addons/predictions_to_bigquery/executor.py +++ b/tfx_addons/predictions_to_bigquery/executor.py @@ -40,6 +40,10 @@ 'filter_threshold', 'gcs_temp_dir', ) + +# Regular expression to check for a proper BigQuery table name, i.e. +# [PROJECT:]DATASET.TABLE, +# where specifying GCP PROJECT is optional. _REGEX_BQ_TABLE_NAME = re.compile(r'^[\w-]*:?[\w_]+\.[\w_]+$') @@ -116,7 +120,7 @@ def _tensor_to_native_python_value( # Removes any extra dimension, e.g. shape (n, 1). values = np.squeeze(values) try: - values = values.item() # Convert to single Python value + values = values.item() # Convert to single Python value. except ValueError: values = list(values) if isinstance(values, list) and isinstance(values[0], bytes): @@ -200,10 +204,10 @@ def Do( ) -> None: """Do function for predictions_to_bq executor.""" - # Check required keys set in exec_properties + # Check required keys set in exec_properties. _check_exec_properties(exec_properties) - # Get prediction log file path and decoder + # Get prediction log file path and decoder. prediction_log_path = _get_prediction_log_path( input_dict['inference_results']) prediction_log_decoder = beam.coders.ProtoCoder( @@ -211,14 +215,14 @@ def Do( tft_output = _get_tft_output(input_dict.get('transform_graph')) - # get schema features + # Get schema features features = utils.get_feature_spec( schema=input_dict.get('schema'), tft_output=tft_output, prediction_log_path=prediction_log_path, ) - # get label names from TFTransformOutput object, if applicable + # Get label names from TFTransformOutput object, if applicable. if tft_output is not None and 'vocab_label_file' in exec_properties: label_key = exec_properties['vocab_label_file'] labels = _get_labels(tft_output, label_key) @@ -227,14 +231,14 @@ def Do( labels = None logging.info('No TFTransform output given; no labels parsed.') - # set BigQuery table name and timestamp suffix if specified. + # Set BigQuery table name and timestamp suffix if specified. _check_bq_table_name(exec_properties['bq_table_name']) timestamp = datetime.datetime.now().replace(second=0, microsecond=0) bq_table_name = _add_bq_table_name_suffix( exec_properties['bq_table_name'], timestamp, exec_properties.get('table_time_suffix')) - # generate bigquery schema from tf transform features + # Generate bigquery schema from tf transform features. add_label_field = labels is not None bq_schema = utils.feature_spec_to_bq_schema( features, add_label_field=add_label_field) @@ -244,7 +248,7 @@ def Do( exec_properties.get('table_expiration_days'), exec_properties.get('table_partitioning')) - # run the Beam pipeline to write the inference data to bigquery + # Run the Beam pipeline to write the inference data to bigquery. with self._make_beam_pipeline() as pipeline: _ = (pipeline | 'Read Prediction Log' >> beam.io.ReadFromTFRecord( diff --git a/tfx_addons/predictions_to_bigquery/utils.py b/tfx_addons/predictions_to_bigquery/utils.py index 3ab63c29..b0cfe635 100644 --- a/tfx_addons/predictions_to_bigquery/utils.py +++ b/tfx_addons/predictions_to_bigquery/utils.py @@ -133,7 +133,7 @@ def get_feature_spec( for the data schema. Args: - schema: Path to a `_SCHEMA_FILENAME` file. + schema: Artifact containing the URI to a `_SCHEMA_FILENAME` file. tft_output: TensorFlow Transform output path. prediction_log_path: Path to a TFRecord file containing inference results. """ @@ -203,16 +203,20 @@ def _create_annotation_fields( add_label_field: bool = False, add_datetime_field: bool = True, ) -> List[Dict]: - """Creates a list of annotation fields in BigQuery schema formatkjjjj. + """Creates annotation fields in BigQuery schema format. + + This function creates the following fields: score, category_label, and + datetime, where the last two are optional. Args: - label_field_name: The name of the label field. - score_field_name: The name of the score field. - required: Whether the fields are required. - add_datetime_field: Whether to add a datetime field. + required: Whether the field is required or not. + add_label_field: If true, add a field representing the label of the + model prediction input. + add_datetime_field: Whether to add a datetime field representing the + data creation timestamp. Returns: - A list of BigQuery schema fields. + A list of the BigQuery schema fields. """ fields = [] @@ -242,16 +246,21 @@ def _create_annotation_fields( return fields -def feature_spec_to_bq_schema(feature_spec: FeatureSpec, - required: bool = False, - **kwargs: int) -> BigQuerySchema: +def feature_spec_to_bq_schema( + feature_spec: FeatureSpec, + required: bool = False, + add_label_field: bool = False, + add_datetime_field: bool = True, +) -> BigQuerySchema: """Converts a TensorFlow feature spec into a BigQuery schema. Args: feature_spec: TensorFlow feature spec. required: If True, mark BigQuery fields as required (i.e. not nullable). - **kwargs: Additional keyword-arguments to pass to - `_create_annotation_fields`. + add_label_field: If true, add a field representing the label of the + model prediction input. + add_datetime_field: Whether to add a datetime field representing the + data creation timestamp. Returns: A `BigQuerySchema` object. @@ -259,5 +268,9 @@ def feature_spec_to_bq_schema(feature_spec: FeatureSpec, bq_schema_fields = _feature_spec_to_bq_schema_fields(feature_spec, required=required) bq_schema_fields.extend( - _create_annotation_fields(required=required, **kwargs)) + _create_annotation_fields( + required=required, + add_label_field=add_label_field, + add_datetime_field=add_datetime_field, + )) return {"fields": bq_schema_fields} diff --git a/tfx_addons/version.py b/tfx_addons/version.py index df85c376..8c036f61 100644 --- a/tfx_addons/version.py +++ b/tfx_addons/version.py @@ -16,7 +16,7 @@ # We follow Semantic Versioning (https://semver.org/) _MAJOR_VERSION = "0" -_MINOR_VERSION = "6" +_MINOR_VERSION = "7" _PATCH_VERSION = "0" # When building releases, we can update this value on the release branch to