-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
EES-5017 Add data set GET query endpoint functionality
- Loading branch information
Showing
30 changed files
with
2,153 additions
and
5 deletions.
There are no files selected for viewing
10 changes: 10 additions & 0 deletions
10
...ion.ExploreEducationStatistics.Common/Validators/ErrorDetails/NotFoundItemsErrorDetail.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
using System.Collections.Generic; | ||
|
||
namespace GovUk.Education.ExploreEducationStatistics.Common.Validators.ErrorDetails; | ||
|
||
/// <summary> | ||
/// Provides details of items that could not be found. | ||
/// </summary> | ||
/// <param name="Items">The items that could not be found.</param> | ||
/// <typeparam name="T">The type of each item.</typeparam> | ||
public record NotFoundItemsErrorDetail<T>(IEnumerable<T> Items); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 8 additions & 0 deletions
8
src/GovUk.Education.ExploreEducationStatistics.Public.Data.Api/Model/IdPublicIdPair.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
namespace GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Model; | ||
|
||
public record IdPublicIdPair | ||
{ | ||
public required int Id { get; init; } | ||
|
||
public required string PublicId { get; init; } | ||
} |
5 changes: 5 additions & 0 deletions
5
src/GovUk.Education.ExploreEducationStatistics.Public.Data.Api/Model/Sort.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
using GovUk.Education.ExploreEducationStatistics.Common.Model; | ||
|
||
namespace GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Model; | ||
|
||
public record Sort(string Field, SortDirection Direction); |
39 changes: 39 additions & 0 deletions
39
...xploreEducationStatistics.Public.Data.Api/Repository/Interfaces/IParquetDataRepository.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
using GovUk.Education.ExploreEducationStatistics.Common.Model.Data; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Model; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model; | ||
using InterpolatedSql; | ||
|
||
namespace GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Repository.Interfaces; | ||
|
||
public interface IParquetDataRepository | ||
{ | ||
Task<long> CountRows( | ||
DataSetVersion dataSetVersion, | ||
IInterpolatedSql where, | ||
CancellationToken cancellationToken = default); | ||
|
||
Task<IEnumerable<IDictionary<string, object?>>> ListRows( | ||
DataSetVersion dataSetVersion, | ||
IEnumerable<string> columns, | ||
IInterpolatedSql where, | ||
IEnumerable<Sort>? sorts = null, | ||
int page = 1, | ||
int pageSize = 1000, | ||
CancellationToken cancellationToken = default); | ||
|
||
Task<ISet<string>> ListColumns( | ||
DataSetVersion dataSetVersion, | ||
CancellationToken cancellationToken = default); | ||
|
||
Task<ISet<GeographicLevel>> ListLocationLevels( | ||
DataSetVersion dataSetVersion, | ||
CancellationToken cancellationToken = default); | ||
|
||
Task<ISet<string>> ListFilterColumns( | ||
DataSetVersion dataSetVersion, | ||
CancellationToken cancellationToken = default); | ||
|
||
Task<ISet<string>> ListIndicatorColumns( | ||
DataSetVersion dataSetVersion, | ||
CancellationToken cancellationToken = default); | ||
} |
23 changes: 23 additions & 0 deletions
23
...ucationStatistics.Public.Data.Api/Repository/Interfaces/IParquetFilterOptionRepository.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Model; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model.Parquet; | ||
|
||
namespace GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Repository.Interfaces; | ||
|
||
public interface IParquetFilterOptionRepository | ||
{ | ||
Task<IEnumerable<ParquetFilterOption>> List( | ||
DataSetVersion dataSetVersion, | ||
IEnumerable<int> ids, | ||
CancellationToken cancellationToken = default); | ||
|
||
Task<IEnumerable<ParquetFilterOption>> List( | ||
DataSetVersion dataSetVersion, | ||
IEnumerable<string> publicIds, | ||
CancellationToken cancellationToken = default); | ||
|
||
Task<IEnumerable<IdPublicIdPair>> ListPublicIds( | ||
DataSetVersion dataSetVersion, | ||
IEnumerable<int> ids, | ||
CancellationToken cancellationToken = default); | ||
} |
24 changes: 24 additions & 0 deletions
24
...ationStatistics.Public.Data.Api/Repository/Interfaces/IParquetLocationOptionRepository.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Model; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Requests; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model.Parquet; | ||
|
||
namespace GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Repository.Interfaces; | ||
|
||
public interface IParquetLocationOptionRepository | ||
{ | ||
Task<IEnumerable<ParquetLocationOption>> List( | ||
DataSetVersion dataSetVersion, | ||
IEnumerable<int> ids, | ||
CancellationToken cancellationToken = default); | ||
|
||
Task<IEnumerable<ParquetLocationOption>> List( | ||
DataSetVersion dataSetVersion, | ||
IEnumerable<DataSetQueryLocation> locations, | ||
CancellationToken cancellationToken = default); | ||
|
||
Task<IEnumerable<IdPublicIdPair>> ListPublicIds( | ||
DataSetVersion dataSetVersion, | ||
IEnumerable<int> ids, | ||
CancellationToken cancellationToken = default); | ||
} |
18 changes: 18 additions & 0 deletions
18
...EducationStatistics.Public.Data.Api/Repository/Interfaces/IParquetTimePeriodRepository.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Requests; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model.Parquet; | ||
|
||
namespace GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Repository.Interfaces; | ||
|
||
public interface IParquetTimePeriodRepository | ||
{ | ||
Task<IEnumerable<ParquetTimePeriod>> List( | ||
DataSetVersion dataSetVersion, | ||
IEnumerable<int> ids, | ||
CancellationToken cancellationToken = default); | ||
|
||
Task<IEnumerable<ParquetTimePeriod>> List( | ||
DataSetVersion dataSetVersion, | ||
IEnumerable<DataSetQueryTimePeriod> timePeriods, | ||
CancellationToken cancellationToken = default); | ||
} |
161 changes: 161 additions & 0 deletions
161
....Education.ExploreEducationStatistics.Public.Data.Api/Repository/ParquetDataRepository.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
using GovUk.Education.ExploreEducationStatistics.Common.Extensions; | ||
using GovUk.Education.ExploreEducationStatistics.Common.Model.Data; | ||
using GovUk.Education.ExploreEducationStatistics.Common.Utils; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Model; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Repository.Interfaces; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model.DuckDb; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model.Parquet; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model.Parquet.Tables; | ||
using GovUk.Education.ExploreEducationStatistics.Public.Data.Services.Interfaces; | ||
using InterpolatedSql; | ||
using InterpolatedSql.Dapper; | ||
using StackExchange.Profiling; | ||
using DataTable = GovUk.Education.ExploreEducationStatistics.Public.Data.Model.Parquet.Tables.DataTable; | ||
|
||
namespace GovUk.Education.ExploreEducationStatistics.Public.Data.Api.Repository; | ||
|
||
public class ParquetDataRepository( | ||
IDuckDbConnection duckDbConnection, | ||
IParquetPathResolver parquetPathResolver) | ||
: IParquetDataRepository | ||
{ | ||
private const string DataIdsAlias = "data_ids"; | ||
|
||
public async Task<long> CountRows( | ||
DataSetVersion dataSetVersion, | ||
IInterpolatedSql where, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
using var _ = MiniProfiler.Current | ||
.Step($"{nameof(ParquetDataRepository)}.{nameof(CountRows)}"); | ||
|
||
var command = duckDbConnection.SqlBuilder( | ||
$""" | ||
SELECT count(*) | ||
FROM '{parquetPathResolver.DataPath(dataSetVersion):raw}' | ||
""" | ||
); | ||
|
||
command.AppendIf(!where.IsEmpty(), $"WHERE {where}"); | ||
|
||
return await command.QuerySingleAsync<long>(cancellationToken: cancellationToken); | ||
} | ||
|
||
public async Task<IEnumerable<IDictionary<string, object?>>> ListRows( | ||
DataSetVersion dataSetVersion, | ||
IEnumerable<string> columns, | ||
IInterpolatedSql where, | ||
IEnumerable<Sort>? sorts = null, | ||
int page = 1, | ||
int pageSize = 1000, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
using var timing = MiniProfiler.Current | ||
.Step($"{nameof(ParquetDataRepository)}.{nameof(ListRows)}"); | ||
|
||
var dataPath = parquetPathResolver.DataPath(dataSetVersion); | ||
|
||
var whereFragment = new DuckDbSqlBuilder() | ||
.AppendIf(!where.IsEmpty(), $"WHERE {where}"); | ||
|
||
var orderings = (sorts ?? []) | ||
.Select(s => $"{s.Field} {s.Direction.ToString().ToUpper()}") | ||
.ToList(); | ||
|
||
var orderByFragment = new DuckDbSqlBuilder() | ||
.AppendIf(orderings.Count != 0, $"ORDER BY") | ||
.AppendRange(orderings, joinString: ",\n"); | ||
|
||
var pageOffset = (page - 1) * pageSize; | ||
|
||
// We essentially split this query into two sub-queries: | ||
// | ||
// 1. The main query which is offset paginated and gathers the row ids | ||
// 2. Another query to fetch the rows using the ids from the main query (i.e. a 'deferred' join) | ||
// | ||
// This 'deferred join' technique is more efficient than a single query and helps to reduce | ||
// the performance penalty of using offset pagination having to scan through many rows. | ||
var command = duckDbConnection.SqlBuilder( | ||
$""" | ||
WITH {DataIdsAlias:raw} AS ( | ||
SELECT {DataTable.Ref().Id:raw} | ||
FROM '{dataPath:raw}' AS {DataTable.TableName:raw} | ||
{whereFragment} | ||
{orderByFragment} | ||
LIMIT {pageSize} | ||
OFFSET {pageOffset} | ||
) | ||
SELECT {columns.Select(DataTable.Ref().Col).JoinToString(",\n"):raw} | ||
FROM '{dataPath:raw}' AS {DataTable.TableName:raw} | ||
JOIN {DataIdsAlias:raw} ON {DataIdsAlias:raw}.id = {DataTable.Ref().Id:raw} | ||
{orderByFragment} | ||
""" | ||
); | ||
|
||
return (await command.QueryAsync(cancellationToken: cancellationToken)) | ||
.Cast<IDictionary<string, object?>>(); | ||
} | ||
|
||
public async Task<ISet<string>> ListColumns( | ||
DataSetVersion dataSetVersion, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
var command = duckDbConnection.SqlBuilder( | ||
$"DESCRIBE SELECT * FROM '{parquetPathResolver.DataPath(dataSetVersion):raw}' LIMIT 1"); | ||
|
||
var columns = await command.QueryAsync<ParquetColumn>(cancellationToken: cancellationToken); | ||
|
||
return columns | ||
.Select(col => col.ColumnName) | ||
.ToHashSet(); | ||
} | ||
|
||
public async Task<ISet<GeographicLevel>> ListLocationLevels( | ||
DataSetVersion dataSetVersion, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
var command = duckDbConnection.SqlBuilder( | ||
$""" | ||
SELECT DISTINCT {LocationsTable.Cols.Level:raw} | ||
FROM '{parquetPathResolver.LocationsPath(dataSetVersion):raw}' | ||
"""); | ||
|
||
var levels = await command.QueryAsync<string>(cancellationToken: cancellationToken); | ||
|
||
return levels | ||
.Select(EnumUtil.GetFromEnumValue<GeographicLevel>) | ||
.ToHashSet(); | ||
} | ||
|
||
public async Task<ISet<string>> ListFilterColumns( | ||
DataSetVersion dataSetVersion, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
var command = duckDbConnection.SqlBuilder( | ||
$""" | ||
SELECT DISTINCT {FiltersTable.Cols.ColumnName:raw} | ||
FROM '{parquetPathResolver.FiltersPath(dataSetVersion):raw}' | ||
"""); | ||
|
||
var cols = await command | ||
.QueryAsync<string>(cancellationToken: cancellationToken); | ||
|
||
return cols.ToHashSet(); | ||
} | ||
|
||
public async Task<ISet<string>> ListIndicatorColumns( | ||
DataSetVersion dataSetVersion, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
var command = duckDbConnection.SqlBuilder( | ||
$""" | ||
SELECT DISTINCT {IndicatorsTable.Cols.Id:raw} | ||
FROM '{parquetPathResolver.IndicatorsPath(dataSetVersion):raw}' | ||
"""); | ||
|
||
var indicators = await command.QueryAsync<string>(cancellationToken: cancellationToken); | ||
|
||
return indicators.ToHashSet(); | ||
} | ||
} |
Oops, something went wrong.