Skip to content

Commit

Permalink
For live archive, current strategy for unaligned audio and video
Browse files Browse the repository at this point in the history
contents is to leave the video stream untouched, and change the audio
stream by either insert silence at the beginning or trim the beginning
of the audio stream.  if there are discontinuities in the audio stream,
we will reencode with resample to fill in the gap with silence.

this commit adds the detection logic, and silence insertion.  The trim /
reencode path is checked in but not tested yet.

also, we need to change the tdft flag on the audio stream since ffmpeg
zero based it.
  • Loading branch information
pohhsu committed Aug 16, 2023
1 parent 9ba9570 commit e26f620
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 27 deletions.
20 changes: 18 additions & 2 deletions transform/BasePackager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@

namespace AMSMigrate.Transform
{
public record TranscodeAudioInfo
{
public long VideoStartTime { get; set; } = 0;
public long VideoTimeScale { get; set; } = 0;
public long VideoStartTimeInAudioTimeScale { get; set; } = 0;
public long AudioStartTime { get; set; } = 0;
public long AudioTimeScale { get; set; } = 0;
public bool AudioStreamHasDiscontinuities { get; set; } = false;
};

abstract class BasePackager : IPackager
{
public const string MEDIA_FILE = ".mp4";
Expand Down Expand Up @@ -38,6 +48,8 @@ abstract class BasePackager : IPackager

public bool TranscodeAudio { get; protected set; }

public TranscodeAudioInfo TranscodeAudioInfoData { get; protected set; } = new();

public IDictionary<string, IList<Track>> FileToTrackMap => _fileToTrackMap;

public BasePackager(AssetDetails assetDetails, TransMuxer transMuxer, ILogger logger)
Expand Down Expand Up @@ -209,7 +221,7 @@ private async Task DownloadAsync(string workingDirectory, string file, IList<Tra
{
var filePath = Path.Combine(workingDirectory, file);
var track = tracks[0];
// TranscodeAudio = true;

if (TranscodeAudio && track.Type == StreamType.Audio)
{
filePath = Path.Combine(tempDirectory, file);
Expand All @@ -219,7 +231,11 @@ private async Task DownloadAsync(string workingDirectory, string file, IList<Tra
await source.DownloadAsync(filePath, cancellationToken);
if (TranscodeAudio && track.Type == StreamType.Audio)
{
await Task.Run(() => _transMuxer.TranscodeAudioAsync(filePath, Path.Combine(workingDirectory, Path.GetFileName(filePath)), cancellationToken));
await Task.Run(() => _transMuxer.TranscodeAudioAsync(
filePath,
Path.Combine(workingDirectory, Path.GetFileName(filePath)),
TranscodeAudioInfoData,
cancellationToken));
}
}
else
Expand Down
58 changes: 54 additions & 4 deletions transform/ShakaPackager.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using AMSMigrate.Contracts;
using Microsoft.Extensions.Logging;
using System.ComponentModel;
using System.IO;
using System.Text.RegularExpressions;

namespace AMSMigrate.Transform
Expand Down Expand Up @@ -37,11 +38,60 @@ public ShakaPackager(MigratorOptions options, AssetDetails assetDetails, TransMu
_logger.LogDebug("Transmuxing FMP4 asset with multiple tracks in a single file into regular MP4 file.");
TransmuxedSmooth = true;
}
else if (assetDetails.ClientManifest != null && assetDetails.ClientManifest.HasDiscontinuities(_logger))
else if (manifest.Format == "vod-fmp4")
{
// mux to a single file.
Inputs.Clear();
Inputs.Add($"{baseName}.mp4");
if (assetDetails.ClientManifest != null) {
var clientManifest = assetDetails.ClientManifest!;

MediaStream? audioStream = null;
MediaStream? videoStream = null;
// TODO: can there be multiple video streams? assume no for now.
foreach (var stream in clientManifest.Streams)
{
if (stream.Type == StreamType.Video)
{
videoStream = stream;
break;
}
}

// TODO: can there be multiple audio streams? assume no for now.
foreach (var stream in clientManifest.Streams)
{
if (stream.Type == StreamType.Audio)
{
audioStream = stream;
break;
}
}

if (audioStream != null && videoStream != null)
{
TranscodeAudioInfoData.AudioStartTime = audioStream.GetStartTimeStamp();
TranscodeAudioInfoData.AudioTimeScale = audioStream.TimeScale;
TranscodeAudioInfoData.AudioStreamHasDiscontinuities = audioStream.HasDiscontinuities();

TranscodeAudioInfoData.VideoStartTime = videoStream.GetStartTimeStamp();
TranscodeAudioInfoData.VideoTimeScale = videoStream.TimeScale;
TranscodeAudioInfoData.VideoStartTimeInAudioTimeScale = TranscodeAudioInfoData.VideoStartTime * TranscodeAudioInfoData.AudioTimeScale / TranscodeAudioInfoData.VideoTimeScale;

_logger.LogDebug("Audio start time: {time}, audio time scale: {timeScale}, audio discontinuity: {flag}", TranscodeAudioInfoData.AudioStartTime,
TranscodeAudioInfoData.AudioTimeScale, TranscodeAudioInfoData.AudioStreamHasDiscontinuities);
_logger.LogDebug("Video start time: {time}, video time scale: {timeScale}", TranscodeAudioInfoData.VideoStartTime, TranscodeAudioInfoData.VideoTimeScale);
_logger.LogDebug("video start time in audio time scale: {time}", TranscodeAudioInfoData.VideoStartTimeInAudioTimeScale);

if (Math.Abs(TranscodeAudioInfoData.AudioStartTime - TranscodeAudioInfoData.VideoStartTimeInAudioTimeScale) <= 0.1 * TranscodeAudioInfoData.AudioTimeScale
&& !TranscodeAudioInfoData.AudioStreamHasDiscontinuities)
{
TranscodeAudio = true;
}
else
{
_logger.LogDebug("video / audio tracks start time not within 0.1 sec and audio stream has discontinuities, transcode required");
TranscodeAudio = true;
}
}
}
}

UsePipeForInput = false;
Expand Down
98 changes: 77 additions & 21 deletions transform/TransMuxer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
using AMSMigrate.Fmp4;
using Azure.ResourceManager.Media.Models;
using FFMpegCore;
using FFMpegCore.Arguments;
using FFMpegCore.Enums;
using FFMpegCore.Pipes;
using Microsoft.Extensions.Azure;
using Microsoft.Extensions.Logging;
using System.Text;

Expand Down Expand Up @@ -75,33 +78,86 @@ public void TransmuxSmooth(Stream source, Stream destination, uint trackId)
}
}
}
public class AudioDelayFilterArgument : IAudioFilterArgument
{
private readonly long _delay = 0;

public AudioDelayFilterArgument(long delay)
{
_delay = delay;
}
public string Key { get; } = "adelay";
public string Value => $"{_delay}:all=1";
}

public async Task TranscodeAudioAsync(string source, string destination, CancellationToken cancellationToken)
public class AudioResample : IAudioFilterArgument
{
var processor = FFMpegArguments
public string Key { get; } = "aresample";
public string Value => $"async=1";
}

public async Task TranscodeAudioAsync(string source, string destination, TranscodeAudioInfo transcodeAudioInfo, CancellationToken cancellationToken)
{
if (transcodeAudioInfo.AudioStartTime > transcodeAudioInfo.VideoStartTimeInAudioTimeScale)
{
long delayInMs = (transcodeAudioInfo.AudioStartTime - transcodeAudioInfo.VideoStartTimeInAudioTimeScale) * 1000 / transcodeAudioInfo.AudioTimeScale;
var processor = FFMpegArguments
.FromFileInput(source)
//.WithGlobalOptions(options => options.WithVerbosityLevel(FFMpegCore.Arguments.VerbosityLevel.Verbose))
.OutputToFile(destination, overwrite: false, options =>
{
// TODO: add silence insertion, trim, resample + transcode
// current just do cmaf copy
bool addSilence = false;
if (addSilence)
.OutputToFile(destination, overwrite: true, options =>
options
.WithAudioFilters(
audioFilterOptions =>
{
options
.CopyChannel()
.ForceFormat("mp4")
.WithCustomArgument("-movflags cmaf");
audioFilterOptions.Arguments.Add(new AudioDelayFilterArgument(delayInMs));
if (transcodeAudioInfo.AudioStreamHasDiscontinuities)
{
audioFilterOptions.Arguments.Add(new AudioResample());
}
}
else
{
options
.CopyChannel()
.ForceFormat("mp4")
.WithCustomArgument("-movflags cmaf");
}
});
await RunAsync(processor, cancellationToken);
)
.WithAudioCodec(AudioCodec.Aac)
.ForceFormat("mp4")
.WithCustomArgument("-movflags cmaf"));

await RunAsync(processor, cancellationToken);

}
else if (transcodeAudioInfo.AudioStartTime <= transcodeAudioInfo.VideoStartTimeInAudioTimeScale)
{
if (transcodeAudioInfo.AudioStreamHasDiscontinuities)
{
long trimInMs = (transcodeAudioInfo.VideoStartTimeInAudioTimeScale - transcodeAudioInfo.AudioStartTime) * 1000 / transcodeAudioInfo.AudioTimeScale;
var processor = FFMpegArguments
.FromFileInput(source, false, opt => opt.Seek(TimeSpan.FromMilliseconds(trimInMs)))
//.WithGlobalOptions(options => options.WithVerbosityLevel(FFMpegCore.Arguments.VerbosityLevel.Verbose))
.OutputToFile(destination, overwrite: true, options =>
options
.WithAudioFilters(
audioFilterOptions =>
{
audioFilterOptions.Arguments.Add(new AudioResample());
}
)
.WithAudioCodec(AudioCodec.Aac)
.ForceFormat("mp4")
.WithCustomArgument("-movflags cmaf"));
await RunAsync(processor, cancellationToken);
}
else
{
double trim = (transcodeAudioInfo.VideoStartTimeInAudioTimeScale - transcodeAudioInfo.AudioStartTime) * 1000.0 / transcodeAudioInfo.AudioTimeScale;
var processor = FFMpegArguments
.FromFileInput(source, false, opt => opt.Seek(TimeSpan.FromMilliseconds(trim)))
//.WithGlobalOptions(options => options.WithVerbosityLevel(FFMpegCore.Arguments.VerbosityLevel.Verbose))
.OutputToFile(destination, overwrite: false, options =>
options
.CopyChannel()
.ForceFormat("mp4")
.WithCustomArgument("-movflags cmaf"));
await RunAsync(processor, cancellationToken);
}
}
// TODO: rewrite tfdt box from 0 based to video start time in audio time scale.
}
}
Expand Down

0 comments on commit e26f620

Please sign in to comment.