Skip to content

Commit

Permalink
Add the initial libyuv support
Browse files Browse the repository at this point in the history
The main benefit of libyuv, apart from shortening the video pipeline, is quite noticeable latency and CPU usage decrease due to various assembler/SIMD optimizations of the library. However, there is a drawback for macOS systems: libyuv cannot be downloaded as a compiled library and can only be built from the source, which means we should include a cropped source code of the library (~10K LoC) into the app or rise the complexity of macOS dev and run toolchains. The main target system -- Linux, and Windows will use compiled lib from the package managers and macOS will use the lib included as a shortened source-code.

Building the app with the no_libyuv tag will force it to use libyuv from the provided source files.
  • Loading branch information
sergystepanov committed Oct 15, 2023
1 parent 072b674 commit b1b3371
Show file tree
Hide file tree
Showing 73 changed files with 12,010 additions and 1,536 deletions.
29 changes: 15 additions & 14 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ------------------------------------------------------------
# Build workflow (Linux x64, macOS x64, Windows x64)
# Build and test workflow (Linux x64, macOS x64, Windows x64)
# ------------------------------------------------------------

name: build
Expand All @@ -20,7 +20,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest, macos-latest, windows-latest ]
step: [ build, check ]
step: [ build, test ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
Expand All @@ -33,7 +33,7 @@ jobs:
if: matrix.os == 'ubuntu-latest'
run: |
sudo apt-get -qq update
sudo apt-get -qq install -y make pkg-config libvpx-dev libx264-dev libopus-dev libsdl2-dev libgl1-mesa-glx
sudo apt-get -qq install -y make pkg-config libvpx-dev libx264-dev libopus-dev libsdl2-dev libyuv-dev libgl1-mesa-glx
- name: Get MacOS dev libraries and tools
if: matrix.os == 'macos-latest'
Expand All @@ -55,9 +55,10 @@ jobs:
mingw-w64-x86_64-opus
mingw-w64-x86_64-x264-git
mingw-w64-x86_64-SDL2
mingw-w64-x86_64-libyuv
- name: Get Windows OpenGL drivers
if: matrix.step == 'check' && matrix.os == 'windows-latest'
if: matrix.step == 'test' && matrix.os == 'windows-latest'
shell: msys2 {0}
run: |
wget -q https://github.com/pal1000/mesa-dist-win/releases/download/20.2.1/mesa3d-20.2.1-release-mingw.7z
Expand All @@ -81,28 +82,28 @@ jobs:
run: |
make build
- name: Verify core rendering (windows-latest)
if: matrix.step == 'check' && matrix.os == 'windows-latest' && always()
- name: Test (windows-latest)
if: matrix.step == 'test' && matrix.os == 'windows-latest' && always()
shell: msys2 {0}
env:
MESA_GL_VERSION_OVERRIDE: 3.3COMPAT
run: |
GL_CTX=-autoGlContext make verify-cores
GL_CTX=-autoGlContext make test verify-cores
- name: Verify core rendering (ubuntu-latest)
if: matrix.step == 'check' && matrix.os == 'ubuntu-latest' && always()
- name: Test (ubuntu-latest)
if: matrix.step == 'test' && matrix.os == 'ubuntu-latest' && always()
env:
MESA_GL_VERSION_OVERRIDE: 3.3COMPAT
run: |
GL_CTX=-autoGlContext xvfb-run --auto-servernum make verify-cores
GL_CTX=-autoGlContext xvfb-run --auto-servernum make test verify-cores
- name: Verify core rendering (macos-latest)
if: matrix.step == 'check' && matrix.os == 'macos-latest' && always()
- name: Test (macos-latest)
if: matrix.step == 'test' && matrix.os == 'macos-latest' && always()
run: |
make verify-cores
make test verify-cores
- uses: actions/upload-artifact@v3
if: matrix.step == 'check' && always()
if: matrix.step == 'test' && always()
with:
name: emulator-test-frames
path: _rendered/*.png
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/cd/cloudretro.io/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@ worker:
domain: cloudretro.io

emulator:
threads: 4
libretro:
logLevel: 1
cores:
list:
mame:
options:
"fbneo-cpu-speed-adjust": "200%"
"fbneo-diagnostic-input": "Hold Start"
nes:
scale: 2
pcsx:
altRepo: true
snes:
scale: 2
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ RUN apt-get -q update && apt-get -q install --no-install-recommends -y \
libopus-dev \
libsdl2-dev \
libvpx-dev \
libyuv-dev \
libx264-dev \
pkg-config \
&& rm -rf /var/lib/apt/lists/*
Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ CGO_CFLAGS='-g -O3 -funroll-loops'
CGO_LDFLAGS='-g -O3'
GO_TAGS=static

.PHONY: clean test

fmt:
@goimports -w cmd pkg tests
@gofmt -s -w cmd pkg tests
Expand All @@ -32,6 +34,9 @@ build.worker:

build: build.coordinator build.worker

test:
go test -v ./pkg/...

verify-cores:
go test -run TestAll ./pkg/worker/room -v -renderFrames $(GL_CTX) -outputPath "../../../_rendered"

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,13 @@ a better sense of performance.

```
# Ubuntu / Windows (WSL2)
apt-get install -y make gcc pkg-config libvpx-dev libx264-dev libopus-dev libsdl2-dev
apt-get install -y make gcc pkg-config libvpx-dev libx264-dev libopus-dev libsdl2-dev libyuv-dev
# MacOS
brew install pkg-config libvpx x264 opus sdl2
# Windows (MSYS2)
pacman -Sy --noconfirm --needed git make mingw-w64-x86_64-{gcc,pkgconf,dlfcn,libvpx,opus,x264-git,SDL2}
pacman -Sy --noconfirm --needed git make mingw-w64-x86_64-{gcc,pkgconf,dlfcn,libvpx,opus,x264-git,SDL2,libyuv}
```

Because the coordinator and workers need to run simultaneously. Workers connect to the coordinator.
Expand Down
16 changes: 3 additions & 13 deletions pkg/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,9 @@ worker:
tag:

emulator:
# set output viewport scale factor
scale: 1

# set the total number of threads for the image processing
# (experimental)
threads: 4
# (removed)
threads: 0

aspectRatio:
# enable aspect ratio changing
Expand Down Expand Up @@ -163,6 +160,7 @@ emulator:
# - altRepo (bool) prioritize secondary repo as the download source
# - lib (string)
# - roms ([]string)
# - scale (int) scales the output video frames by this factor.
# - folder (string)
# By default emulator selection is based on the folder named as cores
# in the list (i.e. nes, snes) but if you specify folder param,
Expand Down Expand Up @@ -244,8 +242,6 @@ encoder:
video:
# h264, vpx (VP8)
codec: h264
# concurrent execution units (0 - disabled)
concurrency: 0
# see: https://trac.ffmpeg.org/wiki/Encode/H.264
h264:
# Constant Rate Factor (CRF) 0-51 (default: 23)
Expand Down Expand Up @@ -273,12 +269,6 @@ encoder:
# one additional FFMPEG concat demux file
recording:
enabled: false
# image compression level:
# 0 - default compression
# -1 - no compression
# -2 - best speed
# -3 - best compression
compressLevel: 0
# name contains the name of the recording dir (or zip)
# format:
# %date:go_time_format% -- refer: https://go.dev/src/time/format.go
Expand Down
2 changes: 1 addition & 1 deletion pkg/config/emulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
)

type Emulator struct {
Scale int
Threads int
AspectRatio struct {
Keep bool
Expand Down Expand Up @@ -54,6 +53,7 @@ type LibretroCoreConfig struct {
Lib string
Options map[string]string
Roms []string
Scale float64
UsesLibCo bool
VFR bool
Width int
Expand Down
9 changes: 4 additions & 5 deletions pkg/config/shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,10 @@ type Server struct {
}

type Recording struct {
Enabled bool
CompressLevel int
Name string
Folder string
Zip bool
Enabled bool
Name string
Folder string
Zip bool
}

func (s *Server) WithFlags() {
Expand Down
5 changes: 2 additions & 3 deletions pkg/config/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,8 @@ type Audio struct {
}

type Video struct {
Codec string
Concurrency int
H264 struct {
Codec string
H264 struct {
Crf uint8
LogLevel int32
Preset string
Expand Down
56 changes: 56 additions & 0 deletions pkg/encoder/color/bgra/bgra.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package bgra

import (
"image"
"image/color"
)

type BGRA struct {
image.RGBA
}

var BGRAModel = color.ModelFunc(func(c color.Color) color.Color {
if _, ok := c.(BGRAColor); ok {
return c
}
r, g, b, a := c.RGBA()
return BGRAColor{uint8(r >> 8), uint8(g >> 8), uint8(b >> 8), uint8(a >> 8)}
})

// BGRAColor represents a BGRA color.
type BGRAColor struct {
R, G, B, A uint8
}

func (c BGRAColor) RGBA() (r, g, b, a uint32) {
r = uint32(c.B)
r |= r << 8
g = uint32(c.G)
g |= g << 8
b = uint32(c.R)
b |= b << 8
a = uint32(255) //uint32(c.A)
a |= a << 8
return
}

func NewBGRA(r image.Rectangle) *BGRA {
return &BGRA{*image.NewRGBA(r)}
}

func (p *BGRA) ColorModel() color.Model { return BGRAModel }
func (p *BGRA) At(x, y int) color.Color {
i := p.PixOffset(x, y)
s := p.Pix[i : i+4 : i+4]
return BGRAColor{s[0], s[1], s[2], s[3]}
}

func (p *BGRA) Set(x, y int, c color.Color) {
i := p.PixOffset(x, y)
c1 := BGRAModel.Convert(c).(BGRAColor)
s := p.Pix[i : i+4 : i+4]
s[0] = c1.R
s[1] = c1.G
s[2] = c1.B
s[3] = 255
}
62 changes: 62 additions & 0 deletions pkg/encoder/color/rgb565/rgb565.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package rgb565

import (
"encoding/binary"
"image"
"image/color"
"math"
)

// RGB565 is an in-memory image whose At method returns RGB565 values.
type RGB565 struct {
// Pix holds the image's pixels, as RGB565 values in big-endian format. The pixel at
// (x, y) starts at Pix[(y-p.Rect.Min.Y)*p.Stride + (x-p.Rect.Min.X)*2].
Pix []uint8
// Stride is the Pix stride (in bytes) between vertically adjacent pixels.
Stride int
// Rect is the image's bounds.
Rect image.Rectangle
}

// Model is the model for RGB565 colors.
var Model = color.ModelFunc(func(c color.Color) color.Color {
//if _, ok := c.(Color); ok {
// return c
//}
r, g, b, _ := c.RGBA()
return Color(uint16((r<<8)&rMask | (g<<3)&gMask | (b>>3)&bMask))
})

const (
rMask = 0b1111100000000000
gMask = 0b0000011111100000
bMask = 0b0000000000011111
)

// Color represents an RGB565 color.
type Color uint16

func (c Color) RGBA() (r, g, b, a uint32) {
return uint32(math.Round(float64(c&rMask>>11)*255.0/31.0)) << 8,
uint32(math.Round(float64(c&gMask>>5)*255.0/63.0)) << 8,
uint32(math.Round(float64(c&bMask)*255.0/31.0)) << 8,
0xffff
}

func NewRGB565(r image.Rectangle) *RGB565 {
return &RGB565{Pix: make([]uint8, r.Dx()*r.Dy()<<1), Stride: r.Dx() << 1, Rect: r}
}

func (p *RGB565) Bounds() image.Rectangle { return p.Rect }
func (p *RGB565) ColorModel() color.Model { return Model }
func (p *RGB565) PixOffset(x, y int) int { return (x-p.Rect.Min.X)<<1 + (y-p.Rect.Min.Y)*p.Stride }

func (p *RGB565) At(x, y int) color.Color {
i := p.PixOffset(x, y)
return Color(binary.LittleEndian.Uint16(p.Pix[i : i+2]))
}

func (p *RGB565) Set(x, y int, c color.Color) {
i := p.PixOffset(x, y)
binary.LittleEndian.PutUint16(p.Pix[i:i+2], uint16(Model.Convert(c).(Color)))
}
24 changes: 24 additions & 0 deletions pkg/encoder/color/rgba/rgba.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package rgba

import (
"image"
"image/color"
)

func ToRGBA(img image.Image, flipped bool) *image.RGBA {
bounds := img.Bounds()
sw, sh := bounds.Dx(), bounds.Dy()
dst := image.NewRGBA(image.Rect(0, 0, sw, sh))
for y := 0; y < sh; y++ {
yy := y
if flipped {
yy = sh - y
}
for x := 0; x < sw; x++ {
px := img.At(x, y)
rgba := color.RGBAModel.Convert(px).(color.RGBA)
dst.Set(x, yy, rgba)
}
}
return dst
}
Loading

0 comments on commit b1b3371

Please sign in to comment.