[packages/libyuv] - added simd patch (avoid AVX in SSSE3-targeted code and SSE4.1 in SSE2-targeted code); rel 2
qboosh
qboosh at pld-linux.org
Tue Jun 29 21:49:20 CEST 2021
commit 5c416020b057c71bca88c15efb598f59f13cc658
Author: Jakub Bogusz <qboosh at pld-linux.org>
Date: Tue Jun 29 21:52:09 2021 +0200
- added simd patch (avoid AVX in SSSE3-targeted code and SSE4.1 in SSE2-targeted code); rel 2
libyuv-simd.patch | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
libyuv.spec | 4 ++-
2 files changed, 90 insertions(+), 1 deletion(-)
---
diff --git a/libyuv.spec b/libyuv.spec
index 72df0b5..bfc12e5 100644
--- a/libyuv.spec
+++ b/libyuv.spec
@@ -9,7 +9,7 @@ Name: libyuv
%define yuv_ver 1788
%define gitref d19f69d9df7a54eae9cfae0b650921f675d9f01a
%define snap 20210611
-%define rel 1
+%define rel 2
Version: 0.%{yuv_ver}
Release: 0.%{snap}.%{rel}
License: BSD
@@ -20,6 +20,7 @@ Source0: %{name}-%{snap}.tar.gz
# Source0-md5: 9783a72e05ef548dd6f6ff7c1775e744
Source1: %{name}.pc
Patch0: shared-lib.patch
+Patch1: %{name}-simd.patch
URL: https://chromium.googlesource.com/libyuv/libyuv
BuildRequires: cmake >= 2.8
%{?with_tests:BuildRequires: gtest-devel}
@@ -71,6 +72,7 @@ Statyczna biblioteka libyuv.
%prep
%setup -q -c
%patch0 -p1
+%patch1 -p1
%build
mkdir -p build
diff --git a/libyuv-simd.patch b/libyuv-simd.patch
new file mode 100644
index 0000000..43fbdc7
--- /dev/null
+++ b/libyuv-simd.patch
@@ -0,0 +1,87 @@
+Fix ScaleRowUp2_Linear_SSSE3 to use SSSE3 SIMD instead of AVX
+(following upstream change in ScaleUVRowUp2_Linear_SSSE3)
+
+Fix ScaleUVRowUp2_{Linear,Bilinear}_16_SSE2 to use SSE2 SIMD instead of SSE4.1
+(based on https://stackoverflow.com/questions/11024652/simulating-packusdw-functionality-with-sse2)
+
+--- libyuv-0.1788/source/scale_gcc.cc.orig 2021-06-28 18:31:31.000000000 +0200
++++ libyuv-0.1788/source/scale_gcc.cc 2021-06-29 20:14:04.119069725 +0200
+@@ -1283,8 +1283,8 @@ void ScaleRowUp2_Linear_SSSE3(const uint
+ "paddw %%xmm4,%%xmm2 \n" // 3*near+far+2 (hi)
+ "psrlw $2,%%xmm0 \n" // 3/4*near+1/4*far (lo)
+ "psrlw $2,%%xmm2 \n" // 3/4*near+1/4*far (hi)
+- "vpackuswb %%xmm2,%%xmm0,%%xmm0 \n"
+- "vmovdqu %%xmm0,(%1) \n"
++ "packuswb %%xmm2,%%xmm0 \n"
++ "movdqu %%xmm0,(%1) \n"
+
+ "lea 0x8(%0),%0 \n"
+ "lea 0x10(%1),%1 \n" // 8 sample to 16 sample
+@@ -2699,9 +2699,14 @@ void ScaleUVRowUp2_Linear_16_SSE2(const
+ "paddd %%xmm2,%%xmm0 \n" // 3*near+far+2 (lo)
+ "paddd %%xmm3,%%xmm1 \n" // 3*near+far+2 (hi)
+
+- "psrld $2,%%xmm0 \n" // 3/4*near+1/4*far (lo)
+- "psrld $2,%%xmm1 \n" // 3/4*near+1/4*far (hi)
+- "packusdw %%xmm1,%%xmm0 \n"
++// "psrld $2,%%xmm0 \n" // 3/4*near+1/4*far (lo)
++// "psrld $2,%%xmm1 \n" // 3/4*near+1/4*far (hi)
++// "packusdw %%xmm1,%%xmm0 \n" // SSE4.1 - use SSE2 replacement
++ "pslld $14,%%xmm0 \n" // 16-2
++ "pslld $14,%%xmm1 \n" // 16-2
++ "psrad $16,%%xmm0 \n"
++ "psrad $16,%%xmm1 \n"
++ "packssdw %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+
+ "lea 0x8(%0),%0 \n"
+@@ -2766,14 +2771,14 @@ void ScaleUVRowUp2_Bilinear_16_SSE2(cons
+ "paddd %%xmm6,%%xmm5 \n" // 3*near+far+8 (2, lo)
+ "paddd %%xmm0,%%xmm4 \n" // 9*near+3*far (1, lo)
+ "paddd %%xmm5,%%xmm4 \n" // 9 3 3 1 + 8 (1, lo)
+- "psrld $4,%%xmm4 \n" // ^ div by 16 (1, lo)
++// "psrld $4,%%xmm4 \n" // ^ div by 16 (1, lo)
+
+ "movdqa %%xmm2,%%xmm5 \n"
+ "paddd %%xmm2,%%xmm5 \n" // 6*near+2*far (2, lo)
+ "paddd %%xmm6,%%xmm0 \n" // 3*near+far+8 (1, lo)
+ "paddd %%xmm2,%%xmm5 \n" // 9*near+3*far (2, lo)
+ "paddd %%xmm0,%%xmm5 \n" // 9 3 3 1 + 8 (2, lo)
+- "psrld $4,%%xmm5 \n" // ^ div by 16 (2, lo)
++// "psrld $4,%%xmm5 \n" // ^ div by 16 (2, lo)
+
+ "movdqa %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm3,%%xmm2 \n"
+@@ -2781,18 +2786,28 @@ void ScaleUVRowUp2_Bilinear_16_SSE2(cons
+ "paddd %%xmm6,%%xmm2 \n" // 3*near+far+8 (2, hi)
+ "paddd %%xmm1,%%xmm0 \n" // 9*near+3*far (1, hi)
+ "paddd %%xmm2,%%xmm0 \n" // 9 3 3 1 + 8 (1, hi)
+- "psrld $4,%%xmm0 \n" // ^ div by 16 (1, hi)
++// "psrld $4,%%xmm0 \n" // ^ div by 16 (1, hi)
+
+ "movdqa %%xmm3,%%xmm2 \n"
+ "paddd %%xmm3,%%xmm2 \n" // 6*near+2*far (2, hi)
+ "paddd %%xmm6,%%xmm1 \n" // 3*near+far+8 (1, hi)
+ "paddd %%xmm3,%%xmm2 \n" // 9*near+3*far (2, hi)
+ "paddd %%xmm1,%%xmm2 \n" // 9 3 3 1 + 8 (2, hi)
+- "psrld $4,%%xmm2 \n" // ^ div by 16 (2, hi)
++// "psrld $4,%%xmm2 \n" // ^ div by 16 (2, hi)
+
+- "packusdw %%xmm0,%%xmm4 \n"
++// "packusdw %%xmm0,%%xmm4 \n" // SSE4.1
++ "pslld $12,%%xmm4 \n" // 16-4
++ "pslld $12,%%xmm0 \n" // 16-4
++ "psrad $16,%%xmm4 \n"
++ "psrad $16,%%xmm0 \n"
++ "packssdw %%xmm0,%%xmm4 \n"
+ "movdqu %%xmm4,(%1) \n" // store above
+- "packusdw %%xmm2,%%xmm5 \n"
++// "packusdw %%xmm2,%%xmm5 \n" // SSE4.1
++ "pslld $12,%%xmm5 \n" // 16-4
++ "pslld $12,%%xmm2 \n" // 16-4
++ "psrad $16,%%xmm5 \n"
++ "psrad $16,%%xmm2 \n"
++ "packssdw %%xmm2,%%xmm5 \n"
+ "movdqu %%xmm5,(%1,%4,2) \n" // store below
+
+ "lea 0x8(%0),%0 \n"
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/libyuv.git/commitdiff/5c416020b057c71bca88c15efb598f59f13cc658
More information about the pld-cvs-commit
mailing list