[packages/ogdf] - new; patched to actually allow sse3 runtime detection

qboosh qboosh at pld-linux.org
Sat Aug 31 13:52:30 CEST 2024

commit dc37419e2dfb3ffbeae6da88517296fdb3679eab
Author: Jakub Bogusz <qboosh at pld-linux.org>
Date:   Sat Aug 31 13:03:49 2024 +0200

    - new; patched to actually allow sse3 runtime detection

 ogdf-no-native.patch |  11 ++
 ogdf-sse.patch       | 394 +++++++++++++++++++++++++++++++++++++++++++++++++++
 ogdf.spec            | 116 +++++++++++++++
 3 files changed, 521 insertions(+)
diff --git a/ogdf.spec b/ogdf.spec
new file mode 100644
index 0000000..6e62855
--- /dev/null
+++ b/ogdf.spec
@@ -0,0 +1,116 @@
+# TODO: system Coin (CoinUtils/Clp/Osi/OsiClp)
+# Conditional build:
+%bcond_without	apidocs		# API documentation
+%bcond_without	static_libs	# static libraries
+Summary:	Open Graph Drawing Framework / Open Graph algorithms and Data structures Framework
+Summary(pl.UTF-8):	Otwarty szkielet algorytmów i struktur dla grafów
+Name:		ogdf
+Version:	2023.09
+Release:	1
+License:	GPL v2 or GPL v3 with limited linking exceptions
+Group:		Libraries
+#Source0Download: https://github.com/ogdf/ogdf/releases
+Source0:	https://github.com/ogdf/ogdf/archive/elderberry-202309/%{name}-elderberry-202309.tar.gz
+# Source0-md5:	139100ac0ace53ec9369ed5a375e25cd
+Patch0:		%{name}-no-native.patch
+Patch1:		%{name}-sse.patch
+URL:		https://ogdf.uos.de/
+BuildRequires:	CGAL-devel
+BuildRequires:	cmake >= 3.8
+%{?with_apidocs:BuildRequires:	doxygen}
+BuildRequires:	libgomp-devel
+BuildRequires:	libstdc++-devel >= 6:7
+BuildRequires:	rpm-build >= 4.6
+BuildRequires:	rpmbuild(macros) >= 1.605
+BuildRoot:	%{tmpdir}/%{name}-%{version}-root-%(id -u -n)
+OGDF is a self-contained C++ library for graph algorithms, in
+particular for (but not restricted to) automatic graph drawing. It
+offers sophisticated algorithms and data structures to use within your
+own applications or scientific projects.
+%description -l pl.UTF-8
+OGDF to samodzielna biblioteka C++ dla algorytmów grafowych, w
+szczególności (ale nie tylko) do automatycznego rysowania grafów.
+Oferuje wymyślne algorytmy i struktury danych do używania w
+aplikacjach lub projektach naukowych.
+%package devel
+Summary:	Header files for OGDF library
+Summary(pl.UTF-8):	Pliki nagłówkowe biblioteki OGDF
+Group:		Development/Libraries
+Requires:	%{name} = %{version}-%{release}
+%description devel
+Header files for OGDF library.
+%description devel -l pl.UTF-8
+Pliki nagłówkowe biblioteki OGDF.
+%package apidocs
+Summary:	API documentation for OGDF library
+Summary(pl.UTF-8):	Dokumentacja API biblioteki OGDF
+Group:		Documentation
+BuildArch:	noarch
+%description apidocs
+API documentation for OGDF library.
+%description apidocs -l pl.UTF-8
+Dokumentacja API biblioteki OGDF.
+%setup -q -n %{name}-elderberry-202309
+%patch0 -p1
+%patch1 -p1
+install -d build
+cd build
+%cmake .. \
+%if %{with apidocs}
+cd ../doc
+doxygen ogdf-doxygen.cfg
+%{__make} -C build install \
+install -d $RPM_BUILD_ROOT%{_examplesdir}
+%{__mv} $RPM_BUILD_ROOT%{_docdir}/libogdf/examples $RPM_BUILD_ROOT%{_examplesdir}/%{name}-%{version}
+%post	-p /sbin/ldconfig
+%postun	-p /sbin/ldconfig
+%doc LICENSE.txt README.md
+%attr(755,root,root) %{_libdir}/libCOIN.so
+%attr(755,root,root) %{_libdir}/libOGDF.so
+%files devel
+# FIXME: system coin
+%if %{with apidocs}
+%files apidocs
+%doc doc/html/{search,*.{css,html,js,png}}
diff --git a/ogdf-no-native.patch b/ogdf-no-native.patch
new file mode 100644
index 0000000..835d751
--- /dev/null
+++ b/ogdf-no-native.patch
@@ -0,0 +1,11 @@
+--- ogdf-elderberry-202309/cmake/compiler-specifics.cmake.orig	2023-09-14 15:14:23.000000000 +0200
++++ ogdf-elderberry-202309/cmake/compiler-specifics.cmake	2024-08-29 06:25:21.576284732 +0200
+@@ -15,7 +15,7 @@ endif()
+ # use native arch (ie, activate things like SSE)
+   # cannot use add_definitions() here because it does not work with check-sse3.cmake
+-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+ endif()
+ # set default warning flags for OGDF and tests
diff --git a/ogdf-sse.patch b/ogdf-sse.patch
new file mode 100644
index 0000000..44d4083
--- /dev/null
+++ b/ogdf-sse.patch
@@ -0,0 +1,394 @@
+--- ogdf-elderberry-202309/src/ogdf/energybased/SpringEmbedderFRExact.cpp.orig	2023-09-14 15:14:23.000000000 +0200
++++ ogdf-elderberry-202309/src/ogdf/energybased/SpringEmbedderFRExact.cpp	2024-08-29 21:41:39.955107959 +0200
+@@ -37,8 +37,6 @@
+ #	include <omp.h>
+ #endif
+-#include <ogdf/basic/internal/intrinsics.h>
+ namespace ogdf {
+ SpringEmbedderFRExact::ArrayGraph::ArrayGraph(GraphAttributes& ga)
+@@ -385,180 +383,4 @@ void SpringEmbedderFRExact::mainStep(Arr
+ 	System::alignedMemoryFree(disp_y);
+ }
+-void SpringEmbedderFRExact::mainStep_sse3(ArrayGraph& C) {
+-	const int n = C.numberOfNodes();
+-#	ifdef _OPENMP
+-	const int work = 256;
+-	const int nThreadsRep = min(omp_get_max_threads(), 1 + n * n / work);
+-	const int nThreadsPrev = min(omp_get_max_threads(), 1 + n / work);
+-#	endif
+-	const double k = m_idealEdgeLength;
+-	const double kSquare = k * k;
+-	const double c_rep = 0.052 * kSquare; // 0.2 = factor for repulsive forces as suggested by Warshal
+-	const double minDist = 10e-6; //100*DBL_EPSILON;
+-	const double minDistSquare = minDist * minDist;
+-	double* disp_x = (double*)System::alignedMemoryAlloc16(n * sizeof(double));
+-	double* disp_y = (double*)System::alignedMemoryAlloc16(n * sizeof(double));
+-	__m128d mm_kSquare = _mm_set1_pd(kSquare);
+-#	endif
+-	__m128d mm_minDist = _mm_set1_pd(minDist);
+-	__m128d mm_minDistSquare = _mm_set1_pd(minDistSquare);
+-	__m128d mm_c_rep = _mm_set1_pd(c_rep);
+-#	pragma omp parallel num_threads(nThreadsRep)
+-	{
+-		double tx = m_txNull;
+-		double ty = m_tyNull;
+-		int cF = 1;
+-		for (int i = 1; i <= m_iterations; i++) {
+-			// repulsive forces
+-#	pragma omp for
+-			for (int v = 0; v < n; ++v) {
+-				__m128d mm_disp_xv = _mm_setzero_pd();
+-				__m128d mm_disp_yv = _mm_setzero_pd();
+-				__m128d mm_xv = _mm_set1_pd(C.m_x[v]);
+-				__m128d mm_yv = _mm_set1_pd(C.m_y[v]);
+-				auto compute_pd = [&](int u) {
+-					__m128d mm_delta_x = _mm_sub_pd(mm_xv, _mm_load_pd(&C.m_x[u]));
+-					__m128d mm_delta_y = _mm_sub_pd(mm_yv, _mm_load_pd(&C.m_y[u]));
+-					__m128d mm_xSquare = _mm_mul_pd(mm_delta_x, mm_delta_x);
+-					__m128d mm_ySquare = _mm_mul_pd(mm_delta_y, mm_delta_y);
+-					__m128d mm_distSquare =
+-							_mm_max_pd(mm_minDistSquare, _mm_add_pd(mm_xSquare, mm_ySquare));
+-					__m128d mm_t =
+-							_mm_div_pd(_mm_load_pd(&C.m_nodeWeight[u]), mm_distSquare);
+-#	else
+-							_mm_div_pd(mm_kSquare, mm_distSquare);
+-#	endif
+-					mm_disp_xv = _mm_add_pd(mm_disp_xv, _mm_mul_pd(mm_delta_x, mm_t));
+-					mm_disp_yv = _mm_add_pd(mm_disp_yv, _mm_mul_pd(mm_delta_y, mm_t));
+-				};
+-				auto compute_sd = [&](int u) {
+-					__m128d mm_delta_x = _mm_sub_sd(mm_xv, _mm_load_sd(&C.m_x[u]));
+-					__m128d mm_delta_y = _mm_sub_sd(mm_yv, _mm_load_sd(&C.m_y[u]));
+-					__m128d mm_xSquare = _mm_mul_sd(mm_delta_x, mm_delta_x);
+-					__m128d mm_ySquare = _mm_mul_sd(mm_delta_y, mm_delta_y);
+-					__m128d mm_distSquare =
+-							_mm_max_sd(mm_minDistSquare, _mm_add_sd(mm_xSquare, mm_ySquare));
+-					__m128d mm_t =
+-							_mm_div_sd(_mm_load_sd(&C.m_nodeWeight[u]), mm_distSquare);
+-#	else
+-							_mm_div_sd(mm_kSquare, mm_distSquare);
+-#	endif
+-					mm_disp_xv = _mm_add_sd(mm_disp_xv, _mm_mul_sd(mm_delta_x, mm_t));
+-					mm_disp_yv = _mm_add_sd(mm_disp_yv, _mm_mul_sd(mm_delta_y, mm_t));
+-				};
+-				int u;
+-				for (u = 0; u + 1 < v; u += 2) {
+-					compute_pd(u);
+-				}
+-				int uStart = u + 2;
+-				if (u == v) {
+-					++u;
+-				}
+-				if (u < n) {
+-					compute_sd(u);
+-				}
+-				// TODO do we need u+1 here?
+-				//      GCC's leak sanitizer reports a heap buffer overflow when using just u.
+-				for (u = uStart; u + 1 < n; u += 2) {
+-					compute_pd(u);
+-				}
+-				if (u < n) {
+-					compute_sd(u);
+-				}
+-				mm_disp_xv = _mm_hadd_pd(mm_disp_xv, mm_disp_xv);
+-				mm_disp_yv = _mm_hadd_pd(mm_disp_yv, mm_disp_yv);
+-				_mm_store_sd(&disp_x[v], _mm_mul_sd(mm_disp_xv, mm_c_rep));
+-				_mm_store_sd(&disp_y[v], _mm_mul_sd(mm_disp_yv, mm_c_rep));
+-			}
+-			// attractive forces
+-#	pragma omp single
+-			for (int e = 0; e < C.numberOfEdges(); ++e) {
+-				int v = C.m_src[e];
+-				int u = C.m_tgt[e];
+-				double delta_x = C.m_x[v] - C.m_x[u];
+-				double delta_y = C.m_y[v] - C.m_y[u];
+-				double dist = max(minDist, sqrt(delta_x * delta_x + delta_y * delta_y));
+-				disp_x[v] -= delta_x * dist / k;
+-				disp_y[v] -= delta_y * dist / k;
+-				disp_x[u] += delta_x * dist / k;
+-				disp_y[u] += delta_y * dist / k;
+-			}
+-			// limit the maximum displacement to the temperature (m_tx,m_ty)
+-			__m128d mm_tx = _mm_set1_pd(tx);
+-			__m128d mm_ty = _mm_set1_pd(ty);
+-#	pragma omp for nowait
+-			for (int v = 0; v < n - 1; v += 2) {
+-				__m128d mm_disp_xv = _mm_load_pd(&disp_x[v]);
+-				__m128d mm_disp_yv = _mm_load_pd(&disp_y[v]);
+-				__m128d mm_dist = _mm_max_pd(mm_minDist,
+-						_mm_sqrt_pd(_mm_add_pd(_mm_mul_pd(mm_disp_xv, mm_disp_xv),
+-								_mm_mul_pd(mm_disp_yv, mm_disp_yv))));
+-				_mm_store_pd(&C.m_x[v],
+-						_mm_add_pd(_mm_load_pd(&C.m_x[v]),
+-								_mm_mul_pd(_mm_div_pd(mm_disp_xv, mm_dist),
+-										_mm_min_pd(mm_dist, mm_tx))));
+-				_mm_store_pd(&C.m_y[v],
+-						_mm_add_pd(_mm_load_pd(&C.m_y[v]),
+-								_mm_mul_pd(_mm_div_pd(mm_disp_yv, mm_dist),
+-										_mm_min_pd(mm_dist, mm_ty))));
+-			}
+-#	pragma omp single nowait
+-			{
+-				if (n % 2) {
+-					int v = n - 1;
+-					double dist = max(minDist, sqrt(disp_x[v] * disp_x[v] + disp_y[v] * disp_y[v]));
+-					C.m_x[v] += disp_x[v] / dist * min(dist, tx);
+-					C.m_y[v] += disp_y[v] / dist * min(dist, ty);
+-				}
+-			}
+-			cool(tx, ty, cF);
+-#	pragma omp barrier
+-		}
+-	}
+-	System::alignedMemoryFree(disp_x);
+-	System::alignedMemoryFree(disp_y);
+-	mainStep(C);
+ }
+--- ogdf-elderberry-202309/src/ogdf/energybased/SpringEmbedderFRExactSSE3.cpp.orig	1970-01-01 01:00:00.000000000 +0100
++++ ogdf-elderberry-202309/src/ogdf/energybased/SpringEmbedderFRExactSSE3.cpp	2024-08-29 21:41:42.018430114 +0200
+@@ -0,0 +1,189 @@
++#include <ogdf/basic/simple_graph_alg.h>
++#include <ogdf/energybased/SpringEmbedderFRExact.h>
++#include <ogdf/packing/TileToRowsCCPacker.h>
++#ifdef _OPENMP
++#	include <omp.h>
++#include <ogdf/basic/internal/intrinsics.h>
++namespace ogdf {
++void SpringEmbedderFRExact::mainStep_sse3(ArrayGraph& C) {
++	const int n = C.numberOfNodes();
++#	ifdef _OPENMP
++	const int work = 256;
++	const int nThreadsRep = min(omp_get_max_threads(), 1 + n * n / work);
++	const int nThreadsPrev = min(omp_get_max_threads(), 1 + n / work);
++#	endif
++	const double k = m_idealEdgeLength;
++	const double kSquare = k * k;
++	const double c_rep = 0.052 * kSquare; // 0.2 = factor for repulsive forces as suggested by Warshal
++	const double minDist = 10e-6; //100*DBL_EPSILON;
++	const double minDistSquare = minDist * minDist;
++	double* disp_x = (double*)System::alignedMemoryAlloc16(n * sizeof(double));
++	double* disp_y = (double*)System::alignedMemoryAlloc16(n * sizeof(double));
++	__m128d mm_kSquare = _mm_set1_pd(kSquare);
++#	endif
++	__m128d mm_minDist = _mm_set1_pd(minDist);
++	__m128d mm_minDistSquare = _mm_set1_pd(minDistSquare);
++	__m128d mm_c_rep = _mm_set1_pd(c_rep);
++#	pragma omp parallel num_threads(nThreadsRep)
++	{
++		double tx = m_txNull;
++		double ty = m_tyNull;
++		int cF = 1;
++		for (int i = 1; i <= m_iterations; i++) {
++			// repulsive forces
++#	pragma omp for
++			for (int v = 0; v < n; ++v) {
++				__m128d mm_disp_xv = _mm_setzero_pd();
++				__m128d mm_disp_yv = _mm_setzero_pd();
++				__m128d mm_xv = _mm_set1_pd(C.m_x[v]);
++				__m128d mm_yv = _mm_set1_pd(C.m_y[v]);
++				auto compute_pd = [&](int u) {
++					__m128d mm_delta_x = _mm_sub_pd(mm_xv, _mm_load_pd(&C.m_x[u]));
++					__m128d mm_delta_y = _mm_sub_pd(mm_yv, _mm_load_pd(&C.m_y[u]));
++					__m128d mm_xSquare = _mm_mul_pd(mm_delta_x, mm_delta_x);
++					__m128d mm_ySquare = _mm_mul_pd(mm_delta_y, mm_delta_y);
++					__m128d mm_distSquare =
++							_mm_max_pd(mm_minDistSquare, _mm_add_pd(mm_xSquare, mm_ySquare));
++					__m128d mm_t =
++							_mm_div_pd(_mm_load_pd(&C.m_nodeWeight[u]), mm_distSquare);
++#	else
++							_mm_div_pd(mm_kSquare, mm_distSquare);
++#	endif
++					mm_disp_xv = _mm_add_pd(mm_disp_xv, _mm_mul_pd(mm_delta_x, mm_t));
++					mm_disp_yv = _mm_add_pd(mm_disp_yv, _mm_mul_pd(mm_delta_y, mm_t));
++				};
++				auto compute_sd = [&](int u) {
++					__m128d mm_delta_x = _mm_sub_sd(mm_xv, _mm_load_sd(&C.m_x[u]));
++					__m128d mm_delta_y = _mm_sub_sd(mm_yv, _mm_load_sd(&C.m_y[u]));
++					__m128d mm_xSquare = _mm_mul_sd(mm_delta_x, mm_delta_x);
++					__m128d mm_ySquare = _mm_mul_sd(mm_delta_y, mm_delta_y);
++					__m128d mm_distSquare =
++							_mm_max_sd(mm_minDistSquare, _mm_add_sd(mm_xSquare, mm_ySquare));
++					__m128d mm_t =
++							_mm_div_sd(_mm_load_sd(&C.m_nodeWeight[u]), mm_distSquare);
++#	else
++							_mm_div_sd(mm_kSquare, mm_distSquare);
++#	endif
++					mm_disp_xv = _mm_add_sd(mm_disp_xv, _mm_mul_sd(mm_delta_x, mm_t));
++					mm_disp_yv = _mm_add_sd(mm_disp_yv, _mm_mul_sd(mm_delta_y, mm_t));
++				};
++				int u;
++				for (u = 0; u + 1 < v; u += 2) {
++					compute_pd(u);
++				}
++				int uStart = u + 2;
++				if (u == v) {
++					++u;
++				}
++				if (u < n) {
++					compute_sd(u);
++				}
++				// TODO do we need u+1 here?
++				//      GCC's leak sanitizer reports a heap buffer overflow when using just u.
++				for (u = uStart; u + 1 < n; u += 2) {
++					compute_pd(u);
++				}
++				if (u < n) {
++					compute_sd(u);
++				}
++				mm_disp_xv = _mm_hadd_pd(mm_disp_xv, mm_disp_xv);
++				mm_disp_yv = _mm_hadd_pd(mm_disp_yv, mm_disp_yv);
++				_mm_store_sd(&disp_x[v], _mm_mul_sd(mm_disp_xv, mm_c_rep));
++				_mm_store_sd(&disp_y[v], _mm_mul_sd(mm_disp_yv, mm_c_rep));
++			}
++			// attractive forces
++#	pragma omp single
++			for (int e = 0; e < C.numberOfEdges(); ++e) {
++				int v = C.m_src[e];
++				int u = C.m_tgt[e];
++				double delta_x = C.m_x[v] - C.m_x[u];
++				double delta_y = C.m_y[v] - C.m_y[u];
++				double dist = max(minDist, sqrt(delta_x * delta_x + delta_y * delta_y));
++				disp_x[v] -= delta_x * dist / k;
++				disp_y[v] -= delta_y * dist / k;
++				disp_x[u] += delta_x * dist / k;
++				disp_y[u] += delta_y * dist / k;
++			}
++			// limit the maximum displacement to the temperature (m_tx,m_ty)
++			__m128d mm_tx = _mm_set1_pd(tx);
++			__m128d mm_ty = _mm_set1_pd(ty);
++#	pragma omp for nowait
++			for (int v = 0; v < n - 1; v += 2) {
++				__m128d mm_disp_xv = _mm_load_pd(&disp_x[v]);
++				__m128d mm_disp_yv = _mm_load_pd(&disp_y[v]);
++				__m128d mm_dist = _mm_max_pd(mm_minDist,
++						_mm_sqrt_pd(_mm_add_pd(_mm_mul_pd(mm_disp_xv, mm_disp_xv),
++								_mm_mul_pd(mm_disp_yv, mm_disp_yv))));
++				_mm_store_pd(&C.m_x[v],
++						_mm_add_pd(_mm_load_pd(&C.m_x[v]),
++								_mm_mul_pd(_mm_div_pd(mm_disp_xv, mm_dist),
++										_mm_min_pd(mm_dist, mm_tx))));
++				_mm_store_pd(&C.m_y[v],
++						_mm_add_pd(_mm_load_pd(&C.m_y[v]),
++								_mm_mul_pd(_mm_div_pd(mm_disp_yv, mm_dist),
++										_mm_min_pd(mm_dist, mm_ty))));
++			}
++#	pragma omp single nowait
++			{
++				if (n % 2) {
++					int v = n - 1;
++					double dist = max(minDist, sqrt(disp_x[v] * disp_x[v] + disp_y[v] * disp_y[v]));
++					C.m_x[v] += disp_x[v] / dist * min(dist, tx);
++					C.m_y[v] += disp_y[v] / dist * min(dist, ty);
++				}
++			}
++			cool(tx, ty, cF);
++#	pragma omp barrier
++		}
++	}
++	System::alignedMemoryFree(disp_x);
++	System::alignedMemoryFree(disp_y);
++	mainStep(C);
+--- ogdf-elderberry-202309/cmake/ogdf.cmake.orig	2023-09-14 15:14:23.000000000 +0200
++++ ogdf-elderberry-202309/cmake/ogdf.cmake	2024-08-29 21:49:24.562590964 +0200
+@@ -170,6 +170,7 @@ if(has_sse3_intrin)
+   set(OGDF_SSE3_EXTENSIONS <intrin.h>)
+ elseif(has_sse3_pmmintrin)
+   set(OGDF_SSE3_EXTENSIONS <pmmintrin.h>)
++  set_source_file_properties( "src/ogdf/energybased/SpringEmbedderFRExactSSE3.cpp" PROPERTIES COMPILE_FLAGS "-msse3" )
+ else()
+   message(STATUS "SSE3 could not be activated")
+ endif()

---- gitweb:


More information about the pld-cvs-commit mailing list