12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564 |
- From 1c54b13cb29463af514a107c42946abd97b5ca41 Mon Sep 17 00:00:00 2001
- From: Alexander Alekhin <alexander.alekhin@intel.com>
- Date: Wed, 7 Sep 2016 18:02:36 +0300
- Subject: [PATCH] cmake: support multiple CPU targets
- Backported from: https://github.com/opencv/opencv/commit/e16227b53cabab1caa4b7aba8ff59a630528348f
- Signed-off-by: Samuel Martin <s.martin49@gmail.com>
- ---
- CMakeLists.txt | 50 +-
- cmake/OpenCVCompilerOptimizations.cmake | 651 +++++++++++++++++++++
- cmake/OpenCVCompilerOptions.cmake | 161 +----
- cmake/OpenCVGenHeaders.cmake | 4 +
- cmake/OpenCVModule.cmake | 3 +
- cmake/OpenCVPCHSupport.cmake | 5 +-
- cmake/OpenCVUtils.cmake | 50 +-
- cmake/checks/cpu_avx.cpp | 9 +
- cmake/checks/cpu_avx2.cpp | 10 +
- cmake/checks/cpu_avx512.cpp | 10 +
- cmake/checks/cpu_fp16.cpp | 33 ++
- cmake/checks/cpu_popcnt.cpp | 8 +
- cmake/checks/cpu_sse.cpp | 2 +
- cmake/checks/cpu_sse2.cpp | 2 +
- cmake/checks/cpu_sse3.cpp | 7 +
- cmake/checks/cpu_sse41.cpp | 6 +
- cmake/checks/cpu_sse42.cpp | 5 +
- cmake/checks/cpu_ssse3.cpp | 7 +
- cmake/checks/fp16.cpp | 33 --
- cmake/templates/cv_cpu_config.h.in | 5 +
- cmake/templates/cvconfig.h.in | 13 +
- .../core/include/opencv2/core/cv_cpu_dispatch.h | 166 ++++++
- modules/core/include/opencv2/core/cv_cpu_helper.h | 133 +++++
- modules/core/include/opencv2/core/cvdef.h | 145 +----
- modules/core/include/opencv2/core/fast_math.hpp | 60 +-
- modules/core/src/system.cpp | 301 ++++++++--
- modules/highgui/CMakeLists.txt | 2 +-
- modules/imgproc/src/imgwarp.cpp | 4 +-
- modules/objdetect/src/haar.cpp | 5 +-
- 29 files changed, 1472 insertions(+), 418 deletions(-)
- create mode 100644 cmake/OpenCVCompilerOptimizations.cmake
- create mode 100644 cmake/checks/cpu_avx.cpp
- create mode 100644 cmake/checks/cpu_avx2.cpp
- create mode 100644 cmake/checks/cpu_avx512.cpp
- create mode 100644 cmake/checks/cpu_fp16.cpp
- create mode 100644 cmake/checks/cpu_popcnt.cpp
- create mode 100644 cmake/checks/cpu_sse.cpp
- create mode 100644 cmake/checks/cpu_sse2.cpp
- create mode 100644 cmake/checks/cpu_sse3.cpp
- create mode 100644 cmake/checks/cpu_sse41.cpp
- create mode 100644 cmake/checks/cpu_sse42.cpp
- create mode 100644 cmake/checks/cpu_ssse3.cpp
- delete mode 100644 cmake/checks/fp16.cpp
- create mode 100644 cmake/templates/cv_cpu_config.h.in
- create mode 100644 modules/core/include/opencv2/core/cv_cpu_dispatch.h
- create mode 100644 modules/core/include/opencv2/core/cv_cpu_helper.h
- diff --git a/CMakeLists.txt b/CMakeLists.txt
- index cc45f6f..9c9971e 100644
- --- a/CMakeLists.txt
- +++ b/CMakeLists.txt
- @@ -85,6 +85,10 @@ if(POLICY CMP0042)
- cmake_policy(SET CMP0042 NEW)
- endif()
-
- +if(POLICY CMP0051)
- + cmake_policy(SET CMP0051 NEW)
- +endif()
- +
- include(cmake/OpenCVUtils.cmake)
-
- # must go before the project command
- @@ -274,16 +278,6 @@ OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov"
- OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
- OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
- OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
- -OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
- -OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
- -OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) )
- -OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
- -OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) )
- -OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
- -OCV_OPTION(ENABLE_POPCNT "Enable POPCNT instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
- -OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
- -OCV_OPTION(ENABLE_AVX2 "Enable AVX2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
- -OCV_OPTION(ENABLE_FMA3 "Enable FMA3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
- OCV_OPTION(ENABLE_NEON "Enable NEON instructions" "${NEON}" IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
- OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
- OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
- @@ -292,6 +286,9 @@ OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with n
- OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function call" OFF )
- OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF )
- OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
- +OCV_OPTION(CV_ENABLE_INTRINSICS "Use intrinsic-based optimized code" ON )
- +OCV_OPTION(CV_DISABLE_OPTIMIZATION "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF )
- +
-
- OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF )
-
- @@ -492,6 +489,9 @@ if(CMAKE_GENERATOR MATCHES "Makefiles|Ninja" AND "${CMAKE_BUILD_TYPE}" STREQUAL
- set(CMAKE_BUILD_TYPE Release)
- endif()
-
- +# --- Python Support ---
- +include(cmake/OpenCVDetectPython.cmake)
- +
- include(cmake/OpenCVCompilerOptions.cmake)
-
-
- @@ -569,9 +569,6 @@ else()
- unset(DOXYGEN_FOUND CACHE)
- endif()
-
- -# --- Python Support ---
- -include(cmake/OpenCVDetectPython.cmake)
- -
- # --- Java Support ---
- include(cmake/OpenCVDetectApacheAnt.cmake)
- if(ANDROID)
- @@ -860,6 +857,33 @@ if(NOT CMAKE_GENERATOR MATCHES "Xcode|Visual Studio")
- status(" Configuration:" ${CMAKE_BUILD_TYPE})
- endif()
-
- +
- +# ========================= CPU code generation mode =========================
- +status("")
- +status(" CPU/HW features:")
- +status(" Baseline:" "${CPU_BASELINE_FINAL}")
- +if(NOT CPU_BASELINE STREQUAL CPU_BASELINE_FINAL)
- + status(" requested:" "${CPU_BASELINE}")
- +endif()
- +if(CPU_BASELINE_REQUIRE)
- + status(" required:" "${CPU_BASELINE_REQUIRE}")
- +endif()
- +if(CPU_BASELINE_DISABLE)
- + status(" disabled:" "${CPU_BASELINE_DISABLE}")
- +endif()
- +if(CPU_DISPATCH_FINAL OR CPU_DISPATCH)
- + status(" Dispatched code generation:" "${CPU_DISPATCH_FINAL}")
- + if(NOT CPU_DISPATCH STREQUAL CPU_DISPATCH_FINAL)
- + status(" requested:" "${CPU_DISPATCH}")
- + endif()
- + if(CPU_DISPATCH_REQUIRE)
- + status(" required:" "${CPU_DISPATCH_REQUIRE}")
- + endif()
- + foreach(OPT ${CPU_DISPATCH_FINAL})
- + status(" ${OPT} (${CPU_${OPT}_USAGE_COUNT} files):" "+ ${CPU_DISPATCH_${OPT}_INCLUDED}")
- + endforeach()
- +endif()
- +
- # ========================== C/C++ options ==========================
- if(CMAKE_CXX_COMPILER_VERSION)
- set(OPENCV_COMPILER_STR "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1} (ver ${CMAKE_CXX_COMPILER_VERSION})")
- diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake
- new file mode 100644
- index 0000000..b849f02
- --- /dev/null
- +++ b/cmake/OpenCVCompilerOptimizations.cmake
- @@ -0,0 +1,651 @@
- +# x86/x86-64 arch:
- +# SSE / SSE2 (always available on 64-bit CPUs)
- +# SSE3 / SSSE3
- +# SSE4_1 / SSE4_2 / POPCNT
- +# AVX / AVX2 / AVX512
- +# FMA3
- +
- +# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag)
- +# CPU_{opt}_IMPLIES=<list>
- +# CPU_{opt}_FORCE=<list> - subset of "implies" list
- +# CPU_{opt}_FLAGS_ON=""
- +# CPU_{opt}_FEATURE_ALIAS - mapping to CV_CPU_* HWFeature enum
- +
- +# Input variables:
- +# CPU_BASELINE=<list> - preferred list of baseline optimizations
- +# CPU_DISPATCH=<list> - preferred list of dispatched optimizations
- +
- +# Advanced input variables:
- +# CPU_BASELINE_REQUIRE=<list> - list of required baseline optimizations
- +# CPU_DISPATCH_REQUIRE=<list> - list of required dispatched optimizations
- +# CPU_BASELINE_DISABLE=<list> - list of disabled baseline optimizations
- +
- +# Output variables:
- +# CPU_BASELINE_FINAL=<list> - final list of enabled compiler optimizations
- +# CPU_DISPATCH_FINAL=<list> - final list of dispatched optimizations
- +#
- +# CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (_opt_avx2.cpp)
- +
- +set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512
- +list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
- +list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
- +
- +ocv_update(CPU_VFPV3_FEATURE_ALIAS "")
- +
- +
- +set(HELP_CPU_BASELINE "Specify list of enabled baseline CPU optimizations")
- +set(HELP_CPU_BASELINE_REQUIRE "Specify list of required baseline CPU optimizations")
- +set(HELP_CPU_BASELINE_DISABLE "Specify list of forbidden baseline CPU optimizations")
- +set(HELP_CPU_DISPATCH "Specify list of dispatched CPU optimizations")
- +set(HELP_CPU_DISPATCH_REQUIRE "Specify list of required dispatched CPU optimizations")
- +
- +foreach(var CPU_BASELINE CPU_BASELINE_REQUIRE CPU_BASELINE_DISABLE CPU_DISPATCH CPU_DISPATCH_REQUIRE)
- + if(DEFINED ${var})
- + string(REPLACE "," ";" _list "${${var}}")
- + set(${var} "${_list}" CACHE STRING "${HELP_${var}}" FORCE)
- + endif()
- +endforeach()
- +
- +# process legacy flags
- +macro(ocv_optimization_process_obsolete_option legacy_flag OPT legacy_warn)
- + if(DEFINED ${legacy_flag})
- + if(${legacy_warn})
- + message(STATUS "WARNING: Option ${legacy_flag}='${${legacy_flag}}' is deprecated and should not be used anymore")
- + message(STATUS " Behaviour of this option is not backward compatible")
- + message(STATUS " Refer to 'CPU_BASELINE'/'CPU_DISPATCH' CMake options documentation")
- + endif()
- + if(${legacy_flag})
- + if(NOT ";${CPU_BASELINE_REQUIRE};" MATCHES ";${OPT};")
- + set(CPU_BASELINE_REQUIRE "${CPU_BASELINE_REQUIRE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_REQUIRE}" FORCE)
- + endif()
- + else()
- + if(NOT ";${CPU_BASELINE_DISABLE};" MATCHES ";${OPT};")
- + set(CPU_BASELINE_DISABLE "${CPU_BASELINE_DISABLE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_DISABLE}" FORCE)
- + endif()
- + endif()
- + endif()
- +endmacro()
- +ocv_optimization_process_obsolete_option(ENABLE_SSE SSE ON)
- +ocv_optimization_process_obsolete_option(ENABLE_SSE2 SSE2 ON)
- +ocv_optimization_process_obsolete_option(ENABLE_SSE3 SSE3 ON)
- +ocv_optimization_process_obsolete_option(ENABLE_SSSE3 SSSE3 ON)
- +ocv_optimization_process_obsolete_option(ENABLE_SSE41 SSE4_1 ON)
- +ocv_optimization_process_obsolete_option(ENABLE_SSE42 SSE4_2 ON)
- +ocv_optimization_process_obsolete_option(ENABLE_POPCNT POPCNT ON)
- +ocv_optimization_process_obsolete_option(ENABLE_AVX AVX ON)
- +ocv_optimization_process_obsolete_option(ENABLE_AVX2 AVX2 ON)
- +ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON)
- +
- +ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF)
- +ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF)
- +
- +
- +macro(ocv_is_optimization_in_list resultvar check_opt)
- + set(__checked "")
- + set(__queue ${ARGN})
- + set(${resultvar} 0)
- + while(__queue AND NOT ${resultvar})
- + list(REMOVE_DUPLICATES __queue)
- + set(__queue_current ${__queue})
- + set(__queue "")
- + foreach(OPT ${__queue_current})
- + if("x${OPT}" STREQUAL "x${check_opt}")
- + set(${resultvar} 1)
- + break()
- + elseif(NOT ";${__checked};" MATCHES ";${OPT};")
- + list(APPEND __queue ${CPU_${OPT}_IMPLIES})
- + endif()
- + list(APPEND __checked ${OPT})
- + endforeach()
- + endwhile()
- +endmacro()
- +
- +macro(ocv_is_optimization_in_force_list resultvar check_opt)
- + set(__checked "")
- + set(__queue ${ARGN})
- + set(${resultvar} 0)
- + while(__queue AND NOT ${resultvar})
- + list(REMOVE_DUPLICATES __queue)
- + set(__queue_current ${__queue})
- + set(__queue "")
- + foreach(OPT ${__queue_current})
- + if(OPT STREQUAL "${check_opt}")
- + set(${resultvar} 1)
- + break()
- + elseif(NOT ";${__checked};" MATCHES ";${OPT};")
- + list(APPEND __queue ${CPU_${OPT}_FORCE})
- + endif()
- + list(APPEND __checked ${OPT})
- + endforeach()
- + endwhile()
- +endmacro()
- +
- +macro(ocv_append_optimization_flag var OPT)
- + if(CPU_${OPT}_FLAGS_CONFLICT)
- + string(REGEX REPLACE " ${CPU_${OPT}_FLAGS_CONFLICT}" "" ${var} " ${${var}}")
- + string(REGEX REPLACE "^ +" "" ${var} "${${var}}")
- + endif()
- + set(${var} "${${var}} ${CPU_${OPT}_FLAGS_ON}")
- +endmacro()
- +
- +# Support GCC -march=native or Intel Compiler -xHost flags
- +if(";${CPU_BASELINE};" MATCHES ";NATIVE;" OR ";${CPU_BASELINE};" MATCHES ";HOST;")
- + set(CPU_BASELINE_DETECT ON)
- + set(_add_native_flag ON)
- +elseif(";${CPU_BASELINE};" MATCHES ";DETECT;")
- + set(CPU_BASELINE_DETECT ON)
- +elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ")
- + if(DEFINED CPU_BASELINE)
- + message(STATUS "CPU: Detected '-march=native' or '-xHost' compiler flag. Force CPU_BASELINE=DETECT.")
- + endif()
- + set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
- + set(CPU_BASELINE_DETECT ON)
- +endif()
- +
- +if(X86 OR X86_64)
- + ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX512")
- +
- + ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp")
- + ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp")
- + ocv_update(CPU_SSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse3.cpp")
- + ocv_update(CPU_SSSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_ssse3.cpp")
- + ocv_update(CPU_SSE4_1_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse41.cpp")
- + ocv_update(CPU_SSE4_2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse42.cpp")
- + ocv_update(CPU_POPCNT_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_popcnt.cpp")
- + ocv_update(CPU_AVX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx.cpp")
- + ocv_update(CPU_AVX2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx2.cpp")
- + ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp")
- + ocv_update(CPU_AVX512_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512.cpp")
- +
- + if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE)
- + ocv_update(CPU_AVX512_IMPLIES "AVX2")
- + ocv_update(CPU_AVX512_FORCE "") # Don't force other optimizations
- + ocv_update(CPU_AVX2_IMPLIES "AVX;FMA3;FP16")
- + ocv_update(CPU_FMA3_IMPLIES "AVX2")
- + ocv_update(CPU_FMA3_FORCE "") # Don't force other optimizations
- + ocv_update(CPU_FP16_IMPLIES "AVX")
- + ocv_update(CPU_FP16_FORCE "") # Don't force other optimizations
- + ocv_update(CPU_AVX_IMPLIES "SSE4_2")
- + ocv_update(CPU_SSE4_2_IMPLIES "SSE4_1;POPCNT")
- + ocv_update(CPU_POPCNT_IMPLIES "SSE4_1")
- + ocv_update(CPU_POPCNT_FORCE "") # Don't force other optimizations
- + ocv_update(CPU_SSE4_1_IMPLIES "SSE3;SSSE3")
- + ocv_update(CPU_SSSE3_IMPLIES "SSE3")
- + ocv_update(CPU_SSE3_IMPLIES "SSE2")
- + ocv_update(CPU_SSE2_IMPLIES "SSE")
- + endif()
- +
- + if(CV_ICC)
- + macro(ocv_intel_compiler_optimization_option name unix_flags msvc_flags)
- + ocv_update(CPU_${name}_FLAGS_NAME "${name}")
- + if(MSVC)
- + set(enable_flags "${msvc_flags}")
- + set(flags_conflict "/arch:[^ ]+")
- + else()
- + set(enable_flags "${unix_flags}")
- + set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]+")
- + endif()
- + ocv_update(CPU_${name}_FLAGS_ON "${enable_flags}")
- + if(flags_conflict)
- + ocv_update(CPU_${name}_FLAGS_CONFLICT "${flags_conflict}")
- + endif()
- + endmacro()
- + ocv_intel_compiler_optimization_option(AVX2 "-march=core-avx2" "/arch:CORE-AVX2")
- + ocv_intel_compiler_optimization_option(FP16 "-mavx" "/arch:AVX")
- + ocv_intel_compiler_optimization_option(AVX "-mavx" "/arch:AVX")
- + ocv_intel_compiler_optimization_option(FMA3 "" "")
- + ocv_intel_compiler_optimization_option(POPCNT "" "")
- + ocv_intel_compiler_optimization_option(SSE4_2 "-msse4.2" "/arch:SSE4.2")
- + ocv_intel_compiler_optimization_option(SSE4_1 "-msse4.1" "/arch:SSE4.1")
- + ocv_intel_compiler_optimization_option(SSE3 "-msse3" "/arch:SSE3")
- + ocv_intel_compiler_optimization_option(SSSE3 "-mssse3" "/arch:SSSE3")
- + ocv_intel_compiler_optimization_option(SSE2 "-msse2" "/arch:SSE2")
- + if(NOT X86_64) # x64 compiler doesn't support /arch:sse
- + ocv_intel_compiler_optimization_option(SSE "-msse" "/arch:SSE")
- + endif()
- + #ocv_intel_compiler_optimization_option(AVX512 "-march=core-avx512")
- + elseif(CMAKE_COMPILER_IS_GNUCXX)
- + ocv_update(CPU_AVX2_FLAGS_ON "-mavx2")
- + ocv_update(CPU_FP16_FLAGS_ON "-mf16c")
- + ocv_update(CPU_AVX_FLAGS_ON "-mavx")
- + ocv_update(CPU_FMA3_FLAGS_ON "-mfma")
- + ocv_update(CPU_POPCNT_FLAGS_ON "-mpopcnt")
- + ocv_update(CPU_SSE4_2_FLAGS_ON "-msse4.2")
- + ocv_update(CPU_SSE4_1_FLAGS_ON "-msse4.1")
- + ocv_update(CPU_SSE3_FLAGS_ON "-msse3")
- + ocv_update(CPU_SSSE3_FLAGS_ON "-mssse3")
- + ocv_update(CPU_SSE2_FLAGS_ON "-msse2")
- + ocv_update(CPU_SSE_FLAGS_ON "-msse")
- + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
- + ocv_update(CPU_AVX512_FLAGS_ON "-mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi")
- + endif()
- + elseif(MSVC)
- + ocv_update(CPU_AVX2_FLAGS_ON "/arch:AVX2")
- + ocv_update(CPU_AVX_FLAGS_ON "/arch:AVX")
- + if(NOT MSVC64)
- + # 64-bit MSVC compiler uses SSE/SSE2 by default
- + ocv_update(CPU_SSE_FLAGS_ON "/arch:SSE")
- + ocv_update(CPU_SSE_SUPPORTED ON)
- + ocv_update(CPU_SSE2_FLAGS_ON "/arch:SSE2")
- + ocv_update(CPU_SSE2_SUPPORTED ON)
- + else()
- + ocv_update(CPU_SSE_SUPPORTED ON)
- + ocv_update(CPU_SSE2_SUPPORTED ON)
- + endif()
- + # Other instruction sets are supported by default since MSVC 2008 at least
- + else()
- + message(WARNING "TODO: Unsupported compiler")
- + endif()
- +
- + if(NOT DEFINED CPU_DISPATCH)
- + set(CPU_DISPATCH "SSE4_1;AVX;FP16;AVX2" CACHE STRING "${HELP_CPU_DISPATCH}")
- + endif()
- +
- + if(NOT DEFINED CPU_BASELINE)
- + if(X86_64)
- + set(CPU_BASELINE "SSSE3" CACHE STRING "${HELP_CPU_BASELINE}")
- + else()
- + set(CPU_BASELINE "SSE2" CACHE STRING "${HELP_CPU_BASELINE}")
- + endif()
- + endif()
- +
- +elseif(ARM OR AARCH64)
- + ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp")
- + if(NOT AARCH64)
- + ocv_update(CPU_KNOWN_OPTIMIZATIONS "VFPV3;NEON;FP16")
- + ocv_update(CPU_NEON_FLAGS_ON "-mfpu=neon")
- + ocv_update(CPU_VFPV3_FLAGS_ON "-mfpu=vfpv3")
- + ocv_update(CPU_FP16_FLAGS_ON "-mfpu=neon-fp16")
- + set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
- + else()
- + ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16")
- + ocv_update(CPU_NEON_FLAGS_ON "")
- + set(CPU_BASELINE "NEON" CACHE STRING "${HELP_CPU_BASELINE}")
- + endif()
- +endif()
- +
- +# Helper values for cmake-gui
- +set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
- +set(CPU_DISPATCH "" CACHE STRING "${HELP_CPU_DISPATCH}")
- +set_property(CACHE CPU_BASELINE PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS})
- +set_property(CACHE CPU_DISPATCH PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS})
- +
- +set(CPU_BASELINE_FLAGS "")
- +
- +set(CPU_BASELINE_FINAL "")
- +set(CPU_DISPATCH_FINAL "")
- +
- +macro(ocv_check_compiler_optimization OPT)
- + if(NOT DEFINED CPU_${OPT}_SUPPORTED)
- + if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE)
- + set(_varname "")
- + if(CPU_${OPT}_TEST_FILE)
- + set(__available 0)
- + if(CPU_BASELINE_DETECT)
- + set(_varname "HAVE_CPU_${OPT}_SUPPORT")
- + ocv_check_compiler_flag(CXX "${CPU_BASELINE_FLAGS}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
- + if(${_varname})
- + list(APPEND CPU_BASELINE_FINAL ${OPT})
- + set(__available 1)
- + endif()
- + endif()
- + if(NOT __available)
- + if(NOT "x${CPU_${OPT}_FLAGS_NAME}" STREQUAL "x")
- + set(_varname "HAVE_CPU_${CPU_${OPT}_FLAGS_NAME}")
- + set(_compile_flags "${CPU_BASELINE_FLAGS}")
- + ocv_append_optimization_flag(_compile_flags ${OPT})
- + ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
- + elseif(NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x")
- + ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "" "${CPU_${OPT}_TEST_FILE}")
- + else()
- + set(_varname "HAVE_CPU_${OPT}_SUPPORT")
- + set(_compile_flags "${CPU_BASELINE_FLAGS}")
- + ocv_append_optimization_flag(_compile_flags ${OPT})
- + ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
- + endif()
- + endif()
- + else()
- + ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "")
- + endif()
- + if(_varname AND ${_varname})
- + set(CPU_${OPT}_SUPPORTED ON)
- + elseif(NOT CPU_${OPT}_SUPPORTED)
- + message(STATUS "${OPT} is not supported by C++ compiler")
- + endif()
- + else()
- + set(CPU_${OPT}_SUPPORTED ON)
- + endif()
- + endif()
- +endmacro()
- +
- +foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
- + set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "" FORCE)
- + if(NOT DEFINED CPU_${OPT}_FORCE)
- + set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}")
- + endif()
- +endforeach()
- +
- +if(_add_native_flag)
- + set(_varname "HAVE_CPU_NATIVE_SUPPORT")
- + ocv_check_compiler_flag(CXX "-march=native" "${_varname}" "")
- + if(_varname)
- + set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} -march=native")
- + else()
- + set(_varname "HAVE_CPU_HOST_SUPPORT")
- + if(MSVC)
- + set(_flag "/QxHost")
- + else()
- + set(_flag "-xHost")
- + endif()
- + ocv_check_compiler_flag(CXX "${_flag}" "${_varname}" "")
- + if(_varname)
- + set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} ${flag}")
- + endif()
- + endif()
- +endif()
- +
- +foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
- + set(__is_disabled 0)
- + foreach(OPT2 ${CPU_BASELINE_DISABLE})
- + ocv_is_optimization_in_list(__is_disabled ${OPT2} ${OPT})
- + if(__is_disabled)
- + break()
- + endif()
- + endforeach()
- + if(__is_disabled)
- + set(__is_from_baseline 0)
- + else()
- + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_REQUIRE})
- + if(NOT __is_from_baseline)
- + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE})
- + endif()
- + endif()
- + ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH_REQUIRE})
- + if(NOT __is_from_dispatch)
- + ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH})
- + endif()
- + if(__is_from_dispatch OR __is_from_baseline OR CPU_BASELINE_DETECT)
- + ocv_check_compiler_optimization(${OPT})
- + endif()
- + if(CPU_BASELINE_DETECT AND NOT __is_from_baseline AND NOT __is_disabled)
- + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_FINAL})
- + endif()
- + if(CPU_${OPT}_SUPPORTED)
- + if(";${CPU_DISPATCH};" MATCHES ";${OPT};" AND NOT __is_from_baseline)
- + list(APPEND CPU_DISPATCH_FINAL ${OPT})
- + elseif(__is_from_baseline AND NOT CPU_BASELINE_DETECT)
- + list(APPEND CPU_BASELINE_FINAL ${OPT})
- + ocv_append_optimization_flag(CPU_BASELINE_FLAGS ${OPT})
- + endif()
- + endif()
- +endforeach()
- +
- +foreach(OPT ${CPU_BASELINE_REQUIRE})
- + if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
- + message(SEND_ERROR "Required baseline optimization is not supported: ${OPT} (CPU_BASELINE_REQUIRE=${CPU_BASELINE_REQUIRE})")
- + endif()
- +endforeach()
- +
- +foreach(OPT ${CPU_BASELINE})
- + if(OPT STREQUAL "DETECT" OR OPT STREQUAL "HOST" OR OPT STREQUAL "NATIVE")
- + # nothing
- + elseif(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
- + message(STATUS "Optimization ${OPT} is not available, skipped")
- + endif()
- +endforeach()
- +
- +foreach(OPT ${CPU_DISPATCH_REQUIRE})
- + if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};")
- + # OK
- + elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
- + message(SEND_ERROR "Dispatched optimization ${OPT} is in baseline list (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})")
- + else()
- + message(SEND_ERROR "Required dispatch optimization is not supported: ${OPT} (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})")
- + endif()
- +endforeach()
- +
- +foreach(OPT ${CPU_DISPATCH})
- + if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};")
- + # OK
- + elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
- + # OK
- + else()
- + message(STATUS "Dispatch optimization ${OPT} is not available, skipped")
- + endif()
- +endforeach()
- +
- +#message(STATUS "CPU_BASELINE_FINAL=${CPU_BASELINE_FINAL}")
- +#message(STATUS "CPU_DISPATCH_FINAL=${CPU_DISPATCH_FINAL}")
- +
- +#if(CPU_DISPATCH_FINAL AND NOT PYTHON_DEFAULT_EXECUTABLE)
- +# message(FATAL_ERROR "Python is required for CPU dispatched optimization support")
- +#endif()
- +
- +macro(ocv_compiler_optimization_options)
- + set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${CPU_BASELINE_FLAGS}")
- + if(NOT __flags STREQUAL CACHED_CPU_BASELINE_FLAGS)
- + set(CACHED_CPU_BASELINE_FLAGS "${__flags}" CACHE INTERNAL "" FORCE)
- + ocv_clear_vars(HAVE_CPU_BASELINE_FLAGS)
- + endif()
- + ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_BASELINE_FLAGS)
- + if(NOT HAVE_CPU_BASELINE_FLAGS)
- + message(FATAL_ERROR "Compiler doesn't support baseline optimization flags: ${CPU_BASELINE_FLAGS}")
- + endif()
- + add_extra_compiler_option_force("${CPU_BASELINE_FLAGS}")
- +
- + foreach(OPT ${CPU_DISPATCH_FINAL})
- + set(__dispatch_flags "")
- + set(__dispatch_definitions "")
- + set(__dispatch_opts "")
- + set(__dispatch_opts_force "")
- + foreach(OPT2 ${CPU_KNOWN_OPTIMIZATIONS})
- + if(NOT CPU_${OPT2}_SUPPORTED)
- + #continue()
- + else()
- + ocv_is_optimization_in_list(__is_from_baseline ${OPT2} ${CPU_BASELINE_FINAL})
- + if(NOT __is_from_baseline)
- + ocv_is_optimization_in_list(__is_active ${OPT2} ${OPT})
- + if(__is_active)
- + ocv_append_optimization_flag(__dispatch_flags ${OPT2})
- + list(APPEND __dispatch_definitions "CV_CPU_COMPILE_${OPT2}=1")
- + list(APPEND __dispatch_opts "${OPT2}")
- + endif()
- + ocv_is_optimization_in_force_list(__is_force ${OPT2} ${OPT})
- + if(__is_force)
- + list(APPEND __dispatch_opts_force "${OPT2}")
- + endif()
- + endif()
- + endif()
- + endforeach()
- + set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${__dispatch_flags}")
- + if(NOT __flags STREQUAL CACHED_CPU_DISPATCH_${OPT}_FLAGS)
- + set(CACHED_CPU_DISPATCH_${OPT}_FLAGS "${__flags}" CACHE INTERNAL "" FORCE)
- + ocv_clear_vars(HAVE_CPU_DISPATCH_FLAGS_${OPT})
- + endif()
- + ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_DISPATCH_FLAGS_${OPT})
- + if(NOT HAVE_CPU_DISPATCH_FLAGS_${OPT})
- + message(FATAL_ERROR "Compiler doesn't support optimization flags for ${OPT} dispatch mode: ${__dispatch_flags}")
- + endif()
- + set(CPU_DISPATCH_FLAGS_${OPT} "${__dispatch_flags}")
- + set(CPU_DISPATCH_DEFINITIONS_${OPT} "${__dispatch_definitions}")
- + set(CPU_DISPATCH_${OPT}_INCLUDED "${__dispatch_opts}")
- + set(CPU_DISPATCH_${OPT}_FORCED "${__dispatch_opts_force}")
- + endforeach()
- +
- + if(ENABLE_POWERPC)
- + add_extra_compiler_option("-mcpu=G3 -mtune=G5")
- + endif()
- + if(ARM)
- + add_extra_compiler_option("-mfp16-format=ieee")
- + endif(ARM)
- + if(ENABLE_NEON)
- + add_extra_compiler_option("-mfpu=neon")
- + endif()
- + if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
- + add_extra_compiler_option("-mfpu=vfpv3")
- + endif()
- +endmacro()
- +
- +macro(ocv_compiler_optimization_options_finalize)
- + if(CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64))
- + if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4)
- + if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)")
- + add_extra_compiler_option(-mfpmath=sse) # !! important - be on the same wave with x64 compilers
- + else()
- + add_extra_compiler_option(-mfpmath=387)
- + endif()
- + endif()
- + endif()
- +
- + if(MSVC)
- + # Generate Intrinsic Functions
- + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi")
- +
- + if((X86 OR X86_64) AND CMAKE_SIZEOF_VOID_P EQUAL 4 AND ";${CPU_BASELINE_FINAL};" MATCHES ";SSE;")
- + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers
- + endif()
- + endif(MSVC)
- +endmacro()
- +
- +macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME TARGET_BASE_NAME)
- + set(__result "")
- + set(__result_libs "")
- + foreach(OPT ${CPU_DISPATCH_FINAL})
- + set(__result_${OPT} "")
- + endforeach()
- + foreach(fname ${${SOURCES_VAR_NAME}})
- + string(TOLOWER "${fname}" fname_LOWER)
- + if(fname_LOWER MATCHES "[.]opt_.*[.]cpp$")
- + if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
- + message(STATUS "Excluding from source files list: ${fname}")
- + #continue()
- + else()
- + set(__opt_found 0)
- + foreach(OPT ${CPU_BASELINE_FINAL})
- + string(TOLOWER "${OPT}" OPT_LOWER)
- + if(fname_LOWER MATCHES "_${OPT_LOWER}[.]cpp$")
- +#message("${fname} BASELINE-${OPT}")
- + set(__opt_found 1)
- + list(APPEND __result "${fname}")
- + break()
- + endif()
- + endforeach()
- + foreach(OPT ${CPU_DISPATCH_FINAL})
- + foreach(OPT2 ${CPU_DISPATCH_${OPT}_FORCED})
- + string(TOLOWER "${OPT2}" OPT2_LOWER)
- + if(fname_LOWER MATCHES "_${OPT2_LOWER}[.]cpp$")
- + list(APPEND __result_${OPT} "${fname}")
- + math(EXPR CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}+1")
- + set(CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}" CACHE INTERNAL "" FORCE)
- +#message("${fname} ${OPT}")
- +#message(" ${CPU_DISPATCH_${OPT}_INCLUDED}")
- +#message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}")
- +#message(" ${CPU_DISPATCH_FLAGS_${OPT}}")
- + set(__opt_found 1)
- + break()
- + endif()
- + endforeach()
- + if(__opt_found)
- + set(__opt_found 1)
- + break()
- + endif()
- + endforeach()
- + if(NOT __opt_found)
- + message(STATUS "Excluding from source files list: ${fname}")
- + endif()
- + endif()
- + else()
- + list(APPEND __result "${fname}")
- + endif()
- + endforeach()
- +
- + foreach(OPT ${CPU_DISPATCH_FINAL})
- + if(__result_${OPT})
- +#message("${OPT}: ${__result_${OPT}}")
- + if(CMAKE_GENERATOR MATCHES "^Visual")
- + # extra flags are added before common flags, so switching between optimizations doesn't work correctly
- + # Also CMAKE_CXX_FLAGS doesn't work (it is directory-based, so add_subdirectory is required)
- + add_library(${TARGET_BASE_NAME}_${OPT} OBJECT ${__result_${OPT}})
- + ocv_append_dependant_targets(${TARGET_BASE_NAME} ${TARGET_BASE_NAME}_${OPT})
- + set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
- + set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}")
- + #list(APPEND __result_libs ${TARGET_BASE_NAME}_${OPT})
- + list(APPEND __result "$<TARGET_OBJECTS:${TARGET_BASE_NAME}_${OPT}>")
- + else()
- + foreach(fname ${__result_${OPT}})
- + set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
- + set_source_files_properties("${fname}" PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}")
- + endforeach()
- + list(APPEND __result ${__result_${OPT}})
- + endif()
- + endif()
- + endforeach()
- + set(${SOURCES_VAR_NAME} "${__result}")
- + list(APPEND ${LIBS_VAR_NAME} ${__result_libs})
- +endmacro()
- +
- +macro(ocv_compiler_optimization_fill_cpu_config)
- + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "")
- + foreach(OPT ${CPU_BASELINE_FINAL})
- + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
- +#define CV_CPU_COMPILE_${OPT} 1
- +#define CV_CPU_BASELINE_COMPILE_${OPT} 1
- +")
- + endforeach()
- +
- + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
- +#define CV_CPU_BASELINE_FEATURES 0 \\")
- + foreach(OPT ${CPU_BASELINE_FINAL})
- + if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
- + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
- + , CV_CPU_${OPT} \\")
- + endif()
- + endforeach()
- + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}\n")
- +
- + set(__dispatch_modes "")
- + foreach(OPT ${CPU_DISPATCH_FINAL})
- + list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT})
- + endforeach()
- + list(REMOVE_DUPLICATES __dispatch_modes)
- + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "")
- + foreach(OPT ${__dispatch_modes})
- + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
- +#define CV_CPU_DISPATCH_COMPILE_${OPT} 1")
- + endforeach()
- +
- + set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n")
- + foreach(OPT ${CPU_ALL_OPTIMIZATIONS})
- + if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
- + set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT}
- +# define CV_CPU_HAS_SUPPORT_${OPT} 1
- +# define CV_CPU_CALL_${OPT}(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT}
- +# define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT}))
- +# define CV_CPU_CALL_${OPT}(...) if (CV_CPU_HAS_SUPPORT_${OPT}) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_${OPT} 0
- +# define CV_CPU_CALL_${OPT}(...)
- +#endif
- +")
- + endif()
- + endforeach()
- +
- + set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h")
- + if(EXISTS "${__file}")
- + file(READ "${__file}" __content)
- + endif()
- + if(__content STREQUAL OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE)
- + #message(STATUS "${__file} contains same content")
- + else()
- + file(WRITE "${__file}" "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}")
- + message(WARNING "${__file} is updated")
- + endif()
- +endmacro()
- +
- +if(CV_DISABLE_OPTIMIZATION OR CV_ICC)
- + ocv_update(CV_ENABLE_UNROLLED 0)
- +else()
- + ocv_update(CV_ENABLE_UNROLLED 1)
- +endif()
- diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
- index 5bb0479..0eb68b6 100644
- --- a/cmake/OpenCVCompilerOptions.cmake
- +++ b/cmake/OpenCVCompilerOptions.cmake
- @@ -31,24 +31,21 @@ endif()
- if(MINGW OR (X86 AND UNIX AND NOT APPLE))
- # mingw compiler is known to produce unstable SSE code with -O3 hence we are trying to use -O2 instead
- if(CMAKE_COMPILER_IS_GNUCXX)
- - foreach(flags CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
- - string(REPLACE "-O3" "-O2" ${flags} "${${flags}}")
- - endforeach()
- - endif()
- -
- - if(CMAKE_COMPILER_IS_GNUCC)
- - foreach(flags CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG)
- + foreach(flags
- + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG
- + CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG)
- string(REPLACE "-O3" "-O2" ${flags} "${${flags}}")
- endforeach()
- endif()
- endif()
-
- if(MSVC)
- - string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
- - string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT}")
- + string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS)
- + string(STRIP "${CMAKE_CXX_FLAGS_INIT}" CMAKE_CXX_FLAGS_INIT)
- if(CMAKE_CXX_FLAGS STREQUAL CMAKE_CXX_FLAGS_INIT)
- # override cmake default exception handling option
- - string(REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
- + string(REPLACE "/EHsc" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
- + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHa")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "Flags used by the compiler during all build types." FORCE)
- endif()
- endif()
- @@ -63,9 +60,6 @@ set(OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE "")
- set(OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG "")
-
- macro(add_extra_compiler_option option)
- - if(CMAKE_BUILD_TYPE)
- - set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
- - endif()
- ocv_check_flag_support(CXX "${option}" _varname "${OPENCV_EXTRA_CXX_FLAGS} ${ARGN}")
- if(${_varname})
- set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}")
- @@ -77,6 +71,12 @@ macro(add_extra_compiler_option option)
- endif()
- endmacro()
-
- +macro(add_extra_compiler_option_force option)
- + set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}")
- + set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} ${option}")
- +endmacro()
- +
- +
- # Gets environment variable and puts its value to the corresponding preprocessor definition
- # Useful for WINRT that has no access to environment variables
- macro(add_env_definitions option)
- @@ -102,7 +102,11 @@ if(MINGW)
- endif()
-
- if(CV_ICC AND NOT ENABLE_FAST_MATH)
- - add_extra_compiler_option("-fp-model precise")
- + if(MSVC)
- + add_extra_compiler_option("/fp:precise")
- + else()
- + add_extra_compiler_option("-fp-model precise")
- + endif()
- endif()
-
- if(CMAKE_COMPILER_IS_GNUCXX)
- @@ -141,7 +145,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
- endif()
-
- # We need pthread's
- - if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX))
- + if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) # TODO
- add_extra_compiler_option(-pthread)
- endif()
-
- @@ -170,83 +174,6 @@ if(CMAKE_COMPILER_IS_GNUCXX)
- if(ENABLE_FAST_MATH)
- add_extra_compiler_option(-ffast-math)
- endif()
- - if(ENABLE_POWERPC)
- - add_extra_compiler_option("-mcpu=G3 -mtune=G5")
- - endif()
- - if(ENABLE_SSE)
- - add_extra_compiler_option(-msse)
- - endif()
- - if(ENABLE_SSE2)
- - add_extra_compiler_option(-msse2)
- - elseif(X86 OR X86_64)
- - add_extra_compiler_option(-mno-sse2)
- - endif()
- - if(ARM)
- - add_extra_compiler_option("-mfp16-format=ieee")
- - endif(ARM)
- - if(ENABLE_NEON)
- - add_extra_compiler_option("-mfpu=neon")
- - endif()
- - if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
- - add_extra_compiler_option("-mfpu=vfpv3")
- - endif()
- -
- - # SSE3 and further should be disabled under MingW because it generates compiler errors
- - if(NOT MINGW)
- - if(ENABLE_AVX)
- - add_extra_compiler_option(-mavx)
- - elseif(X86 OR X86_64)
- - add_extra_compiler_option(-mno-avx)
- - endif()
- - if(ENABLE_AVX2)
- - add_extra_compiler_option(-mavx2)
- -
- - if(ENABLE_FMA3)
- - add_extra_compiler_option(-mfma)
- - endif()
- - endif()
- -
- - # GCC depresses SSEx instructions when -mavx is used. Instead, it generates new AVX instructions or AVX equivalence for all SSEx instructions when needed.
- - if(NOT OPENCV_EXTRA_CXX_FLAGS MATCHES "-mavx")
- - if(ENABLE_SSE3)
- - add_extra_compiler_option(-msse3)
- - elseif(X86 OR X86_64)
- - add_extra_compiler_option(-mno-sse3)
- - endif()
- -
- - if(ENABLE_SSSE3)
- - add_extra_compiler_option(-mssse3)
- - elseif(X86 OR X86_64)
- - add_extra_compiler_option(-mno-ssse3)
- - endif()
- -
- - if(ENABLE_SSE41)
- - add_extra_compiler_option(-msse4.1)
- - elseif(X86 OR X86_64)
- - add_extra_compiler_option(-mno-sse4.1)
- - endif()
- -
- - if(ENABLE_SSE42)
- - add_extra_compiler_option(-msse4.2)
- - elseif(X86 OR X86_64)
- - add_extra_compiler_option(-mno-sse4.2)
- - endif()
- -
- - if(ENABLE_POPCNT)
- - add_extra_compiler_option(-mpopcnt)
- - endif()
- - endif()
- - endif(NOT MINGW)
- -
- - if(X86 OR X86_64)
- - if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4)
- - if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)")
- - add_extra_compiler_option(-mfpmath=sse)# !! important - be on the same wave with x64 compilers
- - else()
- - add_extra_compiler_option(-mfpmath=387)
- - endif()
- - endif()
- - endif()
-
- # Profiling?
- if(ENABLE_PROFILING)
- @@ -257,7 +184,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
- string(REPLACE "-fomit-frame-pointer" "" ${flags} "${${flags}}")
- string(REPLACE "-ffunction-sections" "" ${flags} "${${flags}}")
- endforeach()
- - elseif(NOT APPLE AND NOT ANDROID)
- + elseif(NOT ((IOS OR ANDROID) AND NOT BUILD_SHARED_LIBS))
- # Remove unreferenced functions: function level linking
- add_extra_compiler_option(-ffunction-sections)
- endif()
- @@ -296,41 +223,6 @@ if(MSVC)
- set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi")
- endif()
-
- - if(ENABLE_AVX2 AND NOT MSVC_VERSION LESS 1800)
- - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX2")
- - endif()
- - if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
- - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX")
- - endif()
- -
- - if(ENABLE_SSE4_1 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
- - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1")
- - endif()
- -
- - if(ENABLE_SSE3 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
- - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE3")
- - endif()
- -
- - if(NOT MSVC64)
- - # 64-bit MSVC compiler uses SSE/SSE2 by default
- - if(ENABLE_SSE2 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
- - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2")
- - endif()
- - if(ENABLE_SSE AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
- - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE")
- - endif()
- - endif()
- -
- - if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1 OR ENABLE_AVX OR ENABLE_AVX2)
- - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi")
- - endif()
- -
- - if(X86 OR X86_64)
- - if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND ENABLE_SSE2)
- - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers
- - endif()
- - endif()
- -
- if(OPENCV_WARNINGS_ARE_ERRORS)
- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /WX")
- endif()
- @@ -353,6 +245,16 @@ if(NOT BUILD_SHARED_LIBS AND CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID)
- set(OPENCV_EXTRA_FLAGS "-fPIC ${OPENCV_EXTRA_FLAGS}")
- endif()
-
- +include(cmake/OpenCVCompilerOptimizations.cmake)
- +
- +if(COMMAND ocv_compiler_optimization_options)
- + ocv_compiler_optimization_options()
- +endif()
- +
- +if(COMMAND ocv_compiler_optimization_options_finalize)
- + ocv_compiler_optimization_options_finalize()
- +endif()
- +
- # Add user supplied extra options (optimization, etc...)
- # ==========================================================
- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS}" CACHE INTERNAL "Extra compiler options")
- @@ -370,6 +272,7 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
- add_extra_compiler_option(-fvisibility-inlines-hidden)
- endif()
-
- +# TODO !!!!!
- if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
- if(ARM AND ENABLE_NEON)
- set(FP16_OPTION "-mfpu=neon-fp16")
- @@ -378,7 +281,7 @@ if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
- endif()
- try_compile(__VALID_FP16
- "${OpenCV_BINARY_DIR}"
- - "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
- + "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp"
- COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
- OUTPUT_VARIABLE TRY_OUT
- )
- diff --git a/cmake/OpenCVGenHeaders.cmake b/cmake/OpenCVGenHeaders.cmake
- index 2988979..477b910 100644
- --- a/cmake/OpenCVGenHeaders.cmake
- +++ b/cmake/OpenCVGenHeaders.cmake
- @@ -3,6 +3,10 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CO
- configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/cvconfig.h")
- install(FILES "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2 COMPONENT dev)
-
- +# platform-specific config file
- +ocv_compiler_optimization_fill_cpu_config()
- +configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cv_cpu_config.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cv_cpu_config.h")
- +
- # ----------------------------------------------------------------------------
- # opencv_modules.hpp based on actual modules list
- # ----------------------------------------------------------------------------
- diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
- index 742a287..3e98bf5 100644
- --- a/cmake/OpenCVModule.cmake
- +++ b/cmake/OpenCVModule.cmake
- @@ -65,6 +65,7 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD
- unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE)
- unset(OPENCV_MODULE_${mod}_LINK_DEPS CACHE)
- unset(OPENCV_MODULE_${mod}_WRAPPERS CACHE)
- + unset(OPENCV_DEPENDANT_TARGETS_${mod} CACHE)
- endforeach()
-
- # clean modules info which needs to be recalculated
- @@ -641,6 +642,8 @@ macro(ocv_set_module_sources)
- # use full paths for module to be independent from the module location
- ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS)
-
- + ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module})
- +
- set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}")
- set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")
- endmacro()
- diff --git a/cmake/OpenCVPCHSupport.cmake b/cmake/OpenCVPCHSupport.cmake
- index 90437cb..45968e7 100644
- --- a/cmake/OpenCVPCHSupport.cmake
- +++ b/cmake/OpenCVPCHSupport.cmake
- @@ -326,7 +326,10 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input)
-
- get_target_property(_sources ${_targetName} SOURCES)
- foreach(src ${_sources})
- - if(NOT "${src}" MATCHES "\\.mm$")
- + if(NOT "${src}" MATCHES "\\.mm$"
- + AND NOT "${src}" MATCHES "\\.h$" AND NOT "${src}" MATCHES "\\.hpp$" # header files
- + AND NOT "${src}" MATCHES "^\$" # CMake generator expressions
- + )
- get_source_file_property(oldProps "${src}" COMPILE_FLAGS)
- if(NOT oldProps)
- set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"")
- diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake
- index cdf257d..8a5ee28 100644
- --- a/cmake/OpenCVUtils.cmake
- +++ b/cmake/OpenCVUtils.cmake
- @@ -37,7 +37,11 @@ endmacro()
-
- macro(ocv_update VAR)
- if(NOT DEFINED ${VAR})
- - set(${VAR} ${ARGN})
- + if("x${ARGN}" STREQUAL "x")
- + set(${VAR} "")
- + else()
- + set(${VAR} ${ARGN})
- + endif()
- else()
- #ocv_debug_message("Preserve old value for ${VAR}: ${${VAR}}")
- endif()
- @@ -151,8 +155,15 @@ function(ocv_append_target_property target prop)
- endif()
- endfunction()
-
- +function(ocv_append_dependant_targets target)
- + #ocv_debug_message("ocv_append_dependant_targets(${target} ${ARGN})")
- + _ocv_fix_target(target)
- + set(OPENCV_DEPENDANT_TARGETS_${target} "${OPENCV_DEPENDANT_TARGETS_${target}};${ARGN}" CACHE INTERNAL "" FORCE)
- +endfunction()
- +
- # adds include directories in such way that directories from the OpenCV source tree go first
- function(ocv_target_include_directories target)
- + #ocv_debug_message("ocv_target_include_directories(${target} ${ARGN})")
- _ocv_fix_target(target)
- set(__params "")
- if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.0" AND
- @@ -173,6 +184,11 @@ function(ocv_target_include_directories target)
- else()
- if(TARGET ${target})
- target_include_directories(${target} PRIVATE ${__params})
- + if(OPENCV_DEPENDANT_TARGETS_${target})
- + foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}})
- + target_include_directories(${t} PRIVATE ${__params})
- + endforeach()
- + endif()
- else()
- set(__new_inc "${OCV_TARGET_INCLUDE_DIRS_${target}};${__params}")
- set(OCV_TARGET_INCLUDE_DIRS_${target} "${__new_inc}" CACHE INTERNAL "")
- @@ -205,8 +221,11 @@ set(OCV_COMPILER_FAIL_REGEX
- )
-
- MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
- + set(_fname "${ARGN}")
- if(NOT DEFINED ${RESULT})
- - if("_${LANG}_" MATCHES "_CXX_")
- + if(_fname)
- + # nothing
- + elseif("_${LANG}_" MATCHES "_CXX_")
- set(_fname "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx")
- if("${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror " OR "${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror=unknown-pragmas ")
- FILE(WRITE "${_fname}" "int main() { return 0; }\n")
- @@ -231,7 +250,13 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
- unset(_fname)
- endif()
- if(_fname)
- - MESSAGE(STATUS "Performing Test ${RESULT}")
- + if(NOT "x${ARGN}" STREQUAL "x")
- + file(RELATIVE_PATH __msg "${CMAKE_SOURCE_DIR}" "${ARGN}")
- + set(__msg " (check file: ${__msg})")
- + else()
- + set(__msg "")
- + endif()
- + MESSAGE(STATUS "Performing Test ${RESULT}${__msg}")
- TRY_COMPILE(${RESULT}
- "${CMAKE_BINARY_DIR}"
- "${_fname}"
- @@ -278,7 +303,11 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
- endif()
- ENDMACRO()
-
- -macro(ocv_check_flag_support lang flag varname)
- +macro(ocv_check_flag_support lang flag varname base_options)
- + if(CMAKE_BUILD_TYPE)
- + set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
- + endif()
- +
- if("_${lang}_" MATCHES "_CXX_")
- set(_lang CXX)
- elseif("_${lang}_" MATCHES "_C_")
- @@ -293,7 +322,7 @@ macro(ocv_check_flag_support lang flag varname)
- string(REGEX REPLACE "^(/|-)" "HAVE_${_lang}_" ${varname} "${${varname}}")
- string(REGEX REPLACE " -|-|=| |\\." "_" ${varname} "${${varname}}")
-
- - ocv_check_compiler_flag("${_lang}" "${ARGN} ${flag}" ${${varname}})
- + ocv_check_compiler_flag("${_lang}" "${base_options} ${flag}" ${${varname}} ${ARGN})
- endmacro()
-
- # turns off warnings
- @@ -327,7 +356,7 @@ macro(ocv_warnings_disable)
- string(REPLACE "${warning}" "" ${var} "${${var}}")
- string(REPLACE "-W" "-Wno-" warning "${warning}")
- endif()
- - ocv_check_flag_support(${var} "${warning}" _varname)
- + ocv_check_flag_support(${var} "${warning}" _varname "")
- if(${_varname})
- set(${var} "${${var}} ${warning}")
- endif()
- @@ -342,7 +371,7 @@ macro(ocv_warnings_disable)
- else()
- string(REPLACE "-wd" "-Qwd" warning "${warning}")
- endif()
- - ocv_check_flag_support(${var} "${warning}" _varname)
- + ocv_check_flag_support(${var} "${warning}" _varname "")
- if(${_varname})
- set(${var} "${${var}} ${warning}")
- endif()
- @@ -357,7 +386,7 @@ macro(ocv_warnings_disable)
- endmacro()
-
- macro(add_apple_compiler_options the_module)
- - ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS)
- + ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS "")
- if(HAVE_OBJC_EXCEPTIONS)
- foreach(source ${OPENCV_MODULE_${the_module}_SOURCES})
- if("${source}" MATCHES "\\.mm$")
- @@ -892,6 +921,11 @@ function(_ocv_append_target_includes target)
- if (TARGET ${target}_object)
- target_include_directories(${target}_object PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}})
- endif()
- + if(OPENCV_DEPENDANT_TARGETS_${target})
- + foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}})
- + target_include_directories(${t} PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}})
- + endforeach()
- + endif()
- unset(OCV_TARGET_INCLUDE_DIRS_${target} CACHE)
- endif()
- endfunction()
- diff --git a/cmake/checks/cpu_avx.cpp b/cmake/checks/cpu_avx.cpp
- new file mode 100644
- index 0000000..05536f4
- --- /dev/null
- +++ b/cmake/checks/cpu_avx.cpp
- @@ -0,0 +1,9 @@
- +#if !defined __AVX__ // MSVC supports this flag since MSVS 2013
- +#error "__AVX__ define is missing"
- +#endif
- +#include <immintrin.h>
- +void test()
- +{
- + __m256 a = _mm256_set1_ps(0.0f);
- +}
- +int main() { return 0; }
- diff --git a/cmake/checks/cpu_avx2.cpp b/cmake/checks/cpu_avx2.cpp
- new file mode 100644
- index 0000000..3ab1143
- --- /dev/null
- +++ b/cmake/checks/cpu_avx2.cpp
- @@ -0,0 +1,10 @@
- +#if !defined __AVX2__ // MSVC supports this flag since MSVS 2013
- +#error "__AVX2__ define is missing"
- +#endif
- +#include <immintrin.h>
- +void test()
- +{
- + int data[8] = {0,0,0,0, 0,0,0,0};
- + __m256i a = _mm256_loadu_si256((const __m256i *)data);
- +}
- +int main() { return 0; }
- diff --git a/cmake/checks/cpu_avx512.cpp b/cmake/checks/cpu_avx512.cpp
- new file mode 100644
- index 0000000..d0898ab
- --- /dev/null
- +++ b/cmake/checks/cpu_avx512.cpp
- @@ -0,0 +1,10 @@
- +#if defined __AVX512__ || defined __AVX512F__
- +#include <immintrin.h>
- +void test()
- +{
- + __m512i zmm = _mm512_setzero_si512();
- +}
- +#else
- +#error "AVX512 is not supported"
- +#endif
- +int main() { return 0; }
- diff --git a/cmake/checks/cpu_fp16.cpp b/cmake/checks/cpu_fp16.cpp
- new file mode 100644
- index 0000000..6951f1c
- --- /dev/null
- +++ b/cmake/checks/cpu_fp16.cpp
- @@ -0,0 +1,33 @@
- +#include <stdio.h>
- +
- +#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) || (defined __INTEL_COMPILER && defined __AVX__)
- +#include <immintrin.h>
- +int test()
- +{
- + const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- + short dst[8];
- + __m128 v_src = _mm_load_ps(src);
- + __m128i v_dst = _mm_cvtps_ph(v_src, 0);
- + _mm_storel_epi64((__m128i*)dst, v_dst);
- + return (int)dst[0];
- +}
- +#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
- +#include "arm_neon.h"
- +int test()
- +{
- + const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- + short dst[8];
- + float32x4_t v_src = *(float32x4_t*)src;
- + float16x4_t v_dst = vcvt_f16_f32(v_src);
- + *(float16x4_t*)dst = v_dst;
- + return (int)dst[0];
- +}
- +#else
- +#error "FP16 is not supported"
- +#endif
- +
- +int main()
- +{
- + printf("%d\n", test());
- + return 0;
- +}
- diff --git a/cmake/checks/cpu_popcnt.cpp b/cmake/checks/cpu_popcnt.cpp
- new file mode 100644
- index 0000000..f55c9f3
- --- /dev/null
- +++ b/cmake/checks/cpu_popcnt.cpp
- @@ -0,0 +1,8 @@
- +#include <nmmintrin.h>
- +#ifndef _MSC_VER
- +#include <popcntintrin.h>
- +#endif
- +int main() {
- + int i = _mm_popcnt_u64(1);
- + return 0;
- +}
- diff --git a/cmake/checks/cpu_sse.cpp b/cmake/checks/cpu_sse.cpp
- new file mode 100644
- index 0000000..c6269ac
- --- /dev/null
- +++ b/cmake/checks/cpu_sse.cpp
- @@ -0,0 +1,2 @@
- +#include <xmmintrin.h>
- +int main() { return 0; }
- diff --git a/cmake/checks/cpu_sse2.cpp b/cmake/checks/cpu_sse2.cpp
- new file mode 100644
- index 0000000..68a69f8
- --- /dev/null
- +++ b/cmake/checks/cpu_sse2.cpp
- @@ -0,0 +1,2 @@
- +#include <emmintrin.h>
- +int main() { return 0; }
- diff --git a/cmake/checks/cpu_sse3.cpp b/cmake/checks/cpu_sse3.cpp
- new file mode 100644
- index 0000000..98ce219
- --- /dev/null
- +++ b/cmake/checks/cpu_sse3.cpp
- @@ -0,0 +1,7 @@
- +#include <pmmintrin.h>
- +int main() {
- + __m128 u, v;
- + u = _mm_set1_ps(0.0f);
- + v = _mm_moveldup_ps(u); // SSE3
- + return 0;
- +}
- diff --git a/cmake/checks/cpu_sse41.cpp b/cmake/checks/cpu_sse41.cpp
- new file mode 100644
- index 0000000..ddd835b
- --- /dev/null
- +++ b/cmake/checks/cpu_sse41.cpp
- @@ -0,0 +1,6 @@
- +#include <smmintrin.h>
- +int main() {
- + __m128i a = _mm_setzero_si128(), b = _mm_setzero_si128();
- + __m128i c = _mm_packus_epi32(a, b);
- + return 0;
- +}
- diff --git a/cmake/checks/cpu_sse42.cpp b/cmake/checks/cpu_sse42.cpp
- new file mode 100644
- index 0000000..56f5665
- --- /dev/null
- +++ b/cmake/checks/cpu_sse42.cpp
- @@ -0,0 +1,5 @@
- +#include <nmmintrin.h>
- +int main() {
- + int i = _mm_popcnt_u64(1);
- + return 0;
- +}
- diff --git a/cmake/checks/cpu_ssse3.cpp b/cmake/checks/cpu_ssse3.cpp
- new file mode 100644
- index 0000000..e583199
- --- /dev/null
- +++ b/cmake/checks/cpu_ssse3.cpp
- @@ -0,0 +1,7 @@
- +#include <tmmintrin.h>
- +const double v = 0;
- +int main() {
- + __m128i a = _mm_setzero_si128();
- + __m128i b = _mm_abs_epi32(a);
- + return 0;
- +}
- diff --git a/cmake/checks/fp16.cpp b/cmake/checks/fp16.cpp
- deleted file mode 100644
- index c77c844..0000000
- --- a/cmake/checks/fp16.cpp
- +++ /dev/null
- @@ -1,33 +0,0 @@
- -#include <stdio.h>
- -
- -#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700)
- -#include <immintrin.h>
- -int test()
- -{
- - const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- - short dst[8];
- - __m128 v_src = _mm_load_ps(src);
- - __m128i v_dst = _mm_cvtps_ph(v_src, 0);
- - _mm_storel_epi64((__m128i*)dst, v_dst);
- - return (int)dst[0];
- -}
- -#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
- -#include "arm_neon.h"
- -int test()
- -{
- - const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- - short dst[8];
- - float32x4_t v_src = *(float32x4_t*)src;
- - float16x4_t v_dst = vcvt_f16_f32(v_src);
- - *(float16x4_t*)dst = v_dst;
- - return (int)dst[0];
- -}
- -#else
- -#error "FP16 is not supported"
- -#endif
- -
- -int main()
- -{
- - printf("%d\n", test());
- - return 0;
- -}
- diff --git a/cmake/templates/cv_cpu_config.h.in b/cmake/templates/cv_cpu_config.h.in
- new file mode 100644
- index 0000000..27b2731
- --- /dev/null
- +++ b/cmake/templates/cv_cpu_config.h.in
- @@ -0,0 +1,5 @@
- +// OpenCV CPU baseline features
- +@OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE@
- +
- +// OpenCV supported CPU dispatched features
- +@OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE@
- diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in
- index 05add9e..658d12c 100644
- --- a/cmake/templates/cvconfig.h.in
- +++ b/cmake/templates/cvconfig.h.in
- @@ -1,6 +1,15 @@
- +#ifndef OPENCV_CVCONFIG_H_INCLUDED
- +#define OPENCV_CVCONFIG_H_INCLUDED
- +
- /* OpenCV compiled as static or dynamic libs */
- #cmakedefine BUILD_SHARED_LIBS
-
- +/* OpenCV intrinsics optimized code */
- +#cmakedefine CV_ENABLE_INTRINSICS
- +
- +/* OpenCV additional optimized code */
- +#cmakedefine CV_DISABLE_OPTIMIZATION
- +
- /* Compile for 'real' NVIDIA GPU architectures */
- #define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
-
- @@ -206,3 +215,7 @@
-
- /* OpenVX */
- #cmakedefine HAVE_OPENVX
- +
- +
- +
- +#endif // OPENCV_CVCONFIG_H_INCLUDED
- diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
- new file mode 100644
- index 0000000..9a8537f
- --- /dev/null
- +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
- @@ -0,0 +1,166 @@
- +// This file is part of OpenCV project.
- +// It is subject to the license terms in the LICENSE file found in the top-level directory
- +// of this distribution and at http://opencv.org/license.html.
- +
- +#if defined __OPENCV_BUILD \
- +
- +#include "cv_cpu_config.h"
- +#include "cv_cpu_helper.h"
- +
- +#if defined CV_ENABLE_INTRINSICS \
- + && !defined CV_DISABLE_OPTIMIZATION \
- + && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
- +
- +#ifdef CV_CPU_COMPILE_SSE2
- +# include <emmintrin.h>
- +# define CV_MMX 1
- +# define CV_SSE 1
- +# define CV_SSE2 1
- +#endif
- +#ifdef CV_CPU_COMPILE_SSE3
- +# include <pmmintrin.h>
- +# define CV_SSE3 1
- +#endif
- +#ifdef CV_CPU_COMPILE_SSSE3
- +# include <tmmintrin.h>
- +# define CV_SSSE3 1
- +#endif
- +#ifdef CV_CPU_COMPILE_SSE4_1
- +# include <smmintrin.h>
- +# define CV_SSE4_1 1
- +#endif
- +#ifdef CV_CPU_COMPILE_SSE4_2
- +# include <nmmintrin.h>
- +# define CV_SSE4_2 1
- +#endif
- +#ifdef CV_CPU_COMPILE_POPCNT
- +# ifdef _MSC_VER
- +# include <nmmintrin.h>
- +# if defined(_M_X64)
- +# define CV_POPCNT_U64 _mm_popcnt_u64
- +# endif
- +# define CV_POPCNT_U32 _mm_popcnt_u32
- +# else
- +# include <popcntintrin.h>
- +# if defined(__x86_64__)
- +# define CV_POPCNT_U64 __builtin_popcountll
- +# endif
- +# define CV_POPCNT_U32 __builtin_popcount
- +# endif
- +# define CV_POPCNT 1
- +#endif
- +#ifdef CV_CPU_COMPILE_AVX
- +# include <immintrin.h>
- +# define CV_AVX 1
- +#endif
- +#ifdef CV_CPU_COMPILE_AVX2
- +# include <immintrin.h>
- +# define CV_AVX2 1
- +#endif
- +#ifdef CV_CPU_COMPILE_FMA3
- +# define CV_FMA3 1
- +#endif
- +
- +#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
- +# include <Intrin.h>
- +# include <arm_neon.h>
- +# define CV_NEON 1
- +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
- +# include <arm_neon.h>
- +# define CV_NEON 1
- +#endif
- +
- +#if defined(__ARM_NEON__) || defined(__aarch64__)
- +# include <arm_neon.h>
- +#endif
- +
- +#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
- +
- +#endif // __OPENCV_BUILD
- +
- +
- +
- +#if !defined __OPENCV_BUILD // Compatibility code
- +
- +#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
- +# include <emmintrin.h>
- +# define CV_MMX 1
- +# define CV_SSE 1
- +# define CV_SSE2 1
- +#elif (defined WIN32 || defined _WIN32) && defined(_M_ARM)
- +# include <Intrin.h>
- +# include <arm_neon.h>
- +# define CV_NEON 1
- +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
- +# include <arm_neon.h>
- +# define CV_NEON 1
- +#endif
- +
- +#endif // !__OPENCV_BUILD (Compatibility code)
- +
- +
- +
- +#ifndef CV_MMX
- +# define CV_MMX 0
- +#endif
- +#ifndef CV_SSE
- +# define CV_SSE 0
- +#endif
- +#ifndef CV_SSE2
- +# define CV_SSE2 0
- +#endif
- +#ifndef CV_SSE3
- +# define CV_SSE3 0
- +#endif
- +#ifndef CV_SSSE3
- +# define CV_SSSE3 0
- +#endif
- +#ifndef CV_SSE4_1
- +# define CV_SSE4_1 0
- +#endif
- +#ifndef CV_SSE4_2
- +# define CV_SSE4_2 0
- +#endif
- +#ifndef CV_POPCNT
- +# define CV_POPCNT 0
- +#endif
- +#ifndef CV_AVX
- +# define CV_AVX 0
- +#endif
- +#ifndef CV_AVX2
- +# define CV_AVX2 0
- +#endif
- +#ifndef CV_FMA3
- +# define CV_FMA3 0
- +#endif
- +#ifndef CV_AVX_512F
- +# define CV_AVX_512F 0
- +#endif
- +#ifndef CV_AVX_512BW
- +# define CV_AVX_512BW 0
- +#endif
- +#ifndef CV_AVX_512CD
- +# define CV_AVX_512CD 0
- +#endif
- +#ifndef CV_AVX_512DQ
- +# define CV_AVX_512DQ 0
- +#endif
- +#ifndef CV_AVX_512ER
- +# define CV_AVX_512ER 0
- +#endif
- +#ifndef CV_AVX_512IFMA512
- +# define CV_AVX_512IFMA512 0
- +#endif
- +#ifndef CV_AVX_512PF
- +# define CV_AVX_512PF 0
- +#endif
- +#ifndef CV_AVX_512VBMI
- +# define CV_AVX_512VBMI 0
- +#endif
- +#ifndef CV_AVX_512VL
- +# define CV_AVX_512VL 0
- +#endif
- +
- +#ifndef CV_NEON
- +# define CV_NEON 0
- +#endif
- diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h
- new file mode 100644
- index 0000000..cb755d6
- --- /dev/null
- +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h
- @@ -0,0 +1,133 @@
- +// AUTOGENERATED, DO NOT EDIT
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
- +# define CV_CPU_HAS_SUPPORT_SSE 1
- +# define CV_CPU_CALL_SSE(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
- +# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
- +# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_SSE 0
- +# define CV_CPU_CALL_SSE(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
- +# define CV_CPU_HAS_SUPPORT_SSE2 1
- +# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
- +# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
- +# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_SSE2 0
- +# define CV_CPU_CALL_SSE2(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
- +# define CV_CPU_HAS_SUPPORT_SSE3 1
- +# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
- +# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
- +# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_SSE3 0
- +# define CV_CPU_CALL_SSE3(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
- +# define CV_CPU_HAS_SUPPORT_SSSE3 1
- +# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
- +# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
- +# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_SSSE3 0
- +# define CV_CPU_CALL_SSSE3(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
- +# define CV_CPU_HAS_SUPPORT_SSE4_1 1
- +# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
- +# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
- +# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_SSE4_1 0
- +# define CV_CPU_CALL_SSE4_1(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
- +# define CV_CPU_HAS_SUPPORT_SSE4_2 1
- +# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
- +# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
- +# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_SSE4_2 0
- +# define CV_CPU_CALL_SSE4_2(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
- +# define CV_CPU_HAS_SUPPORT_POPCNT 1
- +# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
- +# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
- +# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_POPCNT 0
- +# define CV_CPU_CALL_POPCNT(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
- +# define CV_CPU_HAS_SUPPORT_AVX 1
- +# define CV_CPU_CALL_AVX(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
- +# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
- +# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_AVX 0
- +# define CV_CPU_CALL_AVX(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
- +# define CV_CPU_HAS_SUPPORT_FP16 1
- +# define CV_CPU_CALL_FP16(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
- +# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
- +# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_FP16 0
- +# define CV_CPU_CALL_FP16(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
- +# define CV_CPU_HAS_SUPPORT_AVX2 1
- +# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
- +# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
- +# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_AVX2 0
- +# define CV_CPU_CALL_AVX2(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
- +# define CV_CPU_HAS_SUPPORT_FMA3 1
- +# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
- +# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
- +# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_FMA3 0
- +# define CV_CPU_CALL_FMA3(...)
- +#endif
- +
- +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
- +# define CV_CPU_HAS_SUPPORT_NEON 1
- +# define CV_CPU_CALL_NEON(...) return __VA_ARGS__
- +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
- +# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
- +# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__
- +#else
- +# define CV_CPU_HAS_SUPPORT_NEON 0
- +# define CV_CPU_CALL_NEON(...)
- +#endif
- diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
- index 699b166..0a46e02 100644
- --- a/modules/core/include/opencv2/core/cvdef.h
- +++ b/modules/core/include/opencv2/core/cvdef.h
- @@ -48,6 +48,10 @@
- //! @addtogroup core_utils
- //! @{
-
- +#ifdef __OPENCV_BUILD
- +#include "cvconfig.h"
- +#endif
- +
- #if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
- # define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
- #endif
- @@ -59,10 +63,6 @@
- #undef abs
- #undef Complex
-
- -#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
- -# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
- -#endif
- -
- #include <limits.h>
- #include "opencv2/core/hal/interface.h"
-
- @@ -88,7 +88,7 @@
- # endif
- #endif
-
- -#if defined CV_ICC && !defined CV_ENABLE_UNROLLED
- +#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
- # define CV_ENABLE_UNROLLED 0
- #else
- # define CV_ENABLE_UNROLLED 1
- @@ -161,142 +161,9 @@ enum CpuFeatures {
- CPU_NEON = 100
- };
-
- -// do not include SSE/AVX/NEON headers for NVCC compiler
- -#ifndef __CUDACC__
- -
- -#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
- -# include <emmintrin.h>
- -# define CV_MMX 1
- -# define CV_SSE 1
- -# define CV_SSE2 1
- -# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
- -# include <pmmintrin.h>
- -# define CV_SSE3 1
- -# endif
- -# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
- -# include <tmmintrin.h>
- -# define CV_SSSE3 1
- -# endif
- -# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
- -# include <smmintrin.h>
- -# define CV_SSE4_1 1
- -# endif
- -# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
- -# include <nmmintrin.h>
- -# define CV_SSE4_2 1
- -# endif
- -# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
- -# ifdef _MSC_VER
- -# include <nmmintrin.h>
- -# else
- -# include <popcntintrin.h>
- -# endif
- -# define CV_POPCNT 1
- -# endif
- -# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
- -// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
- -// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
- -# include <immintrin.h>
- -# define CV_AVX 1
- -# if defined(_XCR_XFEATURE_ENABLED_MASK)
- -# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
- -# else
- -# define __xgetbv() 0
- -# endif
- -# endif
- -# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0)
- -# include <immintrin.h>
- -# define CV_AVX2 1
- -# if defined __FMA__
- -# define CV_FMA3 1
- -# endif
- -# endif
- -#endif
- -
- -#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
- -# include <Intrin.h>
- -# include <arm_neon.h>
- -# define CV_NEON 1
- -# define CPU_HAS_NEON_FEATURE (true)
- -#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
- -# include <arm_neon.h>
- -# define CV_NEON 1
- -#endif
- -
- -#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
- -# define CV_VFP 1
- -#endif
- -
- -#endif // __CUDACC__
- -
- -#ifndef CV_POPCNT
- -#define CV_POPCNT 0
- -#endif
- -#ifndef CV_MMX
- -# define CV_MMX 0
- -#endif
- -#ifndef CV_SSE
- -# define CV_SSE 0
- -#endif
- -#ifndef CV_SSE2
- -# define CV_SSE2 0
- -#endif
- -#ifndef CV_SSE3
- -# define CV_SSE3 0
- -#endif
- -#ifndef CV_SSSE3
- -# define CV_SSSE3 0
- -#endif
- -#ifndef CV_SSE4_1
- -# define CV_SSE4_1 0
- -#endif
- -#ifndef CV_SSE4_2
- -# define CV_SSE4_2 0
- -#endif
- -#ifndef CV_AVX
- -# define CV_AVX 0
- -#endif
- -#ifndef CV_AVX2
- -# define CV_AVX2 0
- -#endif
- -#ifndef CV_FMA3
- -# define CV_FMA3 0
- -#endif
- -#ifndef CV_AVX_512F
- -# define CV_AVX_512F 0
- -#endif
- -#ifndef CV_AVX_512BW
- -# define CV_AVX_512BW 0
- -#endif
- -#ifndef CV_AVX_512CD
- -# define CV_AVX_512CD 0
- -#endif
- -#ifndef CV_AVX_512DQ
- -# define CV_AVX_512DQ 0
- -#endif
- -#ifndef CV_AVX_512ER
- -# define CV_AVX_512ER 0
- -#endif
- -#ifndef CV_AVX_512IFMA512
- -# define CV_AVX_512IFMA512 0
- -#endif
- -#ifndef CV_AVX_512PF
- -# define CV_AVX_512PF 0
- -#endif
- -#ifndef CV_AVX_512VBMI
- -# define CV_AVX_512VBMI 0
- -#endif
- -#ifndef CV_AVX_512VL
- -# define CV_AVX_512VL 0
- -#endif
-
- -#ifndef CV_NEON
- -# define CV_NEON 0
- -#endif
- +#include "cv_cpu_dispatch.h"
-
- -#ifndef CV_VFP
- -# define CV_VFP 0
- -#endif
-
- /* fundamental constants */
- #define CV_PI 3.1415926535897932384626433832795
- diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp
- index c76936a..31c1062 100644
- --- a/modules/core/include/opencv2/core/fast_math.hpp
- +++ b/modules/core/include/opencv2/core/fast_math.hpp
- @@ -47,6 +47,12 @@
-
- #include "opencv2/core/cvdef.h"
-
- +#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
- + && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- +#include <emmintrin.h>
- +#endif
- +
- +
- //! @addtogroup core_utils
- //! @{
-
- @@ -66,7 +72,7 @@
- # include "tegra_round.hpp"
- #endif
-
- -#if CV_VFP
- +#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
- // 1. general scheme
- #define ARM_ROUND(_value, _asm_string) \
- int res; \
- @@ -82,7 +88,7 @@
- #endif
- // 3. version for float
- #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
- -#endif // CV_VFP
- +#endif
-
- /** @brief Rounds floating-point number to the nearest integer
-
- @@ -93,7 +99,7 @@ CV_INLINE int
- cvRound( double value )
- {
- #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
- - && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
- __m128d t = _mm_set_sd( value );
- return _mm_cvtsd_si32(t);
- #elif defined _MSC_VER && defined _M_IX86
- @@ -108,7 +114,7 @@ cvRound( double value )
- defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
- TEGRA_ROUND_DBL(value);
- #elif defined CV_ICC || defined __GNUC__
- -# if CV_VFP
- +# if defined ARM_ROUND_DBL
- ARM_ROUND_DBL(value);
- # else
- return (int)lrint(value);
- @@ -130,18 +136,8 @@ cvRound( double value )
- */
- CV_INLINE int cvFloor( double value )
- {
- -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- - __m128d t = _mm_set_sd( value );
- - int i = _mm_cvtsd_si32(t);
- - return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
- -#elif defined __GNUC__
- int i = (int)value;
- return i - (i > value);
- -#else
- - int i = cvRound(value);
- - float diff = (float)(value - i);
- - return i - (diff < 0);
- -#endif
- }
-
- /** @brief Rounds floating-point number to the nearest integer not smaller than the original.
- @@ -153,18 +149,8 @@ CV_INLINE int cvFloor( double value )
- */
- CV_INLINE int cvCeil( double value )
- {
- -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
- - __m128d t = _mm_set_sd( value );
- - int i = _mm_cvtsd_si32(t);
- - return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
- -#elif defined __GNUC__
- int i = (int)value;
- return i + (i < value);
- -#else
- - int i = cvRound(value);
- - float diff = (float)(i - value);
- - return i + (diff < 0);
- -#endif
- }
-
- /** @brief Determines if the argument is Not A Number.
- @@ -200,8 +186,8 @@ CV_INLINE int cvIsInf( double value )
- /** @overload */
- CV_INLINE int cvRound(float value)
- {
- -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \
- - defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- +#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
- + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
- __m128 t = _mm_set_ss( value );
- return _mm_cvtss_si32(t);
- #elif defined _MSC_VER && defined _M_IX86
- @@ -216,7 +202,7 @@ CV_INLINE int cvRound(float value)
- defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
- TEGRA_ROUND_FLT(value);
- #elif defined CV_ICC || defined __GNUC__
- -# if CV_VFP
- +# if defined ARM_ROUND_FLT
- ARM_ROUND_FLT(value);
- # else
- return (int)lrintf(value);
- @@ -237,18 +223,8 @@ CV_INLINE int cvRound( int value )
- /** @overload */
- CV_INLINE int cvFloor( float value )
- {
- -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- - __m128 t = _mm_set_ss( value );
- - int i = _mm_cvtss_si32(t);
- - return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i)));
- -#elif defined __GNUC__
- int i = (int)value;
- return i - (i > value);
- -#else
- - int i = cvRound(value);
- - float diff = (float)(value - i);
- - return i - (diff < 0);
- -#endif
- }
-
- /** @overload */
- @@ -260,18 +236,8 @@ CV_INLINE int cvFloor( int value )
- /** @overload */
- CV_INLINE int cvCeil( float value )
- {
- -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
- - __m128 t = _mm_set_ss( value );
- - int i = _mm_cvtss_si32(t);
- - return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t));
- -#elif defined __GNUC__
- int i = (int)value;
- return i + (i < value);
- -#else
- - int i = cvRound(value);
- - float diff = (float)(i - value);
- - return i + (diff < 0);
- -#endif
- }
-
- /** @overload */
- diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
- index 3c8f39d..a983838 100644
- --- a/modules/core/src/system.cpp
- +++ b/modules/core/src/system.cpp
- @@ -237,24 +237,81 @@ void Exception::formatMessage()
- msg = format("%s:%d: error: (%d) %s\n", file.c_str(), line, code, err.c_str());
- }
-
- +static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL };
- +
- +static const char* getHWFeatureName(int id)
- +{
- + return (id < CV_HARDWARE_MAX_FEATURE) ? g_hwFeatureNames[id] : NULL;
- +}
- +static const char* getHWFeatureNameSafe(int id)
- +{
- + const char* name = getHWFeatureName(id);
- + return name ? name : "Unknown feature";
- +}
- +
- struct HWFeatures
- {
- enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE };
-
- - HWFeatures(void)
- + HWFeatures(bool run_initialize = false)
- {
- - memset( have, 0, sizeof(have) );
- - x86_family = 0;
- + memset( have, 0, sizeof(have[0]) * MAX_FEATURE );
- + if (run_initialize)
- + initialize();
- }
-
- - static HWFeatures initialize(void)
- + static void initializeNames()
- {
- - HWFeatures f;
- + for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
- + {
- + g_hwFeatureNames[i] = 0;
- + }
- + g_hwFeatureNames[CPU_MMX] = "MMX";
- + g_hwFeatureNames[CPU_SSE] = "SSE";
- + g_hwFeatureNames[CPU_SSE2] = "SSE2";
- + g_hwFeatureNames[CPU_SSE3] = "SSE3";
- + g_hwFeatureNames[CPU_SSSE3] = "SSSE3";
- + g_hwFeatureNames[CPU_SSE4_1] = "SSE4.1";
- + g_hwFeatureNames[CPU_SSE4_2] = "SSE4.2";
- + g_hwFeatureNames[CPU_POPCNT] = "POPCNT";
- + g_hwFeatureNames[CPU_FP16] = "FP16";
- + g_hwFeatureNames[CPU_AVX] = "AVX";
- + g_hwFeatureNames[CPU_AVX2] = "AVX2";
- + g_hwFeatureNames[CPU_FMA3] = "FMA3";
- +
- + g_hwFeatureNames[CPU_AVX_512F] = "AVX512F";
- + g_hwFeatureNames[CPU_AVX_512BW] = "AVX512BW";
- + g_hwFeatureNames[CPU_AVX_512CD] = "AVX512CD";
- + g_hwFeatureNames[CPU_AVX_512DQ] = "AVX512DQ";
- + g_hwFeatureNames[CPU_AVX_512ER] = "AVX512ER";
- + g_hwFeatureNames[CPU_AVX_512IFMA512] = "AVX512IFMA";
- + g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF";
- + g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI";
- + g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL";
- +
- + g_hwFeatureNames[CPU_NEON] = "NEON";
- + }
- +
- + void initialize(void)
- + {
- +#ifndef WINRT
- + if (getenv("OPENCV_DUMP_CONFIG"))
- + {
- + fprintf(stderr, "\nOpenCV build configuration is:\n%s\n",
- + cv::getBuildInformation().c_str());
- + }
- +#endif
- +
- + initializeNames();
- +
- int cpuid_data[4] = { 0, 0, 0, 0 };
- + int cpuid_data_ex[4] = { 0, 0, 0, 0 };
-
- #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
- + #define OPENCV_HAVE_X86_CPUID 1
- __cpuid(cpuid_data, 1);
- #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
- + #define OPENCV_HAVE_X86_CPUID 1
- #ifdef __x86_64__
- asm __volatile__
- (
- @@ -278,33 +335,36 @@ struct HWFeatures
- #endif
- #endif
-
- - f.x86_family = (cpuid_data[0] >> 8) & 15;
- - if( f.x86_family >= 6 )
- + #ifdef OPENCV_HAVE_X86_CPUID
- + int x86_family = (cpuid_data[0] >> 8) & 15;
- + if( x86_family >= 6 )
- {
- - f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0;
- - f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
- - f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
- - f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
- - f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
- - f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
- - f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
- - f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
- - f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
- - f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX
- - f.have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0;
- + have[CV_CPU_MMX] = (cpuid_data[3] & (1<<23)) != 0;
- + have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
- + have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
- + have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
- + have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
- + have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
- + have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
- + have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
- + have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
- + have[CV_CPU_AVX] = (cpuid_data[2] & (1<<28)) != 0;
- + have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0;
-
- // make the second call to the cpuid command in order to get
- // information about extended features like AVX2
- #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
- - __cpuidex(cpuid_data, 7, 0);
- + #define OPENCV_HAVE_X86_CPUID_EX 1
- + __cpuidex(cpuid_data_ex, 7, 0);
- #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
- + #define OPENCV_HAVE_X86_CPUID_EX 1
- #ifdef __x86_64__
- asm __volatile__
- (
- "movl $7, %%eax\n\t"
- "movl $0, %%ecx\n\t"
- "cpuid\n\t"
- - :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
- + :[eax]"=a"(cpuid_data_ex[0]),[ebx]"=b"(cpuid_data_ex[1]),[ecx]"=c"(cpuid_data_ex[2]),[edx]"=d"(cpuid_data_ex[3])
- :
- : "cc"
- );
- @@ -317,29 +377,76 @@ struct HWFeatures
- "cpuid\n\t"
- "movl %%ebx, %0\n\t"
- "popl %%ebx\n\t"
- - : "=r"(cpuid_data[1]), "=c"(cpuid_data[2])
- + : "=r"(cpuid_data_ex[1]), "=c"(cpuid_data_ex[2])
- :
- : "cc"
- );
- #endif
- #endif
- - f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0;
- -
- - f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0;
- - f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0;
- - f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0;
- - f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0;
- - f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0;
- - f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0;
- - f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0;
- - f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0;
- - f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0;
- +
- + #ifdef OPENCV_HAVE_X86_CPUID_EX
- + have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0;
- +
- + have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0;
- + have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0;
- + have[CV_CPU_AVX_512IFMA512] = (cpuid_data_ex[1] & (1<<21)) != 0;
- + have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0;
- + have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0;
- + have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0;
- + have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0;
- + have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0;
- + have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0;
- + #else
- + CV_UNUSED(cpuid_data_ex);
- + #endif
- +
- + bool have_AVX_OS_support = true;
- + bool have_AVX512_OS_support = true;
- + if (!(cpuid_data[2] & (1<<27)))
- + have_AVX_OS_support = false; // OS uses XSAVE_XRSTORE and CPU support AVX
- + else
- + {
- + int xcr0 = 0;
- + #ifdef _XCR_XFEATURE_ENABLED_MASK // requires immintrin.h
- + xcr0 = (int)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);
- + #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
- + __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
- + #endif
- + if ((xcr0 & 0x6) != 0x6)
- + have_AVX_OS_support = false; // YMM registers
- + if ((xcr0 & 0xe6) != 0xe6)
- + have_AVX512_OS_support = false; // ZMM registers
- + }
- +
- + if (!have_AVX_OS_support)
- + {
- + have[CV_CPU_AVX] = false;
- + have[CV_CPU_FP16] = false;
- + have[CV_CPU_AVX2] = false;
- + have[CV_CPU_FMA3] = false;
- + }
- + if (!have_AVX_OS_support || !have_AVX512_OS_support)
- + {
- + have[CV_CPU_AVX_512F] = false;
- + have[CV_CPU_AVX_512BW] = false;
- + have[CV_CPU_AVX_512CD] = false;
- + have[CV_CPU_AVX_512DQ] = false;
- + have[CV_CPU_AVX_512ER] = false;
- + have[CV_CPU_AVX_512IFMA512] = false;
- + have[CV_CPU_AVX_512PF] = false;
- + have[CV_CPU_AVX_512VBMI] = false;
- + have[CV_CPU_AVX_512VL] = false;
- + }
- }
- + #else
- + CV_UNUSED(cpuid_data);
- + CV_UNUSED(cpuid_data_ex);
- + #endif // OPENCV_HAVE_X86_CPUID
-
- #if defined ANDROID || defined __linux__
- #ifdef __aarch64__
- - f.have[CV_CPU_NEON] = true;
- - f.have[CV_CPU_FP16] = true;
- + have[CV_CPU_NEON] = true;
- + have[CV_CPU_FP16] = true;
- #elif defined __arm__
- int cpufile = open("/proc/self/auxv", O_RDONLY);
-
- @@ -352,8 +459,8 @@ struct HWFeatures
- {
- if (auxv.a_type == AT_HWCAP)
- {
- - f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
- - f.have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0;
- + have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
- + have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0;
- break;
- }
- }
- @@ -363,21 +470,133 @@ struct HWFeatures
- #endif
- #elif (defined __clang__ || defined __APPLE__)
- #if (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
- - f.have[CV_CPU_NEON] = true;
- + have[CV_CPU_NEON] = true;
- #endif
- #if (defined __ARM_FP && (((__ARM_FP & 0x2) != 0) && defined __ARM_NEON__))
- - f.have[CV_CPU_FP16] = true;
- + have[CV_CPU_FP16] = true;
- #endif
- #endif
-
- - return f;
- + int baseline_features[] = { CV_CPU_BASELINE_FEATURES };
- + if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])))
- + {
- + fprintf(stderr, "\n"
- + "******************************************************************\n"
- + "* FATAL ERROR: *\n"
- + "* This OpenCV build doesn't support current CPU/HW configuration *\n"
- + "* *\n"
- + "* Use OPENCV_DUMP_CONFIG=1 environment variable for details *\n"
- + "******************************************************************\n");
- + fprintf(stderr, "\nRequired baseline features:\n");
- + checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]), true);
- + CV_ErrorNoReturn(cv::Error::StsAssert, "Missing support for required CPU baseline features. Check OpenCV build configuration and required CPU/HW setup.");
- + }
- +
- + readSettings(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]));
- + }
- +
- + bool checkFeatures(const int* features, int count, bool dump = false)
- + {
- + bool result = true;
- + for (int i = 0; i < count; i++)
- + {
- + int feature = features[i];
- + if (feature)
- + {
- + if (have[feature])
- + {
- + if (dump) fprintf(stderr, "%s - OK\n", getHWFeatureNameSafe(feature));
- + }
- + else
- + {
- + result = false;
- + if (dump) fprintf(stderr, "%s - NOT AVAILABLE\n", getHWFeatureNameSafe(feature));
- + }
- + }
- + }
- + return result;
- + }
- +
- + static inline bool isSymbolSeparator(char c)
- + {
- + return c == ',' || c == ';' || c == '-';
- + }
- +
- + void readSettings(const int* baseline_features, int baseline_count)
- + {
- + bool dump = true;
- + const char* disabled_features =
- +#ifndef WINRT
- + getenv("OPENCV_CPU_DISABLE");
- +#else
- + NULL;
- +#endif
- + if (disabled_features && disabled_features[0] != 0)
- + {
- + const char* start = disabled_features;
- + for (;;)
- + {
- + while (start[0] != 0 && isSymbolSeparator(start[0]))
- + {
- + start++;
- + }
- + if (start[0] == 0)
- + break;
- + const char* end = start;
- + while (end[0] != 0 && !isSymbolSeparator(end[0]))
- + {
- + end++;
- + }
- + if (end == start)
- + continue;
- + cv::String feature(start, end);
- + start = end;
- +
- + CV_Assert(feature.size() > 0);
- +
- + bool found = false;
- + for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
- + {
- + if (!g_hwFeatureNames[i]) continue;
- + size_t len = strlen(g_hwFeatureNames[i]);
- + if (len != feature.size()) continue;
- + if (feature.compare(g_hwFeatureNames[i]) == 0)
- + {
- + bool isBaseline = false;
- + for (int k = 0; k < baseline_count; k++)
- + {
- + if (baseline_features[k] == i)
- + {
- + isBaseline = true;
- + break;
- + }
- + }
- + if (isBaseline)
- + {
- + if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'. This has very limited effect, because code optimizations for this feature are executed unconditionally in the most cases.\n", getHWFeatureNameSafe(i));
- + }
- + if (!have[i])
- + {
- + if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", getHWFeatureNameSafe(i));
- + }
- + have[i] = false;
- +
- + found = true;
- + break;
- + }
- + }
- + if (!found)
- + {
- + if (dump) fprintf(stderr, "OPENCV: Trying to disable unknown CPU feature: '%s'.\n", feature.c_str());
- + }
- + }
- + }
- }
-
- - int x86_family;
- bool have[MAX_FEATURE+1];
- };
-
- -static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures();
- +static HWFeatures featuresEnabled(true), featuresDisabled = HWFeatures(false);
- static HWFeatures* currentFeatures = &featuresEnabled;
-
- bool checkHardwareSupport(int feature)
- diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt
- index eb56177..6d9c650 100644
- --- a/modules/highgui/CMakeLists.txt
- +++ b/modules/highgui/CMakeLists.txt
- @@ -65,7 +65,7 @@ elseif(HAVE_QT)
-
- list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES})
- list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES})
- - ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag)
- + ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag "")
- if(${_have_flag})
- set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations)
- endif()
- diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
- index 0fa5202..dcf2e44 100644
- --- a/modules/imgproc/src/imgwarp.cpp
- +++ b/modules/imgproc/src/imgwarp.cpp
- @@ -1649,7 +1649,7 @@ struct VResizeLanczos4
- {
- CastOp castOp;
- VecOp vecOp;
- - int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
- + int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
- #if CV_ENABLE_UNROLLED
- for( ; x <= width - 4; x += 4 )
- {
- @@ -1657,7 +1657,7 @@ struct VResizeLanczos4
- const WT* S = src[0];
- WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b;
-
- - for( k = 1; k < 8; k++ )
- + for( int k = 1; k < 8; k++ )
- {
- b = beta[k]; S = src[k];
- s0 += S[x]*b; s1 += S[x+1]*b;
- diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp
- index 51843fa..bb37ee9 100644
- --- a/modules/objdetect/src/haar.cpp
- +++ b/modules/objdetect/src/haar.cpp
- @@ -824,10 +824,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
- CvPoint pt, double& stage_sum, int start_stage )
- {
- #ifdef CV_HAAR_USE_AVX
- - bool haveAVX = false;
- - if(cv::checkHardwareSupport(CV_CPU_AVX))
- - if(__xgetbv()&0x6)// Check if the OS will save the YMM registers
- - haveAVX = true;
- + bool haveAVX = cv::checkHardwareSupport(CV_CPU_AVX);
- #else
- # ifdef CV_HAAR_USE_SSE
- bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
- --
- 2.7.4
|