2
1

0001-cmake-support-multiple-CPU-targets.patch 96 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564
  1. From 1c54b13cb29463af514a107c42946abd97b5ca41 Mon Sep 17 00:00:00 2001
  2. From: Alexander Alekhin <alexander.alekhin@intel.com>
  3. Date: Wed, 7 Sep 2016 18:02:36 +0300
  4. Subject: [PATCH] cmake: support multiple CPU targets
  5. Backported from: https://github.com/opencv/opencv/commit/e16227b53cabab1caa4b7aba8ff59a630528348f
  6. Signed-off-by: Samuel Martin <s.martin49@gmail.com>
  7. ---
  8. CMakeLists.txt | 50 +-
  9. cmake/OpenCVCompilerOptimizations.cmake | 651 +++++++++++++++++++++
  10. cmake/OpenCVCompilerOptions.cmake | 161 +----
  11. cmake/OpenCVGenHeaders.cmake | 4 +
  12. cmake/OpenCVModule.cmake | 3 +
  13. cmake/OpenCVPCHSupport.cmake | 5 +-
  14. cmake/OpenCVUtils.cmake | 50 +-
  15. cmake/checks/cpu_avx.cpp | 9 +
  16. cmake/checks/cpu_avx2.cpp | 10 +
  17. cmake/checks/cpu_avx512.cpp | 10 +
  18. cmake/checks/cpu_fp16.cpp | 33 ++
  19. cmake/checks/cpu_popcnt.cpp | 8 +
  20. cmake/checks/cpu_sse.cpp | 2 +
  21. cmake/checks/cpu_sse2.cpp | 2 +
  22. cmake/checks/cpu_sse3.cpp | 7 +
  23. cmake/checks/cpu_sse41.cpp | 6 +
  24. cmake/checks/cpu_sse42.cpp | 5 +
  25. cmake/checks/cpu_ssse3.cpp | 7 +
  26. cmake/checks/fp16.cpp | 33 --
  27. cmake/templates/cv_cpu_config.h.in | 5 +
  28. cmake/templates/cvconfig.h.in | 13 +
  29. .../core/include/opencv2/core/cv_cpu_dispatch.h | 166 ++++++
  30. modules/core/include/opencv2/core/cv_cpu_helper.h | 133 +++++
  31. modules/core/include/opencv2/core/cvdef.h | 145 +----
  32. modules/core/include/opencv2/core/fast_math.hpp | 60 +-
  33. modules/core/src/system.cpp | 301 ++++++++--
  34. modules/highgui/CMakeLists.txt | 2 +-
  35. modules/imgproc/src/imgwarp.cpp | 4 +-
  36. modules/objdetect/src/haar.cpp | 5 +-
  37. 29 files changed, 1472 insertions(+), 418 deletions(-)
  38. create mode 100644 cmake/OpenCVCompilerOptimizations.cmake
  39. create mode 100644 cmake/checks/cpu_avx.cpp
  40. create mode 100644 cmake/checks/cpu_avx2.cpp
  41. create mode 100644 cmake/checks/cpu_avx512.cpp
  42. create mode 100644 cmake/checks/cpu_fp16.cpp
  43. create mode 100644 cmake/checks/cpu_popcnt.cpp
  44. create mode 100644 cmake/checks/cpu_sse.cpp
  45. create mode 100644 cmake/checks/cpu_sse2.cpp
  46. create mode 100644 cmake/checks/cpu_sse3.cpp
  47. create mode 100644 cmake/checks/cpu_sse41.cpp
  48. create mode 100644 cmake/checks/cpu_sse42.cpp
  49. create mode 100644 cmake/checks/cpu_ssse3.cpp
  50. delete mode 100644 cmake/checks/fp16.cpp
  51. create mode 100644 cmake/templates/cv_cpu_config.h.in
  52. create mode 100644 modules/core/include/opencv2/core/cv_cpu_dispatch.h
  53. create mode 100644 modules/core/include/opencv2/core/cv_cpu_helper.h
  54. diff --git a/CMakeLists.txt b/CMakeLists.txt
  55. index cc45f6f..9c9971e 100644
  56. --- a/CMakeLists.txt
  57. +++ b/CMakeLists.txt
  58. @@ -85,6 +85,10 @@ if(POLICY CMP0042)
  59. cmake_policy(SET CMP0042 NEW)
  60. endif()
  61. +if(POLICY CMP0051)
  62. + cmake_policy(SET CMP0051 NEW)
  63. +endif()
  64. +
  65. include(cmake/OpenCVUtils.cmake)
  66. # must go before the project command
  67. @@ -274,16 +278,6 @@ OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov"
  68. OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
  69. OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
  70. OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
  71. -OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
  72. -OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
  73. -OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) )
  74. -OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
  75. -OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) )
  76. -OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
  77. -OCV_OPTION(ENABLE_POPCNT "Enable POPCNT instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
  78. -OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
  79. -OCV_OPTION(ENABLE_AVX2 "Enable AVX2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
  80. -OCV_OPTION(ENABLE_FMA3 "Enable FMA3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
  81. OCV_OPTION(ENABLE_NEON "Enable NEON instructions" "${NEON}" IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
  82. OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
  83. OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
  84. @@ -292,6 +286,9 @@ OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with n
  85. OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function call" OFF )
  86. OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF )
  87. OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
  88. +OCV_OPTION(CV_ENABLE_INTRINSICS "Use intrinsic-based optimized code" ON )
  89. +OCV_OPTION(CV_DISABLE_OPTIMIZATION "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF )
  90. +
  91. OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF )
  92. @@ -492,6 +489,9 @@ if(CMAKE_GENERATOR MATCHES "Makefiles|Ninja" AND "${CMAKE_BUILD_TYPE}" STREQUAL
  93. set(CMAKE_BUILD_TYPE Release)
  94. endif()
  95. +# --- Python Support ---
  96. +include(cmake/OpenCVDetectPython.cmake)
  97. +
  98. include(cmake/OpenCVCompilerOptions.cmake)
  99. @@ -569,9 +569,6 @@ else()
  100. unset(DOXYGEN_FOUND CACHE)
  101. endif()
  102. -# --- Python Support ---
  103. -include(cmake/OpenCVDetectPython.cmake)
  104. -
  105. # --- Java Support ---
  106. include(cmake/OpenCVDetectApacheAnt.cmake)
  107. if(ANDROID)
  108. @@ -860,6 +857,33 @@ if(NOT CMAKE_GENERATOR MATCHES "Xcode|Visual Studio")
  109. status(" Configuration:" ${CMAKE_BUILD_TYPE})
  110. endif()
  111. +
  112. +# ========================= CPU code generation mode =========================
  113. +status("")
  114. +status(" CPU/HW features:")
  115. +status(" Baseline:" "${CPU_BASELINE_FINAL}")
  116. +if(NOT CPU_BASELINE STREQUAL CPU_BASELINE_FINAL)
  117. + status(" requested:" "${CPU_BASELINE}")
  118. +endif()
  119. +if(CPU_BASELINE_REQUIRE)
  120. + status(" required:" "${CPU_BASELINE_REQUIRE}")
  121. +endif()
  122. +if(CPU_BASELINE_DISABLE)
  123. + status(" disabled:" "${CPU_BASELINE_DISABLE}")
  124. +endif()
  125. +if(CPU_DISPATCH_FINAL OR CPU_DISPATCH)
  126. + status(" Dispatched code generation:" "${CPU_DISPATCH_FINAL}")
  127. + if(NOT CPU_DISPATCH STREQUAL CPU_DISPATCH_FINAL)
  128. + status(" requested:" "${CPU_DISPATCH}")
  129. + endif()
  130. + if(CPU_DISPATCH_REQUIRE)
  131. + status(" required:" "${CPU_DISPATCH_REQUIRE}")
  132. + endif()
  133. + foreach(OPT ${CPU_DISPATCH_FINAL})
  134. + status(" ${OPT} (${CPU_${OPT}_USAGE_COUNT} files):" "+ ${CPU_DISPATCH_${OPT}_INCLUDED}")
  135. + endforeach()
  136. +endif()
  137. +
  138. # ========================== C/C++ options ==========================
  139. if(CMAKE_CXX_COMPILER_VERSION)
  140. set(OPENCV_COMPILER_STR "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1} (ver ${CMAKE_CXX_COMPILER_VERSION})")
  141. diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake
  142. new file mode 100644
  143. index 0000000..b849f02
  144. --- /dev/null
  145. +++ b/cmake/OpenCVCompilerOptimizations.cmake
  146. @@ -0,0 +1,651 @@
  147. +# x86/x86-64 arch:
  148. +# SSE / SSE2 (always available on 64-bit CPUs)
  149. +# SSE3 / SSSE3
  150. +# SSE4_1 / SSE4_2 / POPCNT
  151. +# AVX / AVX2 / AVX512
  152. +# FMA3
  153. +
  154. +# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag)
  155. +# CPU_{opt}_IMPLIES=<list>
  156. +# CPU_{opt}_FORCE=<list> - subset of "implies" list
  157. +# CPU_{opt}_FLAGS_ON=""
  158. +# CPU_{opt}_FEATURE_ALIAS - mapping to CV_CPU_* HWFeature enum
  159. +
  160. +# Input variables:
  161. +# CPU_BASELINE=<list> - preferred list of baseline optimizations
  162. +# CPU_DISPATCH=<list> - preferred list of dispatched optimizations
  163. +
  164. +# Advanced input variables:
  165. +# CPU_BASELINE_REQUIRE=<list> - list of required baseline optimizations
  166. +# CPU_DISPATCH_REQUIRE=<list> - list of required dispatched optimizations
  167. +# CPU_BASELINE_DISABLE=<list> - list of disabled baseline optimizations
  168. +
  169. +# Output variables:
  170. +# CPU_BASELINE_FINAL=<list> - final list of enabled compiler optimizations
  171. +# CPU_DISPATCH_FINAL=<list> - final list of dispatched optimizations
  172. +#
  173. +# CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (_opt_avx2.cpp)
  174. +
  175. +set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512
  176. +list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
  177. +list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
  178. +
  179. +ocv_update(CPU_VFPV3_FEATURE_ALIAS "")
  180. +
  181. +
  182. +set(HELP_CPU_BASELINE "Specify list of enabled baseline CPU optimizations")
  183. +set(HELP_CPU_BASELINE_REQUIRE "Specify list of required baseline CPU optimizations")
  184. +set(HELP_CPU_BASELINE_DISABLE "Specify list of forbidden baseline CPU optimizations")
  185. +set(HELP_CPU_DISPATCH "Specify list of dispatched CPU optimizations")
  186. +set(HELP_CPU_DISPATCH_REQUIRE "Specify list of required dispatched CPU optimizations")
  187. +
  188. +foreach(var CPU_BASELINE CPU_BASELINE_REQUIRE CPU_BASELINE_DISABLE CPU_DISPATCH CPU_DISPATCH_REQUIRE)
  189. + if(DEFINED ${var})
  190. + string(REPLACE "," ";" _list "${${var}}")
  191. + set(${var} "${_list}" CACHE STRING "${HELP_${var}}" FORCE)
  192. + endif()
  193. +endforeach()
  194. +
  195. +# process legacy flags
  196. +macro(ocv_optimization_process_obsolete_option legacy_flag OPT legacy_warn)
  197. + if(DEFINED ${legacy_flag})
  198. + if(${legacy_warn})
  199. + message(STATUS "WARNING: Option ${legacy_flag}='${${legacy_flag}}' is deprecated and should not be used anymore")
  200. + message(STATUS " Behaviour of this option is not backward compatible")
  201. + message(STATUS " Refer to 'CPU_BASELINE'/'CPU_DISPATCH' CMake options documentation")
  202. + endif()
  203. + if(${legacy_flag})
  204. + if(NOT ";${CPU_BASELINE_REQUIRE};" MATCHES ";${OPT};")
  205. + set(CPU_BASELINE_REQUIRE "${CPU_BASELINE_REQUIRE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_REQUIRE}" FORCE)
  206. + endif()
  207. + else()
  208. + if(NOT ";${CPU_BASELINE_DISABLE};" MATCHES ";${OPT};")
  209. + set(CPU_BASELINE_DISABLE "${CPU_BASELINE_DISABLE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_DISABLE}" FORCE)
  210. + endif()
  211. + endif()
  212. + endif()
  213. +endmacro()
  214. +ocv_optimization_process_obsolete_option(ENABLE_SSE SSE ON)
  215. +ocv_optimization_process_obsolete_option(ENABLE_SSE2 SSE2 ON)
  216. +ocv_optimization_process_obsolete_option(ENABLE_SSE3 SSE3 ON)
  217. +ocv_optimization_process_obsolete_option(ENABLE_SSSE3 SSSE3 ON)
  218. +ocv_optimization_process_obsolete_option(ENABLE_SSE41 SSE4_1 ON)
  219. +ocv_optimization_process_obsolete_option(ENABLE_SSE42 SSE4_2 ON)
  220. +ocv_optimization_process_obsolete_option(ENABLE_POPCNT POPCNT ON)
  221. +ocv_optimization_process_obsolete_option(ENABLE_AVX AVX ON)
  222. +ocv_optimization_process_obsolete_option(ENABLE_AVX2 AVX2 ON)
  223. +ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON)
  224. +
  225. +ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF)
  226. +ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF)
  227. +
  228. +
  229. +macro(ocv_is_optimization_in_list resultvar check_opt)
  230. + set(__checked "")
  231. + set(__queue ${ARGN})
  232. + set(${resultvar} 0)
  233. + while(__queue AND NOT ${resultvar})
  234. + list(REMOVE_DUPLICATES __queue)
  235. + set(__queue_current ${__queue})
  236. + set(__queue "")
  237. + foreach(OPT ${__queue_current})
  238. + if("x${OPT}" STREQUAL "x${check_opt}")
  239. + set(${resultvar} 1)
  240. + break()
  241. + elseif(NOT ";${__checked};" MATCHES ";${OPT};")
  242. + list(APPEND __queue ${CPU_${OPT}_IMPLIES})
  243. + endif()
  244. + list(APPEND __checked ${OPT})
  245. + endforeach()
  246. + endwhile()
  247. +endmacro()
  248. +
  249. +macro(ocv_is_optimization_in_force_list resultvar check_opt)
  250. + set(__checked "")
  251. + set(__queue ${ARGN})
  252. + set(${resultvar} 0)
  253. + while(__queue AND NOT ${resultvar})
  254. + list(REMOVE_DUPLICATES __queue)
  255. + set(__queue_current ${__queue})
  256. + set(__queue "")
  257. + foreach(OPT ${__queue_current})
  258. + if(OPT STREQUAL "${check_opt}")
  259. + set(${resultvar} 1)
  260. + break()
  261. + elseif(NOT ";${__checked};" MATCHES ";${OPT};")
  262. + list(APPEND __queue ${CPU_${OPT}_FORCE})
  263. + endif()
  264. + list(APPEND __checked ${OPT})
  265. + endforeach()
  266. + endwhile()
  267. +endmacro()
  268. +
  269. +macro(ocv_append_optimization_flag var OPT)
  270. + if(CPU_${OPT}_FLAGS_CONFLICT)
  271. + string(REGEX REPLACE " ${CPU_${OPT}_FLAGS_CONFLICT}" "" ${var} " ${${var}}")
  272. + string(REGEX REPLACE "^ +" "" ${var} "${${var}}")
  273. + endif()
  274. + set(${var} "${${var}} ${CPU_${OPT}_FLAGS_ON}")
  275. +endmacro()
  276. +
  277. +# Support GCC -march=native or Intel Compiler -xHost flags
  278. +if(";${CPU_BASELINE};" MATCHES ";NATIVE;" OR ";${CPU_BASELINE};" MATCHES ";HOST;")
  279. + set(CPU_BASELINE_DETECT ON)
  280. + set(_add_native_flag ON)
  281. +elseif(";${CPU_BASELINE};" MATCHES ";DETECT;")
  282. + set(CPU_BASELINE_DETECT ON)
  283. +elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ")
  284. + if(DEFINED CPU_BASELINE)
  285. + message(STATUS "CPU: Detected '-march=native' or '-xHost' compiler flag. Force CPU_BASELINE=DETECT.")
  286. + endif()
  287. + set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
  288. + set(CPU_BASELINE_DETECT ON)
  289. +endif()
  290. +
  291. +if(X86 OR X86_64)
  292. + ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX512")
  293. +
  294. + ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp")
  295. + ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp")
  296. + ocv_update(CPU_SSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse3.cpp")
  297. + ocv_update(CPU_SSSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_ssse3.cpp")
  298. + ocv_update(CPU_SSE4_1_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse41.cpp")
  299. + ocv_update(CPU_SSE4_2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse42.cpp")
  300. + ocv_update(CPU_POPCNT_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_popcnt.cpp")
  301. + ocv_update(CPU_AVX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx.cpp")
  302. + ocv_update(CPU_AVX2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx2.cpp")
  303. + ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp")
  304. + ocv_update(CPU_AVX512_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512.cpp")
  305. +
  306. + if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE)
  307. + ocv_update(CPU_AVX512_IMPLIES "AVX2")
  308. + ocv_update(CPU_AVX512_FORCE "") # Don't force other optimizations
  309. + ocv_update(CPU_AVX2_IMPLIES "AVX;FMA3;FP16")
  310. + ocv_update(CPU_FMA3_IMPLIES "AVX2")
  311. + ocv_update(CPU_FMA3_FORCE "") # Don't force other optimizations
  312. + ocv_update(CPU_FP16_IMPLIES "AVX")
  313. + ocv_update(CPU_FP16_FORCE "") # Don't force other optimizations
  314. + ocv_update(CPU_AVX_IMPLIES "SSE4_2")
  315. + ocv_update(CPU_SSE4_2_IMPLIES "SSE4_1;POPCNT")
  316. + ocv_update(CPU_POPCNT_IMPLIES "SSE4_1")
  317. + ocv_update(CPU_POPCNT_FORCE "") # Don't force other optimizations
  318. + ocv_update(CPU_SSE4_1_IMPLIES "SSE3;SSSE3")
  319. + ocv_update(CPU_SSSE3_IMPLIES "SSE3")
  320. + ocv_update(CPU_SSE3_IMPLIES "SSE2")
  321. + ocv_update(CPU_SSE2_IMPLIES "SSE")
  322. + endif()
  323. +
  324. + if(CV_ICC)
  325. + macro(ocv_intel_compiler_optimization_option name unix_flags msvc_flags)
  326. + ocv_update(CPU_${name}_FLAGS_NAME "${name}")
  327. + if(MSVC)
  328. + set(enable_flags "${msvc_flags}")
  329. + set(flags_conflict "/arch:[^ ]+")
  330. + else()
  331. + set(enable_flags "${unix_flags}")
  332. + set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]+")
  333. + endif()
  334. + ocv_update(CPU_${name}_FLAGS_ON "${enable_flags}")
  335. + if(flags_conflict)
  336. + ocv_update(CPU_${name}_FLAGS_CONFLICT "${flags_conflict}")
  337. + endif()
  338. + endmacro()
  339. + ocv_intel_compiler_optimization_option(AVX2 "-march=core-avx2" "/arch:CORE-AVX2")
  340. + ocv_intel_compiler_optimization_option(FP16 "-mavx" "/arch:AVX")
  341. + ocv_intel_compiler_optimization_option(AVX "-mavx" "/arch:AVX")
  342. + ocv_intel_compiler_optimization_option(FMA3 "" "")
  343. + ocv_intel_compiler_optimization_option(POPCNT "" "")
  344. + ocv_intel_compiler_optimization_option(SSE4_2 "-msse4.2" "/arch:SSE4.2")
  345. + ocv_intel_compiler_optimization_option(SSE4_1 "-msse4.1" "/arch:SSE4.1")
  346. + ocv_intel_compiler_optimization_option(SSE3 "-msse3" "/arch:SSE3")
  347. + ocv_intel_compiler_optimization_option(SSSE3 "-mssse3" "/arch:SSSE3")
  348. + ocv_intel_compiler_optimization_option(SSE2 "-msse2" "/arch:SSE2")
  349. + if(NOT X86_64) # x64 compiler doesn't support /arch:sse
  350. + ocv_intel_compiler_optimization_option(SSE "-msse" "/arch:SSE")
  351. + endif()
  352. + #ocv_intel_compiler_optimization_option(AVX512 "-march=core-avx512")
  353. + elseif(CMAKE_COMPILER_IS_GNUCXX)
  354. + ocv_update(CPU_AVX2_FLAGS_ON "-mavx2")
  355. + ocv_update(CPU_FP16_FLAGS_ON "-mf16c")
  356. + ocv_update(CPU_AVX_FLAGS_ON "-mavx")
  357. + ocv_update(CPU_FMA3_FLAGS_ON "-mfma")
  358. + ocv_update(CPU_POPCNT_FLAGS_ON "-mpopcnt")
  359. + ocv_update(CPU_SSE4_2_FLAGS_ON "-msse4.2")
  360. + ocv_update(CPU_SSE4_1_FLAGS_ON "-msse4.1")
  361. + ocv_update(CPU_SSE3_FLAGS_ON "-msse3")
  362. + ocv_update(CPU_SSSE3_FLAGS_ON "-mssse3")
  363. + ocv_update(CPU_SSE2_FLAGS_ON "-msse2")
  364. + ocv_update(CPU_SSE_FLAGS_ON "-msse")
  365. + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
  366. + ocv_update(CPU_AVX512_FLAGS_ON "-mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi")
  367. + endif()
  368. + elseif(MSVC)
  369. + ocv_update(CPU_AVX2_FLAGS_ON "/arch:AVX2")
  370. + ocv_update(CPU_AVX_FLAGS_ON "/arch:AVX")
  371. + if(NOT MSVC64)
  372. + # 64-bit MSVC compiler uses SSE/SSE2 by default
  373. + ocv_update(CPU_SSE_FLAGS_ON "/arch:SSE")
  374. + ocv_update(CPU_SSE_SUPPORTED ON)
  375. + ocv_update(CPU_SSE2_FLAGS_ON "/arch:SSE2")
  376. + ocv_update(CPU_SSE2_SUPPORTED ON)
  377. + else()
  378. + ocv_update(CPU_SSE_SUPPORTED ON)
  379. + ocv_update(CPU_SSE2_SUPPORTED ON)
  380. + endif()
  381. + # Other instruction sets are supported by default since MSVC 2008 at least
  382. + else()
  383. + message(WARNING "TODO: Unsupported compiler")
  384. + endif()
  385. +
  386. + if(NOT DEFINED CPU_DISPATCH)
  387. + set(CPU_DISPATCH "SSE4_1;AVX;FP16;AVX2" CACHE STRING "${HELP_CPU_DISPATCH}")
  388. + endif()
  389. +
  390. + if(NOT DEFINED CPU_BASELINE)
  391. + if(X86_64)
  392. + set(CPU_BASELINE "SSSE3" CACHE STRING "${HELP_CPU_BASELINE}")
  393. + else()
  394. + set(CPU_BASELINE "SSE2" CACHE STRING "${HELP_CPU_BASELINE}")
  395. + endif()
  396. + endif()
  397. +
  398. +elseif(ARM OR AARCH64)
  399. + ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp")
  400. + if(NOT AARCH64)
  401. + ocv_update(CPU_KNOWN_OPTIMIZATIONS "VFPV3;NEON;FP16")
  402. + ocv_update(CPU_NEON_FLAGS_ON "-mfpu=neon")
  403. + ocv_update(CPU_VFPV3_FLAGS_ON "-mfpu=vfpv3")
  404. + ocv_update(CPU_FP16_FLAGS_ON "-mfpu=neon-fp16")
  405. + set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
  406. + else()
  407. + ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16")
  408. + ocv_update(CPU_NEON_FLAGS_ON "")
  409. + set(CPU_BASELINE "NEON" CACHE STRING "${HELP_CPU_BASELINE}")
  410. + endif()
  411. +endif()
  412. +
  413. +# Helper values for cmake-gui
  414. +set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
  415. +set(CPU_DISPATCH "" CACHE STRING "${HELP_CPU_DISPATCH}")
  416. +set_property(CACHE CPU_BASELINE PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS})
  417. +set_property(CACHE CPU_DISPATCH PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS})
  418. +
  419. +set(CPU_BASELINE_FLAGS "")
  420. +
  421. +set(CPU_BASELINE_FINAL "")
  422. +set(CPU_DISPATCH_FINAL "")
  423. +
  424. +macro(ocv_check_compiler_optimization OPT)
  425. + if(NOT DEFINED CPU_${OPT}_SUPPORTED)
  426. + if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE)
  427. + set(_varname "")
  428. + if(CPU_${OPT}_TEST_FILE)
  429. + set(__available 0)
  430. + if(CPU_BASELINE_DETECT)
  431. + set(_varname "HAVE_CPU_${OPT}_SUPPORT")
  432. + ocv_check_compiler_flag(CXX "${CPU_BASELINE_FLAGS}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
  433. + if(${_varname})
  434. + list(APPEND CPU_BASELINE_FINAL ${OPT})
  435. + set(__available 1)
  436. + endif()
  437. + endif()
  438. + if(NOT __available)
  439. + if(NOT "x${CPU_${OPT}_FLAGS_NAME}" STREQUAL "x")
  440. + set(_varname "HAVE_CPU_${CPU_${OPT}_FLAGS_NAME}")
  441. + set(_compile_flags "${CPU_BASELINE_FLAGS}")
  442. + ocv_append_optimization_flag(_compile_flags ${OPT})
  443. + ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
  444. + elseif(NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x")
  445. + ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "" "${CPU_${OPT}_TEST_FILE}")
  446. + else()
  447. + set(_varname "HAVE_CPU_${OPT}_SUPPORT")
  448. + set(_compile_flags "${CPU_BASELINE_FLAGS}")
  449. + ocv_append_optimization_flag(_compile_flags ${OPT})
  450. + ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
  451. + endif()
  452. + endif()
  453. + else()
  454. + ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "")
  455. + endif()
  456. + if(_varname AND ${_varname})
  457. + set(CPU_${OPT}_SUPPORTED ON)
  458. + elseif(NOT CPU_${OPT}_SUPPORTED)
  459. + message(STATUS "${OPT} is not supported by C++ compiler")
  460. + endif()
  461. + else()
  462. + set(CPU_${OPT}_SUPPORTED ON)
  463. + endif()
  464. + endif()
  465. +endmacro()
  466. +
  467. +foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
  468. + set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "" FORCE)
  469. + if(NOT DEFINED CPU_${OPT}_FORCE)
  470. + set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}")
  471. + endif()
  472. +endforeach()
  473. +
  474. +if(_add_native_flag)
  475. + set(_varname "HAVE_CPU_NATIVE_SUPPORT")
  476. + ocv_check_compiler_flag(CXX "-march=native" "${_varname}" "")
  477. + if(_varname)
  478. + set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} -march=native")
  479. + else()
  480. + set(_varname "HAVE_CPU_HOST_SUPPORT")
  481. + if(MSVC)
  482. + set(_flag "/QxHost")
  483. + else()
  484. + set(_flag "-xHost")
  485. + endif()
  486. + ocv_check_compiler_flag(CXX "${_flag}" "${_varname}" "")
  487. + if(_varname)
  488. + set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} ${flag}")
  489. + endif()
  490. + endif()
  491. +endif()
  492. +
  493. +foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
  494. + set(__is_disabled 0)
  495. + foreach(OPT2 ${CPU_BASELINE_DISABLE})
  496. + ocv_is_optimization_in_list(__is_disabled ${OPT2} ${OPT})
  497. + if(__is_disabled)
  498. + break()
  499. + endif()
  500. + endforeach()
  501. + if(__is_disabled)
  502. + set(__is_from_baseline 0)
  503. + else()
  504. + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_REQUIRE})
  505. + if(NOT __is_from_baseline)
  506. + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE})
  507. + endif()
  508. + endif()
  509. + ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH_REQUIRE})
  510. + if(NOT __is_from_dispatch)
  511. + ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH})
  512. + endif()
  513. + if(__is_from_dispatch OR __is_from_baseline OR CPU_BASELINE_DETECT)
  514. + ocv_check_compiler_optimization(${OPT})
  515. + endif()
  516. + if(CPU_BASELINE_DETECT AND NOT __is_from_baseline AND NOT __is_disabled)
  517. + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_FINAL})
  518. + endif()
  519. + if(CPU_${OPT}_SUPPORTED)
  520. + if(";${CPU_DISPATCH};" MATCHES ";${OPT};" AND NOT __is_from_baseline)
  521. + list(APPEND CPU_DISPATCH_FINAL ${OPT})
  522. + elseif(__is_from_baseline AND NOT CPU_BASELINE_DETECT)
  523. + list(APPEND CPU_BASELINE_FINAL ${OPT})
  524. + ocv_append_optimization_flag(CPU_BASELINE_FLAGS ${OPT})
  525. + endif()
  526. + endif()
  527. +endforeach()
  528. +
  529. +foreach(OPT ${CPU_BASELINE_REQUIRE})
  530. + if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
  531. + message(SEND_ERROR "Required baseline optimization is not supported: ${OPT} (CPU_BASELINE_REQUIRE=${CPU_BASELINE_REQUIRE})")
  532. + endif()
  533. +endforeach()
  534. +
  535. +foreach(OPT ${CPU_BASELINE})
  536. + if(OPT STREQUAL "DETECT" OR OPT STREQUAL "HOST" OR OPT STREQUAL "NATIVE")
  537. + # nothing
  538. + elseif(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
  539. + message(STATUS "Optimization ${OPT} is not available, skipped")
  540. + endif()
  541. +endforeach()
  542. +
  543. +foreach(OPT ${CPU_DISPATCH_REQUIRE})
  544. + if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};")
  545. + # OK
  546. + elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
  547. + message(SEND_ERROR "Dispatched optimization ${OPT} is in baseline list (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})")
  548. + else()
  549. + message(SEND_ERROR "Required dispatch optimization is not supported: ${OPT} (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})")
  550. + endif()
  551. +endforeach()
  552. +
  553. +foreach(OPT ${CPU_DISPATCH})
  554. + if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};")
  555. + # OK
  556. + elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
  557. + # OK
  558. + else()
  559. + message(STATUS "Dispatch optimization ${OPT} is not available, skipped")
  560. + endif()
  561. +endforeach()
  562. +
  563. +#message(STATUS "CPU_BASELINE_FINAL=${CPU_BASELINE_FINAL}")
  564. +#message(STATUS "CPU_DISPATCH_FINAL=${CPU_DISPATCH_FINAL}")
  565. +
  566. +#if(CPU_DISPATCH_FINAL AND NOT PYTHON_DEFAULT_EXECUTABLE)
  567. +# message(FATAL_ERROR "Python is required for CPU dispatched optimization support")
  568. +#endif()
  569. +
  570. +macro(ocv_compiler_optimization_options)
  571. + set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${CPU_BASELINE_FLAGS}")
  572. + if(NOT __flags STREQUAL CACHED_CPU_BASELINE_FLAGS)
  573. + set(CACHED_CPU_BASELINE_FLAGS "${__flags}" CACHE INTERNAL "" FORCE)
  574. + ocv_clear_vars(HAVE_CPU_BASELINE_FLAGS)
  575. + endif()
  576. + ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_BASELINE_FLAGS)
  577. + if(NOT HAVE_CPU_BASELINE_FLAGS)
  578. + message(FATAL_ERROR "Compiler doesn't support baseline optimization flags: ${CPU_BASELINE_FLAGS}")
  579. + endif()
  580. + add_extra_compiler_option_force("${CPU_BASELINE_FLAGS}")
  581. +
  582. + foreach(OPT ${CPU_DISPATCH_FINAL})
  583. + set(__dispatch_flags "")
  584. + set(__dispatch_definitions "")
  585. + set(__dispatch_opts "")
  586. + set(__dispatch_opts_force "")
  587. + foreach(OPT2 ${CPU_KNOWN_OPTIMIZATIONS})
  588. + if(NOT CPU_${OPT2}_SUPPORTED)
  589. + #continue()
  590. + else()
  591. + ocv_is_optimization_in_list(__is_from_baseline ${OPT2} ${CPU_BASELINE_FINAL})
  592. + if(NOT __is_from_baseline)
  593. + ocv_is_optimization_in_list(__is_active ${OPT2} ${OPT})
  594. + if(__is_active)
  595. + ocv_append_optimization_flag(__dispatch_flags ${OPT2})
  596. + list(APPEND __dispatch_definitions "CV_CPU_COMPILE_${OPT2}=1")
  597. + list(APPEND __dispatch_opts "${OPT2}")
  598. + endif()
  599. + ocv_is_optimization_in_force_list(__is_force ${OPT2} ${OPT})
  600. + if(__is_force)
  601. + list(APPEND __dispatch_opts_force "${OPT2}")
  602. + endif()
  603. + endif()
  604. + endif()
  605. + endforeach()
  606. + set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${__dispatch_flags}")
  607. + if(NOT __flags STREQUAL CACHED_CPU_DISPATCH_${OPT}_FLAGS)
  608. + set(CACHED_CPU_DISPATCH_${OPT}_FLAGS "${__flags}" CACHE INTERNAL "" FORCE)
  609. + ocv_clear_vars(HAVE_CPU_DISPATCH_FLAGS_${OPT})
  610. + endif()
  611. + ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_DISPATCH_FLAGS_${OPT})
  612. + if(NOT HAVE_CPU_DISPATCH_FLAGS_${OPT})
  613. + message(FATAL_ERROR "Compiler doesn't support optimization flags for ${OPT} dispatch mode: ${__dispatch_flags}")
  614. + endif()
  615. + set(CPU_DISPATCH_FLAGS_${OPT} "${__dispatch_flags}")
  616. + set(CPU_DISPATCH_DEFINITIONS_${OPT} "${__dispatch_definitions}")
  617. + set(CPU_DISPATCH_${OPT}_INCLUDED "${__dispatch_opts}")
  618. + set(CPU_DISPATCH_${OPT}_FORCED "${__dispatch_opts_force}")
  619. + endforeach()
  620. +
  621. + if(ENABLE_POWERPC)
  622. + add_extra_compiler_option("-mcpu=G3 -mtune=G5")
  623. + endif()
  624. + if(ARM)
  625. + add_extra_compiler_option("-mfp16-format=ieee")
  626. + endif(ARM)
  627. + if(ENABLE_NEON)
  628. + add_extra_compiler_option("-mfpu=neon")
  629. + endif()
  630. + if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
  631. + add_extra_compiler_option("-mfpu=vfpv3")
  632. + endif()
  633. +endmacro()
  634. +
  635. +macro(ocv_compiler_optimization_options_finalize)
  636. + if(CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64))
  637. + if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4)
  638. + if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)")
  639. + add_extra_compiler_option(-mfpmath=sse) # !! important - be on the same wave with x64 compilers
  640. + else()
  641. + add_extra_compiler_option(-mfpmath=387)
  642. + endif()
  643. + endif()
  644. + endif()
  645. +
  646. + if(MSVC)
  647. + # Generate Intrinsic Functions
  648. + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi")
  649. +
  650. + if((X86 OR X86_64) AND CMAKE_SIZEOF_VOID_P EQUAL 4 AND ";${CPU_BASELINE_FINAL};" MATCHES ";SSE;")
  651. + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers
  652. + endif()
  653. + endif(MSVC)
  654. +endmacro()
  655. +
  656. +macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME TARGET_BASE_NAME)
  657. + set(__result "")
  658. + set(__result_libs "")
  659. + foreach(OPT ${CPU_DISPATCH_FINAL})
  660. + set(__result_${OPT} "")
  661. + endforeach()
  662. + foreach(fname ${${SOURCES_VAR_NAME}})
  663. + string(TOLOWER "${fname}" fname_LOWER)
  664. + if(fname_LOWER MATCHES "[.]opt_.*[.]cpp$")
  665. + if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
  666. + message(STATUS "Excluding from source files list: ${fname}")
  667. + #continue()
  668. + else()
  669. + set(__opt_found 0)
  670. + foreach(OPT ${CPU_BASELINE_FINAL})
  671. + string(TOLOWER "${OPT}" OPT_LOWER)
  672. + if(fname_LOWER MATCHES "_${OPT_LOWER}[.]cpp$")
  673. +#message("${fname} BASELINE-${OPT}")
  674. + set(__opt_found 1)
  675. + list(APPEND __result "${fname}")
  676. + break()
  677. + endif()
  678. + endforeach()
  679. + foreach(OPT ${CPU_DISPATCH_FINAL})
  680. + foreach(OPT2 ${CPU_DISPATCH_${OPT}_FORCED})
  681. + string(TOLOWER "${OPT2}" OPT2_LOWER)
  682. + if(fname_LOWER MATCHES "_${OPT2_LOWER}[.]cpp$")
  683. + list(APPEND __result_${OPT} "${fname}")
  684. + math(EXPR CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}+1")
  685. + set(CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}" CACHE INTERNAL "" FORCE)
  686. +#message("${fname} ${OPT}")
  687. +#message(" ${CPU_DISPATCH_${OPT}_INCLUDED}")
  688. +#message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}")
  689. +#message(" ${CPU_DISPATCH_FLAGS_${OPT}}")
  690. + set(__opt_found 1)
  691. + break()
  692. + endif()
  693. + endforeach()
  694. + if(__opt_found)
  695. + set(__opt_found 1)
  696. + break()
  697. + endif()
  698. + endforeach()
  699. + if(NOT __opt_found)
  700. + message(STATUS "Excluding from source files list: ${fname}")
  701. + endif()
  702. + endif()
  703. + else()
  704. + list(APPEND __result "${fname}")
  705. + endif()
  706. + endforeach()
  707. +
  708. + foreach(OPT ${CPU_DISPATCH_FINAL})
  709. + if(__result_${OPT})
  710. +#message("${OPT}: ${__result_${OPT}}")
  711. + if(CMAKE_GENERATOR MATCHES "^Visual")
  712. + # extra flags are added before common flags, so switching between optimizations doesn't work correctly
  713. + # Also CMAKE_CXX_FLAGS doesn't work (it is directory-based, so add_subdirectory is required)
  714. + add_library(${TARGET_BASE_NAME}_${OPT} OBJECT ${__result_${OPT}})
  715. + ocv_append_dependant_targets(${TARGET_BASE_NAME} ${TARGET_BASE_NAME}_${OPT})
  716. + set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
  717. + set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}")
  718. + #list(APPEND __result_libs ${TARGET_BASE_NAME}_${OPT})
  719. + list(APPEND __result "$<TARGET_OBJECTS:${TARGET_BASE_NAME}_${OPT}>")
  720. + else()
  721. + foreach(fname ${__result_${OPT}})
  722. + set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
  723. + set_source_files_properties("${fname}" PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}")
  724. + endforeach()
  725. + list(APPEND __result ${__result_${OPT}})
  726. + endif()
  727. + endif()
  728. + endforeach()
  729. + set(${SOURCES_VAR_NAME} "${__result}")
  730. + list(APPEND ${LIBS_VAR_NAME} ${__result_libs})
  731. +endmacro()
  732. +
  733. +macro(ocv_compiler_optimization_fill_cpu_config)
  734. + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "")
  735. + foreach(OPT ${CPU_BASELINE_FINAL})
  736. + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
  737. +#define CV_CPU_COMPILE_${OPT} 1
  738. +#define CV_CPU_BASELINE_COMPILE_${OPT} 1
  739. +")
  740. + endforeach()
  741. +
  742. + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
  743. +#define CV_CPU_BASELINE_FEATURES 0 \\")
  744. + foreach(OPT ${CPU_BASELINE_FINAL})
  745. + if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
  746. + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
  747. + , CV_CPU_${OPT} \\")
  748. + endif()
  749. + endforeach()
  750. + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}\n")
  751. +
  752. + set(__dispatch_modes "")
  753. + foreach(OPT ${CPU_DISPATCH_FINAL})
  754. + list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT})
  755. + endforeach()
  756. + list(REMOVE_DUPLICATES __dispatch_modes)
  757. + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "")
  758. + foreach(OPT ${__dispatch_modes})
  759. + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
  760. +#define CV_CPU_DISPATCH_COMPILE_${OPT} 1")
  761. + endforeach()
  762. +
  763. + set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n")
  764. + foreach(OPT ${CPU_ALL_OPTIMIZATIONS})
  765. + if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
  766. + set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}
  767. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT}
  768. +# define CV_CPU_HAS_SUPPORT_${OPT} 1
  769. +# define CV_CPU_CALL_${OPT}(...) return __VA_ARGS__
  770. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT}
  771. +# define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT}))
  772. +# define CV_CPU_CALL_${OPT}(...) if (CV_CPU_HAS_SUPPORT_${OPT}) return __VA_ARGS__
  773. +#else
  774. +# define CV_CPU_HAS_SUPPORT_${OPT} 0
  775. +# define CV_CPU_CALL_${OPT}(...)
  776. +#endif
  777. +")
  778. + endif()
  779. + endforeach()
  780. +
  781. + set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h")
  782. + if(EXISTS "${__file}")
  783. + file(READ "${__file}" __content)
  784. + endif()
  785. + if(__content STREQUAL OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE)
  786. + #message(STATUS "${__file} contains same content")
  787. + else()
  788. + file(WRITE "${__file}" "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}")
  789. + message(WARNING "${__file} is updated")
  790. + endif()
  791. +endmacro()
  792. +
  793. +if(CV_DISABLE_OPTIMIZATION OR CV_ICC)
  794. + ocv_update(CV_ENABLE_UNROLLED 0)
  795. +else()
  796. + ocv_update(CV_ENABLE_UNROLLED 1)
  797. +endif()
  798. diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
  799. index 5bb0479..0eb68b6 100644
  800. --- a/cmake/OpenCVCompilerOptions.cmake
  801. +++ b/cmake/OpenCVCompilerOptions.cmake
  802. @@ -31,24 +31,21 @@ endif()
  803. if(MINGW OR (X86 AND UNIX AND NOT APPLE))
  804. # mingw compiler is known to produce unstable SSE code with -O3 hence we are trying to use -O2 instead
  805. if(CMAKE_COMPILER_IS_GNUCXX)
  806. - foreach(flags CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
  807. - string(REPLACE "-O3" "-O2" ${flags} "${${flags}}")
  808. - endforeach()
  809. - endif()
  810. -
  811. - if(CMAKE_COMPILER_IS_GNUCC)
  812. - foreach(flags CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG)
  813. + foreach(flags
  814. + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG
  815. + CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG)
  816. string(REPLACE "-O3" "-O2" ${flags} "${${flags}}")
  817. endforeach()
  818. endif()
  819. endif()
  820. if(MSVC)
  821. - string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
  822. - string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT}")
  823. + string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS)
  824. + string(STRIP "${CMAKE_CXX_FLAGS_INIT}" CMAKE_CXX_FLAGS_INIT)
  825. if(CMAKE_CXX_FLAGS STREQUAL CMAKE_CXX_FLAGS_INIT)
  826. # override cmake default exception handling option
  827. - string(REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
  828. + string(REPLACE "/EHsc" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
  829. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHa")
  830. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "Flags used by the compiler during all build types." FORCE)
  831. endif()
  832. endif()
  833. @@ -63,9 +60,6 @@ set(OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE "")
  834. set(OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG "")
  835. macro(add_extra_compiler_option option)
  836. - if(CMAKE_BUILD_TYPE)
  837. - set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
  838. - endif()
  839. ocv_check_flag_support(CXX "${option}" _varname "${OPENCV_EXTRA_CXX_FLAGS} ${ARGN}")
  840. if(${_varname})
  841. set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}")
  842. @@ -77,6 +71,12 @@ macro(add_extra_compiler_option option)
  843. endif()
  844. endmacro()
  845. +macro(add_extra_compiler_option_force option)
  846. + set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}")
  847. + set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} ${option}")
  848. +endmacro()
  849. +
  850. +
  851. # Gets environment variable and puts its value to the corresponding preprocessor definition
  852. # Useful for WINRT that has no access to environment variables
  853. macro(add_env_definitions option)
  854. @@ -102,7 +102,11 @@ if(MINGW)
  855. endif()
  856. if(CV_ICC AND NOT ENABLE_FAST_MATH)
  857. - add_extra_compiler_option("-fp-model precise")
  858. + if(MSVC)
  859. + add_extra_compiler_option("/fp:precise")
  860. + else()
  861. + add_extra_compiler_option("-fp-model precise")
  862. + endif()
  863. endif()
  864. if(CMAKE_COMPILER_IS_GNUCXX)
  865. @@ -141,7 +145,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
  866. endif()
  867. # We need pthread's
  868. - if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX))
  869. + if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) # TODO
  870. add_extra_compiler_option(-pthread)
  871. endif()
  872. @@ -170,83 +174,6 @@ if(CMAKE_COMPILER_IS_GNUCXX)
  873. if(ENABLE_FAST_MATH)
  874. add_extra_compiler_option(-ffast-math)
  875. endif()
  876. - if(ENABLE_POWERPC)
  877. - add_extra_compiler_option("-mcpu=G3 -mtune=G5")
  878. - endif()
  879. - if(ENABLE_SSE)
  880. - add_extra_compiler_option(-msse)
  881. - endif()
  882. - if(ENABLE_SSE2)
  883. - add_extra_compiler_option(-msse2)
  884. - elseif(X86 OR X86_64)
  885. - add_extra_compiler_option(-mno-sse2)
  886. - endif()
  887. - if(ARM)
  888. - add_extra_compiler_option("-mfp16-format=ieee")
  889. - endif(ARM)
  890. - if(ENABLE_NEON)
  891. - add_extra_compiler_option("-mfpu=neon")
  892. - endif()
  893. - if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
  894. - add_extra_compiler_option("-mfpu=vfpv3")
  895. - endif()
  896. -
  897. - # SSE3 and further should be disabled under MingW because it generates compiler errors
  898. - if(NOT MINGW)
  899. - if(ENABLE_AVX)
  900. - add_extra_compiler_option(-mavx)
  901. - elseif(X86 OR X86_64)
  902. - add_extra_compiler_option(-mno-avx)
  903. - endif()
  904. - if(ENABLE_AVX2)
  905. - add_extra_compiler_option(-mavx2)
  906. -
  907. - if(ENABLE_FMA3)
  908. - add_extra_compiler_option(-mfma)
  909. - endif()
  910. - endif()
  911. -
  912. - # GCC depresses SSEx instructions when -mavx is used. Instead, it generates new AVX instructions or AVX equivalence for all SSEx instructions when needed.
  913. - if(NOT OPENCV_EXTRA_CXX_FLAGS MATCHES "-mavx")
  914. - if(ENABLE_SSE3)
  915. - add_extra_compiler_option(-msse3)
  916. - elseif(X86 OR X86_64)
  917. - add_extra_compiler_option(-mno-sse3)
  918. - endif()
  919. -
  920. - if(ENABLE_SSSE3)
  921. - add_extra_compiler_option(-mssse3)
  922. - elseif(X86 OR X86_64)
  923. - add_extra_compiler_option(-mno-ssse3)
  924. - endif()
  925. -
  926. - if(ENABLE_SSE41)
  927. - add_extra_compiler_option(-msse4.1)
  928. - elseif(X86 OR X86_64)
  929. - add_extra_compiler_option(-mno-sse4.1)
  930. - endif()
  931. -
  932. - if(ENABLE_SSE42)
  933. - add_extra_compiler_option(-msse4.2)
  934. - elseif(X86 OR X86_64)
  935. - add_extra_compiler_option(-mno-sse4.2)
  936. - endif()
  937. -
  938. - if(ENABLE_POPCNT)
  939. - add_extra_compiler_option(-mpopcnt)
  940. - endif()
  941. - endif()
  942. - endif(NOT MINGW)
  943. -
  944. - if(X86 OR X86_64)
  945. - if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4)
  946. - if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)")
  947. - add_extra_compiler_option(-mfpmath=sse)# !! important - be on the same wave with x64 compilers
  948. - else()
  949. - add_extra_compiler_option(-mfpmath=387)
  950. - endif()
  951. - endif()
  952. - endif()
  953. # Profiling?
  954. if(ENABLE_PROFILING)
  955. @@ -257,7 +184,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
  956. string(REPLACE "-fomit-frame-pointer" "" ${flags} "${${flags}}")
  957. string(REPLACE "-ffunction-sections" "" ${flags} "${${flags}}")
  958. endforeach()
  959. - elseif(NOT APPLE AND NOT ANDROID)
  960. + elseif(NOT ((IOS OR ANDROID) AND NOT BUILD_SHARED_LIBS))
  961. # Remove unreferenced functions: function level linking
  962. add_extra_compiler_option(-ffunction-sections)
  963. endif()
  964. @@ -296,41 +223,6 @@ if(MSVC)
  965. set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi")
  966. endif()
  967. - if(ENABLE_AVX2 AND NOT MSVC_VERSION LESS 1800)
  968. - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX2")
  969. - endif()
  970. - if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
  971. - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX")
  972. - endif()
  973. -
  974. - if(ENABLE_SSE4_1 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
  975. - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1")
  976. - endif()
  977. -
  978. - if(ENABLE_SSE3 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
  979. - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE3")
  980. - endif()
  981. -
  982. - if(NOT MSVC64)
  983. - # 64-bit MSVC compiler uses SSE/SSE2 by default
  984. - if(ENABLE_SSE2 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
  985. - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2")
  986. - endif()
  987. - if(ENABLE_SSE AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
  988. - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE")
  989. - endif()
  990. - endif()
  991. -
  992. - if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1 OR ENABLE_AVX OR ENABLE_AVX2)
  993. - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi")
  994. - endif()
  995. -
  996. - if(X86 OR X86_64)
  997. - if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND ENABLE_SSE2)
  998. - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers
  999. - endif()
  1000. - endif()
  1001. -
  1002. if(OPENCV_WARNINGS_ARE_ERRORS)
  1003. set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /WX")
  1004. endif()
  1005. @@ -353,6 +245,16 @@ if(NOT BUILD_SHARED_LIBS AND CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID)
  1006. set(OPENCV_EXTRA_FLAGS "-fPIC ${OPENCV_EXTRA_FLAGS}")
  1007. endif()
  1008. +include(cmake/OpenCVCompilerOptimizations.cmake)
  1009. +
  1010. +if(COMMAND ocv_compiler_optimization_options)
  1011. + ocv_compiler_optimization_options()
  1012. +endif()
  1013. +
  1014. +if(COMMAND ocv_compiler_optimization_options_finalize)
  1015. + ocv_compiler_optimization_options_finalize()
  1016. +endif()
  1017. +
  1018. # Add user supplied extra options (optimization, etc...)
  1019. # ==========================================================
  1020. set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS}" CACHE INTERNAL "Extra compiler options")
  1021. @@ -370,6 +272,7 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
  1022. add_extra_compiler_option(-fvisibility-inlines-hidden)
  1023. endif()
  1024. +# TODO !!!!!
  1025. if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
  1026. if(ARM AND ENABLE_NEON)
  1027. set(FP16_OPTION "-mfpu=neon-fp16")
  1028. @@ -378,7 +281,7 @@ if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
  1029. endif()
  1030. try_compile(__VALID_FP16
  1031. "${OpenCV_BINARY_DIR}"
  1032. - "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
  1033. + "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp"
  1034. COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
  1035. OUTPUT_VARIABLE TRY_OUT
  1036. )
  1037. diff --git a/cmake/OpenCVGenHeaders.cmake b/cmake/OpenCVGenHeaders.cmake
  1038. index 2988979..477b910 100644
  1039. --- a/cmake/OpenCVGenHeaders.cmake
  1040. +++ b/cmake/OpenCVGenHeaders.cmake
  1041. @@ -3,6 +3,10 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CO
  1042. configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/cvconfig.h")
  1043. install(FILES "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2 COMPONENT dev)
  1044. +# platform-specific config file
  1045. +ocv_compiler_optimization_fill_cpu_config()
  1046. +configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cv_cpu_config.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cv_cpu_config.h")
  1047. +
  1048. # ----------------------------------------------------------------------------
  1049. # opencv_modules.hpp based on actual modules list
  1050. # ----------------------------------------------------------------------------
  1051. diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
  1052. index 742a287..3e98bf5 100644
  1053. --- a/cmake/OpenCVModule.cmake
  1054. +++ b/cmake/OpenCVModule.cmake
  1055. @@ -65,6 +65,7 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD
  1056. unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE)
  1057. unset(OPENCV_MODULE_${mod}_LINK_DEPS CACHE)
  1058. unset(OPENCV_MODULE_${mod}_WRAPPERS CACHE)
  1059. + unset(OPENCV_DEPENDANT_TARGETS_${mod} CACHE)
  1060. endforeach()
  1061. # clean modules info which needs to be recalculated
  1062. @@ -641,6 +642,8 @@ macro(ocv_set_module_sources)
  1063. # use full paths for module to be independent from the module location
  1064. ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS)
  1065. + ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module})
  1066. +
  1067. set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}")
  1068. set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")
  1069. endmacro()
  1070. diff --git a/cmake/OpenCVPCHSupport.cmake b/cmake/OpenCVPCHSupport.cmake
  1071. index 90437cb..45968e7 100644
  1072. --- a/cmake/OpenCVPCHSupport.cmake
  1073. +++ b/cmake/OpenCVPCHSupport.cmake
  1074. @@ -326,7 +326,10 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input)
  1075. get_target_property(_sources ${_targetName} SOURCES)
  1076. foreach(src ${_sources})
  1077. - if(NOT "${src}" MATCHES "\\.mm$")
  1078. + if(NOT "${src}" MATCHES "\\.mm$"
  1079. + AND NOT "${src}" MATCHES "\\.h$" AND NOT "${src}" MATCHES "\\.hpp$" # header files
  1080. + AND NOT "${src}" MATCHES "^\$" # CMake generator expressions
  1081. + )
  1082. get_source_file_property(oldProps "${src}" COMPILE_FLAGS)
  1083. if(NOT oldProps)
  1084. set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"")
  1085. diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake
  1086. index cdf257d..8a5ee28 100644
  1087. --- a/cmake/OpenCVUtils.cmake
  1088. +++ b/cmake/OpenCVUtils.cmake
  1089. @@ -37,7 +37,11 @@ endmacro()
  1090. macro(ocv_update VAR)
  1091. if(NOT DEFINED ${VAR})
  1092. - set(${VAR} ${ARGN})
  1093. + if("x${ARGN}" STREQUAL "x")
  1094. + set(${VAR} "")
  1095. + else()
  1096. + set(${VAR} ${ARGN})
  1097. + endif()
  1098. else()
  1099. #ocv_debug_message("Preserve old value for ${VAR}: ${${VAR}}")
  1100. endif()
  1101. @@ -151,8 +155,15 @@ function(ocv_append_target_property target prop)
  1102. endif()
  1103. endfunction()
  1104. +function(ocv_append_dependant_targets target)
  1105. + #ocv_debug_message("ocv_append_dependant_targets(${target} ${ARGN})")
  1106. + _ocv_fix_target(target)
  1107. + set(OPENCV_DEPENDANT_TARGETS_${target} "${OPENCV_DEPENDANT_TARGETS_${target}};${ARGN}" CACHE INTERNAL "" FORCE)
  1108. +endfunction()
  1109. +
  1110. # adds include directories in such way that directories from the OpenCV source tree go first
  1111. function(ocv_target_include_directories target)
  1112. + #ocv_debug_message("ocv_target_include_directories(${target} ${ARGN})")
  1113. _ocv_fix_target(target)
  1114. set(__params "")
  1115. if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.0" AND
  1116. @@ -173,6 +184,11 @@ function(ocv_target_include_directories target)
  1117. else()
  1118. if(TARGET ${target})
  1119. target_include_directories(${target} PRIVATE ${__params})
  1120. + if(OPENCV_DEPENDANT_TARGETS_${target})
  1121. + foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}})
  1122. + target_include_directories(${t} PRIVATE ${__params})
  1123. + endforeach()
  1124. + endif()
  1125. else()
  1126. set(__new_inc "${OCV_TARGET_INCLUDE_DIRS_${target}};${__params}")
  1127. set(OCV_TARGET_INCLUDE_DIRS_${target} "${__new_inc}" CACHE INTERNAL "")
  1128. @@ -205,8 +221,11 @@ set(OCV_COMPILER_FAIL_REGEX
  1129. )
  1130. MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
  1131. + set(_fname "${ARGN}")
  1132. if(NOT DEFINED ${RESULT})
  1133. - if("_${LANG}_" MATCHES "_CXX_")
  1134. + if(_fname)
  1135. + # nothing
  1136. + elseif("_${LANG}_" MATCHES "_CXX_")
  1137. set(_fname "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx")
  1138. if("${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror " OR "${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror=unknown-pragmas ")
  1139. FILE(WRITE "${_fname}" "int main() { return 0; }\n")
  1140. @@ -231,7 +250,13 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
  1141. unset(_fname)
  1142. endif()
  1143. if(_fname)
  1144. - MESSAGE(STATUS "Performing Test ${RESULT}")
  1145. + if(NOT "x${ARGN}" STREQUAL "x")
  1146. + file(RELATIVE_PATH __msg "${CMAKE_SOURCE_DIR}" "${ARGN}")
  1147. + set(__msg " (check file: ${__msg})")
  1148. + else()
  1149. + set(__msg "")
  1150. + endif()
  1151. + MESSAGE(STATUS "Performing Test ${RESULT}${__msg}")
  1152. TRY_COMPILE(${RESULT}
  1153. "${CMAKE_BINARY_DIR}"
  1154. "${_fname}"
  1155. @@ -278,7 +303,11 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
  1156. endif()
  1157. ENDMACRO()
  1158. -macro(ocv_check_flag_support lang flag varname)
  1159. +macro(ocv_check_flag_support lang flag varname base_options)
  1160. + if(CMAKE_BUILD_TYPE)
  1161. + set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
  1162. + endif()
  1163. +
  1164. if("_${lang}_" MATCHES "_CXX_")
  1165. set(_lang CXX)
  1166. elseif("_${lang}_" MATCHES "_C_")
  1167. @@ -293,7 +322,7 @@ macro(ocv_check_flag_support lang flag varname)
  1168. string(REGEX REPLACE "^(/|-)" "HAVE_${_lang}_" ${varname} "${${varname}}")
  1169. string(REGEX REPLACE " -|-|=| |\\." "_" ${varname} "${${varname}}")
  1170. - ocv_check_compiler_flag("${_lang}" "${ARGN} ${flag}" ${${varname}})
  1171. + ocv_check_compiler_flag("${_lang}" "${base_options} ${flag}" ${${varname}} ${ARGN})
  1172. endmacro()
  1173. # turns off warnings
  1174. @@ -327,7 +356,7 @@ macro(ocv_warnings_disable)
  1175. string(REPLACE "${warning}" "" ${var} "${${var}}")
  1176. string(REPLACE "-W" "-Wno-" warning "${warning}")
  1177. endif()
  1178. - ocv_check_flag_support(${var} "${warning}" _varname)
  1179. + ocv_check_flag_support(${var} "${warning}" _varname "")
  1180. if(${_varname})
  1181. set(${var} "${${var}} ${warning}")
  1182. endif()
  1183. @@ -342,7 +371,7 @@ macro(ocv_warnings_disable)
  1184. else()
  1185. string(REPLACE "-wd" "-Qwd" warning "${warning}")
  1186. endif()
  1187. - ocv_check_flag_support(${var} "${warning}" _varname)
  1188. + ocv_check_flag_support(${var} "${warning}" _varname "")
  1189. if(${_varname})
  1190. set(${var} "${${var}} ${warning}")
  1191. endif()
  1192. @@ -357,7 +386,7 @@ macro(ocv_warnings_disable)
  1193. endmacro()
  1194. macro(add_apple_compiler_options the_module)
  1195. - ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS)
  1196. + ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS "")
  1197. if(HAVE_OBJC_EXCEPTIONS)
  1198. foreach(source ${OPENCV_MODULE_${the_module}_SOURCES})
  1199. if("${source}" MATCHES "\\.mm$")
  1200. @@ -892,6 +921,11 @@ function(_ocv_append_target_includes target)
  1201. if (TARGET ${target}_object)
  1202. target_include_directories(${target}_object PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}})
  1203. endif()
  1204. + if(OPENCV_DEPENDANT_TARGETS_${target})
  1205. + foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}})
  1206. + target_include_directories(${t} PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}})
  1207. + endforeach()
  1208. + endif()
  1209. unset(OCV_TARGET_INCLUDE_DIRS_${target} CACHE)
  1210. endif()
  1211. endfunction()
  1212. diff --git a/cmake/checks/cpu_avx.cpp b/cmake/checks/cpu_avx.cpp
  1213. new file mode 100644
  1214. index 0000000..05536f4
  1215. --- /dev/null
  1216. +++ b/cmake/checks/cpu_avx.cpp
  1217. @@ -0,0 +1,9 @@
  1218. +#if !defined __AVX__ // MSVC supports this flag since MSVS 2013
  1219. +#error "__AVX__ define is missing"
  1220. +#endif
  1221. +#include <immintrin.h>
  1222. +void test()
  1223. +{
  1224. + __m256 a = _mm256_set1_ps(0.0f);
  1225. +}
  1226. +int main() { return 0; }
  1227. diff --git a/cmake/checks/cpu_avx2.cpp b/cmake/checks/cpu_avx2.cpp
  1228. new file mode 100644
  1229. index 0000000..3ab1143
  1230. --- /dev/null
  1231. +++ b/cmake/checks/cpu_avx2.cpp
  1232. @@ -0,0 +1,10 @@
  1233. +#if !defined __AVX2__ // MSVC supports this flag since MSVS 2013
  1234. +#error "__AVX2__ define is missing"
  1235. +#endif
  1236. +#include <immintrin.h>
  1237. +void test()
  1238. +{
  1239. + int data[8] = {0,0,0,0, 0,0,0,0};
  1240. + __m256i a = _mm256_loadu_si256((const __m256i *)data);
  1241. +}
  1242. +int main() { return 0; }
  1243. diff --git a/cmake/checks/cpu_avx512.cpp b/cmake/checks/cpu_avx512.cpp
  1244. new file mode 100644
  1245. index 0000000..d0898ab
  1246. --- /dev/null
  1247. +++ b/cmake/checks/cpu_avx512.cpp
  1248. @@ -0,0 +1,10 @@
  1249. +#if defined __AVX512__ || defined __AVX512F__
  1250. +#include <immintrin.h>
  1251. +void test()
  1252. +{
  1253. + __m512i zmm = _mm512_setzero_si512();
  1254. +}
  1255. +#else
  1256. +#error "AVX512 is not supported"
  1257. +#endif
  1258. +int main() { return 0; }
  1259. diff --git a/cmake/checks/cpu_fp16.cpp b/cmake/checks/cpu_fp16.cpp
  1260. new file mode 100644
  1261. index 0000000..6951f1c
  1262. --- /dev/null
  1263. +++ b/cmake/checks/cpu_fp16.cpp
  1264. @@ -0,0 +1,33 @@
  1265. +#include <stdio.h>
  1266. +
  1267. +#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) || (defined __INTEL_COMPILER && defined __AVX__)
  1268. +#include <immintrin.h>
  1269. +int test()
  1270. +{
  1271. + const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
  1272. + short dst[8];
  1273. + __m128 v_src = _mm_load_ps(src);
  1274. + __m128i v_dst = _mm_cvtps_ph(v_src, 0);
  1275. + _mm_storel_epi64((__m128i*)dst, v_dst);
  1276. + return (int)dst[0];
  1277. +}
  1278. +#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
  1279. +#include "arm_neon.h"
  1280. +int test()
  1281. +{
  1282. + const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
  1283. + short dst[8];
  1284. + float32x4_t v_src = *(float32x4_t*)src;
  1285. + float16x4_t v_dst = vcvt_f16_f32(v_src);
  1286. + *(float16x4_t*)dst = v_dst;
  1287. + return (int)dst[0];
  1288. +}
  1289. +#else
  1290. +#error "FP16 is not supported"
  1291. +#endif
  1292. +
  1293. +int main()
  1294. +{
  1295. + printf("%d\n", test());
  1296. + return 0;
  1297. +}
  1298. diff --git a/cmake/checks/cpu_popcnt.cpp b/cmake/checks/cpu_popcnt.cpp
  1299. new file mode 100644
  1300. index 0000000..f55c9f3
  1301. --- /dev/null
  1302. +++ b/cmake/checks/cpu_popcnt.cpp
  1303. @@ -0,0 +1,8 @@
  1304. +#include <nmmintrin.h>
  1305. +#ifndef _MSC_VER
  1306. +#include <popcntintrin.h>
  1307. +#endif
  1308. +int main() {
  1309. + int i = _mm_popcnt_u64(1);
  1310. + return 0;
  1311. +}
  1312. diff --git a/cmake/checks/cpu_sse.cpp b/cmake/checks/cpu_sse.cpp
  1313. new file mode 100644
  1314. index 0000000..c6269ac
  1315. --- /dev/null
  1316. +++ b/cmake/checks/cpu_sse.cpp
  1317. @@ -0,0 +1,2 @@
  1318. +#include <xmmintrin.h>
  1319. +int main() { return 0; }
  1320. diff --git a/cmake/checks/cpu_sse2.cpp b/cmake/checks/cpu_sse2.cpp
  1321. new file mode 100644
  1322. index 0000000..68a69f8
  1323. --- /dev/null
  1324. +++ b/cmake/checks/cpu_sse2.cpp
  1325. @@ -0,0 +1,2 @@
  1326. +#include <emmintrin.h>
  1327. +int main() { return 0; }
  1328. diff --git a/cmake/checks/cpu_sse3.cpp b/cmake/checks/cpu_sse3.cpp
  1329. new file mode 100644
  1330. index 0000000..98ce219
  1331. --- /dev/null
  1332. +++ b/cmake/checks/cpu_sse3.cpp
  1333. @@ -0,0 +1,7 @@
  1334. +#include <pmmintrin.h>
  1335. +int main() {
  1336. + __m128 u, v;
  1337. + u = _mm_set1_ps(0.0f);
  1338. + v = _mm_moveldup_ps(u); // SSE3
  1339. + return 0;
  1340. +}
  1341. diff --git a/cmake/checks/cpu_sse41.cpp b/cmake/checks/cpu_sse41.cpp
  1342. new file mode 100644
  1343. index 0000000..ddd835b
  1344. --- /dev/null
  1345. +++ b/cmake/checks/cpu_sse41.cpp
  1346. @@ -0,0 +1,6 @@
  1347. +#include <smmintrin.h>
  1348. +int main() {
  1349. + __m128i a = _mm_setzero_si128(), b = _mm_setzero_si128();
  1350. + __m128i c = _mm_packus_epi32(a, b);
  1351. + return 0;
  1352. +}
  1353. diff --git a/cmake/checks/cpu_sse42.cpp b/cmake/checks/cpu_sse42.cpp
  1354. new file mode 100644
  1355. index 0000000..56f5665
  1356. --- /dev/null
  1357. +++ b/cmake/checks/cpu_sse42.cpp
  1358. @@ -0,0 +1,5 @@
  1359. +#include <nmmintrin.h>
  1360. +int main() {
  1361. + int i = _mm_popcnt_u64(1);
  1362. + return 0;
  1363. +}
  1364. diff --git a/cmake/checks/cpu_ssse3.cpp b/cmake/checks/cpu_ssse3.cpp
  1365. new file mode 100644
  1366. index 0000000..e583199
  1367. --- /dev/null
  1368. +++ b/cmake/checks/cpu_ssse3.cpp
  1369. @@ -0,0 +1,7 @@
  1370. +#include <tmmintrin.h>
  1371. +const double v = 0;
  1372. +int main() {
  1373. + __m128i a = _mm_setzero_si128();
  1374. + __m128i b = _mm_abs_epi32(a);
  1375. + return 0;
  1376. +}
  1377. diff --git a/cmake/checks/fp16.cpp b/cmake/checks/fp16.cpp
  1378. deleted file mode 100644
  1379. index c77c844..0000000
  1380. --- a/cmake/checks/fp16.cpp
  1381. +++ /dev/null
  1382. @@ -1,33 +0,0 @@
  1383. -#include <stdio.h>
  1384. -
  1385. -#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700)
  1386. -#include <immintrin.h>
  1387. -int test()
  1388. -{
  1389. - const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
  1390. - short dst[8];
  1391. - __m128 v_src = _mm_load_ps(src);
  1392. - __m128i v_dst = _mm_cvtps_ph(v_src, 0);
  1393. - _mm_storel_epi64((__m128i*)dst, v_dst);
  1394. - return (int)dst[0];
  1395. -}
  1396. -#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
  1397. -#include "arm_neon.h"
  1398. -int test()
  1399. -{
  1400. - const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
  1401. - short dst[8];
  1402. - float32x4_t v_src = *(float32x4_t*)src;
  1403. - float16x4_t v_dst = vcvt_f16_f32(v_src);
  1404. - *(float16x4_t*)dst = v_dst;
  1405. - return (int)dst[0];
  1406. -}
  1407. -#else
  1408. -#error "FP16 is not supported"
  1409. -#endif
  1410. -
  1411. -int main()
  1412. -{
  1413. - printf("%d\n", test());
  1414. - return 0;
  1415. -}
  1416. diff --git a/cmake/templates/cv_cpu_config.h.in b/cmake/templates/cv_cpu_config.h.in
  1417. new file mode 100644
  1418. index 0000000..27b2731
  1419. --- /dev/null
  1420. +++ b/cmake/templates/cv_cpu_config.h.in
  1421. @@ -0,0 +1,5 @@
  1422. +// OpenCV CPU baseline features
  1423. +@OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE@
  1424. +
  1425. +// OpenCV supported CPU dispatched features
  1426. +@OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE@
  1427. diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in
  1428. index 05add9e..658d12c 100644
  1429. --- a/cmake/templates/cvconfig.h.in
  1430. +++ b/cmake/templates/cvconfig.h.in
  1431. @@ -1,6 +1,15 @@
  1432. +#ifndef OPENCV_CVCONFIG_H_INCLUDED
  1433. +#define OPENCV_CVCONFIG_H_INCLUDED
  1434. +
  1435. /* OpenCV compiled as static or dynamic libs */
  1436. #cmakedefine BUILD_SHARED_LIBS
  1437. +/* OpenCV intrinsics optimized code */
  1438. +#cmakedefine CV_ENABLE_INTRINSICS
  1439. +
  1440. +/* OpenCV additional optimized code */
  1441. +#cmakedefine CV_DISABLE_OPTIMIZATION
  1442. +
  1443. /* Compile for 'real' NVIDIA GPU architectures */
  1444. #define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
  1445. @@ -206,3 +215,7 @@
  1446. /* OpenVX */
  1447. #cmakedefine HAVE_OPENVX
  1448. +
  1449. +
  1450. +
  1451. +#endif // OPENCV_CVCONFIG_H_INCLUDED
  1452. diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
  1453. new file mode 100644
  1454. index 0000000..9a8537f
  1455. --- /dev/null
  1456. +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
  1457. @@ -0,0 +1,166 @@
  1458. +// This file is part of OpenCV project.
  1459. +// It is subject to the license terms in the LICENSE file found in the top-level directory
  1460. +// of this distribution and at http://opencv.org/license.html.
  1461. +
  1462. +#if defined __OPENCV_BUILD \
  1463. +
  1464. +#include "cv_cpu_config.h"
  1465. +#include "cv_cpu_helper.h"
  1466. +
  1467. +#if defined CV_ENABLE_INTRINSICS \
  1468. + && !defined CV_DISABLE_OPTIMIZATION \
  1469. + && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
  1470. +
  1471. +#ifdef CV_CPU_COMPILE_SSE2
  1472. +# include <emmintrin.h>
  1473. +# define CV_MMX 1
  1474. +# define CV_SSE 1
  1475. +# define CV_SSE2 1
  1476. +#endif
  1477. +#ifdef CV_CPU_COMPILE_SSE3
  1478. +# include <pmmintrin.h>
  1479. +# define CV_SSE3 1
  1480. +#endif
  1481. +#ifdef CV_CPU_COMPILE_SSSE3
  1482. +# include <tmmintrin.h>
  1483. +# define CV_SSSE3 1
  1484. +#endif
  1485. +#ifdef CV_CPU_COMPILE_SSE4_1
  1486. +# include <smmintrin.h>
  1487. +# define CV_SSE4_1 1
  1488. +#endif
  1489. +#ifdef CV_CPU_COMPILE_SSE4_2
  1490. +# include <nmmintrin.h>
  1491. +# define CV_SSE4_2 1
  1492. +#endif
  1493. +#ifdef CV_CPU_COMPILE_POPCNT
  1494. +# ifdef _MSC_VER
  1495. +# include <nmmintrin.h>
  1496. +# if defined(_M_X64)
  1497. +# define CV_POPCNT_U64 _mm_popcnt_u64
  1498. +# endif
  1499. +# define CV_POPCNT_U32 _mm_popcnt_u32
  1500. +# else
  1501. +# include <popcntintrin.h>
  1502. +# if defined(__x86_64__)
  1503. +# define CV_POPCNT_U64 __builtin_popcountll
  1504. +# endif
  1505. +# define CV_POPCNT_U32 __builtin_popcount
  1506. +# endif
  1507. +# define CV_POPCNT 1
  1508. +#endif
  1509. +#ifdef CV_CPU_COMPILE_AVX
  1510. +# include <immintrin.h>
  1511. +# define CV_AVX 1
  1512. +#endif
  1513. +#ifdef CV_CPU_COMPILE_AVX2
  1514. +# include <immintrin.h>
  1515. +# define CV_AVX2 1
  1516. +#endif
  1517. +#ifdef CV_CPU_COMPILE_FMA3
  1518. +# define CV_FMA3 1
  1519. +#endif
  1520. +
  1521. +#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
  1522. +# include <Intrin.h>
  1523. +# include <arm_neon.h>
  1524. +# define CV_NEON 1
  1525. +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
  1526. +# include <arm_neon.h>
  1527. +# define CV_NEON 1
  1528. +#endif
  1529. +
  1530. +#if defined(__ARM_NEON__) || defined(__aarch64__)
  1531. +# include <arm_neon.h>
  1532. +#endif
  1533. +
  1534. +#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
  1535. +
  1536. +#endif // __OPENCV_BUILD
  1537. +
  1538. +
  1539. +
  1540. +#if !defined __OPENCV_BUILD // Compatibility code
  1541. +
  1542. +#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
  1543. +# include <emmintrin.h>
  1544. +# define CV_MMX 1
  1545. +# define CV_SSE 1
  1546. +# define CV_SSE2 1
  1547. +#elif (defined WIN32 || defined _WIN32) && defined(_M_ARM)
  1548. +# include <Intrin.h>
  1549. +# include <arm_neon.h>
  1550. +# define CV_NEON 1
  1551. +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
  1552. +# include <arm_neon.h>
  1553. +# define CV_NEON 1
  1554. +#endif
  1555. +
  1556. +#endif // !__OPENCV_BUILD (Compatibility code)
  1557. +
  1558. +
  1559. +
  1560. +#ifndef CV_MMX
  1561. +# define CV_MMX 0
  1562. +#endif
  1563. +#ifndef CV_SSE
  1564. +# define CV_SSE 0
  1565. +#endif
  1566. +#ifndef CV_SSE2
  1567. +# define CV_SSE2 0
  1568. +#endif
  1569. +#ifndef CV_SSE3
  1570. +# define CV_SSE3 0
  1571. +#endif
  1572. +#ifndef CV_SSSE3
  1573. +# define CV_SSSE3 0
  1574. +#endif
  1575. +#ifndef CV_SSE4_1
  1576. +# define CV_SSE4_1 0
  1577. +#endif
  1578. +#ifndef CV_SSE4_2
  1579. +# define CV_SSE4_2 0
  1580. +#endif
  1581. +#ifndef CV_POPCNT
  1582. +# define CV_POPCNT 0
  1583. +#endif
  1584. +#ifndef CV_AVX
  1585. +# define CV_AVX 0
  1586. +#endif
  1587. +#ifndef CV_AVX2
  1588. +# define CV_AVX2 0
  1589. +#endif
  1590. +#ifndef CV_FMA3
  1591. +# define CV_FMA3 0
  1592. +#endif
  1593. +#ifndef CV_AVX_512F
  1594. +# define CV_AVX_512F 0
  1595. +#endif
  1596. +#ifndef CV_AVX_512BW
  1597. +# define CV_AVX_512BW 0
  1598. +#endif
  1599. +#ifndef CV_AVX_512CD
  1600. +# define CV_AVX_512CD 0
  1601. +#endif
  1602. +#ifndef CV_AVX_512DQ
  1603. +# define CV_AVX_512DQ 0
  1604. +#endif
  1605. +#ifndef CV_AVX_512ER
  1606. +# define CV_AVX_512ER 0
  1607. +#endif
  1608. +#ifndef CV_AVX_512IFMA512
  1609. +# define CV_AVX_512IFMA512 0
  1610. +#endif
  1611. +#ifndef CV_AVX_512PF
  1612. +# define CV_AVX_512PF 0
  1613. +#endif
  1614. +#ifndef CV_AVX_512VBMI
  1615. +# define CV_AVX_512VBMI 0
  1616. +#endif
  1617. +#ifndef CV_AVX_512VL
  1618. +# define CV_AVX_512VL 0
  1619. +#endif
  1620. +
  1621. +#ifndef CV_NEON
  1622. +# define CV_NEON 0
  1623. +#endif
  1624. diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h
  1625. new file mode 100644
  1626. index 0000000..cb755d6
  1627. --- /dev/null
  1628. +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h
  1629. @@ -0,0 +1,133 @@
  1630. +// AUTOGENERATED, DO NOT EDIT
  1631. +
  1632. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
  1633. +# define CV_CPU_HAS_SUPPORT_SSE 1
  1634. +# define CV_CPU_CALL_SSE(...) return __VA_ARGS__
  1635. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
  1636. +# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
  1637. +# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__
  1638. +#else
  1639. +# define CV_CPU_HAS_SUPPORT_SSE 0
  1640. +# define CV_CPU_CALL_SSE(...)
  1641. +#endif
  1642. +
  1643. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
  1644. +# define CV_CPU_HAS_SUPPORT_SSE2 1
  1645. +# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__
  1646. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
  1647. +# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
  1648. +# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__
  1649. +#else
  1650. +# define CV_CPU_HAS_SUPPORT_SSE2 0
  1651. +# define CV_CPU_CALL_SSE2(...)
  1652. +#endif
  1653. +
  1654. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
  1655. +# define CV_CPU_HAS_SUPPORT_SSE3 1
  1656. +# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__
  1657. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
  1658. +# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
  1659. +# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__
  1660. +#else
  1661. +# define CV_CPU_HAS_SUPPORT_SSE3 0
  1662. +# define CV_CPU_CALL_SSE3(...)
  1663. +#endif
  1664. +
  1665. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
  1666. +# define CV_CPU_HAS_SUPPORT_SSSE3 1
  1667. +# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__
  1668. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
  1669. +# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
  1670. +# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__
  1671. +#else
  1672. +# define CV_CPU_HAS_SUPPORT_SSSE3 0
  1673. +# define CV_CPU_CALL_SSSE3(...)
  1674. +#endif
  1675. +
  1676. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
  1677. +# define CV_CPU_HAS_SUPPORT_SSE4_1 1
  1678. +# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__
  1679. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
  1680. +# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
  1681. +# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__
  1682. +#else
  1683. +# define CV_CPU_HAS_SUPPORT_SSE4_1 0
  1684. +# define CV_CPU_CALL_SSE4_1(...)
  1685. +#endif
  1686. +
  1687. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
  1688. +# define CV_CPU_HAS_SUPPORT_SSE4_2 1
  1689. +# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__
  1690. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
  1691. +# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
  1692. +# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__
  1693. +#else
  1694. +# define CV_CPU_HAS_SUPPORT_SSE4_2 0
  1695. +# define CV_CPU_CALL_SSE4_2(...)
  1696. +#endif
  1697. +
  1698. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
  1699. +# define CV_CPU_HAS_SUPPORT_POPCNT 1
  1700. +# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__
  1701. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
  1702. +# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
  1703. +# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__
  1704. +#else
  1705. +# define CV_CPU_HAS_SUPPORT_POPCNT 0
  1706. +# define CV_CPU_CALL_POPCNT(...)
  1707. +#endif
  1708. +
  1709. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
  1710. +# define CV_CPU_HAS_SUPPORT_AVX 1
  1711. +# define CV_CPU_CALL_AVX(...) return __VA_ARGS__
  1712. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
  1713. +# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
  1714. +# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__
  1715. +#else
  1716. +# define CV_CPU_HAS_SUPPORT_AVX 0
  1717. +# define CV_CPU_CALL_AVX(...)
  1718. +#endif
  1719. +
  1720. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
  1721. +# define CV_CPU_HAS_SUPPORT_FP16 1
  1722. +# define CV_CPU_CALL_FP16(...) return __VA_ARGS__
  1723. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
  1724. +# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
  1725. +# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__
  1726. +#else
  1727. +# define CV_CPU_HAS_SUPPORT_FP16 0
  1728. +# define CV_CPU_CALL_FP16(...)
  1729. +#endif
  1730. +
  1731. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
  1732. +# define CV_CPU_HAS_SUPPORT_AVX2 1
  1733. +# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__
  1734. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
  1735. +# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
  1736. +# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__
  1737. +#else
  1738. +# define CV_CPU_HAS_SUPPORT_AVX2 0
  1739. +# define CV_CPU_CALL_AVX2(...)
  1740. +#endif
  1741. +
  1742. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
  1743. +# define CV_CPU_HAS_SUPPORT_FMA3 1
  1744. +# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__
  1745. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
  1746. +# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
  1747. +# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__
  1748. +#else
  1749. +# define CV_CPU_HAS_SUPPORT_FMA3 0
  1750. +# define CV_CPU_CALL_FMA3(...)
  1751. +#endif
  1752. +
  1753. +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
  1754. +# define CV_CPU_HAS_SUPPORT_NEON 1
  1755. +# define CV_CPU_CALL_NEON(...) return __VA_ARGS__
  1756. +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
  1757. +# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
  1758. +# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__
  1759. +#else
  1760. +# define CV_CPU_HAS_SUPPORT_NEON 0
  1761. +# define CV_CPU_CALL_NEON(...)
  1762. +#endif
  1763. diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
  1764. index 699b166..0a46e02 100644
  1765. --- a/modules/core/include/opencv2/core/cvdef.h
  1766. +++ b/modules/core/include/opencv2/core/cvdef.h
  1767. @@ -48,6 +48,10 @@
  1768. //! @addtogroup core_utils
  1769. //! @{
  1770. +#ifdef __OPENCV_BUILD
  1771. +#include "cvconfig.h"
  1772. +#endif
  1773. +
  1774. #if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
  1775. # define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
  1776. #endif
  1777. @@ -59,10 +63,6 @@
  1778. #undef abs
  1779. #undef Complex
  1780. -#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
  1781. -# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
  1782. -#endif
  1783. -
  1784. #include <limits.h>
  1785. #include "opencv2/core/hal/interface.h"
  1786. @@ -88,7 +88,7 @@
  1787. # endif
  1788. #endif
  1789. -#if defined CV_ICC && !defined CV_ENABLE_UNROLLED
  1790. +#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
  1791. # define CV_ENABLE_UNROLLED 0
  1792. #else
  1793. # define CV_ENABLE_UNROLLED 1
  1794. @@ -161,142 +161,9 @@ enum CpuFeatures {
  1795. CPU_NEON = 100
  1796. };
  1797. -// do not include SSE/AVX/NEON headers for NVCC compiler
  1798. -#ifndef __CUDACC__
  1799. -
  1800. -#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
  1801. -# include <emmintrin.h>
  1802. -# define CV_MMX 1
  1803. -# define CV_SSE 1
  1804. -# define CV_SSE2 1
  1805. -# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
  1806. -# include <pmmintrin.h>
  1807. -# define CV_SSE3 1
  1808. -# endif
  1809. -# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
  1810. -# include <tmmintrin.h>
  1811. -# define CV_SSSE3 1
  1812. -# endif
  1813. -# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
  1814. -# include <smmintrin.h>
  1815. -# define CV_SSE4_1 1
  1816. -# endif
  1817. -# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
  1818. -# include <nmmintrin.h>
  1819. -# define CV_SSE4_2 1
  1820. -# endif
  1821. -# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
  1822. -# ifdef _MSC_VER
  1823. -# include <nmmintrin.h>
  1824. -# else
  1825. -# include <popcntintrin.h>
  1826. -# endif
  1827. -# define CV_POPCNT 1
  1828. -# endif
  1829. -# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
  1830. -// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
  1831. -// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
  1832. -# include <immintrin.h>
  1833. -# define CV_AVX 1
  1834. -# if defined(_XCR_XFEATURE_ENABLED_MASK)
  1835. -# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
  1836. -# else
  1837. -# define __xgetbv() 0
  1838. -# endif
  1839. -# endif
  1840. -# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0)
  1841. -# include <immintrin.h>
  1842. -# define CV_AVX2 1
  1843. -# if defined __FMA__
  1844. -# define CV_FMA3 1
  1845. -# endif
  1846. -# endif
  1847. -#endif
  1848. -
  1849. -#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
  1850. -# include <Intrin.h>
  1851. -# include <arm_neon.h>
  1852. -# define CV_NEON 1
  1853. -# define CPU_HAS_NEON_FEATURE (true)
  1854. -#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
  1855. -# include <arm_neon.h>
  1856. -# define CV_NEON 1
  1857. -#endif
  1858. -
  1859. -#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
  1860. -# define CV_VFP 1
  1861. -#endif
  1862. -
  1863. -#endif // __CUDACC__
  1864. -
  1865. -#ifndef CV_POPCNT
  1866. -#define CV_POPCNT 0
  1867. -#endif
  1868. -#ifndef CV_MMX
  1869. -# define CV_MMX 0
  1870. -#endif
  1871. -#ifndef CV_SSE
  1872. -# define CV_SSE 0
  1873. -#endif
  1874. -#ifndef CV_SSE2
  1875. -# define CV_SSE2 0
  1876. -#endif
  1877. -#ifndef CV_SSE3
  1878. -# define CV_SSE3 0
  1879. -#endif
  1880. -#ifndef CV_SSSE3
  1881. -# define CV_SSSE3 0
  1882. -#endif
  1883. -#ifndef CV_SSE4_1
  1884. -# define CV_SSE4_1 0
  1885. -#endif
  1886. -#ifndef CV_SSE4_2
  1887. -# define CV_SSE4_2 0
  1888. -#endif
  1889. -#ifndef CV_AVX
  1890. -# define CV_AVX 0
  1891. -#endif
  1892. -#ifndef CV_AVX2
  1893. -# define CV_AVX2 0
  1894. -#endif
  1895. -#ifndef CV_FMA3
  1896. -# define CV_FMA3 0
  1897. -#endif
  1898. -#ifndef CV_AVX_512F
  1899. -# define CV_AVX_512F 0
  1900. -#endif
  1901. -#ifndef CV_AVX_512BW
  1902. -# define CV_AVX_512BW 0
  1903. -#endif
  1904. -#ifndef CV_AVX_512CD
  1905. -# define CV_AVX_512CD 0
  1906. -#endif
  1907. -#ifndef CV_AVX_512DQ
  1908. -# define CV_AVX_512DQ 0
  1909. -#endif
  1910. -#ifndef CV_AVX_512ER
  1911. -# define CV_AVX_512ER 0
  1912. -#endif
  1913. -#ifndef CV_AVX_512IFMA512
  1914. -# define CV_AVX_512IFMA512 0
  1915. -#endif
  1916. -#ifndef CV_AVX_512PF
  1917. -# define CV_AVX_512PF 0
  1918. -#endif
  1919. -#ifndef CV_AVX_512VBMI
  1920. -# define CV_AVX_512VBMI 0
  1921. -#endif
  1922. -#ifndef CV_AVX_512VL
  1923. -# define CV_AVX_512VL 0
  1924. -#endif
  1925. -#ifndef CV_NEON
  1926. -# define CV_NEON 0
  1927. -#endif
  1928. +#include "cv_cpu_dispatch.h"
  1929. -#ifndef CV_VFP
  1930. -# define CV_VFP 0
  1931. -#endif
  1932. /* fundamental constants */
  1933. #define CV_PI 3.1415926535897932384626433832795
  1934. diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp
  1935. index c76936a..31c1062 100644
  1936. --- a/modules/core/include/opencv2/core/fast_math.hpp
  1937. +++ b/modules/core/include/opencv2/core/fast_math.hpp
  1938. @@ -47,6 +47,12 @@
  1939. #include "opencv2/core/cvdef.h"
  1940. +#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  1941. + && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
  1942. +#include <emmintrin.h>
  1943. +#endif
  1944. +
  1945. +
  1946. //! @addtogroup core_utils
  1947. //! @{
  1948. @@ -66,7 +72,7 @@
  1949. # include "tegra_round.hpp"
  1950. #endif
  1951. -#if CV_VFP
  1952. +#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
  1953. // 1. general scheme
  1954. #define ARM_ROUND(_value, _asm_string) \
  1955. int res; \
  1956. @@ -82,7 +88,7 @@
  1957. #endif
  1958. // 3. version for float
  1959. #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
  1960. -#endif // CV_VFP
  1961. +#endif
  1962. /** @brief Rounds floating-point number to the nearest integer
  1963. @@ -93,7 +99,7 @@ CV_INLINE int
  1964. cvRound( double value )
  1965. {
  1966. #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  1967. - && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
  1968. + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
  1969. __m128d t = _mm_set_sd( value );
  1970. return _mm_cvtsd_si32(t);
  1971. #elif defined _MSC_VER && defined _M_IX86
  1972. @@ -108,7 +114,7 @@ cvRound( double value )
  1973. defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
  1974. TEGRA_ROUND_DBL(value);
  1975. #elif defined CV_ICC || defined __GNUC__
  1976. -# if CV_VFP
  1977. +# if defined ARM_ROUND_DBL
  1978. ARM_ROUND_DBL(value);
  1979. # else
  1980. return (int)lrint(value);
  1981. @@ -130,18 +136,8 @@ cvRound( double value )
  1982. */
  1983. CV_INLINE int cvFloor( double value )
  1984. {
  1985. -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
  1986. - __m128d t = _mm_set_sd( value );
  1987. - int i = _mm_cvtsd_si32(t);
  1988. - return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
  1989. -#elif defined __GNUC__
  1990. int i = (int)value;
  1991. return i - (i > value);
  1992. -#else
  1993. - int i = cvRound(value);
  1994. - float diff = (float)(value - i);
  1995. - return i - (diff < 0);
  1996. -#endif
  1997. }
  1998. /** @brief Rounds floating-point number to the nearest integer not smaller than the original.
  1999. @@ -153,18 +149,8 @@ CV_INLINE int cvFloor( double value )
  2000. */
  2001. CV_INLINE int cvCeil( double value )
  2002. {
  2003. -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
  2004. - __m128d t = _mm_set_sd( value );
  2005. - int i = _mm_cvtsd_si32(t);
  2006. - return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
  2007. -#elif defined __GNUC__
  2008. int i = (int)value;
  2009. return i + (i < value);
  2010. -#else
  2011. - int i = cvRound(value);
  2012. - float diff = (float)(i - value);
  2013. - return i + (diff < 0);
  2014. -#endif
  2015. }
  2016. /** @brief Determines if the argument is Not A Number.
  2017. @@ -200,8 +186,8 @@ CV_INLINE int cvIsInf( double value )
  2018. /** @overload */
  2019. CV_INLINE int cvRound(float value)
  2020. {
  2021. -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \
  2022. - defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
  2023. +#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  2024. + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
  2025. __m128 t = _mm_set_ss( value );
  2026. return _mm_cvtss_si32(t);
  2027. #elif defined _MSC_VER && defined _M_IX86
  2028. @@ -216,7 +202,7 @@ CV_INLINE int cvRound(float value)
  2029. defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
  2030. TEGRA_ROUND_FLT(value);
  2031. #elif defined CV_ICC || defined __GNUC__
  2032. -# if CV_VFP
  2033. +# if defined ARM_ROUND_FLT
  2034. ARM_ROUND_FLT(value);
  2035. # else
  2036. return (int)lrintf(value);
  2037. @@ -237,18 +223,8 @@ CV_INLINE int cvRound( int value )
  2038. /** @overload */
  2039. CV_INLINE int cvFloor( float value )
  2040. {
  2041. -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
  2042. - __m128 t = _mm_set_ss( value );
  2043. - int i = _mm_cvtss_si32(t);
  2044. - return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i)));
  2045. -#elif defined __GNUC__
  2046. int i = (int)value;
  2047. return i - (i > value);
  2048. -#else
  2049. - int i = cvRound(value);
  2050. - float diff = (float)(value - i);
  2051. - return i - (diff < 0);
  2052. -#endif
  2053. }
  2054. /** @overload */
  2055. @@ -260,18 +236,8 @@ CV_INLINE int cvFloor( int value )
  2056. /** @overload */
  2057. CV_INLINE int cvCeil( float value )
  2058. {
  2059. -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
  2060. - __m128 t = _mm_set_ss( value );
  2061. - int i = _mm_cvtss_si32(t);
  2062. - return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t));
  2063. -#elif defined __GNUC__
  2064. int i = (int)value;
  2065. return i + (i < value);
  2066. -#else
  2067. - int i = cvRound(value);
  2068. - float diff = (float)(i - value);
  2069. - return i + (diff < 0);
  2070. -#endif
  2071. }
  2072. /** @overload */
  2073. diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
  2074. index 3c8f39d..a983838 100644
  2075. --- a/modules/core/src/system.cpp
  2076. +++ b/modules/core/src/system.cpp
  2077. @@ -237,24 +237,81 @@ void Exception::formatMessage()
  2078. msg = format("%s:%d: error: (%d) %s\n", file.c_str(), line, code, err.c_str());
  2079. }
  2080. +static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL };
  2081. +
  2082. +static const char* getHWFeatureName(int id)
  2083. +{
  2084. + return (id < CV_HARDWARE_MAX_FEATURE) ? g_hwFeatureNames[id] : NULL;
  2085. +}
  2086. +static const char* getHWFeatureNameSafe(int id)
  2087. +{
  2088. + const char* name = getHWFeatureName(id);
  2089. + return name ? name : "Unknown feature";
  2090. +}
  2091. +
  2092. struct HWFeatures
  2093. {
  2094. enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE };
  2095. - HWFeatures(void)
  2096. + HWFeatures(bool run_initialize = false)
  2097. {
  2098. - memset( have, 0, sizeof(have) );
  2099. - x86_family = 0;
  2100. + memset( have, 0, sizeof(have[0]) * MAX_FEATURE );
  2101. + if (run_initialize)
  2102. + initialize();
  2103. }
  2104. - static HWFeatures initialize(void)
  2105. + static void initializeNames()
  2106. {
  2107. - HWFeatures f;
  2108. + for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
  2109. + {
  2110. + g_hwFeatureNames[i] = 0;
  2111. + }
  2112. + g_hwFeatureNames[CPU_MMX] = "MMX";
  2113. + g_hwFeatureNames[CPU_SSE] = "SSE";
  2114. + g_hwFeatureNames[CPU_SSE2] = "SSE2";
  2115. + g_hwFeatureNames[CPU_SSE3] = "SSE3";
  2116. + g_hwFeatureNames[CPU_SSSE3] = "SSSE3";
  2117. + g_hwFeatureNames[CPU_SSE4_1] = "SSE4.1";
  2118. + g_hwFeatureNames[CPU_SSE4_2] = "SSE4.2";
  2119. + g_hwFeatureNames[CPU_POPCNT] = "POPCNT";
  2120. + g_hwFeatureNames[CPU_FP16] = "FP16";
  2121. + g_hwFeatureNames[CPU_AVX] = "AVX";
  2122. + g_hwFeatureNames[CPU_AVX2] = "AVX2";
  2123. + g_hwFeatureNames[CPU_FMA3] = "FMA3";
  2124. +
  2125. + g_hwFeatureNames[CPU_AVX_512F] = "AVX512F";
  2126. + g_hwFeatureNames[CPU_AVX_512BW] = "AVX512BW";
  2127. + g_hwFeatureNames[CPU_AVX_512CD] = "AVX512CD";
  2128. + g_hwFeatureNames[CPU_AVX_512DQ] = "AVX512DQ";
  2129. + g_hwFeatureNames[CPU_AVX_512ER] = "AVX512ER";
  2130. + g_hwFeatureNames[CPU_AVX_512IFMA512] = "AVX512IFMA";
  2131. + g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF";
  2132. + g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI";
  2133. + g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL";
  2134. +
  2135. + g_hwFeatureNames[CPU_NEON] = "NEON";
  2136. + }
  2137. +
  2138. + void initialize(void)
  2139. + {
  2140. +#ifndef WINRT
  2141. + if (getenv("OPENCV_DUMP_CONFIG"))
  2142. + {
  2143. + fprintf(stderr, "\nOpenCV build configuration is:\n%s\n",
  2144. + cv::getBuildInformation().c_str());
  2145. + }
  2146. +#endif
  2147. +
  2148. + initializeNames();
  2149. +
  2150. int cpuid_data[4] = { 0, 0, 0, 0 };
  2151. + int cpuid_data_ex[4] = { 0, 0, 0, 0 };
  2152. #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
  2153. + #define OPENCV_HAVE_X86_CPUID 1
  2154. __cpuid(cpuid_data, 1);
  2155. #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
  2156. + #define OPENCV_HAVE_X86_CPUID 1
  2157. #ifdef __x86_64__
  2158. asm __volatile__
  2159. (
  2160. @@ -278,33 +335,36 @@ struct HWFeatures
  2161. #endif
  2162. #endif
  2163. - f.x86_family = (cpuid_data[0] >> 8) & 15;
  2164. - if( f.x86_family >= 6 )
  2165. + #ifdef OPENCV_HAVE_X86_CPUID
  2166. + int x86_family = (cpuid_data[0] >> 8) & 15;
  2167. + if( x86_family >= 6 )
  2168. {
  2169. - f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0;
  2170. - f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
  2171. - f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
  2172. - f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
  2173. - f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
  2174. - f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
  2175. - f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
  2176. - f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
  2177. - f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
  2178. - f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX
  2179. - f.have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0;
  2180. + have[CV_CPU_MMX] = (cpuid_data[3] & (1<<23)) != 0;
  2181. + have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
  2182. + have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
  2183. + have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
  2184. + have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
  2185. + have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
  2186. + have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
  2187. + have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
  2188. + have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
  2189. + have[CV_CPU_AVX] = (cpuid_data[2] & (1<<28)) != 0;
  2190. + have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0;
  2191. // make the second call to the cpuid command in order to get
  2192. // information about extended features like AVX2
  2193. #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
  2194. - __cpuidex(cpuid_data, 7, 0);
  2195. + #define OPENCV_HAVE_X86_CPUID_EX 1
  2196. + __cpuidex(cpuid_data_ex, 7, 0);
  2197. #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
  2198. + #define OPENCV_HAVE_X86_CPUID_EX 1
  2199. #ifdef __x86_64__
  2200. asm __volatile__
  2201. (
  2202. "movl $7, %%eax\n\t"
  2203. "movl $0, %%ecx\n\t"
  2204. "cpuid\n\t"
  2205. - :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
  2206. + :[eax]"=a"(cpuid_data_ex[0]),[ebx]"=b"(cpuid_data_ex[1]),[ecx]"=c"(cpuid_data_ex[2]),[edx]"=d"(cpuid_data_ex[3])
  2207. :
  2208. : "cc"
  2209. );
  2210. @@ -317,29 +377,76 @@ struct HWFeatures
  2211. "cpuid\n\t"
  2212. "movl %%ebx, %0\n\t"
  2213. "popl %%ebx\n\t"
  2214. - : "=r"(cpuid_data[1]), "=c"(cpuid_data[2])
  2215. + : "=r"(cpuid_data_ex[1]), "=c"(cpuid_data_ex[2])
  2216. :
  2217. : "cc"
  2218. );
  2219. #endif
  2220. #endif
  2221. - f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0;
  2222. -
  2223. - f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0;
  2224. - f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0;
  2225. - f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0;
  2226. - f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0;
  2227. - f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0;
  2228. - f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0;
  2229. - f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0;
  2230. - f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0;
  2231. - f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0;
  2232. +
  2233. + #ifdef OPENCV_HAVE_X86_CPUID_EX
  2234. + have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0;
  2235. +
  2236. + have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0;
  2237. + have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0;
  2238. + have[CV_CPU_AVX_512IFMA512] = (cpuid_data_ex[1] & (1<<21)) != 0;
  2239. + have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0;
  2240. + have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0;
  2241. + have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0;
  2242. + have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0;
  2243. + have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0;
  2244. + have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0;
  2245. + #else
  2246. + CV_UNUSED(cpuid_data_ex);
  2247. + #endif
  2248. +
  2249. + bool have_AVX_OS_support = true;
  2250. + bool have_AVX512_OS_support = true;
  2251. + if (!(cpuid_data[2] & (1<<27)))
  2252. + have_AVX_OS_support = false; // OS uses XSAVE_XRSTORE and CPU support AVX
  2253. + else
  2254. + {
  2255. + int xcr0 = 0;
  2256. + #ifdef _XCR_XFEATURE_ENABLED_MASK // requires immintrin.h
  2257. + xcr0 = (int)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);
  2258. + #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
  2259. + __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
  2260. + #endif
  2261. + if ((xcr0 & 0x6) != 0x6)
  2262. + have_AVX_OS_support = false; // YMM registers
  2263. + if ((xcr0 & 0xe6) != 0xe6)
  2264. + have_AVX512_OS_support = false; // ZMM registers
  2265. + }
  2266. +
  2267. + if (!have_AVX_OS_support)
  2268. + {
  2269. + have[CV_CPU_AVX] = false;
  2270. + have[CV_CPU_FP16] = false;
  2271. + have[CV_CPU_AVX2] = false;
  2272. + have[CV_CPU_FMA3] = false;
  2273. + }
  2274. + if (!have_AVX_OS_support || !have_AVX512_OS_support)
  2275. + {
  2276. + have[CV_CPU_AVX_512F] = false;
  2277. + have[CV_CPU_AVX_512BW] = false;
  2278. + have[CV_CPU_AVX_512CD] = false;
  2279. + have[CV_CPU_AVX_512DQ] = false;
  2280. + have[CV_CPU_AVX_512ER] = false;
  2281. + have[CV_CPU_AVX_512IFMA512] = false;
  2282. + have[CV_CPU_AVX_512PF] = false;
  2283. + have[CV_CPU_AVX_512VBMI] = false;
  2284. + have[CV_CPU_AVX_512VL] = false;
  2285. + }
  2286. }
  2287. + #else
  2288. + CV_UNUSED(cpuid_data);
  2289. + CV_UNUSED(cpuid_data_ex);
  2290. + #endif // OPENCV_HAVE_X86_CPUID
  2291. #if defined ANDROID || defined __linux__
  2292. #ifdef __aarch64__
  2293. - f.have[CV_CPU_NEON] = true;
  2294. - f.have[CV_CPU_FP16] = true;
  2295. + have[CV_CPU_NEON] = true;
  2296. + have[CV_CPU_FP16] = true;
  2297. #elif defined __arm__
  2298. int cpufile = open("/proc/self/auxv", O_RDONLY);
  2299. @@ -352,8 +459,8 @@ struct HWFeatures
  2300. {
  2301. if (auxv.a_type == AT_HWCAP)
  2302. {
  2303. - f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
  2304. - f.have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0;
  2305. + have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
  2306. + have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0;
  2307. break;
  2308. }
  2309. }
  2310. @@ -363,21 +470,133 @@ struct HWFeatures
  2311. #endif
  2312. #elif (defined __clang__ || defined __APPLE__)
  2313. #if (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
  2314. - f.have[CV_CPU_NEON] = true;
  2315. + have[CV_CPU_NEON] = true;
  2316. #endif
  2317. #if (defined __ARM_FP && (((__ARM_FP & 0x2) != 0) && defined __ARM_NEON__))
  2318. - f.have[CV_CPU_FP16] = true;
  2319. + have[CV_CPU_FP16] = true;
  2320. #endif
  2321. #endif
  2322. - return f;
  2323. + int baseline_features[] = { CV_CPU_BASELINE_FEATURES };
  2324. + if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])))
  2325. + {
  2326. + fprintf(stderr, "\n"
  2327. + "******************************************************************\n"
  2328. + "* FATAL ERROR: *\n"
  2329. + "* This OpenCV build doesn't support current CPU/HW configuration *\n"
  2330. + "* *\n"
  2331. + "* Use OPENCV_DUMP_CONFIG=1 environment variable for details *\n"
  2332. + "******************************************************************\n");
  2333. + fprintf(stderr, "\nRequired baseline features:\n");
  2334. + checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]), true);
  2335. + CV_ErrorNoReturn(cv::Error::StsAssert, "Missing support for required CPU baseline features. Check OpenCV build configuration and required CPU/HW setup.");
  2336. + }
  2337. +
  2338. + readSettings(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]));
  2339. + }
  2340. +
  2341. + bool checkFeatures(const int* features, int count, bool dump = false)
  2342. + {
  2343. + bool result = true;
  2344. + for (int i = 0; i < count; i++)
  2345. + {
  2346. + int feature = features[i];
  2347. + if (feature)
  2348. + {
  2349. + if (have[feature])
  2350. + {
  2351. + if (dump) fprintf(stderr, "%s - OK\n", getHWFeatureNameSafe(feature));
  2352. + }
  2353. + else
  2354. + {
  2355. + result = false;
  2356. + if (dump) fprintf(stderr, "%s - NOT AVAILABLE\n", getHWFeatureNameSafe(feature));
  2357. + }
  2358. + }
  2359. + }
  2360. + return result;
  2361. + }
  2362. +
  2363. + static inline bool isSymbolSeparator(char c)
  2364. + {
  2365. + return c == ',' || c == ';' || c == '-';
  2366. + }
  2367. +
  2368. + void readSettings(const int* baseline_features, int baseline_count)
  2369. + {
  2370. + bool dump = true;
  2371. + const char* disabled_features =
  2372. +#ifndef WINRT
  2373. + getenv("OPENCV_CPU_DISABLE");
  2374. +#else
  2375. + NULL;
  2376. +#endif
  2377. + if (disabled_features && disabled_features[0] != 0)
  2378. + {
  2379. + const char* start = disabled_features;
  2380. + for (;;)
  2381. + {
  2382. + while (start[0] != 0 && isSymbolSeparator(start[0]))
  2383. + {
  2384. + start++;
  2385. + }
  2386. + if (start[0] == 0)
  2387. + break;
  2388. + const char* end = start;
  2389. + while (end[0] != 0 && !isSymbolSeparator(end[0]))
  2390. + {
  2391. + end++;
  2392. + }
  2393. + if (end == start)
  2394. + continue;
  2395. + cv::String feature(start, end);
  2396. + start = end;
  2397. +
  2398. + CV_Assert(feature.size() > 0);
  2399. +
  2400. + bool found = false;
  2401. + for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
  2402. + {
  2403. + if (!g_hwFeatureNames[i]) continue;
  2404. + size_t len = strlen(g_hwFeatureNames[i]);
  2405. + if (len != feature.size()) continue;
  2406. + if (feature.compare(g_hwFeatureNames[i]) == 0)
  2407. + {
  2408. + bool isBaseline = false;
  2409. + for (int k = 0; k < baseline_count; k++)
  2410. + {
  2411. + if (baseline_features[k] == i)
  2412. + {
  2413. + isBaseline = true;
  2414. + break;
  2415. + }
  2416. + }
  2417. + if (isBaseline)
  2418. + {
  2419. + if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'. This has very limited effect, because code optimizations for this feature are executed unconditionally in the most cases.\n", getHWFeatureNameSafe(i));
  2420. + }
  2421. + if (!have[i])
  2422. + {
  2423. + if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", getHWFeatureNameSafe(i));
  2424. + }
  2425. + have[i] = false;
  2426. +
  2427. + found = true;
  2428. + break;
  2429. + }
  2430. + }
  2431. + if (!found)
  2432. + {
  2433. + if (dump) fprintf(stderr, "OPENCV: Trying to disable unknown CPU feature: '%s'.\n", feature.c_str());
  2434. + }
  2435. + }
  2436. + }
  2437. }
  2438. - int x86_family;
  2439. bool have[MAX_FEATURE+1];
  2440. };
  2441. -static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures();
  2442. +static HWFeatures featuresEnabled(true), featuresDisabled = HWFeatures(false);
  2443. static HWFeatures* currentFeatures = &featuresEnabled;
  2444. bool checkHardwareSupport(int feature)
  2445. diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt
  2446. index eb56177..6d9c650 100644
  2447. --- a/modules/highgui/CMakeLists.txt
  2448. +++ b/modules/highgui/CMakeLists.txt
  2449. @@ -65,7 +65,7 @@ elseif(HAVE_QT)
  2450. list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES})
  2451. list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES})
  2452. - ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag)
  2453. + ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag "")
  2454. if(${_have_flag})
  2455. set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations)
  2456. endif()
  2457. diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
  2458. index 0fa5202..dcf2e44 100644
  2459. --- a/modules/imgproc/src/imgwarp.cpp
  2460. +++ b/modules/imgproc/src/imgwarp.cpp
  2461. @@ -1649,7 +1649,7 @@ struct VResizeLanczos4
  2462. {
  2463. CastOp castOp;
  2464. VecOp vecOp;
  2465. - int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
  2466. + int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
  2467. #if CV_ENABLE_UNROLLED
  2468. for( ; x <= width - 4; x += 4 )
  2469. {
  2470. @@ -1657,7 +1657,7 @@ struct VResizeLanczos4
  2471. const WT* S = src[0];
  2472. WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b;
  2473. - for( k = 1; k < 8; k++ )
  2474. + for( int k = 1; k < 8; k++ )
  2475. {
  2476. b = beta[k]; S = src[k];
  2477. s0 += S[x]*b; s1 += S[x+1]*b;
  2478. diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp
  2479. index 51843fa..bb37ee9 100644
  2480. --- a/modules/objdetect/src/haar.cpp
  2481. +++ b/modules/objdetect/src/haar.cpp
  2482. @@ -824,10 +824,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
  2483. CvPoint pt, double& stage_sum, int start_stage )
  2484. {
  2485. #ifdef CV_HAAR_USE_AVX
  2486. - bool haveAVX = false;
  2487. - if(cv::checkHardwareSupport(CV_CPU_AVX))
  2488. - if(__xgetbv()&0x6)// Check if the OS will save the YMM registers
  2489. - haveAVX = true;
  2490. + bool haveAVX = cv::checkHardwareSupport(CV_CPU_AVX);
  2491. #else
  2492. # ifdef CV_HAAR_USE_SSE
  2493. bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
  2494. --
  2495. 2.7.4