You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

406 lines
13KB

  1. // -*- C++ -*-
  2. // Copyright (C) 2007-2020 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the terms
  6. // of the GNU General Public License as published by the Free Software
  7. // Foundation; either version 3, or (at your option) any later
  8. // version.
  9. // This library is distributed in the hope that it will be useful, but
  10. // WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. // General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /** @file parallel/find.h
  21. * @brief Parallel implementation base for std::find(), std::equal()
  22. * and related functions.
  23. * This file is a GNU parallel extension to the Standard C++ Library.
  24. */
  25. // Written by Felix Putze and Johannes Singler.
  26. #ifndef _GLIBCXX_PARALLEL_FIND_H
  27. #define _GLIBCXX_PARALLEL_FIND_H 1
  28. #include <bits/stl_algobase.h>
  29. #include <parallel/features.h>
  30. #include <parallel/parallel.h>
  31. #include <parallel/compatibility.h>
  32. #include <parallel/equally_split.h>
  33. namespace __gnu_parallel
  34. {
  35. /**
  36. * @brief Parallel std::find, switch for different algorithms.
  37. * @param __begin1 Begin iterator of first sequence.
  38. * @param __end1 End iterator of first sequence.
  39. * @param __begin2 Begin iterator of second sequence. Must have same
  40. * length as first sequence.
  41. * @param __pred Find predicate.
  42. * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
  43. * @return Place of finding in both sequences.
  44. */
  45. template<typename _RAIter1,
  46. typename _RAIter2,
  47. typename _Pred,
  48. typename _Selector>
  49. inline std::pair<_RAIter1, _RAIter2>
  50. __find_template(_RAIter1 __begin1, _RAIter1 __end1,
  51. _RAIter2 __begin2, _Pred __pred, _Selector __selector)
  52. {
  53. switch (_Settings::get().find_algorithm)
  54. {
  55. case GROWING_BLOCKS:
  56. return __find_template(__begin1, __end1, __begin2, __pred,
  57. __selector, growing_blocks_tag());
  58. case CONSTANT_SIZE_BLOCKS:
  59. return __find_template(__begin1, __end1, __begin2, __pred,
  60. __selector, constant_size_blocks_tag());
  61. case EQUAL_SPLIT:
  62. return __find_template(__begin1, __end1, __begin2, __pred,
  63. __selector, equal_split_tag());
  64. default:
  65. _GLIBCXX_PARALLEL_ASSERT(false);
  66. return std::make_pair(__begin1, __begin2);
  67. }
  68. }
  69. #if _GLIBCXX_FIND_EQUAL_SPLIT
  70. /**
  71. * @brief Parallel std::find, equal splitting variant.
  72. * @param __begin1 Begin iterator of first sequence.
  73. * @param __end1 End iterator of first sequence.
  74. * @param __begin2 Begin iterator of second sequence. Second __sequence
  75. * must have same length as first sequence.
  76. * @param __pred Find predicate.
  77. * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
  78. * @return Place of finding in both sequences.
  79. */
  80. template<typename _RAIter1,
  81. typename _RAIter2,
  82. typename _Pred,
  83. typename _Selector>
  84. std::pair<_RAIter1, _RAIter2>
  85. __find_template(_RAIter1 __begin1, _RAIter1 __end1,
  86. _RAIter2 __begin2, _Pred __pred,
  87. _Selector __selector, equal_split_tag)
  88. {
  89. _GLIBCXX_CALL(__end1 - __begin1)
  90. typedef std::iterator_traits<_RAIter1> _TraitsType;
  91. typedef typename _TraitsType::difference_type _DifferenceType;
  92. typedef typename _TraitsType::value_type _ValueType;
  93. _DifferenceType __length = __end1 - __begin1;
  94. _DifferenceType __result = __length;
  95. _DifferenceType* __borders;
  96. omp_lock_t __result_lock;
  97. omp_init_lock(&__result_lock);
  98. _ThreadIndex __num_threads = __get_max_threads();
  99. # pragma omp parallel num_threads(__num_threads)
  100. {
  101. # pragma omp single
  102. {
  103. __num_threads = omp_get_num_threads();
  104. __borders = new _DifferenceType[__num_threads + 1];
  105. __equally_split(__length, __num_threads, __borders);
  106. } //single
  107. _ThreadIndex __iam = omp_get_thread_num();
  108. _DifferenceType __start = __borders[__iam],
  109. __stop = __borders[__iam + 1];
  110. _RAIter1 __i1 = __begin1 + __start;
  111. _RAIter2 __i2 = __begin2 + __start;
  112. for (_DifferenceType __pos = __start; __pos < __stop; ++__pos)
  113. {
  114. # pragma omp flush(__result)
  115. // Result has been set to something lower.
  116. if (__result < __pos)
  117. break;
  118. if (__selector(__i1, __i2, __pred))
  119. {
  120. omp_set_lock(&__result_lock);
  121. if (__pos < __result)
  122. __result = __pos;
  123. omp_unset_lock(&__result_lock);
  124. break;
  125. }
  126. ++__i1;
  127. ++__i2;
  128. }
  129. } //parallel
  130. omp_destroy_lock(&__result_lock);
  131. delete[] __borders;
  132. return std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
  133. __begin2 + __result);
  134. }
  135. #endif
  136. #if _GLIBCXX_FIND_GROWING_BLOCKS
  137. /**
  138. * @brief Parallel std::find, growing block size variant.
  139. * @param __begin1 Begin iterator of first sequence.
  140. * @param __end1 End iterator of first sequence.
  141. * @param __begin2 Begin iterator of second sequence. Second __sequence
  142. * must have same length as first sequence.
  143. * @param __pred Find predicate.
  144. * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
  145. * @return Place of finding in both sequences.
  146. * @see __gnu_parallel::_Settings::find_sequential_search_size
  147. * @see __gnu_parallel::_Settings::find_scale_factor
  148. *
  149. * There are two main differences between the growing blocks and
  150. * the constant-size blocks variants.
  151. * 1. For GB, the block size grows; for CSB, the block size is fixed.
  152. * 2. For GB, the blocks are allocated dynamically;
  153. * for CSB, the blocks are allocated in a predetermined manner,
  154. * namely spacial round-robin.
  155. */
  156. template<typename _RAIter1,
  157. typename _RAIter2,
  158. typename _Pred,
  159. typename _Selector>
  160. std::pair<_RAIter1, _RAIter2>
  161. __find_template(_RAIter1 __begin1, _RAIter1 __end1,
  162. _RAIter2 __begin2, _Pred __pred, _Selector __selector,
  163. growing_blocks_tag)
  164. {
  165. _GLIBCXX_CALL(__end1 - __begin1)
  166. typedef std::iterator_traits<_RAIter1> _TraitsType;
  167. typedef typename _TraitsType::difference_type _DifferenceType;
  168. typedef typename _TraitsType::value_type _ValueType;
  169. const _Settings& __s = _Settings::get();
  170. _DifferenceType __length = __end1 - __begin1;
  171. _DifferenceType
  172. __sequential_search_size = std::min<_DifferenceType>
  173. (__length, __s.find_sequential_search_size);
  174. // Try it sequentially first.
  175. std::pair<_RAIter1, _RAIter2>
  176. __find_seq_result = __selector._M_sequential_algorithm
  177. (__begin1, __begin1 + __sequential_search_size,
  178. __begin2, __pred);
  179. if (__find_seq_result.first != (__begin1 + __sequential_search_size))
  180. return __find_seq_result;
  181. // Index of beginning of next free block (after sequential find).
  182. _DifferenceType __next_block_start = __sequential_search_size;
  183. _DifferenceType __result = __length;
  184. omp_lock_t __result_lock;
  185. omp_init_lock(&__result_lock);
  186. const float __scale_factor = __s.find_scale_factor;
  187. _ThreadIndex __num_threads = __get_max_threads();
  188. # pragma omp parallel shared(__result) num_threads(__num_threads)
  189. {
  190. # pragma omp single
  191. __num_threads = omp_get_num_threads();
  192. // Not within first __k elements -> start parallel.
  193. _ThreadIndex __iam = omp_get_thread_num();
  194. _DifferenceType __block_size =
  195. std::max<_DifferenceType>(1, __scale_factor * __next_block_start);
  196. _DifferenceType __start = __fetch_and_add<_DifferenceType>
  197. (&__next_block_start, __block_size);
  198. // Get new block, update pointer to next block.
  199. _DifferenceType __stop =
  200. std::min<_DifferenceType>(__length, __start + __block_size);
  201. std::pair<_RAIter1, _RAIter2> __local_result;
  202. while (__start < __length)
  203. {
  204. # pragma omp flush(__result)
  205. // Get new value of result.
  206. if (__result < __start)
  207. {
  208. // No chance to find first element.
  209. break;
  210. }
  211. __local_result = __selector._M_sequential_algorithm
  212. (__begin1 + __start, __begin1 + __stop,
  213. __begin2 + __start, __pred);
  214. if (__local_result.first != (__begin1 + __stop))
  215. {
  216. omp_set_lock(&__result_lock);
  217. if ((__local_result.first - __begin1) < __result)
  218. {
  219. __result = __local_result.first - __begin1;
  220. // Result cannot be in future blocks, stop algorithm.
  221. __fetch_and_add<_DifferenceType>(&__next_block_start,
  222. __length);
  223. }
  224. omp_unset_lock(&__result_lock);
  225. }
  226. _DifferenceType __block_size =
  227. std::max<_DifferenceType>(1, __scale_factor * __next_block_start);
  228. // Get new block, update pointer to next block.
  229. __start = __fetch_and_add<_DifferenceType>(&__next_block_start,
  230. __block_size);
  231. __stop =
  232. std::min<_DifferenceType>(__length, __start + __block_size);
  233. }
  234. } //parallel
  235. omp_destroy_lock(&__result_lock);
  236. // Return iterator on found element.
  237. return
  238. std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
  239. __begin2 + __result);
  240. }
  241. #endif
  242. #if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS
  243. /**
  244. * @brief Parallel std::find, constant block size variant.
  245. * @param __begin1 Begin iterator of first sequence.
  246. * @param __end1 End iterator of first sequence.
  247. * @param __begin2 Begin iterator of second sequence. Second __sequence
  248. * must have same length as first sequence.
  249. * @param __pred Find predicate.
  250. * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
  251. * @return Place of finding in both sequences.
  252. * @see __gnu_parallel::_Settings::find_sequential_search_size
  253. * @see __gnu_parallel::_Settings::find_block_size
  254. * There are two main differences between the growing blocks and the
  255. * constant-size blocks variants.
  256. * 1. For GB, the block size grows; for CSB, the block size is fixed.
  257. * 2. For GB, the blocks are allocated dynamically; for CSB, the
  258. * blocks are allocated in a predetermined manner, namely spacial
  259. * round-robin.
  260. */
  261. template<typename _RAIter1,
  262. typename _RAIter2,
  263. typename _Pred,
  264. typename _Selector>
  265. std::pair<_RAIter1, _RAIter2>
  266. __find_template(_RAIter1 __begin1, _RAIter1 __end1,
  267. _RAIter2 __begin2, _Pred __pred, _Selector __selector,
  268. constant_size_blocks_tag)
  269. {
  270. _GLIBCXX_CALL(__end1 - __begin1)
  271. typedef std::iterator_traits<_RAIter1> _TraitsType;
  272. typedef typename _TraitsType::difference_type _DifferenceType;
  273. typedef typename _TraitsType::value_type _ValueType;
  274. const _Settings& __s = _Settings::get();
  275. _DifferenceType __length = __end1 - __begin1;
  276. _DifferenceType __sequential_search_size = std::min<_DifferenceType>
  277. (__length, __s.find_sequential_search_size);
  278. // Try it sequentially first.
  279. std::pair<_RAIter1, _RAIter2>
  280. __find_seq_result = __selector._M_sequential_algorithm
  281. (__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
  282. if (__find_seq_result.first != (__begin1 + __sequential_search_size))
  283. return __find_seq_result;
  284. _DifferenceType __result = __length;
  285. omp_lock_t __result_lock;
  286. omp_init_lock(&__result_lock);
  287. // Not within first __sequential_search_size elements -> start parallel.
  288. _ThreadIndex __num_threads = __get_max_threads();
  289. # pragma omp parallel shared(__result) num_threads(__num_threads)
  290. {
  291. # pragma omp single
  292. __num_threads = omp_get_num_threads();
  293. _ThreadIndex __iam = omp_get_thread_num();
  294. _DifferenceType __block_size = __s.find_initial_block_size;
  295. // First element of thread's current iteration.
  296. _DifferenceType __iteration_start = __sequential_search_size;
  297. // Where to work (initialization).
  298. _DifferenceType __start = __iteration_start + __iam * __block_size;
  299. _DifferenceType __stop = std::min<_DifferenceType>(__length,
  300. __start
  301. + __block_size);
  302. std::pair<_RAIter1, _RAIter2> __local_result;
  303. while (__start < __length)
  304. {
  305. // Get new value of result.
  306. # pragma omp flush(__result)
  307. // No chance to find first element.
  308. if (__result < __start)
  309. break;
  310. __local_result = __selector._M_sequential_algorithm
  311. (__begin1 + __start, __begin1 + __stop,
  312. __begin2 + __start, __pred);
  313. if (__local_result.first != (__begin1 + __stop))
  314. {
  315. omp_set_lock(&__result_lock);
  316. if ((__local_result.first - __begin1) < __result)
  317. __result = __local_result.first - __begin1;
  318. omp_unset_lock(&__result_lock);
  319. // Will not find better value in its interval.
  320. break;
  321. }
  322. __iteration_start += __num_threads * __block_size;
  323. // Where to work.
  324. __start = __iteration_start + __iam * __block_size;
  325. __stop = std::min<_DifferenceType>(__length,
  326. __start + __block_size);
  327. }
  328. } //parallel
  329. omp_destroy_lock(&__result_lock);
  330. // Return iterator on found element.
  331. return std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
  332. __begin2 + __result);
  333. }
  334. #endif
  335. } // end namespace
  336. #endif /* _GLIBCXX_PARALLEL_FIND_H */