yafl_math.c 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070
  1. /*******************************************************************************
  2. Copyright 2021 anonimous <shkolnick-kun@gmail.com> and contributors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing,
  8. software distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions
  11. and limitations under the License.
  12. ******************************************************************************/
  13. #include "yafl_math.h"
  14. /* We need some way of printing human readable statuses */
  15. char * yafl_fail_dsc(yaflStatusEn status)
  16. {
  17. switch (status & 0xff0)
  18. {
  19. #define CASE_DSC(err) do { \
  20. case err: \
  21. { \
  22. return #err; \
  23. } \
  24. } while (0)
  25. CASE_DSC(YAFL_ST_INV_ARG_1);
  26. CASE_DSC(YAFL_ST_INV_ARG_2);
  27. CASE_DSC(YAFL_ST_INV_ARG_3);
  28. CASE_DSC(YAFL_ST_INV_ARG_4);
  29. CASE_DSC(YAFL_ST_INV_ARG_5);
  30. CASE_DSC(YAFL_ST_INV_ARG_6);
  31. CASE_DSC(YAFL_ST_INV_ARG_7);
  32. CASE_DSC(YAFL_ST_INV_ARG_8);
  33. CASE_DSC(YAFL_ST_INV_ARG_9);
  34. CASE_DSC(YAFL_ST_INV_ARG_10);
  35. CASE_DSC(YAFL_ST_INV_ARG_11);
  36. CASE_DSC(YAFL_ST_INV_ARG_12);
  37. #undef CASE_DSC
  38. default:
  39. {
  40. return "Internal error!!!";
  41. }
  42. }
  43. }
  44. #define YAFL_DO_VXN(name, op) \
  45. yaflStatusEn name(yaflInt sz, yaflFloat *res, yaflFloat *v, yaflFloat n) \
  46. { \
  47. yaflInt k; \
  48. \
  49. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  50. YAFL_CHECK(v, YAFL_ST_INV_ARG_3); \
  51. \
  52. for (k = 0; k < sz; k++) \
  53. { \
  54. res[k] op v[k] * n; \
  55. } \
  56. return YAFL_ST_OK; \
  57. }
  58. YAFL_DO_VXN(yafl_math_set_vxn, =)
  59. YAFL_DO_VXN(yafl_math_add_vxn, +=)
  60. YAFL_DO_VXN(yafl_math_sub_vxn, -=)
  61. #define YAFL_DO_VRN(name, op) \
  62. yaflStatusEn name(yaflInt sz, yaflFloat *res, yaflFloat *v, yaflFloat n) \
  63. { \
  64. yaflStatusEn status; \
  65. yaflInt k; \
  66. \
  67. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  68. YAFL_CHECK(v, YAFL_ST_INV_ARG_3); \
  69. \
  70. status = YAFL_ST_OK; \
  71. if (n > 0.0) \
  72. { \
  73. if (n < YAFL_EPS) \
  74. { \
  75. n = YAFL_EPS; \
  76. status = YAFL_ST_R; \
  77. } \
  78. } \
  79. else \
  80. { \
  81. if (n > -YAFL_EPS) \
  82. { \
  83. n = -YAFL_EPS; \
  84. status = YAFL_ST_R; \
  85. } \
  86. } \
  87. \
  88. for (k = 0; k < sz; k++) \
  89. { \
  90. res[k] op v[k] / n; \
  91. } \
  92. return status; \
  93. }
  94. YAFL_DO_VRN(yafl_math_set_vrn, =)
  95. YAFL_DO_VRN(yafl_math_add_vrn, +=)
  96. YAFL_DO_VRN(yafl_math_sub_vrn, -=)
  97. #define YAFL_DO_VXV(name, op) \
  98. yaflStatusEn name(yaflInt sz, yaflFloat *res, yaflFloat *a, yaflFloat *b)\
  99. { \
  100. yaflInt k; \
  101. \
  102. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  103. YAFL_CHECK(a, YAFL_ST_INV_ARG_3); \
  104. YAFL_CHECK(b, YAFL_ST_INV_ARG_4); \
  105. \
  106. for (k = 0; k < sz; k++) \
  107. { \
  108. res[k] op a[k] * b[k]; \
  109. } \
  110. return YAFL_ST_OK; \
  111. }
  112. YAFL_DO_VXV(yafl_math_set_vxv, =)
  113. YAFL_DO_VXV(yafl_math_add_vxv, +=)
  114. YAFL_DO_VXV(yafl_math_sub_vxv, -=)
  115. #define YAFL_DO_VRV(name, op) \
  116. yaflStatusEn name(yaflInt sz, yaflFloat *res, yaflFloat *a, yaflFloat *b)\
  117. { \
  118. yaflInt k; \
  119. \
  120. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  121. YAFL_CHECK(a, YAFL_ST_INV_ARG_3); \
  122. YAFL_CHECK(b, YAFL_ST_INV_ARG_4); \
  123. \
  124. for (k = 0; k < sz; k++) \
  125. { \
  126. res[k] op a[k] / b[k]; \
  127. } \
  128. return YAFL_ST_OK; \
  129. }
  130. YAFL_DO_VRV(yafl_math_set_vrv, =)
  131. YAFL_DO_VRV(yafl_math_add_vrv, +=)
  132. YAFL_DO_VRV(yafl_math_sub_vrv, -=)
  133. yaflStatusEn yafl_math_vtv(yaflInt sz, yaflFloat *res, yaflFloat *a, yaflFloat *b)
  134. {
  135. yaflInt k;
  136. yaflFloat r;
  137. YAFL_CHECK(res, YAFL_ST_INV_ARG_2);
  138. YAFL_CHECK(a, YAFL_ST_INV_ARG_3);
  139. YAFL_CHECK(b, YAFL_ST_INV_ARG_4);
  140. r = a[0] * b[0];
  141. for (k = 1; k < sz; k++)
  142. {
  143. r += a[k] * b[k];
  144. }
  145. *res = r;
  146. return YAFL_ST_OK;
  147. }
  148. #define YAFL_DO_VVT(name, op) \
  149. yaflStatusEn name(yaflInt nr, yaflInt nc, yaflFloat *res, yaflFloat *a, yaflFloat *b) \
  150. { \
  151. yaflInt j; \
  152. \
  153. YAFL_CHECK(res, YAFL_ST_INV_ARG_3); \
  154. YAFL_CHECK(a, YAFL_ST_INV_ARG_4); \
  155. YAFL_CHECK(b, YAFL_ST_INV_ARG_5); \
  156. \
  157. for (j = 0; j < nr; j++) \
  158. { \
  159. yaflInt k; \
  160. yaflInt ncj; \
  161. yaflFloat aj; \
  162. \
  163. ncj = nc * j; \
  164. aj = a[j]; \
  165. \
  166. for (k = 0; k < nc; k++) \
  167. { \
  168. res[ncj + k] op aj * b[k]; \
  169. } \
  170. } \
  171. return YAFL_ST_OK; \
  172. }
  173. YAFL_DO_VVT(yafl_math_set_vvt, =)
  174. YAFL_DO_VVT(yafl_math_add_vvt, +=)
  175. YAFL_DO_VVT(yafl_math_sub_vvt, -=)
  176. #define YAFL_DO_VVTXN(name, op) \
  177. yaflStatusEn name(yaflInt nr, yaflInt nc, yaflFloat *res, yaflFloat *a, yaflFloat *b, yaflFloat n) \
  178. { \
  179. yaflInt j; \
  180. \
  181. YAFL_CHECK(res, YAFL_ST_INV_ARG_3); \
  182. YAFL_CHECK(a, YAFL_ST_INV_ARG_4); \
  183. YAFL_CHECK(b, YAFL_ST_INV_ARG_5); \
  184. \
  185. for (j = 0; j < nr; j++) \
  186. { \
  187. yaflInt k; \
  188. yaflInt ncj; \
  189. yaflFloat aj; \
  190. \
  191. ncj = nc * j; \
  192. aj = a[j] * n; \
  193. \
  194. for (k = 0; k < nc; k++) \
  195. { \
  196. res[ncj + k] op aj * b[k]; \
  197. } \
  198. } \
  199. return YAFL_ST_OK; \
  200. }
  201. YAFL_DO_VVTXN(yafl_math_set_vvtxn, =)
  202. YAFL_DO_VVTXN(yafl_math_add_vvtxn, +=)
  203. YAFL_DO_VVTXN(yafl_math_sub_vvtxn, -=)
  204. #define YAFL_DO_MV(name, op1, op2) \
  205. yaflStatusEn name(yaflInt nr, yaflInt nc, yaflFloat *res, yaflFloat *a, yaflFloat *b) \
  206. { \
  207. yaflInt j; \
  208. \
  209. YAFL_CHECK(res, YAFL_ST_INV_ARG_3); \
  210. YAFL_CHECK(a, YAFL_ST_INV_ARG_4); \
  211. YAFL_CHECK(b, YAFL_ST_INV_ARG_5); \
  212. \
  213. for (j = 0; j < nr; j++) \
  214. { \
  215. yaflInt k; \
  216. yaflInt ncj; \
  217. yaflFloat resj; \
  218. \
  219. ncj = nc * j; \
  220. resj = res[j]; \
  221. resj op1 a[ncj] * b[0]; \
  222. \
  223. for (k = 1; k < nc; k++) \
  224. { \
  225. resj op2 a[ncj + k] * b[k]; \
  226. } \
  227. res[j] = resj; \
  228. } \
  229. return YAFL_ST_OK; \
  230. }
  231. YAFL_DO_MV(yafl_math_set_mv, =, +=)
  232. YAFL_DO_MV(yafl_math_add_mv, +=, +=)
  233. YAFL_DO_MV(yafl_math_sub_mv, -=, -=)
  234. #define YAFL_DO_VTM(name, op1, op2) \
  235. yaflStatusEn name(yaflInt nr, yaflInt nc, yaflFloat *res, yaflFloat *a, yaflFloat *b) \
  236. { \
  237. yaflInt j; \
  238. \
  239. YAFL_CHECK(res, YAFL_ST_INV_ARG_3); \
  240. YAFL_CHECK(a, YAFL_ST_INV_ARG_4); \
  241. YAFL_CHECK(b, YAFL_ST_INV_ARG_5); \
  242. \
  243. for (j = 0; j < nc; j++) \
  244. { \
  245. res[j] op1 a[0] * b[j]; \
  246. } \
  247. \
  248. for (j = 1; j < nr; j++) \
  249. { \
  250. yaflInt k; \
  251. yaflInt ncj; \
  252. yaflFloat aj; \
  253. \
  254. ncj = nc * j; \
  255. aj = a[j]; \
  256. \
  257. for (k = 0; k < nc; k++) \
  258. { \
  259. res[k] op2 aj * b[ncj + k]; \
  260. } \
  261. } \
  262. return YAFL_ST_OK; \
  263. }
  264. YAFL_DO_VTM(yafl_math_set_vtm, =, +=)
  265. YAFL_DO_VTM(yafl_math_add_vtm, +=, +=)
  266. YAFL_DO_VTM(yafl_math_sub_vtm, -=, -=)
  267. /*This is right as it is OMP friendly style*/
  268. #define YAFL_DO_MM(name, op1, op2) \
  269. yaflStatusEn name(yaflInt nr, yaflInt ncr, yaflInt nc, yaflFloat *res, yaflFloat *a, yaflFloat *b) \
  270. { \
  271. yaflInt i; \
  272. \
  273. YAFL_CHECK(res, YAFL_ST_INV_ARG_4); \
  274. YAFL_CHECK(a, YAFL_ST_INV_ARG_5); \
  275. YAFL_CHECK(b, YAFL_ST_INV_ARG_6); \
  276. \
  277. for (i = 0; i < nr; i++) \
  278. { \
  279. yaflInt j; \
  280. yaflInt k; \
  281. yaflInt nci; \
  282. yaflInt ncri; \
  283. \
  284. nci = nc * i; \
  285. ncri = ncr * i; \
  286. \
  287. for (k = 0; k < nc; k++) \
  288. { \
  289. res[nci + k] op1 a[ncri] * b[k]; \
  290. } \
  291. \
  292. for (j = 1; j < ncr; j++) \
  293. { \
  294. yaflInt ncj; \
  295. yaflFloat aij; \
  296. \
  297. ncj = nc * j; \
  298. aij = a[ncri + j]; \
  299. \
  300. for (k = 0; k < nc; k++) \
  301. { \
  302. res[nci + k] op2 aij * b[ncj + k]; \
  303. } \
  304. } \
  305. } \
  306. return YAFL_ST_OK; \
  307. }
  308. YAFL_DO_MM(yafl_math_set_mm, =, +=)
  309. YAFL_DO_MM(yafl_math_add_mm, +=, +=)
  310. YAFL_DO_MM(yafl_math_sub_mm, -=, -=)
  311. #define YAFL_DO_VTU(name, op1, op2) \
  312. yaflStatusEn name(yaflInt sz, yaflFloat *res, yaflFloat *a, yaflFloat *b) \
  313. { \
  314. yaflInt j; \
  315. yaflInt szj; \
  316. \
  317. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  318. YAFL_CHECK(a, YAFL_ST_INV_ARG_3); \
  319. YAFL_CHECK(b, YAFL_ST_INV_ARG_4); \
  320. \
  321. for (j = 0; j < sz; j++) \
  322. { \
  323. res[j] op1 a[j]; \
  324. } \
  325. \
  326. for (szj = 0, j = 1; j < sz; szj += j++) \
  327. { \
  328. yaflInt k; \
  329. yaflFloat resj; \
  330. \
  331. resj = res[j]; \
  332. for (k = 0; k < j; k++) \
  333. { \
  334. resj op2 a[k] * b[k + szj]; \
  335. } \
  336. res[j] = resj; \
  337. } \
  338. return YAFL_ST_OK; \
  339. }
  340. YAFL_DO_VTU(yafl_math_set_vtu, =, +=)
  341. YAFL_DO_VTU(yafl_math_add_vtu, +=, +=)
  342. YAFL_DO_VTU(yafl_math_sub_vtu, -=, -=)
  343. #define YAFL_DO_UV(name, op1, op2) \
  344. yaflStatusEn name(yaflInt sz, yaflFloat *res, yaflFloat *a, yaflFloat *b) \
  345. { \
  346. yaflInt j; \
  347. yaflInt szj; \
  348. \
  349. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  350. YAFL_CHECK(a, YAFL_ST_INV_ARG_3); \
  351. YAFL_CHECK(b, YAFL_ST_INV_ARG_4); \
  352. \
  353. for (j = 0; j < sz; j++) \
  354. { \
  355. res[j] op1 b[j]; \
  356. } \
  357. \
  358. for (szj = 0, j = 1; j < sz; szj += j++) \
  359. { \
  360. yaflInt k; \
  361. yaflFloat bj; \
  362. \
  363. bj = b[j]; \
  364. for (k = 0; k < j; k++) \
  365. { \
  366. res[k] op2 a[k + szj] * bj; \
  367. } \
  368. } \
  369. return YAFL_ST_OK; \
  370. }
  371. YAFL_DO_UV(yafl_math_set_uv, =, +=)
  372. YAFL_DO_UV(yafl_math_add_uv, +=, +=)
  373. YAFL_DO_UV(yafl_math_sub_uv, -=, -=)
  374. /*This is right as it is OMP friendly style*/
  375. #define YAFL_DO_MU(name, op1, op2) \
  376. yaflStatusEn name(yaflInt nr, yaflInt nc, yaflFloat *res, yaflFloat *a, yaflFloat *b) \
  377. { \
  378. yaflInt i; \
  379. \
  380. YAFL_CHECK(res, YAFL_ST_INV_ARG_3); \
  381. YAFL_CHECK(a, YAFL_ST_INV_ARG_4); \
  382. YAFL_CHECK(b, YAFL_ST_INV_ARG_5); \
  383. \
  384. for (i = 0; i < nr; i++) \
  385. { \
  386. yaflInt j; \
  387. yaflInt nci; \
  388. yaflInt ncj; \
  389. \
  390. nci = nc * i; \
  391. \
  392. for (j = 0; j < nc; j++) \
  393. { \
  394. res[nci + j] op1 a[nci + j]; \
  395. } \
  396. \
  397. for (ncj = 0, j = 1; j < nc; ncj += j++) \
  398. { \
  399. yaflInt k; \
  400. yaflFloat resij; \
  401. \
  402. resij = res[nci + j]; \
  403. for (k = 0; k < j; k++) \
  404. { \
  405. resij op2 a[nci + k] * b[k + ncj]; \
  406. } \
  407. res[nci + j] = resij; \
  408. } \
  409. } \
  410. return YAFL_ST_OK; \
  411. }
  412. YAFL_DO_MU(yafl_math_set_mu, =, +=)
  413. YAFL_DO_MU(yafl_math_add_mu, +=, +=)
  414. YAFL_DO_MU(yafl_math_sub_mu, -=, -=)
  415. yaflStatusEn yafl_math_set_u(yaflInt sz, yaflFloat *res, yaflFloat *u)
  416. {
  417. yaflInt i;
  418. YAFL_CHECK(res, YAFL_ST_INV_ARG_2);
  419. YAFL_CHECK(u, YAFL_ST_INV_ARG_3);
  420. for (i = 0; i < sz; i++)
  421. {
  422. yaflInt szi;
  423. yaflInt j;
  424. szi = sz * i;
  425. for (j = 0; j < i; j++)
  426. {
  427. res[szi + j] = 0.0;
  428. }
  429. res[szi + j] = 1.0;
  430. for (j++; j < sz; j++)
  431. {
  432. res[szi + j] = u[i + ((j - 1) * j) / 2];
  433. }
  434. }
  435. return YAFL_ST_OK;
  436. }
  437. #define YAFL_DO_U(name, op) \
  438. yaflStatusEn name(yaflInt sz, yaflFloat *res, yaflFloat *u) \
  439. { \
  440. yaflInt i; \
  441. \
  442. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  443. YAFL_CHECK(u, YAFL_ST_INV_ARG_3); \
  444. \
  445. for (i = 0; i < sz; i++) \
  446. { \
  447. yaflInt szi; \
  448. yaflInt j; \
  449. \
  450. res[(sz + 1) * i] op 1.0; \
  451. \
  452. szi = sz * i; \
  453. for (j = i + 1; j < sz; j++) \
  454. { \
  455. res[szi + j] op u[i + ((j - 1) * j) / 2]; \
  456. } \
  457. } \
  458. return YAFL_ST_OK; \
  459. }
  460. YAFL_DO_U(yafl_math_add_u, +=)
  461. YAFL_DO_U(yafl_math_sub_u, -=)
  462. #define YAFL_DO_BM(name, op) \
  463. yaflStatusEn name(yaflInt nc, yaflFloat *res, yaflInt sr, yaflInt sc, yaflFloat *a) \
  464. { \
  465. yaflInt j; \
  466. \
  467. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  468. YAFL_CHECK(a, YAFL_ST_INV_ARG_4); \
  469. \
  470. for (j = 0; j < sr; j++) \
  471. { \
  472. yaflInt k; \
  473. yaflInt ncj; \
  474. yaflInt scj; \
  475. \
  476. ncj = nc * j; \
  477. scj = sc * j; \
  478. \
  479. for (k = 0; k < sc; k++) \
  480. { \
  481. res[ncj + k] op a[scj + k]; \
  482. } \
  483. } \
  484. return YAFL_ST_OK; \
  485. }
  486. YAFL_DO_BM(yafl_math_bset_m, =)
  487. YAFL_DO_BM(yafl_math_badd_m, +=)
  488. YAFL_DO_BM(yafl_math_bsub_m, -=)
  489. yaflStatusEn yafl_math_bset_u(yaflInt nc, yaflFloat *res, yaflInt sz, yaflFloat *u)
  490. {
  491. yaflInt i;
  492. YAFL_CHECK(res, YAFL_ST_INV_ARG_2);
  493. YAFL_CHECK(u, YAFL_ST_INV_ARG_4);
  494. for (i = 0; i < sz; i++)
  495. {
  496. yaflInt nci;
  497. yaflInt j;
  498. nci = nc * i;
  499. for (j = 0; j < i; j++)
  500. {
  501. res[nci + j] = 0.0;
  502. }
  503. res[nci + j] = 1.0;
  504. for (j++; j < sz; j++)
  505. {
  506. res[nci + j] = u[i + ((j - 1) * j) / 2];
  507. }
  508. }
  509. return YAFL_ST_OK;
  510. }
  511. #define YAFL_DO_BU(name, op) \
  512. yaflStatusEn name(yaflInt nc, yaflFloat *res, yaflInt sz, yaflFloat *u) \
  513. { \
  514. yaflInt i; \
  515. \
  516. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  517. YAFL_CHECK(u, YAFL_ST_INV_ARG_4); \
  518. \
  519. for (i = 0; i < sz; i++) \
  520. { \
  521. yaflInt nci; \
  522. yaflInt j; \
  523. \
  524. res[(nc + 1) * i] op 1.0; \
  525. \
  526. nci = nc * i; \
  527. for (j = i + 1; j < sz; j++) \
  528. { \
  529. res[nci + j] op u[i + ((j - 1) * j) / 2]; \
  530. } \
  531. } \
  532. return YAFL_ST_OK; \
  533. }
  534. YAFL_DO_BU(yafl_math_badd_u, +=)
  535. YAFL_DO_BU(yafl_math_bsub_u, -=)
  536. yaflStatusEn yafl_math_bset_ut(yaflInt nc, yaflFloat *res, yaflInt sz, yaflFloat *u)
  537. {
  538. yaflInt i;
  539. yaflInt szi;
  540. YAFL_CHECK(res, YAFL_ST_INV_ARG_2);
  541. YAFL_CHECK(u, YAFL_ST_INV_ARG_4);
  542. for (szi = 0, i = 0; i < sz; szi += i++)
  543. {
  544. yaflInt nci;
  545. yaflInt j;
  546. nci = nc * i;
  547. for (j = 0; j < i; j++)
  548. {
  549. res[nci + j] = u[j + szi];
  550. }
  551. res[nci + j] = 1.0;
  552. for (j++; j < sz; j++)
  553. {
  554. res[nci + j] = 0.0;
  555. }
  556. }
  557. return YAFL_ST_OK;
  558. }
  559. #define YAFL_DO_BUT(name, op) \
  560. yaflStatusEn name(yaflInt nc, yaflFloat *res, yaflInt sz, yaflFloat *u) \
  561. { \
  562. yaflInt i; \
  563. yaflInt szi; \
  564. \
  565. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  566. YAFL_CHECK(u, YAFL_ST_INV_ARG_4); \
  567. \
  568. for (szi = 0, i = 0; i < sz; szi += i++) \
  569. { \
  570. yaflInt nci; \
  571. yaflInt j; \
  572. \
  573. nci = nc * i; \
  574. for (j = 0; j < i; j++) \
  575. { \
  576. res[nci + j] op u[j + szi]; \
  577. } \
  578. \
  579. res[nci + j] op 1.0; \
  580. } \
  581. return YAFL_ST_OK; \
  582. }
  583. YAFL_DO_BUT(yafl_math_badd_ut, +=)
  584. YAFL_DO_BUT(yafl_math_bsub_ut, -=)
  585. #define YAFL_DO_BV(name, op) \
  586. yaflStatusEn name(yaflInt nc, yaflFloat *res, yaflInt sz, yaflFloat *v) \
  587. { \
  588. yaflInt i; \
  589. \
  590. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  591. YAFL_CHECK(v, YAFL_ST_INV_ARG_4); \
  592. \
  593. for (i = 0; i < sz; i++) \
  594. { \
  595. res[nc * i] op v[i]; \
  596. } \
  597. return YAFL_ST_OK; \
  598. }
  599. YAFL_DO_BV(yafl_math_bset_v, =)
  600. YAFL_DO_BV(yafl_math_badd_v, +=)
  601. YAFL_DO_BV(yafl_math_bsub_v, -=)
  602. #define YAFL_DO_BVVT(name, op) \
  603. yaflStatusEn name(yaflInt nc, yaflFloat *res, yaflInt sz, yaflFloat *a, yaflFloat *b) \
  604. { \
  605. yaflInt j; \
  606. \
  607. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  608. YAFL_CHECK(a, YAFL_ST_INV_ARG_4); \
  609. YAFL_CHECK(b, YAFL_ST_INV_ARG_5); \
  610. \
  611. for (j = 0; j < sz; j++) \
  612. { \
  613. yaflInt k; \
  614. yaflInt ncj; \
  615. yaflFloat aj; \
  616. \
  617. ncj = nc * j; \
  618. aj = a[j]; \
  619. \
  620. for (k = 0; k < sz; k++) \
  621. { \
  622. res[ncj + k] op aj * b[k]; \
  623. } \
  624. } \
  625. return YAFL_ST_OK; \
  626. }
  627. YAFL_DO_BVVT(yafl_math_bset_vvt, =)
  628. YAFL_DO_BVVT(yafl_math_badd_vvt, +=)
  629. YAFL_DO_BVVT(yafl_math_bsub_vvt, -=)
  630. #define YAFL_DO_BMU(name, op1, op2) \
  631. yaflStatusEn name(yaflInt rnc, yaflFloat *res, yaflInt nr, yaflInt nc, yaflFloat *a, yaflFloat *b) \
  632. { \
  633. yaflInt i; \
  634. \
  635. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  636. YAFL_CHECK(a, YAFL_ST_INV_ARG_5); \
  637. YAFL_CHECK(b, YAFL_ST_INV_ARG_6); \
  638. YAFL_CHECK(rnc >= nc, YAFL_ST_INV_ARG_1); \
  639. \
  640. for (i = 0; i < nr; i++) \
  641. { \
  642. yaflInt j; \
  643. yaflInt nci; \
  644. yaflInt rnci; \
  645. yaflInt ncj; \
  646. \
  647. nci = nc * i; \
  648. rnci = rnc * i; \
  649. \
  650. for (j = 0; j < nc; j++) \
  651. { \
  652. res[rnci + j] op1 a[nci + j]; \
  653. } \
  654. \
  655. for (ncj = 0, j = 1; j < nc; ncj += j++) \
  656. { \
  657. yaflInt k; \
  658. yaflFloat resij; \
  659. \
  660. resij = res[rnci + j]; \
  661. for (k = 0; k < j; k++) \
  662. { \
  663. resij op2 a[nci + k] * b[k + ncj]; \
  664. } \
  665. res[rnci + j] = resij; \
  666. } \
  667. } \
  668. return YAFL_ST_OK; \
  669. }
  670. YAFL_DO_BMU(yafl_math_bset_mu, =, +=)
  671. YAFL_DO_BMU(yafl_math_badd_mu, +=, +=)
  672. YAFL_DO_BMU(yafl_math_bsub_mu, -=, -=)
  673. #define YAFL_DO_BBU(name, op1, op2) \
  674. yaflStatusEn name(yaflInt rnc, yaflFloat *res, yaflInt nr, yaflInt nc, yaflInt anc, yaflFloat *a, yaflFloat *b) \
  675. { \
  676. yaflInt i; \
  677. \
  678. YAFL_CHECK(res, YAFL_ST_INV_ARG_2); \
  679. YAFL_CHECK(a, YAFL_ST_INV_ARG_6); \
  680. YAFL_CHECK(b, YAFL_ST_INV_ARG_7); \
  681. YAFL_CHECK(rnc >= nc, YAFL_ST_INV_ARG_1); \
  682. YAFL_CHECK(anc >= nc, YAFL_ST_INV_ARG_5); \
  683. \
  684. for (i = 0; i < nr; i++) \
  685. { \
  686. yaflInt j; \
  687. yaflInt anci; \
  688. yaflInt rnci; \
  689. yaflInt ncj; \
  690. \
  691. anci = anc * i; \
  692. rnci = rnc * i; \
  693. \
  694. for (j = 0; j < nc; j++) \
  695. { \
  696. res[rnci + j] op1 a[anci + j]; \
  697. } \
  698. \
  699. for (ncj = 0, j = 1; j < nc; ncj += j++) \
  700. { \
  701. yaflInt k; \
  702. yaflFloat resij; \
  703. \
  704. resij = res[rnci + j]; \
  705. for (k = 0; k < j; k++) \
  706. { \
  707. resij op2 a[anci + k] * b[k + ncj]; \
  708. } \
  709. res[rnci + j] = resij; \
  710. } \
  711. } \
  712. return YAFL_ST_OK; \
  713. }
  714. YAFL_DO_BBU(yafl_math_bset_bu, =, +=)
  715. YAFL_DO_BBU(yafl_math_badd_bu, +=, +=)
  716. YAFL_DO_BBU(yafl_math_bsub_bu, -=, -=)
  717. yaflStatusEn yafl_math_ruv(yaflInt sz, yaflFloat *res, yaflFloat *u)
  718. {
  719. yaflInt j;
  720. yaflInt szj;
  721. YAFL_CHECK(res, YAFL_ST_INV_ARG_2);
  722. YAFL_CHECK(u, YAFL_ST_INV_ARG_3);
  723. for (j = sz - 1, szj = ((j - 1) * j) / 2; j > 0; szj -= --j)
  724. /*for (j = sz - 1; j > 0; j--)*/
  725. {
  726. yaflInt i;
  727. yaflFloat resj;
  728. /*yaflInt szj;
  729. szj = ((j - 1)*j)/2;*/
  730. resj = res[j];
  731. for (i = j - 1; i >= 0; i--)
  732. {
  733. res[i] -= u[i + szj] * resj;
  734. }
  735. }
  736. return YAFL_ST_OK;
  737. }
  738. yaflStatusEn yafl_math_rutv(yaflInt sz, yaflFloat *res, yaflFloat *u)
  739. {
  740. yaflInt j;
  741. YAFL_CHECK(res, YAFL_ST_INV_ARG_2);
  742. YAFL_CHECK(u, YAFL_ST_INV_ARG_3);
  743. for (j = 0; j < sz - 1; j++)
  744. {
  745. yaflInt i;
  746. yaflFloat resj;
  747. resj = res[j];
  748. for (i = j + 1; i < sz; i++)
  749. {
  750. yaflInt szi;
  751. szi = ((i - 1)*i)/2;
  752. res[i] -= u[szi + j] * resj;
  753. }
  754. }
  755. return YAFL_ST_OK;
  756. }
  757. yaflStatusEn yafl_math_rum(yaflInt nr, yaflInt nc, yaflFloat *res, yaflFloat *u)
  758. {
  759. yaflInt j;
  760. yaflInt nrj;
  761. YAFL_CHECK(res, YAFL_ST_INV_ARG_3);
  762. YAFL_CHECK(u, YAFL_ST_INV_ARG_4);
  763. for (j = nr - 1, nrj = ((j - 1) * j) / 2; j > 0; nrj -= --j)
  764. {
  765. yaflInt ncj;
  766. yaflInt i;
  767. ncj = nc * j;
  768. for (i = j - 1; i >= 0; i--)
  769. {
  770. yaflInt k;
  771. yaflInt nci;
  772. yaflFloat uij;
  773. nci = nc * i;
  774. uij = u[i + nrj];
  775. for (k = nc - 1; k >= 0; k--)
  776. {
  777. res[nci + k] -= uij * res[ncj + k];
  778. }
  779. }
  780. }
  781. return YAFL_ST_OK;
  782. }
  783. yaflStatusEn yafl_math_mwgsu(yaflInt nr, yaflInt nc, yaflFloat *res_u, yaflFloat *res_d, yaflFloat *w, yaflFloat *d)
  784. {
  785. yaflStatusEn status = YAFL_ST_OK;
  786. yaflInt j;
  787. yaflInt nrj;
  788. YAFL_CHECK(res_u, YAFL_ST_INV_ARG_3);
  789. YAFL_CHECK(res_d, YAFL_ST_INV_ARG_4);
  790. YAFL_CHECK(w, YAFL_ST_INV_ARG_5);
  791. YAFL_CHECK(d, YAFL_ST_INV_ARG_6);
  792. for (j = nr - 1, nrj = ((j - 1) * j) / 2; j >= 0; nrj -= --j)
  793. {
  794. yaflInt ncj;
  795. yaflInt k;
  796. yaflFloat res_dj;
  797. yaflFloat wjk;
  798. ncj = nc * j;
  799. /*res_d[j] = w[j].dot(d * w[j])*/
  800. wjk = w[ncj + nc - 1];
  801. wjk *= wjk;
  802. res_dj = wjk * d[nc - 1];
  803. for (k = nc - 2; k >= 0; k--)
  804. {
  805. wjk = w[ncj + k];
  806. wjk *= wjk;
  807. res_dj += wjk * d[k];
  808. }
  809. /*Bad Eigenvalue workaround*/
  810. if (res_dj < YAFL_EPS)
  811. {
  812. res_d[j] = YAFL_EPS;
  813. for (k = j - 1; k >= 0; k--)
  814. {
  815. res_u[k + nrj] = 0;
  816. }
  817. status |= YAFL_ST_MSK_REGULARIZED;
  818. continue;
  819. }
  820. /*Good Eigenvalue*/
  821. res_d[j] = res_dj;
  822. for (k = j - 1; k >= 0; k--)
  823. {
  824. yaflInt nck;
  825. yaflInt i;
  826. yaflFloat res_ukj;
  827. nck = nc * k;
  828. /* res_u[k,j] = w[k].dot(d * w[j])/res_d[j] */
  829. res_ukj = w[nck] * d[0] * w[ncj] / res_dj;
  830. for (i = 1; i < nc; i++)
  831. {
  832. res_ukj += w[nck + i] * d[i] * w[ncj + i] / res_dj;
  833. }
  834. res_u[k + nrj] = res_ukj;
  835. /* w[k] -= res_u[k,j] * w[j] */
  836. w[nck + nc - 1] -= res_ukj * w[ncj + nc - 1];
  837. for (i = nc - 2; i >= 0; i--)
  838. {
  839. w[nck + i] -= res_ukj * w[ncj + i];
  840. }
  841. }
  842. }
  843. return status;
  844. }
  845. yaflStatusEn yafl_math_udu_up(yaflInt sz, yaflFloat *res_u, yaflFloat *res_d, yaflFloat alpha, yaflFloat *v)
  846. {
  847. yaflStatusEn status = YAFL_ST_OK;
  848. yaflInt j;
  849. yaflInt szj;
  850. YAFL_CHECK(res_u, YAFL_ST_INV_ARG_2);
  851. YAFL_CHECK(res_d, YAFL_ST_INV_ARG_3);
  852. YAFL_CHECK(v, YAFL_ST_INV_ARG_5);
  853. YAFL_CHECK(alpha >= 0, YAFL_ST_INV_ARG_4);
  854. for (j = sz - 1, szj = ((j - 1) * j) / 2; j >= 0; szj -= --j)
  855. {
  856. yaflInt k;
  857. yaflFloat pj;
  858. yaflFloat dj;
  859. yaflFloat res_dj;
  860. yaflFloat betaj;
  861. dj = res_d[j];
  862. pj = v[j];
  863. res_dj = dj + alpha * pj * pj;
  864. if (res_dj < YAFL_EPS)
  865. {
  866. res_dj = YAFL_EPS;
  867. status |= YAFL_ST_MSK_REGULARIZED;
  868. }
  869. betaj = alpha * pj / res_dj;
  870. /*Update res_d[j]*/
  871. res_d[j] = res_dj;
  872. /*Update res_u[:j,j]*/
  873. for (k = j - 1; k >= 0; k--)
  874. {
  875. /*Buffer vars*/
  876. yaflFloat ukj;
  877. yaflFloat vk;
  878. ukj = res_u[k + szj];
  879. vk = v[k] - pj * ukj;
  880. v[k] = vk;
  881. res_u[k + szj] = ukj + betaj * vk;
  882. }
  883. alpha *= dj / res_dj;
  884. }
  885. return status;
  886. }
  887. yaflStatusEn yafl_math_udu_down(yaflInt sz, yaflFloat *res_u, yaflFloat *res_d, yaflFloat alpha, yaflFloat *v)
  888. {
  889. yaflStatusEn status = YAFL_ST_OK;
  890. yaflInt j;
  891. yaflInt szj;
  892. yaflFloat pj;
  893. yaflFloat dj;
  894. YAFL_CHECK(res_u, YAFL_ST_INV_ARG_2);
  895. YAFL_CHECK(res_d, YAFL_ST_INV_ARG_3);
  896. YAFL_CHECK(v, YAFL_ST_INV_ARG_5);
  897. YAFL_CHECK(alpha >= 0, YAFL_ST_INV_ARG_4);
  898. /*Solve U*p = v*/
  899. yafl_math_ruv(sz, v, res_u);
  900. /*Compute alpha0*/
  901. pj = v[0];
  902. dj = pj * pj / res_d[0];
  903. for (j = 1; j < sz; j++)
  904. {
  905. if (res_d[j] < YAFL_EPS)
  906. {
  907. res_d[j] = YAFL_EPS;
  908. status |= YAFL_ST_MSK_REGULARIZED;
  909. }
  910. pj = v[j];
  911. dj += pj * pj / res_d[j];
  912. }
  913. dj = 1 - alpha * dj;
  914. if (dj < YAFL_EPS)
  915. {
  916. dj = YAFL_EPS;
  917. status |= YAFL_ST_MSK_REGULARIZED;
  918. }
  919. alpha /= dj;
  920. /*Conmpute update*/
  921. for (szj = 0, j = 0; j < sz; szj += j++)
  922. {
  923. yaflInt k;
  924. yaflFloat res_dj;
  925. yaflFloat betaj;
  926. dj = res_d[j];
  927. pj = v[j]; /*We don't need to do v[j] = pj; as it was done in #L774*/
  928. res_dj = dj * dj / (dj + alpha * pj * pj);
  929. /*No need to check dj or res_d[j] as it was set big enough in #L783*/
  930. betaj = alpha * pj / dj; /*Sign "-" will be used in #L825*/
  931. /*Update res_d[j]*/
  932. res_d[j] = res_dj;
  933. for (k = 0; k < j; k++)
  934. {
  935. /*Buffer vars*/
  936. yaflFloat ukj;
  937. yaflFloat vk;
  938. ukj = res_u[k + szj];
  939. vk = v[k];
  940. res_u[k + szj] = ukj - betaj * vk;
  941. v[k] = vk + pj * ukj;
  942. }
  943. alpha *= res_dj / dj;
  944. }
  945. return status;
  946. }