res_speech.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /*
  2. * Asterisk -- An open source telephony toolkit.
  3. *
  4. * Copyright (C) 2006, Digium, Inc.
  5. *
  6. * Joshua Colp <jcolp@digium.com>
  7. *
  8. * See http://www.asterisk.org for more information about
  9. * the Asterisk project. Please do not directly contact
  10. * any of the maintainers of this project for assistance;
  11. * the project provides a web site, mailing lists and IRC
  12. * channels for your use.
  13. *
  14. * This program is free software, distributed under the terms of
  15. * the GNU General Public License Version 2. See the LICENSE file
  16. * at the top of the source tree.
  17. */
  18. /*! \file
  19. *
  20. * \brief Generic Speech Recognition API
  21. *
  22. * \author Joshua Colp <jcolp@digium.com>
  23. */
  24. /*** MODULEINFO
  25. <support_level>core</support_level>
  26. ***/
  27. #include "asterisk.h"
  28. #include "asterisk/channel.h"
  29. #include "asterisk/module.h"
  30. #include "asterisk/lock.h"
  31. #include "asterisk/linkedlists.h"
  32. #include "asterisk/cli.h"
  33. #include "asterisk/term.h"
  34. #include "asterisk/speech.h"
  35. #include "asterisk/format_cache.h"
  36. #include "asterisk/translate.h"
  37. static AST_RWLIST_HEAD_STATIC(engines, ast_speech_engine);
  38. static struct ast_speech_engine *default_engine = NULL;
  39. /*! \brief Find a speech recognition engine of specified name, if NULL then use the default one */
  40. struct ast_speech_engine *ast_speech_find_engine(const char *engine_name)
  41. {
  42. struct ast_speech_engine *engine = NULL;
  43. /* If no name is specified -- use the default engine */
  44. if (ast_strlen_zero(engine_name))
  45. return default_engine;
  46. AST_RWLIST_RDLOCK(&engines);
  47. AST_RWLIST_TRAVERSE(&engines, engine, list) {
  48. if (!strcasecmp(engine->name, engine_name)) {
  49. break;
  50. }
  51. }
  52. AST_RWLIST_UNLOCK(&engines);
  53. return engine;
  54. }
  55. /*! \brief Activate a loaded (either local or global) grammar */
  56. int ast_speech_grammar_activate(struct ast_speech *speech, const char *grammar_name)
  57. {
  58. return (speech->engine->activate ? speech->engine->activate(speech, grammar_name) : -1);
  59. }
  60. /*! \brief Deactivate a loaded grammar on a speech structure */
  61. int ast_speech_grammar_deactivate(struct ast_speech *speech, const char *grammar_name)
  62. {
  63. return (speech->engine->deactivate ? speech->engine->deactivate(speech, grammar_name) : -1);
  64. }
  65. /*! \brief Load a local grammar on a speech structure */
  66. int ast_speech_grammar_load(struct ast_speech *speech, const char *grammar_name, const char *grammar)
  67. {
  68. return (speech->engine->load ? speech->engine->load(speech, grammar_name, grammar) : -1);
  69. }
  70. /*! \brief Unload a local grammar from a speech structure */
  71. int ast_speech_grammar_unload(struct ast_speech *speech, const char *grammar_name)
  72. {
  73. return (speech->engine->unload ? speech->engine->unload(speech, grammar_name) : -1);
  74. }
  75. /*! \brief Return the results of a recognition from the speech structure */
  76. struct ast_speech_result *ast_speech_results_get(struct ast_speech *speech)
  77. {
  78. return (speech->engine->get ? speech->engine->get(speech) : NULL);
  79. }
  80. /*! \brief Free a list of results */
  81. int ast_speech_results_free(struct ast_speech_result *result)
  82. {
  83. struct ast_speech_result *current_result = result, *prev_result = NULL;
  84. int res = 0;
  85. while (current_result != NULL) {
  86. prev_result = current_result;
  87. /* Deallocate what we can */
  88. if (current_result->text != NULL) {
  89. ast_free(current_result->text);
  90. current_result->text = NULL;
  91. }
  92. if (current_result->grammar != NULL) {
  93. ast_free(current_result->grammar);
  94. current_result->grammar = NULL;
  95. }
  96. /* Move on and then free ourselves */
  97. current_result = AST_LIST_NEXT(current_result, list);
  98. ast_free(prev_result);
  99. prev_result = NULL;
  100. }
  101. return res;
  102. }
  103. /*! \brief Start speech recognition on a speech structure */
  104. void ast_speech_start(struct ast_speech *speech)
  105. {
  106. /* Clear any flags that may affect things */
  107. ast_clear_flag(speech, AST_SPEECH_SPOKE);
  108. ast_clear_flag(speech, AST_SPEECH_QUIET);
  109. ast_clear_flag(speech, AST_SPEECH_HAVE_RESULTS);
  110. /* If results are on the structure, free them since we are starting again */
  111. if (speech->results) {
  112. ast_speech_results_free(speech->results);
  113. speech->results = NULL;
  114. }
  115. /* If the engine needs to start stuff up, do it */
  116. if (speech->engine->start)
  117. speech->engine->start(speech);
  118. return;
  119. }
  120. /*! \brief Write in signed linear audio to be recognized */
  121. int ast_speech_write(struct ast_speech *speech, void *data, int len)
  122. {
  123. /* Make sure the speech engine is ready to accept audio */
  124. if (speech->state != AST_SPEECH_STATE_READY)
  125. return -1;
  126. return speech->engine->write(speech, data, len);
  127. }
  128. /*! \brief Signal to the engine that DTMF was received */
  129. int ast_speech_dtmf(struct ast_speech *speech, const char *dtmf)
  130. {
  131. int res = 0;
  132. if (speech->state != AST_SPEECH_STATE_READY)
  133. return -1;
  134. if (speech->engine->dtmf != NULL) {
  135. res = speech->engine->dtmf(speech, dtmf);
  136. }
  137. return res;
  138. }
  139. /*! \brief Change an engine specific attribute */
  140. int ast_speech_change(struct ast_speech *speech, const char *name, const char *value)
  141. {
  142. return (speech->engine->change ? speech->engine->change(speech, name, value) : -1);
  143. }
  144. /*! \brief Get an engine specific attribute */
  145. int ast_speech_get_setting(struct ast_speech *speech, const char *name, char *buf, size_t len)
  146. {
  147. return (speech->engine->get_setting ? speech->engine->get_setting(speech, name, buf, len) : -1);
  148. }
  149. /*! \brief Create a new speech structure using the engine specified */
  150. struct ast_speech *ast_speech_new(const char *engine_name, const struct ast_format_cap *cap)
  151. {
  152. struct ast_speech_engine *engine = NULL;
  153. struct ast_speech *new_speech = NULL;
  154. struct ast_format_cap *joint;
  155. RAII_VAR(struct ast_format *, best, NULL, ao2_cleanup);
  156. RAII_VAR(struct ast_format *, best_translated, NULL, ao2_cleanup);
  157. /* Try to find the speech recognition engine that was requested */
  158. if (!(engine = ast_speech_find_engine(engine_name)))
  159. return NULL;
  160. joint = ast_format_cap_alloc(AST_FORMAT_CAP_FLAG_DEFAULT);
  161. if (!joint) {
  162. return NULL;
  163. }
  164. ast_format_cap_get_compatible(engine->formats, cap, joint);
  165. best = ast_format_cap_get_format(joint, 0);
  166. ao2_ref(joint, -1);
  167. if (!best) {
  168. if (ast_format_cap_iscompatible_format(engine->formats, ast_format_slin) != AST_FORMAT_CMP_NOT_EQUAL) {
  169. best = ao2_bump(ast_format_slin);
  170. } else {
  171. /*
  172. * If there is no overlap and the engine does not support slin, find the best
  173. * format to translate to and set that as the 'best' input format for the engine.
  174. * API consumer is responsible for translating to this format.
  175. * Safe to cast cap as ast_translator_best_choice does not modify the caps
  176. */
  177. if (ast_translator_best_choice(engine->formats, (struct ast_format_cap *)cap, &best, &best_translated)) {
  178. /* No overlapping formats and no translatable formats */
  179. return NULL;
  180. }
  181. }
  182. }
  183. /* Allocate our own speech structure, and try to allocate a structure from the engine too */
  184. if (!(new_speech = ast_calloc(1, sizeof(*new_speech)))) {
  185. return NULL;
  186. }
  187. /* Initialize the lock */
  188. ast_mutex_init(&new_speech->lock);
  189. /* Make sure no results are present */
  190. new_speech->results = NULL;
  191. /* Copy over our engine pointer */
  192. new_speech->engine = engine;
  193. /* Can't forget the format audio is going to be in */
  194. new_speech->format = ao2_bump(best);
  195. /* We are not ready to accept audio yet */
  196. ast_speech_change_state(new_speech, AST_SPEECH_STATE_NOT_READY);
  197. /* Pass ourselves to the engine so they can set us up some more and if they error out then do not create a structure */
  198. if (engine->create(new_speech, new_speech->format)) {
  199. ast_mutex_destroy(&new_speech->lock);
  200. ao2_ref(new_speech->format, -1);
  201. ast_free(new_speech);
  202. return NULL;
  203. }
  204. return new_speech;
  205. }
  206. /*! \brief Destroy a speech structure */
  207. int ast_speech_destroy(struct ast_speech *speech)
  208. {
  209. int res = 0;
  210. /* Call our engine so we are destroyed properly */
  211. speech->engine->destroy(speech);
  212. /* Deinitialize the lock */
  213. ast_mutex_destroy(&speech->lock);
  214. /* If results exist on the speech structure, destroy them */
  215. if (speech->results)
  216. ast_speech_results_free(speech->results);
  217. /* If a processing sound is set - free the memory used by it */
  218. if (speech->processing_sound)
  219. ast_free(speech->processing_sound);
  220. ao2_ref(speech->format, -1);
  221. /* Aloha we are done */
  222. ast_free(speech);
  223. return res;
  224. }
  225. /*! \brief Change state of a speech structure */
  226. int ast_speech_change_state(struct ast_speech *speech, int state)
  227. {
  228. int res = 0;
  229. switch (state) {
  230. case AST_SPEECH_STATE_WAIT:
  231. /* The engine heard audio, so they spoke */
  232. ast_set_flag(speech, AST_SPEECH_SPOKE);
  233. default:
  234. speech->state = state;
  235. break;
  236. }
  237. return res;
  238. }
  239. const char *ast_speech_results_type_to_string(enum ast_speech_results_type type)
  240. {
  241. switch (type) {
  242. case AST_SPEECH_RESULTS_TYPE_NORMAL:
  243. return "normal";
  244. case AST_SPEECH_RESULTS_TYPE_NBEST:
  245. return "nbest";
  246. default:
  247. ast_assert(0);
  248. return "unknown";
  249. }
  250. }
  251. /*! \brief Change the type of results we want */
  252. int ast_speech_change_results_type(struct ast_speech *speech, enum ast_speech_results_type results_type)
  253. {
  254. speech->results_type = results_type;
  255. return (speech->engine->change_results_type ? speech->engine->change_results_type(speech, results_type) : 0);
  256. }
  257. /*! \brief Register a speech recognition engine */
  258. int ast_speech_register(struct ast_speech_engine *engine)
  259. {
  260. int res = 0;
  261. /* Confirm the engine meets the minimum API requirements */
  262. if (!engine->create || !engine->write || !engine->destroy) {
  263. ast_log(LOG_WARNING, "Speech recognition engine '%s' did not meet minimum API requirements.\n", engine->name);
  264. return -1;
  265. }
  266. /* If an engine is already loaded with this name, error out */
  267. if (ast_speech_find_engine(engine->name)) {
  268. ast_log(LOG_WARNING, "Speech recognition engine '%s' already exists.\n", engine->name);
  269. return -1;
  270. }
  271. ast_verb(5, "Registered speech recognition engine '%s'\n", engine->name);
  272. /* Add to the engine linked list and make default if needed */
  273. AST_RWLIST_WRLOCK(&engines);
  274. AST_RWLIST_INSERT_HEAD(&engines, engine, list);
  275. if (!default_engine) {
  276. default_engine = engine;
  277. ast_verb(5, "Made '%s' the default speech recognition engine\n", engine->name);
  278. }
  279. AST_RWLIST_UNLOCK(&engines);
  280. return res;
  281. }
  282. /*! \brief Unregister a speech recognition engine */
  283. int ast_speech_unregister(const char *engine_name)
  284. {
  285. return ast_speech_unregister2(engine_name) == NULL ? -1 : 0;
  286. }
  287. struct ast_speech_engine *ast_speech_unregister2(const char *engine_name)
  288. {
  289. struct ast_speech_engine *engine = NULL;
  290. if (ast_strlen_zero(engine_name)) {
  291. return NULL;
  292. }
  293. AST_RWLIST_WRLOCK(&engines);
  294. AST_RWLIST_TRAVERSE_SAFE_BEGIN(&engines, engine, list) {
  295. if (!strcasecmp(engine->name, engine_name)) {
  296. /* We have our engine... removed it */
  297. AST_RWLIST_REMOVE_CURRENT(list);
  298. /* If this was the default engine, we need to pick a new one */
  299. if (engine == default_engine) {
  300. default_engine = AST_RWLIST_FIRST(&engines);
  301. }
  302. ast_verb(5, "Unregistered speech recognition engine '%s'\n", engine_name);
  303. /* All went well */
  304. break;
  305. }
  306. }
  307. AST_RWLIST_TRAVERSE_SAFE_END;
  308. AST_RWLIST_UNLOCK(&engines);
  309. return engine;
  310. }
  311. void ast_speech_unregister_engines(
  312. int (*should_unregister)(const struct ast_speech_engine *engine, void *data), void *data,
  313. void (*on_unregistered)(void *obj))
  314. {
  315. struct ast_speech_engine *engine = NULL;
  316. if (!should_unregister) {
  317. return;
  318. }
  319. AST_RWLIST_WRLOCK(&engines);
  320. AST_RWLIST_TRAVERSE_SAFE_BEGIN(&engines, engine, list) {
  321. if (should_unregister(engine, data)) {
  322. /* We have our engine... removed it */
  323. AST_RWLIST_REMOVE_CURRENT(list);
  324. /* If this was the default engine, we need to pick a new one */
  325. if (engine == default_engine) {
  326. default_engine = AST_RWLIST_FIRST(&engines);
  327. }
  328. ast_verb(5, "Unregistered speech recognition engine '%s'\n", engine->name);
  329. /* All went well */
  330. if (on_unregistered) {
  331. on_unregistered(engine);
  332. }
  333. }
  334. }
  335. AST_RWLIST_TRAVERSE_SAFE_END;
  336. AST_RWLIST_UNLOCK(&engines);
  337. }
  338. static int unload_module(void)
  339. {
  340. /* We can not be unloaded */
  341. return -1;
  342. }
  343. static int load_module(void)
  344. {
  345. return AST_MODULE_LOAD_SUCCESS;
  346. }
  347. AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_GLOBAL_SYMBOLS | AST_MODFLAG_LOAD_ORDER, "Generic Speech Recognition API",
  348. .support_level = AST_MODULE_SUPPORT_CORE,
  349. .load = load_module,
  350. .unload = unload_module,
  351. .load_pri = AST_MODPRI_APP_DEPEND - 1,
  352. );