mtmd-audio.h 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. #pragma once
  2. #include "ggml.h"
  3. #include <cstdint>
  4. #include <vector>
  5. #include <string>
  6. #define WHISPER_ASSERT GGML_ASSERT
  7. #define WHISPER_SAMPLE_RATE 16000
  8. #define WHISPER_N_FFT 400
  9. #define WHISPER_HOP_LENGTH 160
  10. #define WHISPER_CHUNK_SIZE 30
  11. #define COMMON_SAMPLE_RATE 16000
  12. namespace whisper_preprocessor {
  13. struct whisper_mel {
  14. int n_len;
  15. int n_len_org;
  16. int n_mel;
  17. std::vector<float> data;
  18. };
  19. struct whisper_filters {
  20. int32_t n_mel;
  21. int32_t n_fft;
  22. std::vector<float> data;
  23. };
  24. extern bool preprocess_audio(
  25. const float * samples,
  26. size_t n_samples,
  27. const whisper_filters & filters,
  28. std::vector<whisper_mel> & output);
  29. } // namespace whisper_preprocessor
  30. // TODO @ngxson : move this helper to mtmd-helpers.cpp
  31. namespace audio_helpers {
  32. extern bool is_audio_file(const char * buf, size_t len);
  33. extern bool decode_audio_from_buf(
  34. const unsigned char * buf_in,
  35. size_t len,
  36. int target_sampler_rate,
  37. std::vector<float> & pcmf32_mono);
  38. } // namespace audio_helpers
  39. namespace whisper_precalc_filters {
  40. extern whisper_preprocessor::whisper_filters get_128_bins();
  41. } // namespace whisper_precalc_filters