Skip to content

Commit

Permalink
simplify
Browse files Browse the repository at this point in the history
  • Loading branch information
danemadsen committed Aug 13, 2024
1 parent e2774c8 commit db9e393
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 22 deletions.
6 changes: 0 additions & 6 deletions include/babylon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@ namespace DeepPhonemizer {
std::vector<std::string> g2p(const std::string& text);

private:
const std::array<const char *, 1> input_names = {"text"};
const std::array<const char *, 1> output_names = {"output"};

std::string lang;
bool punctuation;
Ort::Session* session;
Expand Down Expand Up @@ -72,9 +69,6 @@ namespace Vits {
void tts(const std::vector<std::string>& phonemes, const std::string& output_path);

private:
const std::array<const char *, 3> input_names = {"input", "input_lengths", "scales"};
const std::array<const char *, 1> output_names = {"output"};

int sample_rate;
std::vector<float> scales;

Expand Down
35 changes: 19 additions & 16 deletions src/phonemizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,25 @@
#include <algorithm>
#include <cmath>

const std::array<const char *, 1> input_names = {"text"};
const std::array<const char *, 1> output_names = {"output"};

std::vector<float> softmax(const std::vector<float>& logits) {
float max_logit = *std::max_element(logits.begin(), logits.end());
std::vector<float> probabilities(logits.size());

float sum = 0.0f;
for (float logit : logits) {
sum += std::exp(logit - max_logit);
}

for (size_t i = 0; i < logits.size(); ++i) {
probabilities[i] = std::exp(logits[i] - max_logit) / sum;
}

return probabilities;
}

namespace DeepPhonemizer {
SequenceTokenizer::SequenceTokenizer(const std::vector<std::string>& symbols, const std::vector<std::string>& languages, int char_repeats, bool lowercase, bool append_start_end)
: char_repeats(char_repeats), lowercase(lowercase), append_start_end(append_start_end), pad_token("_"), end_token("<end>") {
Expand Down Expand Up @@ -113,22 +132,6 @@ namespace DeepPhonemizer {
return "<" + language + ">";
}

std::vector<float> softmax(const std::vector<float>& logits) {
float max_logit = *std::max_element(logits.begin(), logits.end());
std::vector<float> probabilities(logits.size());

float sum = 0.0f;
for (float logit : logits) {
sum += std::exp(logit - max_logit);
}

for (size_t i = 0; i < logits.size(); ++i) {
probabilities[i] = std::exp(logits[i] - max_logit) / sum;
}

return probabilities;
}

Session::Session(const std::string& model_path, const std::string language, const bool use_punctuation) {
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "DeepPhonemizer");
env.DisableTelemetryEvents();
Expand Down
3 changes: 3 additions & 0 deletions src/voice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
#include <algorithm>
#include <cmath>

const std::array<const char *, 3> input_names = {"input", "input_lengths", "scales"};
const std::array<const char *, 1> output_names = {"output"};

struct WavHeader {
uint8_t RIFF[4] = {'R', 'I', 'F', 'F'};
uint32_t chunk_size;
Expand Down

0 comments on commit db9e393

Please sign in to comment.