Skip to content

Commit

Permalink
num to word
Browse files Browse the repository at this point in the history
  • Loading branch information
danemadsen committed Jul 30, 2024
1 parent b5e4ca3 commit 92aaa44
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 5 deletions.
2 changes: 1 addition & 1 deletion example/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
int main() {
babylon_g2p_init("./models/deep_phonemizer.onnx", "en_us", 1);

const char* text = "Hello world. There is 317 characters in this sentence. This is an example program for the Babylon project. Text to speech models can be used to generate speech from text. This is a very powerful tool for many applications. For example, it can be used to generate speech for virtual assistants, audiobooks, and more.";
const char* text = "Hello world. There is 317 characters in this sentence.";

babylon_tts_init("./models/curie.onnx");

Expand Down
131 changes: 127 additions & 4 deletions src/numbers_to_words.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,25 @@
#include <vector>
#include <iostream>

std::vector<std::string> split_into_threes(const std::string& str) {
std::vector<std::string> parts;
int length = str.length();

// Process the string from the end
for (int i = length; i > 0; i -= 3) {
if (i < 3) {
parts.push_back(str.substr(0, i));
} else {
parts.push_back(str.substr(i - 3, 3));
}
}

// Since we processed from the end, reverse the order of parts
std::reverse(parts.begin(), parts.end());

return parts;
}

std::string number_to_word(int number) {
switch (number) {
case 0:
Expand Down Expand Up @@ -29,12 +48,116 @@ std::string number_to_word(int number) {
}
}

std::string tens_to_word(int tens) {
switch (tens) {
case 1:
return "ten";
case 2:
return "twenty";
case 3:
return "thirty";
case 4:
return "forty";
case 5:
return "fifty";
case 6:
return "sixty";
case 7:
return "seventy";
case 8:
return "eighty";
case 9:
return "ninety";
default:
return "";
}
}

std::string teens_to_word(int teens) {
switch (teens) {
case 11:
return "eleven";
case 12:
return "twelve";
case 13:
return "thirteen";
case 14:
return "fourteen";
case 15:
return "fifteen";
case 16:
return "sixteen";
case 17:
return "seventeen";
case 18:
return "eighteen";
case 19:
return "nineteen";
default:
return "";
}
}

std::string hundreds_to_words(int hundreds) {
std::string result;
int hundreds_digit = hundreds / 100;
int tens_digit = (hundreds % 100) / 10;
int ones_digit = hundreds % 10;

if (hundreds_digit > 0) {
result += number_to_word(hundreds_digit) + " hundred";

if (tens_digit > 0 || ones_digit > 0) {
result += " and";
}
}

if (tens_digit > 1) {
if (result.length() > 0) {
result += " ";
}
result += tens_to_word(tens_digit);
} else if (tens_digit == 1) {
if (result.length() > 0) {
result += " ";
}
result += teens_to_word(hundreds % 100);
}

if (ones_digit > 0 && tens_digit != 1) {
if (result.length() > 0) {
result += " ";
}
result += number_to_word(ones_digit);
}

return result;
}

std::vector<std::string> numbers_to_words(const std::string& text) {
std::vector<std::string> result;

for (int i = 0; i < text.length(); i++) {
result.push_back(number_to_word(text[i] - '0'));
std::cout << number_to_word(text[i] - '0') << std::endl;
std::vector<std::string> parts = split_into_threes(text);
std::vector<std::string> suffixes = {
"thousand",
"million",
"billion",
"trillion",
"quadrillion",
"quintillion",
"sextillion",
"septillion",
"octillion",
"nonillion",
"decillion"
};

for (int i = 0; i < parts.size(); i++) {
int number = std::stoi(parts[i]);
result.push_back(hundreds_to_words(number));

if (i > 0 && i < suffixes.size()) {
result.back() += " " + suffixes[i - 1];
}
}

return result;
Expand Down
2 changes: 2 additions & 0 deletions src/phonemizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,11 @@ namespace DeepPhonemizer {
std::vector<std::string> number_words = numbers_to_words(word);

for (const auto& number_word : number_words) {
std::cout << number_word << " ";
word_phonemes = g2p_internal(number_word);
phonemes.insert(phonemes.end(), word_phonemes.begin(), word_phonemes.end());
}
std::cout << std::endl;
}
else {
word_phonemes = g2p_internal(word);
Expand Down

0 comments on commit 92aaa44

Please sign in to comment.