-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from Azure/audiotranscription
AudioTranscription file structure
- Loading branch information
Showing
16 changed files
with
325 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -407,3 +407,4 @@ FodyWeavers.xsd | |
|
||
# JetBrains Rider | ||
*.sln.iml | ||
src/AIHub/appsettings.Development.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
namespace MVCWeb.Controllers; | ||
using System; | ||
using System.Net.Http; | ||
using System.Net.Http.Headers; | ||
using System.Threading.Tasks; | ||
using Newtonsoft.Json; | ||
using Microsoft.AspNetCore.Mvc; | ||
using System.Net; | ||
using Newtonsoft.Json.Linq; | ||
using Microsoft.VisualBasic; | ||
|
||
public class AudioTranscriptionController : Controller | ||
{ | ||
private readonly ILogger<HomeController> _logger; | ||
private readonly IConfiguration _config; | ||
private string SpeechRegion; | ||
private string SpeechSubscriptionKey; | ||
private string storageconnstring; | ||
private readonly BlobServiceClient blobServiceClient; | ||
private readonly BlobContainerClient containerClient; | ||
private readonly IEnumerable<BlobItem> blobs; | ||
private Uri sasUri; | ||
|
||
|
||
//Results | ||
string result_message_front; | ||
|
||
|
||
|
||
private AudioTranscriptionModel model; | ||
|
||
|
||
public AudioTranscriptionController(IConfiguration config) | ||
{ | ||
_config = config; | ||
SpeechRegion = _config.GetValue<string>("AudioTranscription:SpeechLocation"); | ||
SpeechSubscriptionKey = _config.GetValue<string>("AudioTranscription:SpeechSubscriptionKey"); | ||
storageconnstring = _config.GetValue<string>("Storage:ConnectionString"); | ||
BlobServiceClient blobServiceClient = new BlobServiceClient(storageconnstring); | ||
containerClient = blobServiceClient.GetBlobContainerClient(_config.GetValue<string>("AudioTranscription:ContainerName")); | ||
sasUri = containerClient.GenerateSasUri(Azure.Storage.Sas.BlobContainerSasPermissions.Read, DateTimeOffset.UtcNow.AddHours(1)); | ||
// Obtiene una lista de blobs en el contenedor | ||
blobs = containerClient.GetBlobs(); | ||
model = new AudioTranscriptionModel(); | ||
} | ||
|
||
public IActionResult AudioTranscription() | ||
{ | ||
return View(); | ||
} | ||
|
||
[HttpPost] | ||
public async Task<IActionResult> TranscribeAudio(string audio_url, IFormFile imageFile) | ||
{ | ||
|
||
string audio = audio_url + sasUri.Query; | ||
|
||
// CALL 1: STT 3.1 | ||
|
||
var client = new HttpClient(); | ||
var request = new HttpRequestMessage(HttpMethod.Post, "https://"+SpeechRegion+".api.cognitive.microsoft.com/speechtotext/v3.1/transcriptions"); | ||
request.Headers.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey); | ||
var content = new StringContent("{\r\n\"contentUrls\": [\r\n \"" + audio + "\"\r\n ],\r\n \"locale\": \"es-es\",\r\n \"displayName\": \"My Transcription\",\r\n \"model\": null,\r\n \"properties\": {\r\n \"wordLevelTimestampsEnabled\": true,\r\n \"languageIdentification\": {\r\n \"candidateLocales\": [\r\n \"en-US\", \"de-DE\", \"es-ES\"\r\n ]\r\n }\r\n }\r\n}", null, "application/json"); | ||
request.Content = content; | ||
var response = await client.SendAsync(request); | ||
response.EnsureSuccessStatusCode(); | ||
//Console.WriteLine(await response.Content.ReadAsStringAsync()); | ||
var responsejson = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync()); | ||
Console.WriteLine(responsejson); | ||
var output_result = responsejson.self.ToString(); | ||
Console.WriteLine("SELF: "+output_result); | ||
|
||
client.Dispose(); | ||
|
||
// CALL 2: CHECK FOR FINISH | ||
var client2 = new HttpClient(); | ||
var request2 = new HttpRequestMessage(HttpMethod.Get, output_result); | ||
client2.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey); | ||
var content2 = new StringContent(string.Empty); | ||
content2.Headers.ContentType = new MediaTypeHeaderValue("application/json"); | ||
request2.Content = content2; | ||
var response2 = await client2.SendAsync(request2); | ||
response2.EnsureSuccessStatusCode(); | ||
//Console.WriteLine(await response2.Content.ReadAsStringAsync()); | ||
var responsejson2 = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync()); | ||
Console.WriteLine(responsejson2); | ||
while (responsejson2.status != "Succeeded") | ||
{ | ||
Thread.Sleep(10000); | ||
response2 = await client2.GetAsync(output_result); | ||
responsejson2 = JsonConvert.DeserializeObject<dynamic>(await response2.Content.ReadAsStringAsync()); | ||
Console.WriteLine(responsejson2.status); | ||
} | ||
client2.Dispose(); | ||
|
||
|
||
// CALL 3: GET RESULTS URL | ||
|
||
var client3 = new HttpClient(); | ||
var request3 = new HttpRequestMessage(HttpMethod.Get, output_result+"/files/"); | ||
request3.Headers.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey); | ||
var content3 = new StringContent(string.Empty); | ||
content3.Headers.ContentType = new MediaTypeHeaderValue("application/json"); | ||
request3.Content = content3; | ||
var response3 = await client3.SendAsync(request3); | ||
response3.EnsureSuccessStatusCode(); | ||
var responsejson3 = JsonConvert.DeserializeObject<dynamic>(await response3.Content.ReadAsStringAsync()); | ||
Console.WriteLine(responsejson3); | ||
// Extract contentUrl field | ||
string output_result3 = (string)responsejson3["values"][0]["links"]["contentUrl"]; | ||
Console.WriteLine(output_result3); | ||
client3.Dispose(); | ||
|
||
// CALL 4: GET RESULTS (TRANSCRIPTION) | ||
|
||
var client4 = new HttpClient(); | ||
var request4 = new HttpRequestMessage(HttpMethod.Get, output_result3); | ||
request4.Headers.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey); | ||
var content4 = new StringContent(string.Empty); | ||
content4.Headers.ContentType = new MediaTypeHeaderValue("application/json"); | ||
request4.Content = content4; | ||
var response4 = await client4.SendAsync(request4); | ||
response4.EnsureSuccessStatusCode(); | ||
Console.WriteLine(await response4.Content.ReadAsStringAsync()); | ||
var jsonObject4 = JsonConvert.DeserializeObject<JObject>(await response4.Content.ReadAsStringAsync()); | ||
string output_result4 = (string)jsonObject4["combinedRecognizedPhrases"][0]["lexical"]; | ||
Console.WriteLine(output_result4); | ||
client4.Dispose(); | ||
|
||
|
||
//Show transcript results | ||
ViewBag.Message = "TRANSCRIPTION RESULTS: \n\n"+output_result4; | ||
|
||
|
||
return View("AudioTranscription", model); | ||
} | ||
public class SpeechToTextResponse | ||
{ | ||
[JsonProperty("text")] | ||
public string Text { get; set; } | ||
} | ||
|
||
//Upload a file to my azure storage account | ||
[HttpPost] | ||
public async Task<IActionResult> UploadFile(IFormFile imageFile, string prompt) | ||
{ | ||
//Check no image | ||
|
||
if (CheckNullValues(imageFile)) | ||
{ | ||
ViewBag.Message = "You must upload an mp3 audio file"; | ||
return View("AudioTranscription"); | ||
} | ||
|
||
//Upload file to azure storage account | ||
string url = imageFile.FileName.ToString(); | ||
//Console.WriteLine(url); | ||
url = url.Replace(" ", ""); | ||
//Console.WriteLine(url); | ||
BlobClient blobClient = containerClient.GetBlobClient(url); | ||
var httpHeaders = new BlobHttpHeaders | ||
{ | ||
ContentType = "audio/mpeg", | ||
}; | ||
await blobClient.UploadAsync(imageFile.OpenReadStream(), new BlobUploadOptions { HttpHeaders = httpHeaders }); | ||
|
||
//Get the url of the file | ||
Uri blobUrl = blobClient.Uri; | ||
|
||
if (CheckImageExtension(blobUrl.ToString())) | ||
{ | ||
ViewBag.Message = "You must upload an audio file with .mp3 extension"; | ||
return View("AudioTranscription", model); | ||
} | ||
|
||
|
||
//Call EvaluateImage with the url | ||
await TranscribeAudio(blobUrl.ToString(), imageFile); | ||
ViewBag.Waiting = null; | ||
|
||
return View("AudioTranscription", model); | ||
} | ||
|
||
|
||
|
||
[ResponseCache(Duration = 0, Location = ResponseCacheLocation.None, NoStore = true)] | ||
public IActionResult Error() | ||
{ | ||
return View(new ErrorViewModel { RequestId = Activity.Current?.Id ?? HttpContext.TraceIdentifier }); | ||
} | ||
|
||
private bool CheckNullValues(IFormFile imageFile) | ||
{ | ||
if (imageFile == null) | ||
{ | ||
return true; | ||
} | ||
return false; | ||
} | ||
|
||
private bool CheckImageExtension(string blobUri) | ||
{ | ||
string uri_lower = blobUri; | ||
if (uri_lower.Contains(".mp3", StringComparison.OrdinalIgnoreCase)) | ||
{ | ||
return false; | ||
} | ||
return true; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
namespace MVCWeb.Models; | ||
|
||
public class AudioTranscriptionModel | ||
{ | ||
|
||
public int? Severity { get; set; } | ||
public int? Violence { get; set; } | ||
public int? SelfHarm { get; set; } | ||
public int? Hate { get; set; } | ||
public string? Prompt { get; set; } | ||
public string? Image { get; set; } | ||
public string? Message { get; set; } | ||
|
||
} |
72 changes: 72 additions & 0 deletions
72
src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
@{ | ||
ViewData["Title"] = "Audio Trancription"; | ||
} | ||
|
||
<div class="text-center"> | ||
<svg style="fill: var(--main-color)" xmlns="http://www.w3.org/2000/svg" height="4em" | ||
viewBox="0 0 512 512"><!--! Font Awesome Free 6.4.2 by @@fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. --> | ||
<path | ||
<path d="M64 0C28.7 0 0 28.7 0 64V448c0 35.3 28.7 64 64 64H320c35.3 0 64-28.7 64-64V160H256c-17.7 0-32-14.3-32-32V0H64zM256 0V128H384L256 0zm2 226.3c37.1 22.4 62 63.1 62 109.7s-24.9 87.3-62 109.7c-7.6 4.6-17.4 2.1-22-5.4s-2.1-17.4 5.4-22C269.4 401.5 288 370.9 288 336s-18.6-65.5-46.5-82.3c-7.6-4.6-10-14.4-5.4-22s14.4-10 22-5.4zm-91.9 30.9c6 2.5 9.9 8.3 9.9 14.8V400c0 6.5-3.9 12.3-9.9 14.8s-12.9 1.1-17.4-3.5L113.4 376H80c-8.8 0-16-7.2-16-16V312c0-8.8 7.2-16 16-16h33.4l35.3-35.3c4.6-4.6 11.5-5.9 17.4-3.5zm51 34.9c6.6-5.9 16.7-5.3 22.6 1.3C249.8 304.6 256 319.6 256 336s-6.2 31.4-16.3 42.7c-5.9 6.6-16 7.1-22.6 1.3s-7.1-16-1.3-22.6c5.1-5.7 8.1-13.1 8.1-21.3s-3.1-15.7-8.1-21.3c-5.9-6.6-5.3-16.7 1.3-22.6z"/> | ||
</svg> | ||
<h1 class="sectionTitle">Audio Transcription</h1> | ||
<p class="sectionSubTitle">Analiza tus audios usando Azure AI Speech</p> | ||
<p class="sectionDetails">Sólo necesitas subir un audio (.mp3).</p> | ||
|
||
</div> | ||
|
||
@if (ViewBag.Message != null) | ||
{ | ||
<div class="row justify-content-center mt-5"> | ||
<div class="col-md-6"> | ||
<div class="alert alert-primary" role="alert"> | ||
@Html.Raw(ViewBag.Message.Replace("\n", "<br />")) | ||
</div> | ||
</div> | ||
</div> | ||
} | ||
<form asp-controller="AudioTranscription" asp-action="UploadFile" method="post" enctype="multipart/form-data"> | ||
@* <div class="row justify-content-center mt-5"> | ||
<span class="form-group"> | ||
<label for="fname">Upload your image to analyze:</label><br> | ||
<input type="text" class="form-control" id="image_url" name="image_url" value="" style="width: 70%;"/> | ||
<input type="file" class="form-control-file" id="imageFile" name="imageFile" /> | ||
</div> | ||
<button type="submit" class="btn btn-primary">Upload Image</button> | ||
</div> *@ | ||
|
||
<div class="col-md-6"> | ||
|
||
<div class="form-group"> | ||
<label for="imageFile">Audio File:</label><br> | ||
<input type="file" class="form-control-file" id="imageFile" name="imageFile" /> | ||
</br> | ||
</div> | ||
<div id="loadingPanel" style="display: none;">Transcribing...</div> | ||
<button type="submit" class="btn btn-primary" onclick="submitForm()">Transcribe audio</button> | ||
|
||
</div> | ||
<script> | ||
function submitForm() { | ||
// Disable the button | ||
var btn = document.querySelector('button[type="submit"]'); | ||
btn.disabled = true; | ||
// Show the loading panel | ||
var loadingPanel = document.getElementById('loadingPanel'); | ||
loadingPanel.style.display = 'block'; | ||
// Submit the form | ||
var form = document.querySelector('form'); | ||
form.submit(); | ||
} | ||
window.onload = function () { | ||
// Enable the button | ||
var btn = document.querySelector('button[type="submit"]'); | ||
btn.disabled = false; | ||
// Hide the loading panel | ||
var loadingPanel = document.getElementById('loadingPanel'); | ||
loadingPanel.style.display = 'none'; | ||
} | ||
</script> | ||
</form> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.