Skip to content

Commit

Permalink
Merge pull request #4 from Azure/audiotranscription
Browse files Browse the repository at this point in the history
AudioTranscription file structure
  • Loading branch information
rag2111 authored Jan 11, 2024
2 parents 9645b36 + 5b044b3 commit 873998a
Show file tree
Hide file tree
Showing 16 changed files with 325 additions and 14 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -407,3 +407,4 @@ FodyWeavers.xsd

# JetBrains Rider
*.sln.iml
src/AIHub/appsettings.Development.json
210 changes: 210 additions & 0 deletions src/AIHub/Controllers/AudioTranscriptionController.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
namespace MVCWeb.Controllers;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Threading.Tasks;
using Newtonsoft.Json;
using Microsoft.AspNetCore.Mvc;
using System.Net;
using Newtonsoft.Json.Linq;
using Microsoft.VisualBasic;

public class AudioTranscriptionController : Controller
{
private readonly ILogger<HomeController> _logger;
private readonly IConfiguration _config;
private string SpeechRegion;
private string SpeechSubscriptionKey;
private string storageconnstring;
private readonly BlobServiceClient blobServiceClient;
private readonly BlobContainerClient containerClient;
private readonly IEnumerable<BlobItem> blobs;
private Uri sasUri;


//Results
string result_message_front;



private AudioTranscriptionModel model;


public AudioTranscriptionController(IConfiguration config)
{
_config = config;
SpeechRegion = _config.GetValue<string>("AudioTranscription:SpeechLocation");
SpeechSubscriptionKey = _config.GetValue<string>("AudioTranscription:SpeechSubscriptionKey");
storageconnstring = _config.GetValue<string>("Storage:ConnectionString");
BlobServiceClient blobServiceClient = new BlobServiceClient(storageconnstring);
containerClient = blobServiceClient.GetBlobContainerClient(_config.GetValue<string>("AudioTranscription:ContainerName"));
sasUri = containerClient.GenerateSasUri(Azure.Storage.Sas.BlobContainerSasPermissions.Read, DateTimeOffset.UtcNow.AddHours(1));
// Obtiene una lista de blobs en el contenedor
blobs = containerClient.GetBlobs();
model = new AudioTranscriptionModel();
}

public IActionResult AudioTranscription()
{
return View();
}

[HttpPost]
public async Task<IActionResult> TranscribeAudio(string audio_url, IFormFile imageFile)
{

string audio = audio_url + sasUri.Query;

// CALL 1: STT 3.1

var client = new HttpClient();
var request = new HttpRequestMessage(HttpMethod.Post, "https://"+SpeechRegion+".api.cognitive.microsoft.com/speechtotext/v3.1/transcriptions");
request.Headers.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey);
var content = new StringContent("{\r\n\"contentUrls\": [\r\n \"" + audio + "\"\r\n ],\r\n \"locale\": \"es-es\",\r\n \"displayName\": \"My Transcription\",\r\n \"model\": null,\r\n \"properties\": {\r\n \"wordLevelTimestampsEnabled\": true,\r\n \"languageIdentification\": {\r\n \"candidateLocales\": [\r\n \"en-US\", \"de-DE\", \"es-ES\"\r\n ]\r\n }\r\n }\r\n}", null, "application/json");
request.Content = content;
var response = await client.SendAsync(request);
response.EnsureSuccessStatusCode();
//Console.WriteLine(await response.Content.ReadAsStringAsync());
var responsejson = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
Console.WriteLine(responsejson);
var output_result = responsejson.self.ToString();
Console.WriteLine("SELF: "+output_result);

client.Dispose();

// CALL 2: CHECK FOR FINISH
var client2 = new HttpClient();
var request2 = new HttpRequestMessage(HttpMethod.Get, output_result);
client2.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey);
var content2 = new StringContent(string.Empty);
content2.Headers.ContentType = new MediaTypeHeaderValue("application/json");
request2.Content = content2;
var response2 = await client2.SendAsync(request2);
response2.EnsureSuccessStatusCode();
//Console.WriteLine(await response2.Content.ReadAsStringAsync());
var responsejson2 = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
Console.WriteLine(responsejson2);
while (responsejson2.status != "Succeeded")
{
Thread.Sleep(10000);
response2 = await client2.GetAsync(output_result);
responsejson2 = JsonConvert.DeserializeObject<dynamic>(await response2.Content.ReadAsStringAsync());
Console.WriteLine(responsejson2.status);
}
client2.Dispose();


// CALL 3: GET RESULTS URL

var client3 = new HttpClient();
var request3 = new HttpRequestMessage(HttpMethod.Get, output_result+"/files/");
request3.Headers.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey);
var content3 = new StringContent(string.Empty);
content3.Headers.ContentType = new MediaTypeHeaderValue("application/json");
request3.Content = content3;
var response3 = await client3.SendAsync(request3);
response3.EnsureSuccessStatusCode();
var responsejson3 = JsonConvert.DeserializeObject<dynamic>(await response3.Content.ReadAsStringAsync());
Console.WriteLine(responsejson3);
// Extract contentUrl field
string output_result3 = (string)responsejson3["values"][0]["links"]["contentUrl"];
Console.WriteLine(output_result3);
client3.Dispose();

// CALL 4: GET RESULTS (TRANSCRIPTION)

var client4 = new HttpClient();
var request4 = new HttpRequestMessage(HttpMethod.Get, output_result3);
request4.Headers.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey);
var content4 = new StringContent(string.Empty);
content4.Headers.ContentType = new MediaTypeHeaderValue("application/json");
request4.Content = content4;
var response4 = await client4.SendAsync(request4);
response4.EnsureSuccessStatusCode();
Console.WriteLine(await response4.Content.ReadAsStringAsync());
var jsonObject4 = JsonConvert.DeserializeObject<JObject>(await response4.Content.ReadAsStringAsync());
string output_result4 = (string)jsonObject4["combinedRecognizedPhrases"][0]["lexical"];
Console.WriteLine(output_result4);
client4.Dispose();


//Show transcript results
ViewBag.Message = "TRANSCRIPTION RESULTS: \n\n"+output_result4;


return View("AudioTranscription", model);
}
public class SpeechToTextResponse
{
[JsonProperty("text")]
public string Text { get; set; }
}

//Upload a file to my azure storage account
[HttpPost]
public async Task<IActionResult> UploadFile(IFormFile imageFile, string prompt)
{
//Check no image

if (CheckNullValues(imageFile))
{
ViewBag.Message = "You must upload an mp3 audio file";
return View("AudioTranscription");
}

//Upload file to azure storage account
string url = imageFile.FileName.ToString();
//Console.WriteLine(url);
url = url.Replace(" ", "");
//Console.WriteLine(url);
BlobClient blobClient = containerClient.GetBlobClient(url);
var httpHeaders = new BlobHttpHeaders
{
ContentType = "audio/mpeg",
};
await blobClient.UploadAsync(imageFile.OpenReadStream(), new BlobUploadOptions { HttpHeaders = httpHeaders });

//Get the url of the file
Uri blobUrl = blobClient.Uri;

if (CheckImageExtension(blobUrl.ToString()))
{
ViewBag.Message = "You must upload an audio file with .mp3 extension";
return View("AudioTranscription", model);
}


//Call EvaluateImage with the url
await TranscribeAudio(blobUrl.ToString(), imageFile);
ViewBag.Waiting = null;

return View("AudioTranscription", model);
}



[ResponseCache(Duration = 0, Location = ResponseCacheLocation.None, NoStore = true)]
public IActionResult Error()
{
return View(new ErrorViewModel { RequestId = Activity.Current?.Id ?? HttpContext.TraceIdentifier });
}

private bool CheckNullValues(IFormFile imageFile)
{
if (imageFile == null)
{
return true;
}
return false;
}

private bool CheckImageExtension(string blobUri)
{
string uri_lower = blobUri;
if (uri_lower.Contains(".mp3", StringComparison.OrdinalIgnoreCase))
{
return false;
}
return true;
}
}
14 changes: 14 additions & 0 deletions src/AIHub/Models/AudioTranscriptionModel.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
namespace MVCWeb.Models;

public class AudioTranscriptionModel
{

public int? Severity { get; set; }
public int? Violence { get; set; }
public int? SelfHarm { get; set; }
public int? Hate { get; set; }
public string? Prompt { get; set; }
public string? Image { get; set; }
public string? Message { get; set; }

}
72 changes: 72 additions & 0 deletions src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
@{
ViewData["Title"] = "Audio Trancription";
}

<div class="text-center">
<svg style="fill: var(--main-color)" xmlns="http://www.w3.org/2000/svg" height="4em"
viewBox="0 0 512 512"><!--! Font Awesome Free 6.4.2 by @@fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. -->
<path
<path d="M64 0C28.7 0 0 28.7 0 64V448c0 35.3 28.7 64 64 64H320c35.3 0 64-28.7 64-64V160H256c-17.7 0-32-14.3-32-32V0H64zM256 0V128H384L256 0zm2 226.3c37.1 22.4 62 63.1 62 109.7s-24.9 87.3-62 109.7c-7.6 4.6-17.4 2.1-22-5.4s-2.1-17.4 5.4-22C269.4 401.5 288 370.9 288 336s-18.6-65.5-46.5-82.3c-7.6-4.6-10-14.4-5.4-22s14.4-10 22-5.4zm-91.9 30.9c6 2.5 9.9 8.3 9.9 14.8V400c0 6.5-3.9 12.3-9.9 14.8s-12.9 1.1-17.4-3.5L113.4 376H80c-8.8 0-16-7.2-16-16V312c0-8.8 7.2-16 16-16h33.4l35.3-35.3c4.6-4.6 11.5-5.9 17.4-3.5zm51 34.9c6.6-5.9 16.7-5.3 22.6 1.3C249.8 304.6 256 319.6 256 336s-6.2 31.4-16.3 42.7c-5.9 6.6-16 7.1-22.6 1.3s-7.1-16-1.3-22.6c5.1-5.7 8.1-13.1 8.1-21.3s-3.1-15.7-8.1-21.3c-5.9-6.6-5.3-16.7 1.3-22.6z"/>
</svg>
<h1 class="sectionTitle">Audio Transcription</h1>
<p class="sectionSubTitle">Analiza tus audios usando Azure AI Speech</p>
<p class="sectionDetails">Sólo necesitas subir un audio (.mp3).</p>

</div>

@if (ViewBag.Message != null)
{
<div class="row justify-content-center mt-5">
<div class="col-md-6">
<div class="alert alert-primary" role="alert">
@Html.Raw(ViewBag.Message.Replace("\n", "<br />"))
</div>
</div>
</div>
}
<form asp-controller="AudioTranscription" asp-action="UploadFile" method="post" enctype="multipart/form-data">
@* <div class="row justify-content-center mt-5">
<span class="form-group">
<label for="fname">Upload your image to analyze:</label><br>
<input type="text" class="form-control" id="image_url" name="image_url" value="" style="width: 70%;"/>
<input type="file" class="form-control-file" id="imageFile" name="imageFile" />
</div>
<button type="submit" class="btn btn-primary">Upload Image</button>
</div> *@

<div class="col-md-6">

<div class="form-group">
<label for="imageFile">Audio File:</label><br>
<input type="file" class="form-control-file" id="imageFile" name="imageFile" />
</br>
</div>
<div id="loadingPanel" style="display: none;">Transcribing...</div>
<button type="submit" class="btn btn-primary" onclick="submitForm()">Transcribe audio</button>

</div>
<script>
function submitForm() {
// Disable the button
var btn = document.querySelector('button[type="submit"]');
btn.disabled = true;
// Show the loading panel
var loadingPanel = document.getElementById('loadingPanel');
loadingPanel.style.display = 'block';
// Submit the form
var form = document.querySelector('form');
form.submit();
}
window.onload = function () {
// Enable the button
var btn = document.querySelector('button[type="submit"]');
btn.disabled = false;
// Hide the loading panel
var loadingPanel = document.getElementById('loadingPanel');
loadingPanel.style.display = 'none';
}
</script>
</form>
21 changes: 17 additions & 4 deletions src/AIHub/Views/Home/Index.cshtml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@
</div>
</a>
</li>
<li key="audiotranscription">
<a asp-area="" style="text-decoration: none" asp-controller="AudioTranscription" asp-action="AudioTranscription">
<div class="homeBlock">
<svg style="fill: var(--main-color)" xmlns="http://www.w3.org/2000/svg" height="2em"
viewBox="0 0 512 512"><!--! Font Awesome Free 6.4.2 by @@fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. -->
<path d="M64 0C28.7 0 0 28.7 0 64V448c0 35.3 28.7 64 64 64H320c35.3 0 64-28.7 64-64V160H256c-17.7 0-32-14.3-32-32V0H64zM256 0V128H384L256 0zm2 226.3c37.1 22.4 62 63.1 62 109.7s-24.9 87.3-62 109.7c-7.6 4.6-17.4 2.1-22-5.4s-2.1-17.4 5.4-22C269.4 401.5 288 370.9 288 336s-18.6-65.5-46.5-82.3c-7.6-4.6-10-14.4-5.4-22s14.4-10 22-5.4zm-91.9 30.9c6 2.5 9.9 8.3 9.9 14.8V400c0 6.5-3.9 12.3-9.9 14.8s-12.9 1.1-17.4-3.5L113.4 376H80c-8.8 0-16-7.2-16-16V312c0-8.8 7.2-16 16-16h33.4l35.3-35.3c4.6-4.6 11.5-5.9 17.4-3.5zm51 34.9c6.6-5.9 16.7-5.3 22.6 1.3C249.8 304.6 256 319.6 256 336s-6.2 31.4-16.3 42.7c-5.9 6.6-16 7.1-22.6 1.3s-7.1-16-1.3-22.6c5.1-5.7 8.1-13.1 8.1-21.3s-3.1-15.7-8.1-21.3c-5.9-6.6-5.3-16.7 1.3-22.6z"/>
</svg>
<p class="homeBlockText">Audio Transcription</p>
<p class="homeBlockTextDetail">Transcribe audio files</p>
</div>
</a>
</li>
<li key="callcenter">
<a asp-area="" style="text-decoration: none" asp-controller="CallCenter" asp-action="CallCenter">
<div class="homeBlock">
Expand All @@ -48,6 +60,8 @@
</div>
</a>
</li>
</ul>
<ul class="homeBlockNavList">
<li key="brandanalyzer">
<a asp-area="" style="text-decoration: none" asp-controller="BrandAnalyzer" asp-action="BrandAnalyzer">
<div class="homeBlock">
Expand All @@ -57,7 +71,7 @@
d="M36.8 192H603.2c20.3 0 36.8-16.5 36.8-36.8c0-7.3-2.2-14.4-6.2-20.4L558.2 21.4C549.3 8 534.4 0 518.3 0H121.7c-16 0-31 8-39.9 21.4L6.2 134.7c-4 6.1-6.2 13.2-6.2 20.4C0 175.5 16.5 192 36.8 192zM64 224V384v80c0 26.5 21.5 48 48 48H336c26.5 0 48-21.5 48-48V384 224H320V384H128V224H64zm448 0V480c0 17.7 14.3 32 32 32s32-14.3 32-32V224H512z" />
</svg>
<p class="homeBlockText">Brand Analyzer</p>
<p class="homeBlockTextDetail">Brand Analyzer: Analyze your brand's internet reputation</p>
<p class="homeBlockTextDetail">Analyze your brand's internet reputation</p>
</div>
</a>
</li>
Expand All @@ -70,12 +84,11 @@
d="M320 464c8.8 0 16-7.2 16-16V160H256c-17.7 0-32-14.3-32-32V48H64c-8.8 0-16 7.2-16 16V448c0 8.8 7.2 16 16 16H320zM0 64C0 28.7 28.7 0 64 0H229.5c17 0 33.3 6.7 45.3 18.7l90.5 90.5c12 12 18.7 28.3 18.7 45.3V448c0 35.3-28.7 64-64 64H64c-35.3 0-64-28.7-64-64V64z" />
</svg>
<p class="homeBlockText">Form Analyzer</p>
<p class="homeBlockTextDetail">Analiza tus documentos</p>
<p class="homeBlockTextDetail">Summarize and ask questions to your documents</p>
</div>
</a>
</li>
</ul>
<ul class="homeBlockNavList">


<li key="textmodetator">
<a asp-area="" style="text-decoration: none" asp-controller="ContentSafety" asp-action="TextModerator">
Expand Down
Loading

0 comments on commit 873998a

Please sign in to comment.