i am new to R and trying to understand Rshiny to build UIs. I am trying to create a UI for my python app that transcribes mulitple wav files. There are two parts below, first my python app and the second my shiny app in R which uses reticulate to call my transcribe.py app. For some reason though, i do not receive any output.
My Python app works perfectly and does NOT need code review.However, the Rshiny app does not execute the python app correctly to produce the desired result. The objective is to let the user transcribe the files from the UI and decide if they want to download the csv.
I have a python app for transcribing files called transcribe.py-
import os
import json
import time
# import threading
from pathlib import Path
import concurrent.futures
# from os.path import join, dirname
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
# Replace with your api key.
my_api_key = "abc123"
# You can add a directory path to Path() if you want to run
# the project from a different folder at some point.
directory = Path().absolute()
authenticator = IAMAuthenticator(my_api_key)
service = SpeechToTextV1(authenticator=authenticator)
service.set_service_url('https://api.us-east.speech-to-text.watson.cloud.ibm.com')
# I used this URL.
# service.set_service_url('https://stream.watsonplatform.net/speech-to-text/api')
models = service.list_models().get_result()
#print(json.dumps(models, indent=2))
model = service.get_model('en-US_BroadbandModel').get_result()
#print(json.dumps(model, indent=2))
# get data to a csv
########################RUN THIS PART SECOND#####################################
def process_data(json_data, output_path):
print(f"Processing: {output_path.stem}")
cols = ["transcript", "confidence"]
dfdata = [[t[cols[0]], t[cols[1]]] for r in json_data.get('results') for t in r.get("alternatives")]
df0 = pd.DataFrame(data = dfdata, columns = cols)
df1 = pd.DataFrame(json_data.get("speaker_labels")).drop(["final", "confidence"], axis=1)
# test3 = pd.concat([df0, df1], axis=1)
test3 = pd.merge(df0, df1, left_index = True, right_index = True)
# sentiment
print(f"Getting sentiment for: {output_path.stem}")
transcript = test3["transcript"]
transcript.dropna(inplace=True)
analyzer = SentimentIntensityAnalyzer()
text = transcript
scores = [analyzer.polarity_scores(txt) for txt in text]
# data = pd.DataFrame(text, columns = ["Text"])
data = transcript.to_frame(name="Text")
data2 = pd.DataFrame(scores)
# final_dataset= pd.concat([data, data2], axis=1)
final_dataset = pd.merge(data, data2, left_index = True, right_index = True)
# test4 = pd.concat([test3, final_dataset], axis=1)
test4 = pd.merge(test3, final_dataset, left_index = True, right_index = True)
test4.drop("Text", axis=1, inplace=True)
test4.rename(columns = {
"neg": "Negative",
"pos": "Positive",
"neu": "Neutral",
}, inplace=True)
# This is the name of the output csv file
test4.to_csv(output_path, index = False)
def process_audio_file(filename, output_type = "csv"):
audio_file_path = directory.joinpath(filename)
# Update output path to consider `output_type` parameter.
out_path = directory.joinpath(f"{audio_file_path.stem}.{output_type}")
print(f"Current file: '{filename}'")
with open(audio_file_path, "rb") as audio_file:
data = service.recognize(
audio = audio_file,
speaker_labels = True,
content_type = "audio/wav",
inactivity_timeout = -1,
model = "en-US_NarrowbandModel",
continuous = True,
).get_result()
print(f"Speech-to-text complete for: '{filename}'")
# Return data and output path as collection.
return [data, out_path]
def main():
print("Running main()...")
# Default num. workers == min(32, os.cpu_count() + 4)
n_workers = os.cpu_count() + 2
# Create generator for all .wav files in folder (and subfolders).
file_gen = directory.glob("**/*.wav")
with concurrent.futures.ThreadPoolExecutor(max_workers = n_workers) as executor:
futures = {executor.submit(process_audio_file, f) for f in file_gen}
for future in concurrent.futures.as_completed(futures):
pkg = future.result()
process_data(*pkg)
if __name__ == "__main__":
print(f"Program to process audio files has started.")
t_start = time.perf_counter()
main()
t_stop = time.perf_counter()
print(f"Done! Processing completed in {t_stop - t_start} seconds.")
In Rstudio, I tried -
R.UI file
library(shiny)
library(reticulate) # for reading Python code
library(dplyr)
library(stringr)
library(formattable) # for adding color to tables
library(shinybusy) # for busy bar
library(DT) # for dataTableOutput
use_python("/usr/lib/python3")
ui <- fluidPage(
add_busy_bar(color = "#5d98ff"),
fileInput("wavFile", "SELECT .WAV FILE", accept = ".wav"),
uiOutput("downloadData"),
dataTableOutput("transcript"),
)
R.Server file
server <- function(input, output) {
# .WAV File Selector ------------------------------------------------------
file <- reactive({
file <- input$wavFile # Get file from user input
gsub("\\\\","/",file$datapath) # Access the file path. Convert back slashes to forward slashes.
})
# Transcribe and Clean ----------------------------------------------------
transcript <- reactive({
req(input$wavFile) # Require a file before proceeding
source_python('transcribe.py') # Load the Python function # COMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
transcript <- data.frame(transcribe(file())) # Transcribe the file # COMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
# load('transcript.rdata') # Loads a dummy transcript # UNCOMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
transcript$transcript <- unlist(transcript$transcript) # Transcript field comes in as a list. Unlist it.
transcript <- transcript[which(!(is.na(transcript$confidence))),] # Remove empty lines
names(transcript) <- str_to_title(names(transcript)) # Capitalize column headers
transcript # Return the transcript
})
# Use a server-side download button ---------------------------------------
# ...so that the download button only appears after transcription
output$downloadData <- renderUI({
req(transcript())
downloadButton("handleDownload","Download CSV")
})
output$handleDownload <- downloadHandler(
filename = function() {
paste('Transcript ',Sys.Date(), ".csv", sep = "")
},
content = function(file) {
write.csv(transcript(), file, row.names = FALSE)
}
)
# Transcript table --------------------------------------------------------
output$transcript <- renderDataTable({
as.datatable(formattable(
transcript() %>%
select(Transcript,
Confidence,
Negative,
Positive
),
list(Confidence = color_tile('#ffffff','#a2b3c8'),
Negative = color_tile('#ffffff', '#e74446'),
Positive = color_tile('#ffffff', "#499650")
)
), rownames = FALSE, options =list(paging = FALSE)
)
})
# END ---------------------------------------------------------------------
}
from How do I make/convert my python app into an Rshiny app? Its a brainteaser! Unable to find what change UI needs in R
No comments:
Post a Comment