Sunday, 28 March 2021

How do I make/convert my python app into an Rshiny app? Its a brainteaser! Unable to find what change UI needs in R

i am new to R and trying to understand Rshiny to build UIs. I am trying to create a UI for my python app that transcribes mulitple wav files. There are two parts below, first my python app and the second my shiny app in R which uses reticulate to call my transcribe.py app. For some reason though, i do not receive any output.

My Python app works perfectly and does NOT need code review.However, the Rshiny app does not execute the python app correctly to produce the desired result. The objective is to let the user transcribe the files from the UI and decide if they want to download the csv.

I have a python app for transcribing files called transcribe.py-

import os
import json
import time
# import threading
from pathlib import Path

import concurrent.futures

# from os.path import join, dirname
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import pandas as pd

# Replace with your api key.
my_api_key = "abc123"

# You can add a directory path to Path() if you want to run
# the project from a different folder at some point.
directory = Path().absolute()


authenticator = IAMAuthenticator(my_api_key)

service = SpeechToTextV1(authenticator=authenticator)
service.set_service_url('https://api.us-east.speech-to-text.watson.cloud.ibm.com')
# I used this URL.
# service.set_service_url('https://stream.watsonplatform.net/speech-to-text/api') 


models = service.list_models().get_result()
#print(json.dumps(models, indent=2))

model = service.get_model('en-US_BroadbandModel').get_result()
#print(json.dumps(model, indent=2))



# get data to a csv
########################RUN THIS PART SECOND#####################################


def process_data(json_data, output_path):

    print(f"Processing: {output_path.stem}")

    cols = ["transcript", "confidence"]

    dfdata = [[t[cols[0]], t[cols[1]]] for r in json_data.get('results') for t in r.get("alternatives")]

    df0 = pd.DataFrame(data = dfdata, columns = cols)

    df1 = pd.DataFrame(json_data.get("speaker_labels")).drop(["final", "confidence"], axis=1)


    # test3 = pd.concat([df0, df1], axis=1)
    test3 = pd.merge(df0, df1, left_index = True, right_index = True)


    # sentiment
    print(f"Getting sentiment for: {output_path.stem}")
    transcript = test3["transcript"]
    transcript.dropna(inplace=True)

    analyzer = SentimentIntensityAnalyzer()
    text = transcript
    scores = [analyzer.polarity_scores(txt) for txt in text]

    # data = pd.DataFrame(text, columns = ["Text"])
    data = transcript.to_frame(name="Text")
    data2 = pd.DataFrame(scores)


    # final_dataset= pd.concat([data, data2], axis=1)
    final_dataset = pd.merge(data, data2, left_index = True, right_index = True)

    # test4 = pd.concat([test3, final_dataset], axis=1)
    test4 = pd.merge(test3, final_dataset, left_index = True, right_index = True)

    test4.drop("Text", axis=1, inplace=True)

    test4.rename(columns = {
            "neg": "Negative",
            "pos": "Positive",
            "neu": "Neutral",
            }, inplace=True)

    # This is the name of the output csv file
    test4.to_csv(output_path, index = False)


def process_audio_file(filename, output_type = "csv"):

    audio_file_path = directory.joinpath(filename)

    # Update output path to consider `output_type` parameter.
    out_path = directory.joinpath(f"{audio_file_path.stem}.{output_type}")

    print(f"Current file: '{filename}'")

    with open(audio_file_path, "rb") as audio_file:
        data = service.recognize(
                audio = audio_file,
                speaker_labels = True,
                content_type = "audio/wav",
                inactivity_timeout = -1,
                model = "en-US_NarrowbandModel",
                continuous = True,
            ).get_result()

    print(f"Speech-to-text complete for: '{filename}'")

    # Return data and output path as collection.
    return [data, out_path]


def main():
    print("Running main()...")

    # Default num. workers == min(32, os.cpu_count() + 4)
    n_workers = os.cpu_count() + 2

    # Create generator for all .wav files in folder (and subfolders).
    file_gen = directory.glob("**/*.wav")

    with concurrent.futures.ThreadPoolExecutor(max_workers = n_workers) as executor:
        futures = {executor.submit(process_audio_file, f) for f in file_gen}
        for future in concurrent.futures.as_completed(futures):
            pkg = future.result()
            process_data(*pkg)


if __name__ == "__main__":

    print(f"Program to process audio files has started.")

    t_start = time.perf_counter()

    main()

    t_stop = time.perf_counter()
    print(f"Done! Processing completed in {t_stop - t_start} seconds.")

In Rstudio, I tried -

R.UI file

library(shiny)
library(reticulate) # for reading Python code
library(dplyr)
library(stringr) 
library(formattable) # for adding color to tables
library(shinybusy) # for busy bar
library(DT) # for dataTableOutput

use_python("/usr/lib/python3")

ui <- fluidPage(
  add_busy_bar(color = "#5d98ff"),
  fileInput("wavFile", "SELECT .WAV FILE", accept = ".wav"),
  uiOutput("downloadData"),
  dataTableOutput("transcript"),
  
)

R.Server file

server <- function(input, output) {
  
  # .WAV File Selector ------------------------------------------------------
  
  file <- reactive({
    file <- input$wavFile # Get file from user input
    gsub("\\\\","/",file$datapath) # Access the file path. Convert back slashes to forward slashes.
  })
  
  
  # Transcribe and Clean ----------------------------------------------------
  
  transcript <- reactive({
    
    req(input$wavFile) # Require a file before proceeding
    
    source_python('transcribe.py') # Load the Python function           # COMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
    transcript <- data.frame(transcribe(file())) # Transcribe the file  # COMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
    # load('transcript.rdata') # Loads a dummy transcript               # UNCOMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
    
    transcript$transcript <- unlist(transcript$transcript) # Transcript field comes in as a list. Unlist it.
    transcript <- transcript[which(!(is.na(transcript$confidence))),] # Remove empty lines
    names(transcript) <- str_to_title(names(transcript)) # Capitalize column headers
    
    transcript # Return the transcript
    
  })
  
  
  # Use a server-side download button ---------------------------------------
  
  # ...so that the download button only appears after transcription
  
  output$downloadData <- renderUI({
    req(transcript())
    downloadButton("handleDownload","Download CSV")
  })
  
  output$handleDownload <- downloadHandler(
    filename = function() {
      paste('Transcript ',Sys.Date(), ".csv", sep = "")
    },
    content = function(file) {
      write.csv(transcript(), file, row.names = FALSE)
    }
  )
  
  
  # Transcript table --------------------------------------------------------
  
  output$transcript <- renderDataTable({ 
    as.datatable(formattable(
      transcript() %>%
        select(Transcript,
               Confidence,
               Negative,
               Positive
        ),
      list(Confidence = color_tile('#ffffff','#a2b3c8'),
           Negative = color_tile('#ffffff', '#e74446'),
           Positive = color_tile('#ffffff', "#499650")
      )
    ), rownames = FALSE, options =list(paging = FALSE)
    )
  })
  
  
  # END ---------------------------------------------------------------------
  
}


from How do I make/convert my python app into an Rshiny app? Its a brainteaser! Unable to find what change UI needs in R

No comments:

Post a Comment