Commit 6f867da8 authored by Federico Mestrone's avatar Federico Mestrone
Browse files

Speech-to-Text demo in Java

parent e722de56
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>gcp.cm.bigdata</groupId>
<artifactId>speech-to-text</artifactId>
<version>1.0.0</version>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-speech</artifactId>
<version>0.87.0-beta</version>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-storage</artifactId>
<version>1.69.0</version>
</dependency>
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
<version>1.72</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>gcp.cm.mlapi.speech.ConsoleApp</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>com.jolira</groupId>
<artifactId>onejar-maven-plugin</artifactId>
<version>1.4.4</version>
<executions>
<execution>
<goals>
<goal>one-jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.6.0</version>
<executions>
<execution>
<goals>
<goal>java</goal>
</goals>
</execution>
</executions>
<configuration>
<mainClass>com.example.Main</mainClass>
</configuration>
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
package gcp.cm.mlapi.speech;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.google.cloud.speech.v1p1beta1.*;
import com.google.cloud.storage.StorageException;
import java.util.List;
public class ConsoleApp {
@Parameter(names={"--bucket", "-b"}, description = "The bucket to read from and write to")
private String bucket = "gcp-bigdataml";
@Parameter(names={"--folder", "-f"}, description = "The folder to read from and write to")
private String folder = "audio";
@Parameter(names={"--extension", "--ext", "-x"}, description = "The audio extension for the files to convert")
private String suffix = "wav";
@Parameter(names={"--outputExtension", "--out", "-o"}, description = "The extension to attache to the script files")
private String extension = "txt";
@Parameter(names={"--encoding", "--enc", "-c"}, description = "The audio encoding of the files to convert")
private RecognitionConfig.AudioEncoding encoding = RecognitionConfig.AudioEncoding.LINEAR16;
@Parameter(names={"--sampleRate", "--rate", "-r"}, description = "The audio sampling rate of the files to convert in Hertz")
private int sampleRateHertz = 16000;
@Parameter(names={"--language", "--lang", "-l"}, description = "The language of the speech")
private String languageCode = "it-IT";
@Parameter(names = {"--help", "-h"}, help = true, description = "Show this help screen")
private boolean isHelp = false;
public static void main(String[] args) {
ConsoleApp app = new ConsoleApp();
JCommander jcmd = new JCommander(app);
jcmd.setProgramName("speech-to-text");
jcmd.parse(args);
if (app.isHelp) {
jcmd.usage();
} else {
app.run();
}
}
private void run() {
// Prepare the client for Cloud Storage
StorageSupport storage = new StorageSupport(bucket, folder, extension);
// Get the list of files to be processed
List<String> files = storage.listFilesToProcess(suffix);
// Prepare the configuration for Cloud Speech API
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(encoding)
.setSampleRateHertz(sampleRateHertz)
.setLanguageCode(languageCode)
.setEnableAutomaticPunctuation(true)
.build();
// Process the files
if (!files.isEmpty()) {
try (SpeechSupport recognition = new SpeechSupport(config)) {
for (String name : files) {
// Prepare the audio information
String uri = String.format("gs://%s/%s", storage.getBucket(), name);
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(uri).build();
// Get the script for the audio file
System.out.print("> Recognizing text for " + name + " ");
String script = recognition.recognize(audio);
System.out.println("> Writing script for " + name);
try {
storage.store(name, script);
} catch (StorageException e) {
System.err.println("! Could not store " + name + " due to " + e.getMessage());
}
}
}
} else {
System.out.println("> No files to be processed");
}
}
}
package gcp.cm.mlapi.speech;
import com.google.api.gax.longrunning.OperationFuture;
import com.google.api.gax.rpc.InvalidArgumentException;
import com.google.api.gax.rpc.PermissionDeniedException;
import com.google.cloud.speech.v1p1beta1.*;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
public class SpeechSupport implements AutoCloseable {
private RecognitionConfig config;
SpeechClient speechClient;
public SpeechSupport(RecognitionConfig config) {
this.config = config;
try {
speechClient = SpeechClient.create();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public String recognize(RecognitionAudio audio) {
try {
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> operation;
operation = speechClient.longRunningRecognizeAsync(config, audio);
while (!operation.isDone()) {
System.out.print(">");
Thread.sleep(6000); // 10 ticks make 1 minute
}
System.out.println("|");
LongRunningRecognizeResponse response = operation.get();
StringBuffer buffer = new StringBuffer();
for (SpeechRecognitionResult result : response.getResultsList()) {
buffer.append(result.getAlternatives(0).getTranscript().trim());
buffer.append("\n");
}
return buffer.toString();
} catch (PermissionDeniedException e) {
System.err.println("Access denied to GCP resource : " + e.getMessage());
return null;
} catch (InvalidArgumentException e) {
System.err.println("Invalid argument to GCP API : " + e.getMessage());
return null;
} catch (InterruptedException | ExecutionException e) {
e.printStackTrace();
return null;
}
}
@Override
public void close() {
speechClient.close();
}
}
package gcp.cm.mlapi.speech;
import com.google.api.gax.paging.Page;
import com.google.cloud.storage.*;
import java.util.ArrayList;
import java.util.List;
import static java.nio.charset.StandardCharsets.UTF_8;
public class StorageSupport {
private String bucket;
private String folder;
private String extension;
private Storage storage;
public StorageSupport(String bucket, String folder, String extension) {
this.bucket = bucket;
this.folder = folder;
this.extension = extension;
this.storage = StorageOptions.getDefaultInstance().getService();
}
public boolean fileExists(String name) {
Blob blob = storage.get(bucket, name, Storage.BlobGetOption.fields(Storage.BlobField.NAME));
return blob != null;
}
public List<String> listFilesToProcess(String suffix) {
Page<Blob> blobs = storage.list(bucket, Storage.BlobListOption.prefix(folder));
List<String> results = new ArrayList<>();
for (Blob blob : blobs.iterateAll()) {
if (blob.getName().endsWith(suffix)) {
if (!fileExists(blob.getName() + "." + extension)) {
results.add(blob.getName());
}
}
}
return results;
}
public void store(String name, String contents) {
BlobId blobId = BlobId.of(bucket, name + "." + extension);
BlobInfo blobInfo = BlobInfo.newBuilder(blobId).setContentType("text/plain").build();
Blob blob = storage.create(blobInfo, contents.getBytes(UTF_8));
}
public String getBucket() {
return bucket;
}
public String getFolder() {
return folder;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment