genproto: google.golang.org/genproto/googleapis/cloud/speech/v1 Index | Files

package speech

import "google.golang.org/genproto/googleapis/cloud/speech/v1"

Index

Package Files

cloud_speech.pb.go

Variables

var (
    RecognitionConfig_AudioEncoding_name = map[int32]string{
        0:  "ENCODING_UNSPECIFIED",
        1:  "LINEAR16",
        2:  "FLAC",
        3:  "MULAW",
        4:  "AMR",
        5:  "AMR_WB",
        6:  "OGG_OPUS",
        7:  "SPEEX_WITH_HEADER_BYTE",
    }
    RecognitionConfig_AudioEncoding_value = map[string]int32{
        "ENCODING_UNSPECIFIED":   0,
        "LINEAR16":               1,
        "FLAC":                   2,
        "MULAW":                  3,
        "AMR":                    4,
        "AMR_WB":                 5,
        "OGG_OPUS":               6,
        "SPEEX_WITH_HEADER_BYTE": 7,
    }
)

Enum value maps for RecognitionConfig_AudioEncoding.

var (
    RecognitionMetadata_InteractionType_name = map[int32]string{
        0:  "INTERACTION_TYPE_UNSPECIFIED",
        1:  "DISCUSSION",
        2:  "PRESENTATION",
        3:  "PHONE_CALL",
        4:  "VOICEMAIL",
        5:  "PROFESSIONALLY_PRODUCED",
        6:  "VOICE_SEARCH",
        7:  "VOICE_COMMAND",
        8:  "DICTATION",
    }
    RecognitionMetadata_InteractionType_value = map[string]int32{
        "INTERACTION_TYPE_UNSPECIFIED": 0,
        "DISCUSSION":                   1,
        "PRESENTATION":                 2,
        "PHONE_CALL":                   3,
        "VOICEMAIL":                    4,
        "PROFESSIONALLY_PRODUCED":      5,
        "VOICE_SEARCH":                 6,
        "VOICE_COMMAND":                7,
        "DICTATION":                    8,
    }
)

Enum value maps for RecognitionMetadata_InteractionType.

var (
    RecognitionMetadata_MicrophoneDistance_name = map[int32]string{
        0:  "MICROPHONE_DISTANCE_UNSPECIFIED",
        1:  "NEARFIELD",
        2:  "MIDFIELD",
        3:  "FARFIELD",
    }
    RecognitionMetadata_MicrophoneDistance_value = map[string]int32{
        "MICROPHONE_DISTANCE_UNSPECIFIED": 0,
        "NEARFIELD":                       1,
        "MIDFIELD":                        2,
        "FARFIELD":                        3,
    }
)

Enum value maps for RecognitionMetadata_MicrophoneDistance.

var (
    RecognitionMetadata_OriginalMediaType_name = map[int32]string{
        0:  "ORIGINAL_MEDIA_TYPE_UNSPECIFIED",
        1:  "AUDIO",
        2:  "VIDEO",
    }
    RecognitionMetadata_OriginalMediaType_value = map[string]int32{
        "ORIGINAL_MEDIA_TYPE_UNSPECIFIED": 0,
        "AUDIO":                           1,
        "VIDEO":                           2,
    }
)

Enum value maps for RecognitionMetadata_OriginalMediaType.

var (
    RecognitionMetadata_RecordingDeviceType_name = map[int32]string{
        0:  "RECORDING_DEVICE_TYPE_UNSPECIFIED",
        1:  "SMARTPHONE",
        2:  "PC",
        3:  "PHONE_LINE",
        4:  "VEHICLE",
        5:  "OTHER_OUTDOOR_DEVICE",
        6:  "OTHER_INDOOR_DEVICE",
    }
    RecognitionMetadata_RecordingDeviceType_value = map[string]int32{
        "RECORDING_DEVICE_TYPE_UNSPECIFIED": 0,
        "SMARTPHONE":                        1,
        "PC":                                2,
        "PHONE_LINE":                        3,
        "VEHICLE":                           4,
        "OTHER_OUTDOOR_DEVICE":              5,
        "OTHER_INDOOR_DEVICE":               6,
    }
)

Enum value maps for RecognitionMetadata_RecordingDeviceType.

var (
    StreamingRecognizeResponse_SpeechEventType_name = map[int32]string{
        0:  "SPEECH_EVENT_UNSPECIFIED",
        1:  "END_OF_SINGLE_UTTERANCE",
    }
    StreamingRecognizeResponse_SpeechEventType_value = map[string]int32{
        "SPEECH_EVENT_UNSPECIFIED": 0,
        "END_OF_SINGLE_UTTERANCE":  1,
    }
)

Enum value maps for StreamingRecognizeResponse_SpeechEventType.

var File_google_cloud_speech_v1_cloud_speech_proto protoreflect.FileDescriptor

func RegisterSpeechServer Uses

func RegisterSpeechServer(s *grpc.Server, srv SpeechServer)

type LongRunningRecognizeMetadata Uses

type LongRunningRecognizeMetadata struct {

    // Approximate percentage of audio processed thus far. Guaranteed to be 100
    // when the audio is fully processed and the results are available.
    ProgressPercent int32 `protobuf:"varint,1,opt,name=progress_percent,json=progressPercent,proto3" json:"progress_percent,omitempty"`
    // Time when the request was received.
    StartTime *timestamp.Timestamp `protobuf:"bytes,2,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"`
    // Time of the most recent processing update.
    LastUpdateTime *timestamp.Timestamp `protobuf:"bytes,3,opt,name=last_update_time,json=lastUpdateTime,proto3" json:"last_update_time,omitempty"`
    // contains filtered or unexported fields
}

Describes the progress of a long-running `LongRunningRecognize` call. It is included in the `metadata` field of the `Operation` returned by the `GetOperation` call of the `google::longrunning::Operations` service.

func (*LongRunningRecognizeMetadata) Descriptor Uses

func (*LongRunningRecognizeMetadata) Descriptor() ([]byte, []int)

Deprecated: Use LongRunningRecognizeMetadata.ProtoReflect.Descriptor instead.

func (*LongRunningRecognizeMetadata) GetLastUpdateTime Uses

func (x *LongRunningRecognizeMetadata) GetLastUpdateTime() *timestamp.Timestamp

func (*LongRunningRecognizeMetadata) GetProgressPercent Uses

func (x *LongRunningRecognizeMetadata) GetProgressPercent() int32

func (*LongRunningRecognizeMetadata) GetStartTime Uses

func (x *LongRunningRecognizeMetadata) GetStartTime() *timestamp.Timestamp

func (*LongRunningRecognizeMetadata) ProtoMessage Uses

func (*LongRunningRecognizeMetadata) ProtoMessage()

func (*LongRunningRecognizeMetadata) ProtoReflect Uses

func (x *LongRunningRecognizeMetadata) ProtoReflect() protoreflect.Message

func (*LongRunningRecognizeMetadata) Reset Uses

func (x *LongRunningRecognizeMetadata) Reset()

func (*LongRunningRecognizeMetadata) String Uses

func (x *LongRunningRecognizeMetadata) String() string

type LongRunningRecognizeRequest Uses

type LongRunningRecognizeRequest struct {

    // Required. Provides information to the recognizer that specifies how to
    // process the request.
    Config *RecognitionConfig `protobuf:"bytes,1,opt,name=config,proto3" json:"config,omitempty"`
    // Required. The audio data to be recognized.
    Audio *RecognitionAudio `protobuf:"bytes,2,opt,name=audio,proto3" json:"audio,omitempty"`
    // contains filtered or unexported fields
}

The top-level message sent by the client for the `LongRunningRecognize` method.

func (*LongRunningRecognizeRequest) Descriptor Uses

func (*LongRunningRecognizeRequest) Descriptor() ([]byte, []int)

Deprecated: Use LongRunningRecognizeRequest.ProtoReflect.Descriptor instead.

func (*LongRunningRecognizeRequest) GetAudio Uses

func (x *LongRunningRecognizeRequest) GetAudio() *RecognitionAudio

func (*LongRunningRecognizeRequest) GetConfig Uses

func (x *LongRunningRecognizeRequest) GetConfig() *RecognitionConfig

func (*LongRunningRecognizeRequest) ProtoMessage Uses

func (*LongRunningRecognizeRequest) ProtoMessage()

func (*LongRunningRecognizeRequest) ProtoReflect Uses

func (x *LongRunningRecognizeRequest) ProtoReflect() protoreflect.Message

func (*LongRunningRecognizeRequest) Reset Uses

func (x *LongRunningRecognizeRequest) Reset()

func (*LongRunningRecognizeRequest) String Uses

func (x *LongRunningRecognizeRequest) String() string

type LongRunningRecognizeResponse Uses

type LongRunningRecognizeResponse struct {

    // Sequential list of transcription results corresponding to
    // sequential portions of audio.
    Results []*SpeechRecognitionResult `protobuf:"bytes,2,rep,name=results,proto3" json:"results,omitempty"`
    // contains filtered or unexported fields
}

The only message returned to the client by the `LongRunningRecognize` method. It contains the result as zero or more sequential `SpeechRecognitionResult` messages. It is included in the `result.response` field of the `Operation` returned by the `GetOperation` call of the `google::longrunning::Operations` service.

func (*LongRunningRecognizeResponse) Descriptor Uses

func (*LongRunningRecognizeResponse) Descriptor() ([]byte, []int)

Deprecated: Use LongRunningRecognizeResponse.ProtoReflect.Descriptor instead.

func (*LongRunningRecognizeResponse) GetResults Uses

func (x *LongRunningRecognizeResponse) GetResults() []*SpeechRecognitionResult

func (*LongRunningRecognizeResponse) ProtoMessage Uses

func (*LongRunningRecognizeResponse) ProtoMessage()

func (*LongRunningRecognizeResponse) ProtoReflect Uses

func (x *LongRunningRecognizeResponse) ProtoReflect() protoreflect.Message

func (*LongRunningRecognizeResponse) Reset Uses

func (x *LongRunningRecognizeResponse) Reset()

func (*LongRunningRecognizeResponse) String Uses

func (x *LongRunningRecognizeResponse) String() string

type RecognitionAudio Uses

type RecognitionAudio struct {

    // The audio source, which is either inline content or a Google Cloud
    // Storage uri.
    //
    // Types that are assignable to AudioSource:
    //	*RecognitionAudio_Content
    //	*RecognitionAudio_Uri
    AudioSource isRecognitionAudio_AudioSource `protobuf_oneof:"audio_source"`
    // contains filtered or unexported fields
}

Contains audio data in the encoding specified in the `RecognitionConfig`. Either `content` or `uri` must be supplied. Supplying both or neither returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See [content limits](https://cloud.google.com/speech-to-text/quotas#content).

func (*RecognitionAudio) Descriptor Uses

func (*RecognitionAudio) Descriptor() ([]byte, []int)

Deprecated: Use RecognitionAudio.ProtoReflect.Descriptor instead.

func (*RecognitionAudio) GetAudioSource Uses

func (m *RecognitionAudio) GetAudioSource() isRecognitionAudio_AudioSource

func (*RecognitionAudio) GetContent Uses

func (x *RecognitionAudio) GetContent() []byte

func (*RecognitionAudio) GetUri Uses

func (x *RecognitionAudio) GetUri() string

func (*RecognitionAudio) ProtoMessage Uses

func (*RecognitionAudio) ProtoMessage()

func (*RecognitionAudio) ProtoReflect Uses

func (x *RecognitionAudio) ProtoReflect() protoreflect.Message

func (*RecognitionAudio) Reset Uses

func (x *RecognitionAudio) Reset()

func (*RecognitionAudio) String Uses

func (x *RecognitionAudio) String() string

type RecognitionAudio_Content Uses

type RecognitionAudio_Content struct {
    // The audio data bytes encoded as specified in
    // `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a
    // pure binary representation, whereas JSON representations use base64.
    Content []byte `protobuf:"bytes,1,opt,name=content,proto3,oneof"`
}

type RecognitionAudio_Uri Uses

type RecognitionAudio_Uri struct {
    // URI that points to a file that contains audio data bytes as specified in
    // `RecognitionConfig`. The file must not be compressed (for example, gzip).
    // Currently, only Google Cloud Storage URIs are
    // supported, which must be specified in the following format:
    // `gs://bucket_name/object_name` (other URI formats return
    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
    // [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
    Uri string `protobuf:"bytes,2,opt,name=uri,proto3,oneof"`
}

type RecognitionConfig Uses

type RecognitionConfig struct {

    // Encoding of audio data sent in all `RecognitionAudio` messages.
    // This field is optional for `FLAC` and `WAV` audio files and required
    // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
    Encoding RecognitionConfig_AudioEncoding `protobuf:"varint,1,opt,name=encoding,proto3,enum=google.cloud.speech.v1.RecognitionConfig_AudioEncoding" json:"encoding,omitempty"`
    // Sample rate in Hertz of the audio data sent in all
    // `RecognitionAudio` messages. Valid values are: 8000-48000.
    // 16000 is optimal. For best results, set the sampling rate of the audio
    // source to 16000 Hz. If that's not possible, use the native sample rate of
    // the audio source (instead of re-sampling).
    // This field is optional for FLAC and WAV audio files, but is
    // required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
    SampleRateHertz int32 `protobuf:"varint,2,opt,name=sample_rate_hertz,json=sampleRateHertz,proto3" json:"sample_rate_hertz,omitempty"`
    // The number of channels in the input audio data.
    // ONLY set this for MULTI-CHANNEL recognition.
    // Valid values for LINEAR16 and FLAC are `1`-`8`.
    // Valid values for OGG_OPUS are '1'-'254'.
    // Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
    // If `0` or omitted, defaults to one channel (mono).
    // Note: We only recognize the first channel by default.
    // To perform independent recognition on each channel set
    // `enable_separate_recognition_per_channel` to 'true'.
    AudioChannelCount int32 `protobuf:"varint,7,opt,name=audio_channel_count,json=audioChannelCount,proto3" json:"audio_channel_count,omitempty"`
    // This needs to be set to `true` explicitly and `audio_channel_count` > 1
    // to get each channel recognized separately. The recognition result will
    // contain a `channel_tag` field to state which channel that result belongs
    // to. If this is not true, we will only recognize the first channel. The
    // request is billed cumulatively for all channels recognized:
    // `audio_channel_count` multiplied by the length of the audio.
    EnableSeparateRecognitionPerChannel bool `protobuf:"varint,12,opt,name=enable_separate_recognition_per_channel,json=enableSeparateRecognitionPerChannel,proto3" json:"enable_separate_recognition_per_channel,omitempty"`
    // Required. The language of the supplied audio as a
    // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
    // Example: "en-US".
    // See [Language
    // Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
    // of the currently supported language codes.
    LanguageCode string `protobuf:"bytes,3,opt,name=language_code,json=languageCode,proto3" json:"language_code,omitempty"`
    // Maximum number of recognition hypotheses to be returned.
    // Specifically, the maximum number of `SpeechRecognitionAlternative` messages
    // within each `SpeechRecognitionResult`.
    // The server may return fewer than `max_alternatives`.
    // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
    // one. If omitted, will return a maximum of one.
    MaxAlternatives int32 `protobuf:"varint,4,opt,name=max_alternatives,json=maxAlternatives,proto3" json:"max_alternatives,omitempty"`
    // If set to `true`, the server will attempt to filter out
    // profanities, replacing all but the initial character in each filtered word
    // with asterisks, e.g. "f***". If set to `false` or omitted, profanities
    // won't be filtered out.
    ProfanityFilter bool `protobuf:"varint,5,opt,name=profanity_filter,json=profanityFilter,proto3" json:"profanity_filter,omitempty"`
    // Array of [SpeechContext][google.cloud.speech.v1.SpeechContext].
    // A means to provide context to assist the speech recognition. For more
    // information, see
    // [speech
    // adaptation](https://cloud.google.com/speech-to-text/docs/context-strength).
    SpeechContexts []*SpeechContext `protobuf:"bytes,6,rep,name=speech_contexts,json=speechContexts,proto3" json:"speech_contexts,omitempty"`
    // If `true`, the top result includes a list of words and
    // the start and end time offsets (timestamps) for those words. If
    // `false`, no word-level time offset information is returned. The default is
    // `false`.
    EnableWordTimeOffsets bool `protobuf:"varint,8,opt,name=enable_word_time_offsets,json=enableWordTimeOffsets,proto3" json:"enable_word_time_offsets,omitempty"`
    // If 'true', adds punctuation to recognition result hypotheses.
    // This feature is only available in select languages. Setting this for
    // requests in other languages has no effect at all.
    // The default 'false' value does not add punctuation to result hypotheses.
    // Note: This is currently offered as an experimental service, complimentary
    // to all users. In the future this may be exclusively available as a
    // premium feature.
    EnableAutomaticPunctuation bool `protobuf:"varint,11,opt,name=enable_automatic_punctuation,json=enableAutomaticPunctuation,proto3" json:"enable_automatic_punctuation,omitempty"`
    // Config to enable speaker diarization and set additional
    // parameters to make diarization better suited for your application.
    // Note: When this is enabled, we send all the words from the beginning of the
    // audio for the top alternative in every consecutive STREAMING responses.
    // This is done in order to improve our speaker tags as our models learn to
    // identify the speakers in the conversation over time.
    // For non-streaming requests, the diarization results will be provided only
    // in the top alternative of the FINAL SpeechRecognitionResult.
    DiarizationConfig *SpeakerDiarizationConfig `protobuf:"bytes,19,opt,name=diarization_config,json=diarizationConfig,proto3" json:"diarization_config,omitempty"`
    // Metadata regarding this request.
    Metadata *RecognitionMetadata `protobuf:"bytes,9,opt,name=metadata,proto3" json:"metadata,omitempty"`
    // Which model to select for the given request. Select the model
    // best suited to your domain to get best results. If a model is not
    // explicitly specified, then we auto-select a model based on the parameters
    // in the RecognitionConfig.
    // <table>
    //   <tr>
    //     <td><b>Model</b></td>
    //     <td><b>Description</b></td>
    //   </tr>
    //   <tr>
    //     <td><code>command_and_search</code></td>
    //     <td>Best for short queries such as voice commands or voice search.</td>
    //   </tr>
    //   <tr>
    //     <td><code>phone_call</code></td>
    //     <td>Best for audio that originated from a phone call (typically
    //     recorded at an 8khz sampling rate).</td>
    //   </tr>
    //   <tr>
    //     <td><code>video</code></td>
    //     <td>Best for audio that originated from from video or includes multiple
    //         speakers. Ideally the audio is recorded at a 16khz or greater
    //         sampling rate. This is a premium model that costs more than the
    //         standard rate.</td>
    //   </tr>
    //   <tr>
    //     <td><code>default</code></td>
    //     <td>Best for audio that is not one of the specific audio models.
    //         For example, long-form audio. Ideally the audio is high-fidelity,
    //         recorded at a 16khz or greater sampling rate.</td>
    //   </tr>
    // </table>
    Model string `protobuf:"bytes,13,opt,name=model,proto3" json:"model,omitempty"`
    // Set to true to use an enhanced model for speech recognition.
    // If `use_enhanced` is set to true and the `model` field is not set, then
    // an appropriate enhanced model is chosen if an enhanced model exists for
    // the audio.
    //
    // If `use_enhanced` is true and an enhanced version of the specified model
    // does not exist, then the speech is recognized using the standard version
    // of the specified model.
    UseEnhanced bool `protobuf:"varint,14,opt,name=use_enhanced,json=useEnhanced,proto3" json:"use_enhanced,omitempty"`
    // contains filtered or unexported fields
}

Provides information to the recognizer that specifies how to process the request.

func (*RecognitionConfig) Descriptor Uses

func (*RecognitionConfig) Descriptor() ([]byte, []int)

Deprecated: Use RecognitionConfig.ProtoReflect.Descriptor instead.

func (*RecognitionConfig) GetAudioChannelCount Uses

func (x *RecognitionConfig) GetAudioChannelCount() int32

func (*RecognitionConfig) GetDiarizationConfig Uses

func (x *RecognitionConfig) GetDiarizationConfig() *SpeakerDiarizationConfig

func (*RecognitionConfig) GetEnableAutomaticPunctuation Uses

func (x *RecognitionConfig) GetEnableAutomaticPunctuation() bool

func (*RecognitionConfig) GetEnableSeparateRecognitionPerChannel Uses

func (x *RecognitionConfig) GetEnableSeparateRecognitionPerChannel() bool

func (*RecognitionConfig) GetEnableWordTimeOffsets Uses

func (x *RecognitionConfig) GetEnableWordTimeOffsets() bool

func (*RecognitionConfig) GetEncoding Uses

func (x *RecognitionConfig) GetEncoding() RecognitionConfig_AudioEncoding

func (*RecognitionConfig) GetLanguageCode Uses

func (x *RecognitionConfig) GetLanguageCode() string

func (*RecognitionConfig) GetMaxAlternatives Uses

func (x *RecognitionConfig) GetMaxAlternatives() int32

func (*RecognitionConfig) GetMetadata Uses

func (x *RecognitionConfig) GetMetadata() *RecognitionMetadata

func (*RecognitionConfig) GetModel Uses

func (x *RecognitionConfig) GetModel() string

func (*RecognitionConfig) GetProfanityFilter Uses

func (x *RecognitionConfig) GetProfanityFilter() bool

func (*RecognitionConfig) GetSampleRateHertz Uses

func (x *RecognitionConfig) GetSampleRateHertz() int32

func (*RecognitionConfig) GetSpeechContexts Uses

func (x *RecognitionConfig) GetSpeechContexts() []*SpeechContext

func (*RecognitionConfig) GetUseEnhanced Uses

func (x *RecognitionConfig) GetUseEnhanced() bool

func (*RecognitionConfig) ProtoMessage Uses

func (*RecognitionConfig) ProtoMessage()

func (*RecognitionConfig) ProtoReflect Uses

func (x *RecognitionConfig) ProtoReflect() protoreflect.Message

func (*RecognitionConfig) Reset Uses

func (x *RecognitionConfig) Reset()

func (*RecognitionConfig) String Uses

func (x *RecognitionConfig) String() string

type RecognitionConfig_AudioEncoding Uses

type RecognitionConfig_AudioEncoding int32

The encoding of the audio data sent in the request.

All encodings support only 1 channel (mono) audio, unless the `audio_channel_count` and `enable_separate_recognition_per_channel` fields are set.

For best results, the audio source should be captured and transmitted using a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech recognition can be reduced if lossy codecs are used to capture or transmit audio, particularly if background noise is present. Lossy codecs include `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, and `MP3`.

The `FLAC` and `WAV` audio file formats include a header that describes the included audio content. You can request recognition for `WAV` files that contain either `LINEAR16` or `MULAW` encoded audio. If you send `FLAC` or `WAV` audio file format in your request, you do not need to specify an `AudioEncoding`; the audio encoding format is determined from the file header. If you specify an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the encoding configuration must match the encoding described in the audio header; otherwise the request returns an [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.

const (
    // Not specified.
    RecognitionConfig_ENCODING_UNSPECIFIED RecognitionConfig_AudioEncoding = 0
    // Uncompressed 16-bit signed little-endian samples (Linear PCM).
    RecognitionConfig_LINEAR16 RecognitionConfig_AudioEncoding = 1
    // `FLAC` (Free Lossless Audio
    // Codec) is the recommended encoding because it is
    // lossless--therefore recognition is not compromised--and
    // requires only about half the bandwidth of `LINEAR16`. `FLAC` stream
    // encoding supports 16-bit and 24-bit samples, however, not all fields in
    // `STREAMINFO` are supported.
    RecognitionConfig_FLAC RecognitionConfig_AudioEncoding = 2
    // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
    RecognitionConfig_MULAW RecognitionConfig_AudioEncoding = 3
    // Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000.
    RecognitionConfig_AMR RecognitionConfig_AudioEncoding = 4
    // Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000.
    RecognitionConfig_AMR_WB RecognitionConfig_AudioEncoding = 5
    // Opus encoded audio frames in Ogg container
    // ([OggOpus](https://wiki.xiph.org/OggOpus)).
    // `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000.
    RecognitionConfig_OGG_OPUS RecognitionConfig_AudioEncoding = 6
    // Although the use of lossy encodings is not recommended, if a very low
    // bitrate encoding is required, `OGG_OPUS` is highly preferred over
    // Speex encoding. The [Speex](https://speex.org/)  encoding supported by
    // Cloud Speech API has a header byte in each block, as in MIME type
    // `audio/x-speex-with-header-byte`.
    // It is a variant of the RTP Speex encoding defined in
    // [RFC 5574](https://tools.ietf.org/html/rfc5574).
    // The stream is a sequence of blocks, one block per RTP packet. Each block
    // starts with a byte containing the length of the block, in bytes, followed
    // by one or more frames of Speex data, padded to an integral number of
    // bytes (octets) as specified in RFC 5574. In other words, each RTP header
    // is replaced with a single byte containing the block length. Only Speex
    // wideband is supported. `sample_rate_hertz` must be 16000.
    RecognitionConfig_SPEEX_WITH_HEADER_BYTE RecognitionConfig_AudioEncoding = 7
)

func (RecognitionConfig_AudioEncoding) Descriptor Uses

func (RecognitionConfig_AudioEncoding) Descriptor() protoreflect.EnumDescriptor

func (RecognitionConfig_AudioEncoding) Enum Uses

func (x RecognitionConfig_AudioEncoding) Enum() *RecognitionConfig_AudioEncoding

func (RecognitionConfig_AudioEncoding) EnumDescriptor Uses

func (RecognitionConfig_AudioEncoding) EnumDescriptor() ([]byte, []int)

Deprecated: Use RecognitionConfig_AudioEncoding.Descriptor instead.

func (RecognitionConfig_AudioEncoding) Number Uses

func (x RecognitionConfig_AudioEncoding) Number() protoreflect.EnumNumber

func (RecognitionConfig_AudioEncoding) String Uses

func (x RecognitionConfig_AudioEncoding) String() string

func (RecognitionConfig_AudioEncoding) Type Uses

func (RecognitionConfig_AudioEncoding) Type() protoreflect.EnumType

type RecognitionMetadata Uses

type RecognitionMetadata struct {

    // The use case most closely describing the audio content to be recognized.
    InteractionType RecognitionMetadata_InteractionType `protobuf:"varint,1,opt,name=interaction_type,json=interactionType,proto3,enum=google.cloud.speech.v1.RecognitionMetadata_InteractionType" json:"interaction_type,omitempty"`
    // The industry vertical to which this speech recognition request most
    // closely applies. This is most indicative of the topics contained
    // in the audio.  Use the 6-digit NAICS code to identify the industry
    // vertical - see https://www.naics.com/search/.
    IndustryNaicsCodeOfAudio uint32 `protobuf:"varint,3,opt,name=industry_naics_code_of_audio,json=industryNaicsCodeOfAudio,proto3" json:"industry_naics_code_of_audio,omitempty"`
    // The audio type that most closely describes the audio being recognized.
    MicrophoneDistance RecognitionMetadata_MicrophoneDistance `protobuf:"varint,4,opt,name=microphone_distance,json=microphoneDistance,proto3,enum=google.cloud.speech.v1.RecognitionMetadata_MicrophoneDistance" json:"microphone_distance,omitempty"`
    // The original media the speech was recorded on.
    OriginalMediaType RecognitionMetadata_OriginalMediaType `protobuf:"varint,5,opt,name=original_media_type,json=originalMediaType,proto3,enum=google.cloud.speech.v1.RecognitionMetadata_OriginalMediaType" json:"original_media_type,omitempty"`
    // The type of device the speech was recorded with.
    RecordingDeviceType RecognitionMetadata_RecordingDeviceType `protobuf:"varint,6,opt,name=recording_device_type,json=recordingDeviceType,proto3,enum=google.cloud.speech.v1.RecognitionMetadata_RecordingDeviceType" json:"recording_device_type,omitempty"`
    // The device used to make the recording.  Examples 'Nexus 5X' or
    // 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or
    // 'Cardioid Microphone'.
    RecordingDeviceName string `protobuf:"bytes,7,opt,name=recording_device_name,json=recordingDeviceName,proto3" json:"recording_device_name,omitempty"`
    // Mime type of the original audio file.  For example `audio/m4a`,
    // `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.
    // A list of possible audio mime types is maintained at
    // http://www.iana.org/assignments/media-types/media-types.xhtml#audio
    OriginalMimeType string `protobuf:"bytes,8,opt,name=original_mime_type,json=originalMimeType,proto3" json:"original_mime_type,omitempty"`
    // Description of the content. Eg. "Recordings of federal supreme court
    // hearings from 2012".
    AudioTopic string `protobuf:"bytes,10,opt,name=audio_topic,json=audioTopic,proto3" json:"audio_topic,omitempty"`
    // contains filtered or unexported fields
}

Description of audio data to be recognized.

func (*RecognitionMetadata) Descriptor Uses

func (*RecognitionMetadata) Descriptor() ([]byte, []int)

Deprecated: Use RecognitionMetadata.ProtoReflect.Descriptor instead.

func (*RecognitionMetadata) GetAudioTopic Uses

func (x *RecognitionMetadata) GetAudioTopic() string

func (*RecognitionMetadata) GetIndustryNaicsCodeOfAudio Uses

func (x *RecognitionMetadata) GetIndustryNaicsCodeOfAudio() uint32

func (*RecognitionMetadata) GetInteractionType Uses

func (x *RecognitionMetadata) GetInteractionType() RecognitionMetadata_InteractionType

func (*RecognitionMetadata) GetMicrophoneDistance Uses

func (x *RecognitionMetadata) GetMicrophoneDistance() RecognitionMetadata_MicrophoneDistance

func (*RecognitionMetadata) GetOriginalMediaType Uses

func (x *RecognitionMetadata) GetOriginalMediaType() RecognitionMetadata_OriginalMediaType

func (*RecognitionMetadata) GetOriginalMimeType Uses

func (x *RecognitionMetadata) GetOriginalMimeType() string

func (*RecognitionMetadata) GetRecordingDeviceName Uses

func (x *RecognitionMetadata) GetRecordingDeviceName() string

func (*RecognitionMetadata) GetRecordingDeviceType Uses

func (x *RecognitionMetadata) GetRecordingDeviceType() RecognitionMetadata_RecordingDeviceType

func (*RecognitionMetadata) ProtoMessage Uses

func (*RecognitionMetadata) ProtoMessage()

func (*RecognitionMetadata) ProtoReflect Uses

func (x *RecognitionMetadata) ProtoReflect() protoreflect.Message

func (*RecognitionMetadata) Reset Uses

func (x *RecognitionMetadata) Reset()

func (*RecognitionMetadata) String Uses

func (x *RecognitionMetadata) String() string

type RecognitionMetadata_InteractionType Uses

type RecognitionMetadata_InteractionType int32

Use case categories that the audio recognition request can be described by.

const (
    // Use case is either unknown or is something other than one of the other
    // values below.
    RecognitionMetadata_INTERACTION_TYPE_UNSPECIFIED RecognitionMetadata_InteractionType = 0
    // Multiple people in a conversation or discussion. For example in a
    // meeting with two or more people actively participating. Typically
    // all the primary people speaking would be in the same room (if not,
    // see PHONE_CALL)
    RecognitionMetadata_DISCUSSION RecognitionMetadata_InteractionType = 1
    // One or more persons lecturing or presenting to others, mostly
    // uninterrupted.
    RecognitionMetadata_PRESENTATION RecognitionMetadata_InteractionType = 2
    // A phone-call or video-conference in which two or more people, who are
    // not in the same room, are actively participating.
    RecognitionMetadata_PHONE_CALL RecognitionMetadata_InteractionType = 3
    // A recorded message intended for another person to listen to.
    RecognitionMetadata_VOICEMAIL RecognitionMetadata_InteractionType = 4
    // Professionally produced audio (eg. TV Show, Podcast).
    RecognitionMetadata_PROFESSIONALLY_PRODUCED RecognitionMetadata_InteractionType = 5
    // Transcribe spoken questions and queries into text.
    RecognitionMetadata_VOICE_SEARCH RecognitionMetadata_InteractionType = 6
    // Transcribe voice commands, such as for controlling a device.
    RecognitionMetadata_VOICE_COMMAND RecognitionMetadata_InteractionType = 7
    // Transcribe speech to text to create a written document, such as a
    // text-message, email or report.
    RecognitionMetadata_DICTATION RecognitionMetadata_InteractionType = 8
)

func (RecognitionMetadata_InteractionType) Descriptor Uses

func (RecognitionMetadata_InteractionType) Descriptor() protoreflect.EnumDescriptor

func (RecognitionMetadata_InteractionType) Enum Uses

func (x RecognitionMetadata_InteractionType) Enum() *RecognitionMetadata_InteractionType

func (RecognitionMetadata_InteractionType) EnumDescriptor Uses

func (RecognitionMetadata_InteractionType) EnumDescriptor() ([]byte, []int)

Deprecated: Use RecognitionMetadata_InteractionType.Descriptor instead.

func (RecognitionMetadata_InteractionType) Number Uses

func (x RecognitionMetadata_InteractionType) Number() protoreflect.EnumNumber

func (RecognitionMetadata_InteractionType) String Uses

func (x RecognitionMetadata_InteractionType) String() string

func (RecognitionMetadata_InteractionType) Type Uses

func (RecognitionMetadata_InteractionType) Type() protoreflect.EnumType

type RecognitionMetadata_MicrophoneDistance Uses

type RecognitionMetadata_MicrophoneDistance int32

Enumerates the types of capture settings describing an audio file.

const (
    // Audio type is not known.
    RecognitionMetadata_MICROPHONE_DISTANCE_UNSPECIFIED RecognitionMetadata_MicrophoneDistance = 0
    // The audio was captured from a closely placed microphone. Eg. phone,
    // dictaphone, or handheld microphone. Generally if there speaker is within
    // 1 meter of the microphone.
    RecognitionMetadata_NEARFIELD RecognitionMetadata_MicrophoneDistance = 1
    // The speaker if within 3 meters of the microphone.
    RecognitionMetadata_MIDFIELD RecognitionMetadata_MicrophoneDistance = 2
    // The speaker is more than 3 meters away from the microphone.
    RecognitionMetadata_FARFIELD RecognitionMetadata_MicrophoneDistance = 3
)

func (RecognitionMetadata_MicrophoneDistance) Descriptor Uses

func (RecognitionMetadata_MicrophoneDistance) Descriptor() protoreflect.EnumDescriptor

func (RecognitionMetadata_MicrophoneDistance) Enum Uses

func (x RecognitionMetadata_MicrophoneDistance) Enum() *RecognitionMetadata_MicrophoneDistance

func (RecognitionMetadata_MicrophoneDistance) EnumDescriptor Uses

func (RecognitionMetadata_MicrophoneDistance) EnumDescriptor() ([]byte, []int)

Deprecated: Use RecognitionMetadata_MicrophoneDistance.Descriptor instead.

func (RecognitionMetadata_MicrophoneDistance) Number Uses

func (x RecognitionMetadata_MicrophoneDistance) Number() protoreflect.EnumNumber

func (RecognitionMetadata_MicrophoneDistance) String Uses

func (x RecognitionMetadata_MicrophoneDistance) String() string

func (RecognitionMetadata_MicrophoneDistance) Type Uses

func (RecognitionMetadata_MicrophoneDistance) Type() protoreflect.EnumType

type RecognitionMetadata_OriginalMediaType Uses

type RecognitionMetadata_OriginalMediaType int32

The original media the speech was recorded on.

const (
    // Unknown original media type.
    RecognitionMetadata_ORIGINAL_MEDIA_TYPE_UNSPECIFIED RecognitionMetadata_OriginalMediaType = 0
    // The speech data is an audio recording.
    RecognitionMetadata_AUDIO RecognitionMetadata_OriginalMediaType = 1
    // The speech data originally recorded on a video.
    RecognitionMetadata_VIDEO RecognitionMetadata_OriginalMediaType = 2
)

func (RecognitionMetadata_OriginalMediaType) Descriptor Uses

func (RecognitionMetadata_OriginalMediaType) Descriptor() protoreflect.EnumDescriptor

func (RecognitionMetadata_OriginalMediaType) Enum Uses

func (x RecognitionMetadata_OriginalMediaType) Enum() *RecognitionMetadata_OriginalMediaType

func (RecognitionMetadata_OriginalMediaType) EnumDescriptor Uses

func (RecognitionMetadata_OriginalMediaType) EnumDescriptor() ([]byte, []int)

Deprecated: Use RecognitionMetadata_OriginalMediaType.Descriptor instead.

func (RecognitionMetadata_OriginalMediaType) Number Uses

func (x RecognitionMetadata_OriginalMediaType) Number() protoreflect.EnumNumber

func (RecognitionMetadata_OriginalMediaType) String Uses

func (x RecognitionMetadata_OriginalMediaType) String() string

func (RecognitionMetadata_OriginalMediaType) Type Uses

func (RecognitionMetadata_OriginalMediaType) Type() protoreflect.EnumType

type RecognitionMetadata_RecordingDeviceType Uses

type RecognitionMetadata_RecordingDeviceType int32

The type of device the speech was recorded with.

const (
    // The recording device is unknown.
    RecognitionMetadata_RECORDING_DEVICE_TYPE_UNSPECIFIED RecognitionMetadata_RecordingDeviceType = 0
    // Speech was recorded on a smartphone.
    RecognitionMetadata_SMARTPHONE RecognitionMetadata_RecordingDeviceType = 1
    // Speech was recorded using a personal computer or tablet.
    RecognitionMetadata_PC RecognitionMetadata_RecordingDeviceType = 2
    // Speech was recorded over a phone line.
    RecognitionMetadata_PHONE_LINE RecognitionMetadata_RecordingDeviceType = 3
    // Speech was recorded in a vehicle.
    RecognitionMetadata_VEHICLE RecognitionMetadata_RecordingDeviceType = 4
    // Speech was recorded outdoors.
    RecognitionMetadata_OTHER_OUTDOOR_DEVICE RecognitionMetadata_RecordingDeviceType = 5
    // Speech was recorded indoors.
    RecognitionMetadata_OTHER_INDOOR_DEVICE RecognitionMetadata_RecordingDeviceType = 6
)

func (RecognitionMetadata_RecordingDeviceType) Descriptor Uses

func (RecognitionMetadata_RecordingDeviceType) Descriptor() protoreflect.EnumDescriptor

func (RecognitionMetadata_RecordingDeviceType) Enum Uses

func (x RecognitionMetadata_RecordingDeviceType) Enum() *RecognitionMetadata_RecordingDeviceType

func (RecognitionMetadata_RecordingDeviceType) EnumDescriptor Uses

func (RecognitionMetadata_RecordingDeviceType) EnumDescriptor() ([]byte, []int)

Deprecated: Use RecognitionMetadata_RecordingDeviceType.Descriptor instead.

func (RecognitionMetadata_RecordingDeviceType) Number Uses

func (x RecognitionMetadata_RecordingDeviceType) Number() protoreflect.EnumNumber

func (RecognitionMetadata_RecordingDeviceType) String Uses

func (x RecognitionMetadata_RecordingDeviceType) String() string

func (RecognitionMetadata_RecordingDeviceType) Type Uses

func (RecognitionMetadata_RecordingDeviceType) Type() protoreflect.EnumType

type RecognizeRequest Uses

type RecognizeRequest struct {

    // Required. Provides information to the recognizer that specifies how to
    // process the request.
    Config *RecognitionConfig `protobuf:"bytes,1,opt,name=config,proto3" json:"config,omitempty"`
    // Required. The audio data to be recognized.
    Audio *RecognitionAudio `protobuf:"bytes,2,opt,name=audio,proto3" json:"audio,omitempty"`
    // contains filtered or unexported fields
}

The top-level message sent by the client for the `Recognize` method.

func (*RecognizeRequest) Descriptor Uses

func (*RecognizeRequest) Descriptor() ([]byte, []int)

Deprecated: Use RecognizeRequest.ProtoReflect.Descriptor instead.

func (*RecognizeRequest) GetAudio Uses

func (x *RecognizeRequest) GetAudio() *RecognitionAudio

func (*RecognizeRequest) GetConfig Uses

func (x *RecognizeRequest) GetConfig() *RecognitionConfig

func (*RecognizeRequest) ProtoMessage Uses

func (*RecognizeRequest) ProtoMessage()

func (*RecognizeRequest) ProtoReflect Uses

func (x *RecognizeRequest) ProtoReflect() protoreflect.Message

func (*RecognizeRequest) Reset Uses

func (x *RecognizeRequest) Reset()

func (*RecognizeRequest) String Uses

func (x *RecognizeRequest) String() string

type RecognizeResponse Uses

type RecognizeResponse struct {

    // Sequential list of transcription results corresponding to
    // sequential portions of audio.
    Results []*SpeechRecognitionResult `protobuf:"bytes,2,rep,name=results,proto3" json:"results,omitempty"`
    // contains filtered or unexported fields
}

The only message returned to the client by the `Recognize` method. It contains the result as zero or more sequential `SpeechRecognitionResult` messages.

func (*RecognizeResponse) Descriptor Uses

func (*RecognizeResponse) Descriptor() ([]byte, []int)

Deprecated: Use RecognizeResponse.ProtoReflect.Descriptor instead.

func (*RecognizeResponse) GetResults Uses

func (x *RecognizeResponse) GetResults() []*SpeechRecognitionResult

func (*RecognizeResponse) ProtoMessage Uses

func (*RecognizeResponse) ProtoMessage()

func (*RecognizeResponse) ProtoReflect Uses

func (x *RecognizeResponse) ProtoReflect() protoreflect.Message

func (*RecognizeResponse) Reset Uses

func (x *RecognizeResponse) Reset()

func (*RecognizeResponse) String Uses

func (x *RecognizeResponse) String() string

type SpeakerDiarizationConfig Uses

type SpeakerDiarizationConfig struct {

    // If 'true', enables speaker detection for each recognized word in
    // the top alternative of the recognition result using a speaker_tag provided
    // in the WordInfo.
    EnableSpeakerDiarization bool `protobuf:"varint,1,opt,name=enable_speaker_diarization,json=enableSpeakerDiarization,proto3" json:"enable_speaker_diarization,omitempty"`
    // Minimum number of speakers in the conversation. This range gives you more
    // flexibility by allowing the system to automatically determine the correct
    // number of speakers. If not set, the default value is 2.
    MinSpeakerCount int32 `protobuf:"varint,2,opt,name=min_speaker_count,json=minSpeakerCount,proto3" json:"min_speaker_count,omitempty"`
    // Maximum number of speakers in the conversation. This range gives you more
    // flexibility by allowing the system to automatically determine the correct
    // number of speakers. If not set, the default value is 6.
    MaxSpeakerCount int32 `protobuf:"varint,3,opt,name=max_speaker_count,json=maxSpeakerCount,proto3" json:"max_speaker_count,omitempty"`
    // Unused.
    //
    // Deprecated: Do not use.
    SpeakerTag int32 `protobuf:"varint,5,opt,name=speaker_tag,json=speakerTag,proto3" json:"speaker_tag,omitempty"`
    // contains filtered or unexported fields
}

Config to enable speaker diarization.

func (*SpeakerDiarizationConfig) Descriptor Uses

func (*SpeakerDiarizationConfig) Descriptor() ([]byte, []int)

Deprecated: Use SpeakerDiarizationConfig.ProtoReflect.Descriptor instead.

func (*SpeakerDiarizationConfig) GetEnableSpeakerDiarization Uses

func (x *SpeakerDiarizationConfig) GetEnableSpeakerDiarization() bool

func (*SpeakerDiarizationConfig) GetMaxSpeakerCount Uses

func (x *SpeakerDiarizationConfig) GetMaxSpeakerCount() int32

func (*SpeakerDiarizationConfig) GetMinSpeakerCount Uses

func (x *SpeakerDiarizationConfig) GetMinSpeakerCount() int32

func (*SpeakerDiarizationConfig) GetSpeakerTag Uses

func (x *SpeakerDiarizationConfig) GetSpeakerTag() int32

Deprecated: Do not use.

func (*SpeakerDiarizationConfig) ProtoMessage Uses

func (*SpeakerDiarizationConfig) ProtoMessage()

func (*SpeakerDiarizationConfig) ProtoReflect Uses

func (x *SpeakerDiarizationConfig) ProtoReflect() protoreflect.Message

func (*SpeakerDiarizationConfig) Reset Uses

func (x *SpeakerDiarizationConfig) Reset()

func (*SpeakerDiarizationConfig) String Uses

func (x *SpeakerDiarizationConfig) String() string

type SpeechClient Uses

type SpeechClient interface {
    // Performs synchronous speech recognition: receive results after all audio
    // has been sent and processed.
    Recognize(ctx context.Context, in *RecognizeRequest, opts ...grpc.CallOption) (*RecognizeResponse, error)
    // Performs asynchronous speech recognition: receive results via the
    // google.longrunning.Operations interface. Returns either an
    // `Operation.error` or an `Operation.response` which contains
    // a `LongRunningRecognizeResponse` message.
    // For more information on asynchronous speech recognition, see the
    // [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
    LongRunningRecognize(ctx context.Context, in *LongRunningRecognizeRequest, opts ...grpc.CallOption) (*longrunning.Operation, error)
    // Performs bidirectional streaming speech recognition: receive results while
    // sending audio. This method is only available via the gRPC API (not REST).
    StreamingRecognize(ctx context.Context, opts ...grpc.CallOption) (Speech_StreamingRecognizeClient, error)
}

SpeechClient is the client API for Speech service.

For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.

func NewSpeechClient Uses

func NewSpeechClient(cc grpc.ClientConnInterface) SpeechClient

type SpeechContext Uses

type SpeechContext struct {

    // A list of strings containing words and phrases "hints" so that
    // the speech recognition is more likely to recognize them. This can be used
    // to improve the accuracy for specific words and phrases, for example, if
    // specific commands are typically spoken by the user. This can also be used
    // to add additional words to the vocabulary of the recognizer. See
    // [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
    //
    // List items can also be set to classes for groups of words that represent
    // common concepts that occur in natural language. For example, rather than
    // providing phrase hints for every month of the year, using the $MONTH class
    // improves the likelihood of correctly transcribing audio that includes
    // months.
    Phrases []string `protobuf:"bytes,1,rep,name=phrases,proto3" json:"phrases,omitempty"`
    // contains filtered or unexported fields
}

Provides "hints" to the speech recognizer to favor specific words and phrases in the results.

func (*SpeechContext) Descriptor Uses

func (*SpeechContext) Descriptor() ([]byte, []int)

Deprecated: Use SpeechContext.ProtoReflect.Descriptor instead.

func (*SpeechContext) GetPhrases Uses

func (x *SpeechContext) GetPhrases() []string

func (*SpeechContext) ProtoMessage Uses

func (*SpeechContext) ProtoMessage()

func (*SpeechContext) ProtoReflect Uses

func (x *SpeechContext) ProtoReflect() protoreflect.Message

func (*SpeechContext) Reset Uses

func (x *SpeechContext) Reset()

func (*SpeechContext) String Uses

func (x *SpeechContext) String() string

type SpeechRecognitionAlternative Uses

type SpeechRecognitionAlternative struct {

    // Transcript text representing the words that the user spoke.
    Transcript string `protobuf:"bytes,1,opt,name=transcript,proto3" json:"transcript,omitempty"`
    // The confidence estimate between 0.0 and 1.0. A higher number
    // indicates an estimated greater likelihood that the recognized words are
    // correct. This field is set only for the top alternative of a non-streaming
    // result or, of a streaming result where `is_final=true`.
    // This field is not guaranteed to be accurate and users should not rely on it
    // to be always provided.
    // The default of 0.0 is a sentinel value indicating `confidence` was not set.
    Confidence float32 `protobuf:"fixed32,2,opt,name=confidence,proto3" json:"confidence,omitempty"`
    // A list of word-specific information for each recognized word.
    // Note: When `enable_speaker_diarization` is true, you will see all the words
    // from the beginning of the audio.
    Words []*WordInfo `protobuf:"bytes,3,rep,name=words,proto3" json:"words,omitempty"`
    // contains filtered or unexported fields
}

Alternative hypotheses (a.k.a. n-best list).

func (*SpeechRecognitionAlternative) Descriptor Uses

func (*SpeechRecognitionAlternative) Descriptor() ([]byte, []int)

Deprecated: Use SpeechRecognitionAlternative.ProtoReflect.Descriptor instead.

func (*SpeechRecognitionAlternative) GetConfidence Uses

func (x *SpeechRecognitionAlternative) GetConfidence() float32

func (*SpeechRecognitionAlternative) GetTranscript Uses

func (x *SpeechRecognitionAlternative) GetTranscript() string

func (*SpeechRecognitionAlternative) GetWords Uses

func (x *SpeechRecognitionAlternative) GetWords() []*WordInfo

func (*SpeechRecognitionAlternative) ProtoMessage Uses

func (*SpeechRecognitionAlternative) ProtoMessage()

func (*SpeechRecognitionAlternative) ProtoReflect Uses

func (x *SpeechRecognitionAlternative) ProtoReflect() protoreflect.Message

func (*SpeechRecognitionAlternative) Reset Uses

func (x *SpeechRecognitionAlternative) Reset()

func (*SpeechRecognitionAlternative) String Uses

func (x *SpeechRecognitionAlternative) String() string

type SpeechRecognitionResult Uses

type SpeechRecognitionResult struct {

    // May contain one or more recognition hypotheses (up to the
    // maximum specified in `max_alternatives`).
    // These alternatives are ordered in terms of accuracy, with the top (first)
    // alternative being the most probable, as ranked by the recognizer.
    Alternatives []*SpeechRecognitionAlternative `protobuf:"bytes,1,rep,name=alternatives,proto3" json:"alternatives,omitempty"`
    // For multi-channel audio, this is the channel number corresponding to the
    // recognized result for the audio from that channel.
    // For audio_channel_count = N, its output values can range from '1' to 'N'.
    ChannelTag int32 `protobuf:"varint,2,opt,name=channel_tag,json=channelTag,proto3" json:"channel_tag,omitempty"`
    // contains filtered or unexported fields
}

A speech recognition result corresponding to a portion of the audio.

func (*SpeechRecognitionResult) Descriptor Uses

func (*SpeechRecognitionResult) Descriptor() ([]byte, []int)

Deprecated: Use SpeechRecognitionResult.ProtoReflect.Descriptor instead.

func (*SpeechRecognitionResult) GetAlternatives Uses

func (x *SpeechRecognitionResult) GetAlternatives() []*SpeechRecognitionAlternative

func (*SpeechRecognitionResult) GetChannelTag Uses

func (x *SpeechRecognitionResult) GetChannelTag() int32

func (*SpeechRecognitionResult) ProtoMessage Uses

func (*SpeechRecognitionResult) ProtoMessage()

func (*SpeechRecognitionResult) ProtoReflect Uses

func (x *SpeechRecognitionResult) ProtoReflect() protoreflect.Message

func (*SpeechRecognitionResult) Reset Uses

func (x *SpeechRecognitionResult) Reset()

func (*SpeechRecognitionResult) String Uses

func (x *SpeechRecognitionResult) String() string

type SpeechServer Uses

type SpeechServer interface {
    // Performs synchronous speech recognition: receive results after all audio
    // has been sent and processed.
    Recognize(context.Context, *RecognizeRequest) (*RecognizeResponse, error)
    // Performs asynchronous speech recognition: receive results via the
    // google.longrunning.Operations interface. Returns either an
    // `Operation.error` or an `Operation.response` which contains
    // a `LongRunningRecognizeResponse` message.
    // For more information on asynchronous speech recognition, see the
    // [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
    LongRunningRecognize(context.Context, *LongRunningRecognizeRequest) (*longrunning.Operation, error)
    // Performs bidirectional streaming speech recognition: receive results while
    // sending audio. This method is only available via the gRPC API (not REST).
    StreamingRecognize(Speech_StreamingRecognizeServer) error
}

SpeechServer is the server API for Speech service.

type Speech_StreamingRecognizeClient Uses

type Speech_StreamingRecognizeClient interface {
    Send(*StreamingRecognizeRequest) error
    Recv() (*StreamingRecognizeResponse, error)
    grpc.ClientStream
}

type Speech_StreamingRecognizeServer Uses

type Speech_StreamingRecognizeServer interface {
    Send(*StreamingRecognizeResponse) error
    Recv() (*StreamingRecognizeRequest, error)
    grpc.ServerStream
}

type StreamingRecognitionConfig Uses

type StreamingRecognitionConfig struct {

    // Required. Provides information to the recognizer that specifies how to
    // process the request.
    Config *RecognitionConfig `protobuf:"bytes,1,opt,name=config,proto3" json:"config,omitempty"`
    // If `false` or omitted, the recognizer will perform continuous
    // recognition (continuing to wait for and process audio even if the user
    // pauses speaking) until the client closes the input stream (gRPC API) or
    // until the maximum time limit has been reached. May return multiple
    // `StreamingRecognitionResult`s with the `is_final` flag set to `true`.
    //
    // If `true`, the recognizer will detect a single spoken utterance. When it
    // detects that the user has paused or stopped speaking, it will return an
    // `END_OF_SINGLE_UTTERANCE` event and cease recognition. It will return no
    // more than one `StreamingRecognitionResult` with the `is_final` flag set to
    // `true`.
    SingleUtterance bool `protobuf:"varint,2,opt,name=single_utterance,json=singleUtterance,proto3" json:"single_utterance,omitempty"`
    // If `true`, interim results (tentative hypotheses) may be
    // returned as they become available (these interim results are indicated with
    // the `is_final=false` flag).
    // If `false` or omitted, only `is_final=true` result(s) are returned.
    InterimResults bool `protobuf:"varint,3,opt,name=interim_results,json=interimResults,proto3" json:"interim_results,omitempty"`
    // contains filtered or unexported fields
}

Provides information to the recognizer that specifies how to process the request.

func (*StreamingRecognitionConfig) Descriptor Uses

func (*StreamingRecognitionConfig) Descriptor() ([]byte, []int)

Deprecated: Use StreamingRecognitionConfig.ProtoReflect.Descriptor instead.

func (*StreamingRecognitionConfig) GetConfig Uses

func (x *StreamingRecognitionConfig) GetConfig() *RecognitionConfig

func (*StreamingRecognitionConfig) GetInterimResults Uses

func (x *StreamingRecognitionConfig) GetInterimResults() bool

func (*StreamingRecognitionConfig) GetSingleUtterance Uses

func (x *StreamingRecognitionConfig) GetSingleUtterance() bool

func (*StreamingRecognitionConfig) ProtoMessage Uses

func (*StreamingRecognitionConfig) ProtoMessage()

func (*StreamingRecognitionConfig) ProtoReflect Uses

func (x *StreamingRecognitionConfig) ProtoReflect() protoreflect.Message

func (*StreamingRecognitionConfig) Reset Uses

func (x *StreamingRecognitionConfig) Reset()

func (*StreamingRecognitionConfig) String Uses

func (x *StreamingRecognitionConfig) String() string

type StreamingRecognitionResult Uses

type StreamingRecognitionResult struct {

    // May contain one or more recognition hypotheses (up to the
    // maximum specified in `max_alternatives`).
    // These alternatives are ordered in terms of accuracy, with the top (first)
    // alternative being the most probable, as ranked by the recognizer.
    Alternatives []*SpeechRecognitionAlternative `protobuf:"bytes,1,rep,name=alternatives,proto3" json:"alternatives,omitempty"`
    // If `false`, this `StreamingRecognitionResult` represents an
    // interim result that may change. If `true`, this is the final time the
    // speech service will return this particular `StreamingRecognitionResult`,
    // the recognizer will not return any further hypotheses for this portion of
    // the transcript and corresponding audio.
    IsFinal bool `protobuf:"varint,2,opt,name=is_final,json=isFinal,proto3" json:"is_final,omitempty"`
    // An estimate of the likelihood that the recognizer will not
    // change its guess about this interim result. Values range from 0.0
    // (completely unstable) to 1.0 (completely stable).
    // This field is only provided for interim results (`is_final=false`).
    // The default of 0.0 is a sentinel value indicating `stability` was not set.
    Stability float32 `protobuf:"fixed32,3,opt,name=stability,proto3" json:"stability,omitempty"`
    // Time offset of the end of this result relative to the
    // beginning of the audio.
    ResultEndTime *duration.Duration `protobuf:"bytes,4,opt,name=result_end_time,json=resultEndTime,proto3" json:"result_end_time,omitempty"`
    // For multi-channel audio, this is the channel number corresponding to the
    // recognized result for the audio from that channel.
    // For audio_channel_count = N, its output values can range from '1' to 'N'.
    ChannelTag int32 `protobuf:"varint,5,opt,name=channel_tag,json=channelTag,proto3" json:"channel_tag,omitempty"`
    // The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of
    // the language in this result. This language code was detected to have the
    // most likelihood of being spoken in the audio.
    LanguageCode string `protobuf:"bytes,6,opt,name=language_code,json=languageCode,proto3" json:"language_code,omitempty"`
    // contains filtered or unexported fields
}

A streaming speech recognition result corresponding to a portion of the audio that is currently being processed.

func (*StreamingRecognitionResult) Descriptor Uses

func (*StreamingRecognitionResult) Descriptor() ([]byte, []int)

Deprecated: Use StreamingRecognitionResult.ProtoReflect.Descriptor instead.

func (*StreamingRecognitionResult) GetAlternatives Uses

func (x *StreamingRecognitionResult) GetAlternatives() []*SpeechRecognitionAlternative

func (*StreamingRecognitionResult) GetChannelTag Uses

func (x *StreamingRecognitionResult) GetChannelTag() int32

func (*StreamingRecognitionResult) GetIsFinal Uses

func (x *StreamingRecognitionResult) GetIsFinal() bool

func (*StreamingRecognitionResult) GetLanguageCode Uses

func (x *StreamingRecognitionResult) GetLanguageCode() string

func (*StreamingRecognitionResult) GetResultEndTime Uses

func (x *StreamingRecognitionResult) GetResultEndTime() *duration.Duration

func (*StreamingRecognitionResult) GetStability Uses

func (x *StreamingRecognitionResult) GetStability() float32

func (*StreamingRecognitionResult) ProtoMessage Uses

func (*StreamingRecognitionResult) ProtoMessage()

func (*StreamingRecognitionResult) ProtoReflect Uses

func (x *StreamingRecognitionResult) ProtoReflect() protoreflect.Message

func (*StreamingRecognitionResult) Reset Uses

func (x *StreamingRecognitionResult) Reset()

func (*StreamingRecognitionResult) String Uses

func (x *StreamingRecognitionResult) String() string

type StreamingRecognizeRequest Uses

type StreamingRecognizeRequest struct {

    // The streaming request, which is either a streaming config or audio content.
    //
    // Types that are assignable to StreamingRequest:
    //	*StreamingRecognizeRequest_StreamingConfig
    //	*StreamingRecognizeRequest_AudioContent
    StreamingRequest isStreamingRecognizeRequest_StreamingRequest `protobuf_oneof:"streaming_request"`
    // contains filtered or unexported fields
}

The top-level message sent by the client for the `StreamingRecognize` method. Multiple `StreamingRecognizeRequest` messages are sent. The first message must contain a `streaming_config` message and must not contain `audio_content`. All subsequent messages must contain `audio_content` and must not contain a `streaming_config` message.

func (*StreamingRecognizeRequest) Descriptor Uses

func (*StreamingRecognizeRequest) Descriptor() ([]byte, []int)

Deprecated: Use StreamingRecognizeRequest.ProtoReflect.Descriptor instead.

func (*StreamingRecognizeRequest) GetAudioContent Uses

func (x *StreamingRecognizeRequest) GetAudioContent() []byte

func (*StreamingRecognizeRequest) GetStreamingConfig Uses

func (x *StreamingRecognizeRequest) GetStreamingConfig() *StreamingRecognitionConfig

func (*StreamingRecognizeRequest) GetStreamingRequest Uses

func (m *StreamingRecognizeRequest) GetStreamingRequest() isStreamingRecognizeRequest_StreamingRequest

func (*StreamingRecognizeRequest) ProtoMessage Uses

func (*StreamingRecognizeRequest) ProtoMessage()

func (*StreamingRecognizeRequest) ProtoReflect Uses

func (x *StreamingRecognizeRequest) ProtoReflect() protoreflect.Message

func (*StreamingRecognizeRequest) Reset Uses

func (x *StreamingRecognizeRequest) Reset()

func (*StreamingRecognizeRequest) String Uses

func (x *StreamingRecognizeRequest) String() string

type StreamingRecognizeRequest_AudioContent Uses

type StreamingRecognizeRequest_AudioContent struct {
    // The audio data to be recognized. Sequential chunks of audio data are sent
    // in sequential `StreamingRecognizeRequest` messages. The first
    // `StreamingRecognizeRequest` message must not contain `audio_content` data
    // and all subsequent `StreamingRecognizeRequest` messages must contain
    // `audio_content` data. The audio bytes must be encoded as specified in
    // `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a
    // pure binary representation (not base64). See
    // [content limits](https://cloud.google.com/speech-to-text/quotas#content).
    AudioContent []byte `protobuf:"bytes,2,opt,name=audio_content,json=audioContent,proto3,oneof"`
}

type StreamingRecognizeRequest_StreamingConfig Uses

type StreamingRecognizeRequest_StreamingConfig struct {
    // Provides information to the recognizer that specifies how to process the
    // request. The first `StreamingRecognizeRequest` message must contain a
    // `streaming_config`  message.
    StreamingConfig *StreamingRecognitionConfig `protobuf:"bytes,1,opt,name=streaming_config,json=streamingConfig,proto3,oneof"`
}

type StreamingRecognizeResponse Uses

type StreamingRecognizeResponse struct {

    // If set, returns a [google.rpc.Status][google.rpc.Status] message that
    // specifies the error for the operation.
    Error *status.Status `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"`
    // This repeated list contains zero or more results that
    // correspond to consecutive portions of the audio currently being processed.
    // It contains zero or one `is_final=true` result (the newly settled portion),
    // followed by zero or more `is_final=false` results (the interim results).
    Results []*StreamingRecognitionResult `protobuf:"bytes,2,rep,name=results,proto3" json:"results,omitempty"`
    // Indicates the type of speech event.
    SpeechEventType StreamingRecognizeResponse_SpeechEventType `protobuf:"varint,4,opt,name=speech_event_type,json=speechEventType,proto3,enum=google.cloud.speech.v1.StreamingRecognizeResponse_SpeechEventType" json:"speech_event_type,omitempty"`
    // contains filtered or unexported fields
}

`StreamingRecognizeResponse` is the only message returned to the client by `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse` messages are streamed back to the client. If there is no recognizable audio, and `single_utterance` is set to false, then no messages are streamed back to the client.

Here's an example of a series of ten `StreamingRecognizeResponse`s that might be returned while processing audio:

1. results { alternatives { transcript: "tube" } stability: 0.01 }

2. results { alternatives { transcript: "to be a" } stability: 0.01 }

3. results { alternatives { transcript: "to be" } stability: 0.9 }

results { alternatives { transcript: " or not to be" } stability: 0.01 }

4. results { alternatives { transcript: "to be or not to be"

               confidence: 0.92 }
alternatives { transcript: "to bee or not to bee" }
is_final: true }

5. results { alternatives { transcript: " that's" } stability: 0.01 }

6. results { alternatives { transcript: " that is" } stability: 0.9 }

results { alternatives { transcript: " the question" } stability: 0.01 }

7. results { alternatives { transcript: " that is the question"

               confidence: 0.98 }
alternatives { transcript: " that was the question" }
is_final: true }

Notes:

- Only two of the above responses #4 and #7 contain final results; they are

indicated by `is_final: true`. Concatenating these together generates the
full transcript: "to be or not to be that is the question".

- The others contain interim `results`. #3 and #6 contain two interim

`results`: the first portion has a high stability and is less likely to
change; the second portion has a low stability and is very likely to
change. A UI designer might choose to show only high stability `results`.

- The specific `stability` and `confidence` values shown above are only for

illustrative purposes. Actual values may vary.

- In each response, only one of these fields will be set:

`error`,
`speech_event_type`, or
one or more (repeated) `results`.

func (*StreamingRecognizeResponse) Descriptor Uses

func (*StreamingRecognizeResponse) Descriptor() ([]byte, []int)

Deprecated: Use StreamingRecognizeResponse.ProtoReflect.Descriptor instead.

func (*StreamingRecognizeResponse) GetError Uses

func (x *StreamingRecognizeResponse) GetError() *status.Status

func (*StreamingRecognizeResponse) GetResults Uses

func (x *StreamingRecognizeResponse) GetResults() []*StreamingRecognitionResult

func (*StreamingRecognizeResponse) GetSpeechEventType Uses

func (x *StreamingRecognizeResponse) GetSpeechEventType() StreamingRecognizeResponse_SpeechEventType

func (*StreamingRecognizeResponse) ProtoMessage Uses

func (*StreamingRecognizeResponse) ProtoMessage()

func (*StreamingRecognizeResponse) ProtoReflect Uses

func (x *StreamingRecognizeResponse) ProtoReflect() protoreflect.Message

func (*StreamingRecognizeResponse) Reset Uses

func (x *StreamingRecognizeResponse) Reset()

func (*StreamingRecognizeResponse) String Uses

func (x *StreamingRecognizeResponse) String() string

type StreamingRecognizeResponse_SpeechEventType Uses

type StreamingRecognizeResponse_SpeechEventType int32

Indicates the type of speech event.

const (
    // No speech event specified.
    StreamingRecognizeResponse_SPEECH_EVENT_UNSPECIFIED StreamingRecognizeResponse_SpeechEventType = 0
    // This event indicates that the server has detected the end of the user's
    // speech utterance and expects no additional speech. Therefore, the server
    // will not process additional audio (although it may subsequently return
    // additional results). The client should stop sending additional audio
    // data, half-close the gRPC connection, and wait for any additional results
    // until the server closes the gRPC connection. This event is only sent if
    // `single_utterance` was set to `true`, and is not used otherwise.
    StreamingRecognizeResponse_END_OF_SINGLE_UTTERANCE StreamingRecognizeResponse_SpeechEventType = 1
)

func (StreamingRecognizeResponse_SpeechEventType) Descriptor Uses

func (StreamingRecognizeResponse_SpeechEventType) Descriptor() protoreflect.EnumDescriptor

func (StreamingRecognizeResponse_SpeechEventType) Enum Uses

func (x StreamingRecognizeResponse_SpeechEventType) Enum() *StreamingRecognizeResponse_SpeechEventType

func (StreamingRecognizeResponse_SpeechEventType) EnumDescriptor Uses

func (StreamingRecognizeResponse_SpeechEventType) EnumDescriptor() ([]byte, []int)

Deprecated: Use StreamingRecognizeResponse_SpeechEventType.Descriptor instead.

func (StreamingRecognizeResponse_SpeechEventType) Number Uses

func (x StreamingRecognizeResponse_SpeechEventType) Number() protoreflect.EnumNumber

func (StreamingRecognizeResponse_SpeechEventType) String Uses

func (x StreamingRecognizeResponse_SpeechEventType) String() string

func (StreamingRecognizeResponse_SpeechEventType) Type Uses

func (StreamingRecognizeResponse_SpeechEventType) Type() protoreflect.EnumType

type UnimplementedSpeechServer Uses

type UnimplementedSpeechServer struct {
}

UnimplementedSpeechServer can be embedded to have forward compatible implementations.

func (*UnimplementedSpeechServer) LongRunningRecognize Uses

func (*UnimplementedSpeechServer) LongRunningRecognize(context.Context, *LongRunningRecognizeRequest) (*longrunning.Operation, error)

func (*UnimplementedSpeechServer) Recognize Uses

func (*UnimplementedSpeechServer) Recognize(context.Context, *RecognizeRequest) (*RecognizeResponse, error)

func (*UnimplementedSpeechServer) StreamingRecognize Uses

func (*UnimplementedSpeechServer) StreamingRecognize(Speech_StreamingRecognizeServer) error

type WordInfo Uses

type WordInfo struct {

    // Time offset relative to the beginning of the audio,
    // and corresponding to the start of the spoken word.
    // This field is only set if `enable_word_time_offsets=true` and only
    // in the top hypothesis.
    // This is an experimental feature and the accuracy of the time offset can
    // vary.
    StartTime *duration.Duration `protobuf:"bytes,1,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"`
    // Time offset relative to the beginning of the audio,
    // and corresponding to the end of the spoken word.
    // This field is only set if `enable_word_time_offsets=true` and only
    // in the top hypothesis.
    // This is an experimental feature and the accuracy of the time offset can
    // vary.
    EndTime *duration.Duration `protobuf:"bytes,2,opt,name=end_time,json=endTime,proto3" json:"end_time,omitempty"`
    // The word corresponding to this set of information.
    Word string `protobuf:"bytes,3,opt,name=word,proto3" json:"word,omitempty"`
    // A distinct integer value is assigned for every speaker within
    // the audio. This field specifies which one of those speakers was detected to
    // have spoken this word. Value ranges from '1' to diarization_speaker_count.
    // speaker_tag is set if enable_speaker_diarization = 'true' and only in the
    // top alternative.
    SpeakerTag int32 `protobuf:"varint,5,opt,name=speaker_tag,json=speakerTag,proto3" json:"speaker_tag,omitempty"`
    // contains filtered or unexported fields
}

Word-specific information for recognized words.

func (*WordInfo) Descriptor Uses

func (*WordInfo) Descriptor() ([]byte, []int)

Deprecated: Use WordInfo.ProtoReflect.Descriptor instead.

func (*WordInfo) GetEndTime Uses

func (x *WordInfo) GetEndTime() *duration.Duration

func (*WordInfo) GetSpeakerTag Uses

func (x *WordInfo) GetSpeakerTag() int32

func (*WordInfo) GetStartTime Uses

func (x *WordInfo) GetStartTime() *duration.Duration

func (*WordInfo) GetWord Uses

func (x *WordInfo) GetWord() string

func (*WordInfo) ProtoMessage Uses

func (*WordInfo) ProtoMessage()

func (*WordInfo) ProtoReflect Uses

func (x *WordInfo) ProtoReflect() protoreflect.Message

func (*WordInfo) Reset Uses

func (x *WordInfo) Reset()

func (*WordInfo) String Uses

func (x *WordInfo) String() string

Package speech imports 15 packages (graph) and is imported by 14 packages. Updated 2020-06-12. Refresh now. Tools for package owners.