Documentation ¶
Index ¶
- Variables
- func RegisterFrontierServer(s grpc.ServiceRegistrar, srv FrontierServer)
- type Cookie
- func (*Cookie) Descriptor() ([]byte, []int)deprecated
- func (x *Cookie) GetDomain() string
- func (x *Cookie) GetExpires() float64
- func (x *Cookie) GetHttpOnly() bool
- func (x *Cookie) GetName() string
- func (x *Cookie) GetPath() string
- func (x *Cookie) GetSameSite() string
- func (x *Cookie) GetSecure() bool
- func (x *Cookie) GetSession() bool
- func (x *Cookie) GetSize() int32
- func (x *Cookie) GetValue() string
- func (*Cookie) ProtoMessage()
- func (x *Cookie) ProtoReflect() protoreflect.Message
- func (x *Cookie) Reset()
- func (x *Cookie) String() string
- type CountResponse
- type CrawlExecutionId
- type CrawlExecutionStatus
- func (*CrawlExecutionStatus) Descriptor() ([]byte, []int)deprecated
- func (x *CrawlExecutionStatus) GetBytesCrawled() int64
- func (x *CrawlExecutionStatus) GetCreatedTime() *timestamppb.Timestamp
- func (x *CrawlExecutionStatus) GetCurrentUriId() []string
- func (x *CrawlExecutionStatus) GetDesiredState() CrawlExecutionStatus_State
- func (x *CrawlExecutionStatus) GetDocumentsCrawled() int64
- func (x *CrawlExecutionStatus) GetDocumentsDenied() int64
- func (x *CrawlExecutionStatus) GetDocumentsFailed() int64
- func (x *CrawlExecutionStatus) GetDocumentsOutOfScope() int64
- func (x *CrawlExecutionStatus) GetDocumentsRetried() int64
- func (x *CrawlExecutionStatus) GetEndTime() *timestamppb.Timestamp
- func (x *CrawlExecutionStatus) GetError() *v11.Error
- func (x *CrawlExecutionStatus) GetId() string
- func (x *CrawlExecutionStatus) GetJobExecutionId() string
- func (x *CrawlExecutionStatus) GetJobId() string
- func (x *CrawlExecutionStatus) GetLastChangeTime() *timestamppb.Timestamp
- func (x *CrawlExecutionStatus) GetSeedId() string
- func (x *CrawlExecutionStatus) GetStartTime() *timestamppb.Timestamp
- func (x *CrawlExecutionStatus) GetState() CrawlExecutionStatus_State
- func (x *CrawlExecutionStatus) GetUrisCrawled() int64
- func (*CrawlExecutionStatus) ProtoMessage()
- func (x *CrawlExecutionStatus) ProtoReflect() protoreflect.Message
- func (x *CrawlExecutionStatus) Reset()
- func (x *CrawlExecutionStatus) String() string
- type CrawlExecutionStatusChange
- func (*CrawlExecutionStatusChange) Descriptor() ([]byte, []int)deprecated
- func (x *CrawlExecutionStatusChange) GetAddBytesCrawled() int64
- func (x *CrawlExecutionStatusChange) GetAddCurrentUri() *QueuedUri
- func (x *CrawlExecutionStatusChange) GetAddDocumentsCrawled() int64
- func (x *CrawlExecutionStatusChange) GetAddDocumentsDenied() int64
- func (x *CrawlExecutionStatusChange) GetAddDocumentsFailed() int64
- func (x *CrawlExecutionStatusChange) GetAddDocumentsOutOfScope() int64
- func (x *CrawlExecutionStatusChange) GetAddDocumentsRetried() int64
- func (x *CrawlExecutionStatusChange) GetAddUrisCrawled() int64
- func (x *CrawlExecutionStatusChange) GetDeleteCurrentUri() *QueuedUri
- func (x *CrawlExecutionStatusChange) GetEndTime() *timestamppb.Timestamp
- func (x *CrawlExecutionStatusChange) GetError() *v11.Error
- func (x *CrawlExecutionStatusChange) GetId() string
- func (x *CrawlExecutionStatusChange) GetState() CrawlExecutionStatus_State
- func (*CrawlExecutionStatusChange) ProtoMessage()
- func (x *CrawlExecutionStatusChange) ProtoReflect() protoreflect.Message
- func (x *CrawlExecutionStatusChange) Reset()
- func (x *CrawlExecutionStatusChange) String() string
- type CrawlExecutionStatus_State
- func (CrawlExecutionStatus_State) Descriptor() protoreflect.EnumDescriptor
- func (x CrawlExecutionStatus_State) Enum() *CrawlExecutionStatus_State
- func (CrawlExecutionStatus_State) EnumDescriptor() ([]byte, []int)deprecated
- func (x CrawlExecutionStatus_State) Number() protoreflect.EnumNumber
- func (x CrawlExecutionStatus_State) String() string
- func (CrawlExecutionStatus_State) Type() protoreflect.EnumType
- type CrawlHostGroup
- func (*CrawlHostGroup) Descriptor() ([]byte, []int)deprecated
- func (x *CrawlHostGroup) GetCurrentUriId() string
- func (x *CrawlHostGroup) GetDelayFactor() float32
- func (x *CrawlHostGroup) GetFetchStartTimeStamp() *timestamppb.Timestamp
- func (x *CrawlHostGroup) GetId() string
- func (x *CrawlHostGroup) GetMaxRetries() int32
- func (x *CrawlHostGroup) GetMaxTimeBetweenPageLoadMs() int64
- func (x *CrawlHostGroup) GetMinTimeBetweenPageLoadMs() int64
- func (x *CrawlHostGroup) GetQueuedUriCount() int64
- func (x *CrawlHostGroup) GetRetryDelaySeconds() int32
- func (x *CrawlHostGroup) GetSessionToken() string
- func (*CrawlHostGroup) ProtoMessage()
- func (x *CrawlHostGroup) ProtoReflect() protoreflect.Message
- func (x *CrawlHostGroup) Reset()
- func (x *CrawlHostGroup) String() string
- type CrawlSeedRequest
- func (*CrawlSeedRequest) Descriptor() ([]byte, []int)deprecated
- func (x *CrawlSeedRequest) GetJob() *v1.ConfigObject
- func (x *CrawlSeedRequest) GetJobExecutionId() string
- func (x *CrawlSeedRequest) GetSeed() *v1.ConfigObject
- func (x *CrawlSeedRequest) GetTimeout() *timestamppb.Timestamp
- func (*CrawlSeedRequest) ProtoMessage()
- func (x *CrawlSeedRequest) ProtoReflect() protoreflect.Message
- func (x *CrawlSeedRequest) Reset()
- func (x *CrawlSeedRequest) String() string
- type FrontierClient
- type FrontierServer
- type Frontier_PageCompletedClient
- type Frontier_PageCompletedServer
- type JobExecutionStatus
- func (*JobExecutionStatus) Descriptor() ([]byte, []int)deprecated
- func (x *JobExecutionStatus) GetBytesCrawled() int64
- func (x *JobExecutionStatus) GetDesiredState() JobExecutionStatus_State
- func (x *JobExecutionStatus) GetDocumentsCrawled() int64
- func (x *JobExecutionStatus) GetDocumentsDenied() int64
- func (x *JobExecutionStatus) GetDocumentsFailed() int64
- func (x *JobExecutionStatus) GetDocumentsOutOfScope() int64
- func (x *JobExecutionStatus) GetDocumentsRetried() int64
- func (x *JobExecutionStatus) GetEndTime() *timestamppb.Timestamp
- func (x *JobExecutionStatus) GetError() *v11.Error
- func (x *JobExecutionStatus) GetExecutionsState() map[string]int32
- func (x *JobExecutionStatus) GetId() string
- func (x *JobExecutionStatus) GetJobId() string
- func (x *JobExecutionStatus) GetStartTime() *timestamppb.Timestamp
- func (x *JobExecutionStatus) GetState() JobExecutionStatus_State
- func (x *JobExecutionStatus) GetUrisCrawled() int64
- func (*JobExecutionStatus) ProtoMessage()
- func (x *JobExecutionStatus) ProtoReflect() protoreflect.Message
- func (x *JobExecutionStatus) Reset()
- func (x *JobExecutionStatus) String() string
- type JobExecutionStatus_State
- func (JobExecutionStatus_State) Descriptor() protoreflect.EnumDescriptor
- func (x JobExecutionStatus_State) Enum() *JobExecutionStatus_State
- func (JobExecutionStatus_State) EnumDescriptor() ([]byte, []int)deprecated
- func (x JobExecutionStatus_State) Number() protoreflect.EnumNumber
- func (x JobExecutionStatus_State) String() string
- func (JobExecutionStatus_State) Type() protoreflect.EnumType
- type PageHarvest
- func (*PageHarvest) Descriptor() ([]byte, []int)deprecated
- func (x *PageHarvest) GetError() *v11.Error
- func (x *PageHarvest) GetMetrics() *PageHarvest_Metrics
- func (m *PageHarvest) GetMsg() isPageHarvest_Msg
- func (x *PageHarvest) GetOutlink() *QueuedUri
- func (x *PageHarvest) GetSessionToken() string
- func (*PageHarvest) ProtoMessage()
- func (x *PageHarvest) ProtoReflect() protoreflect.Message
- func (x *PageHarvest) Reset()
- func (x *PageHarvest) String() string
- type PageHarvestSpec
- func (*PageHarvestSpec) Descriptor() ([]byte, []int)deprecated
- func (x *PageHarvestSpec) GetCrawlConfig() *v1.ConfigObject
- func (x *PageHarvestSpec) GetQueuedUri() *QueuedUri
- func (x *PageHarvestSpec) GetSessionToken() string
- func (*PageHarvestSpec) ProtoMessage()
- func (x *PageHarvestSpec) ProtoReflect() protoreflect.Message
- func (x *PageHarvestSpec) Reset()
- func (x *PageHarvestSpec) String() string
- type PageHarvest_Error
- type PageHarvest_Metrics
- func (*PageHarvest_Metrics) Descriptor() ([]byte, []int)deprecated
- func (x *PageHarvest_Metrics) GetBytesDownloaded() int64
- func (x *PageHarvest_Metrics) GetUriCount() int32
- func (*PageHarvest_Metrics) ProtoMessage()
- func (x *PageHarvest_Metrics) ProtoReflect() protoreflect.Message
- func (x *PageHarvest_Metrics) Reset()
- func (x *PageHarvest_Metrics) String() string
- type PageHarvest_Metrics_
- type PageHarvest_Outlink
- type QueuedUri
- func (*QueuedUri) Descriptor() ([]byte, []int)deprecated
- func (x *QueuedUri) GetAnnotation() []*v1.Annotation
- func (x *QueuedUri) GetCookies() []*Cookie
- func (x *QueuedUri) GetCrawlHostGroupId() string
- func (x *QueuedUri) GetDiscoveredTimeStamp() *timestamppb.Timestamp
- func (x *QueuedUri) GetDiscoveryPath() string
- func (x *QueuedUri) GetEarliestFetchTimeStamp() *timestamppb.Timestamp
- func (x *QueuedUri) GetError() *v11.Error
- func (x *QueuedUri) GetExecutionId() string
- func (x *QueuedUri) GetFetchStartTimeStamp() *timestamppb.Timestamp
- func (x *QueuedUri) GetId() string
- func (x *QueuedUri) GetIp() string
- func (x *QueuedUri) GetJobExecutionId() string
- func (x *QueuedUri) GetPageFetchTimeMs() int64
- func (x *QueuedUri) GetPolitenessRef() *v1.ConfigRef
- func (x *QueuedUri) GetPriorityWeight() float64
- func (x *QueuedUri) GetReferrer() string
- func (x *QueuedUri) GetRetries() int32
- func (x *QueuedUri) GetSeedUri() string
- func (x *QueuedUri) GetSequence() int64
- func (x *QueuedUri) GetUnresolved() bool
- func (x *QueuedUri) GetUri() string
- func (*QueuedUri) ProtoMessage()
- func (x *QueuedUri) ProtoReflect() protoreflect.Message
- func (x *QueuedUri) Reset()
- func (x *QueuedUri) String() string
- type UnimplementedFrontierServer
- func (UnimplementedFrontierServer) BusyCrawlHostGroupCount(context.Context, *emptypb.Empty) (*CountResponse, error)
- func (UnimplementedFrontierServer) CrawlSeed(context.Context, *CrawlSeedRequest) (*CrawlExecutionId, error)
- func (UnimplementedFrontierServer) GetNextPage(context.Context, *emptypb.Empty) (*PageHarvestSpec, error)
- func (UnimplementedFrontierServer) PageCompleted(Frontier_PageCompletedServer) error
- func (UnimplementedFrontierServer) QueueCountForCrawlExecution(context.Context, *CrawlExecutionId) (*CountResponse, error)
- func (UnimplementedFrontierServer) QueueCountForCrawlHostGroup(context.Context, *CrawlHostGroup) (*CountResponse, error)
- func (UnimplementedFrontierServer) QueueCountTotal(context.Context, *emptypb.Empty) (*CountResponse, error)
- type UnsafeFrontierServer
Constants ¶
This section is empty.
Variables ¶
var ( CrawlExecutionStatus_State_name = map[int32]string{ 0: "UNDEFINED", 1: "CREATED", 2: "FETCHING", 3: "SLEEPING", 4: "FINISHED", 5: "ABORTED_TIMEOUT", 6: "ABORTED_SIZE", 7: "ABORTED_MANUAL", 8: "FAILED", 9: "DIED", } CrawlExecutionStatus_State_value = map[string]int32{ "UNDEFINED": 0, "CREATED": 1, "FETCHING": 2, "SLEEPING": 3, "FINISHED": 4, "ABORTED_TIMEOUT": 5, "ABORTED_SIZE": 6, "ABORTED_MANUAL": 7, "FAILED": 8, "DIED": 9, } )
Enum value maps for CrawlExecutionStatus_State.
var ( JobExecutionStatus_State_name = map[int32]string{ 0: "UNDEFINED", 1: "CREATED", 2: "RUNNING", 3: "FINISHED", 4: "ABORTED_MANUAL", 5: "FAILED", 6: "DIED", } JobExecutionStatus_State_value = map[string]int32{ "UNDEFINED": 0, "CREATED": 1, "RUNNING": 2, "FINISHED": 3, "ABORTED_MANUAL": 4, "FAILED": 5, "DIED": 6, } )
Enum value maps for JobExecutionStatus_State.
var File_frontier_v1_frontier_proto protoreflect.FileDescriptor
var File_frontier_v1_resources_proto protoreflect.FileDescriptor
var Frontier_ServiceDesc = grpc.ServiceDesc{ ServiceName: "veidemann.api.frontier.v1.Frontier", HandlerType: (*FrontierServer)(nil), Methods: []grpc.MethodDesc{ { MethodName: "CrawlSeed", Handler: _Frontier_CrawlSeed_Handler, }, { MethodName: "GetNextPage", Handler: _Frontier_GetNextPage_Handler, }, { MethodName: "BusyCrawlHostGroupCount", Handler: _Frontier_BusyCrawlHostGroupCount_Handler, }, { MethodName: "QueueCountTotal", Handler: _Frontier_QueueCountTotal_Handler, }, { MethodName: "QueueCountForCrawlExecution", Handler: _Frontier_QueueCountForCrawlExecution_Handler, }, { MethodName: "QueueCountForCrawlHostGroup", Handler: _Frontier_QueueCountForCrawlHostGroup_Handler, }, }, Streams: []grpc.StreamDesc{ { StreamName: "PageCompleted", Handler: _Frontier_PageCompleted_Handler, ClientStreams: true, }, }, Metadata: "frontier/v1/frontier.proto", }
Frontier_ServiceDesc is the grpc.ServiceDesc for Frontier service. It's only intended for direct use with grpc.RegisterService, and not to be introspected or modified (even as a copy)
Functions ¶
func RegisterFrontierServer ¶
func RegisterFrontierServer(s grpc.ServiceRegistrar, srv FrontierServer)
Types ¶
type Cookie ¶
type Cookie struct { // Cookie name. Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // Cookie value. Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` // Cookie domain. Domain string `protobuf:"bytes,3,opt,name=domain,proto3" json:"domain,omitempty"` // Cookie path. Path string `protobuf:"bytes,4,opt,name=path,proto3" json:"path,omitempty"` // Cookie expiration date as the number of seconds since the UNIX epoch. Expires float64 `protobuf:"fixed64,5,opt,name=expires,proto3" json:"expires,omitempty"` // Cookie size. Size int32 `protobuf:"varint,6,opt,name=size,proto3" json:"size,omitempty"` // True if cookie is http-only. HttpOnly bool `protobuf:"varint,7,opt,name=http_only,json=httpOnly,proto3" json:"http_only,omitempty"` // True if cookie is secure. Secure bool `protobuf:"varint,8,opt,name=secure,proto3" json:"secure,omitempty"` // True in case of session cookie. Session bool `protobuf:"varint,9,opt,name=session,proto3" json:"session,omitempty"` // Cookie SameSite type. SameSite string `protobuf:"bytes,10,opt,name=same_site,json=sameSite,proto3" json:"same_site,omitempty"` // contains filtered or unexported fields }
func (*Cookie) Descriptor
deprecated
func (*Cookie) GetExpires ¶
func (*Cookie) GetHttpOnly ¶
func (*Cookie) GetSameSite ¶
func (*Cookie) GetSession ¶
func (*Cookie) ProtoMessage ¶
func (*Cookie) ProtoMessage()
func (*Cookie) ProtoReflect ¶
func (x *Cookie) ProtoReflect() protoreflect.Message
type CountResponse ¶
type CountResponse struct { Count int64 `protobuf:"varint,1,opt,name=count,proto3" json:"count,omitempty"` // contains filtered or unexported fields }
func (*CountResponse) Descriptor
deprecated
func (*CountResponse) Descriptor() ([]byte, []int)
Deprecated: Use CountResponse.ProtoReflect.Descriptor instead.
func (*CountResponse) GetCount ¶
func (x *CountResponse) GetCount() int64
func (*CountResponse) ProtoMessage ¶
func (*CountResponse) ProtoMessage()
func (*CountResponse) ProtoReflect ¶
func (x *CountResponse) ProtoReflect() protoreflect.Message
func (*CountResponse) Reset ¶
func (x *CountResponse) Reset()
func (*CountResponse) String ¶
func (x *CountResponse) String() string
type CrawlExecutionId ¶
type CrawlExecutionId struct { Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` // contains filtered or unexported fields }
The execution id for a seed crawl
func (*CrawlExecutionId) Descriptor
deprecated
func (*CrawlExecutionId) Descriptor() ([]byte, []int)
Deprecated: Use CrawlExecutionId.ProtoReflect.Descriptor instead.
func (*CrawlExecutionId) GetId ¶
func (x *CrawlExecutionId) GetId() string
func (*CrawlExecutionId) ProtoMessage ¶
func (*CrawlExecutionId) ProtoMessage()
func (*CrawlExecutionId) ProtoReflect ¶
func (x *CrawlExecutionId) ProtoReflect() protoreflect.Message
func (*CrawlExecutionId) Reset ¶
func (x *CrawlExecutionId) Reset()
func (*CrawlExecutionId) String ¶
func (x *CrawlExecutionId) String() string
type CrawlExecutionStatus ¶
type CrawlExecutionStatus struct { Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` State CrawlExecutionStatus_State `protobuf:"varint,2,opt,name=state,proto3,enum=veidemann.api.frontier.v1.CrawlExecutionStatus_State" json:"state,omitempty"` JobId string `protobuf:"bytes,3,opt,name=job_id,json=jobId,proto3" json:"job_id,omitempty"` SeedId string `protobuf:"bytes,4,opt,name=seed_id,json=seedId,proto3" json:"seed_id,omitempty"` StartTime *timestamppb.Timestamp `protobuf:"bytes,6,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` // When this crawl execution started crawling EndTime *timestamppb.Timestamp `protobuf:"bytes,7,opt,name=end_time,json=endTime,proto3" json:"end_time,omitempty"` // When this crawl execution ended DocumentsCrawled int64 `protobuf:"varint,8,opt,name=documents_crawled,json=documentsCrawled,proto3" json:"documents_crawled,omitempty"` BytesCrawled int64 `protobuf:"varint,9,opt,name=bytes_crawled,json=bytesCrawled,proto3" json:"bytes_crawled,omitempty"` UrisCrawled int64 `protobuf:"varint,10,opt,name=uris_crawled,json=urisCrawled,proto3" json:"uris_crawled,omitempty"` DocumentsFailed int64 `protobuf:"varint,11,opt,name=documents_failed,json=documentsFailed,proto3" json:"documents_failed,omitempty"` DocumentsOutOfScope int64 `protobuf:"varint,12,opt,name=documents_out_of_scope,json=documentsOutOfScope,proto3" json:"documents_out_of_scope,omitempty"` DocumentsRetried int64 `protobuf:"varint,13,opt,name=documents_retried,json=documentsRetried,proto3" json:"documents_retried,omitempty"` DocumentsDenied int64 `protobuf:"varint,14,opt,name=documents_denied,json=documentsDenied,proto3" json:"documents_denied,omitempty"` LastChangeTime *timestamppb.Timestamp `protobuf:"bytes,15,opt,name=last_change_time,json=lastChangeTime,proto3" json:"last_change_time,omitempty"` // When this record was last updated CreatedTime *timestamppb.Timestamp `protobuf:"bytes,16,opt,name=created_time,json=createdTime,proto3" json:"created_time,omitempty"` // When this crawl execution was created CurrentUriId []string `protobuf:"bytes,20,rep,name=current_uri_id,json=currentUriId,proto3" json:"current_uri_id,omitempty"` JobExecutionId string `protobuf:"bytes,21,opt,name=job_execution_id,json=jobExecutionId,proto3" json:"job_execution_id,omitempty"` Error *v11.Error `protobuf:"bytes,22,opt,name=error,proto3" json:"error,omitempty"` // Extra description of error state DesiredState CrawlExecutionStatus_State `` // Used when external process want to abort execution /* 157-byte string literal not displayed */ // contains filtered or unexported fields }
Metadata about a crawl execution. A crawl execution is the complete harvest of a seed as specified in the connected job's configuration.
func (*CrawlExecutionStatus) Descriptor
deprecated
func (*CrawlExecutionStatus) Descriptor() ([]byte, []int)
Deprecated: Use CrawlExecutionStatus.ProtoReflect.Descriptor instead.
func (*CrawlExecutionStatus) GetBytesCrawled ¶
func (x *CrawlExecutionStatus) GetBytesCrawled() int64
func (*CrawlExecutionStatus) GetCreatedTime ¶
func (x *CrawlExecutionStatus) GetCreatedTime() *timestamppb.Timestamp
func (*CrawlExecutionStatus) GetCurrentUriId ¶
func (x *CrawlExecutionStatus) GetCurrentUriId() []string
func (*CrawlExecutionStatus) GetDesiredState ¶
func (x *CrawlExecutionStatus) GetDesiredState() CrawlExecutionStatus_State
func (*CrawlExecutionStatus) GetDocumentsCrawled ¶
func (x *CrawlExecutionStatus) GetDocumentsCrawled() int64
func (*CrawlExecutionStatus) GetDocumentsDenied ¶
func (x *CrawlExecutionStatus) GetDocumentsDenied() int64
func (*CrawlExecutionStatus) GetDocumentsFailed ¶
func (x *CrawlExecutionStatus) GetDocumentsFailed() int64
func (*CrawlExecutionStatus) GetDocumentsOutOfScope ¶
func (x *CrawlExecutionStatus) GetDocumentsOutOfScope() int64
func (*CrawlExecutionStatus) GetDocumentsRetried ¶
func (x *CrawlExecutionStatus) GetDocumentsRetried() int64
func (*CrawlExecutionStatus) GetEndTime ¶
func (x *CrawlExecutionStatus) GetEndTime() *timestamppb.Timestamp
func (*CrawlExecutionStatus) GetError ¶
func (x *CrawlExecutionStatus) GetError() *v11.Error
func (*CrawlExecutionStatus) GetId ¶
func (x *CrawlExecutionStatus) GetId() string
func (*CrawlExecutionStatus) GetJobExecutionId ¶
func (x *CrawlExecutionStatus) GetJobExecutionId() string
func (*CrawlExecutionStatus) GetJobId ¶
func (x *CrawlExecutionStatus) GetJobId() string
func (*CrawlExecutionStatus) GetLastChangeTime ¶
func (x *CrawlExecutionStatus) GetLastChangeTime() *timestamppb.Timestamp
func (*CrawlExecutionStatus) GetSeedId ¶
func (x *CrawlExecutionStatus) GetSeedId() string
func (*CrawlExecutionStatus) GetStartTime ¶
func (x *CrawlExecutionStatus) GetStartTime() *timestamppb.Timestamp
func (*CrawlExecutionStatus) GetState ¶
func (x *CrawlExecutionStatus) GetState() CrawlExecutionStatus_State
func (*CrawlExecutionStatus) GetUrisCrawled ¶
func (x *CrawlExecutionStatus) GetUrisCrawled() int64
func (*CrawlExecutionStatus) ProtoMessage ¶
func (*CrawlExecutionStatus) ProtoMessage()
func (*CrawlExecutionStatus) ProtoReflect ¶
func (x *CrawlExecutionStatus) ProtoReflect() protoreflect.Message
func (*CrawlExecutionStatus) Reset ¶
func (x *CrawlExecutionStatus) Reset()
func (*CrawlExecutionStatus) String ¶
func (x *CrawlExecutionStatus) String() string
type CrawlExecutionStatusChange ¶
type CrawlExecutionStatusChange struct { Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` State CrawlExecutionStatus_State `protobuf:"varint,2,opt,name=state,proto3,enum=veidemann.api.frontier.v1.CrawlExecutionStatus_State" json:"state,omitempty"` EndTime *timestamppb.Timestamp `protobuf:"bytes,4,opt,name=end_time,json=endTime,proto3" json:"end_time,omitempty"` // When this crawl execution ended AddDocumentsCrawled int64 `protobuf:"varint,5,opt,name=add_documents_crawled,json=addDocumentsCrawled,proto3" json:"add_documents_crawled,omitempty"` AddBytesCrawled int64 `protobuf:"varint,6,opt,name=add_bytes_crawled,json=addBytesCrawled,proto3" json:"add_bytes_crawled,omitempty"` AddUrisCrawled int64 `protobuf:"varint,7,opt,name=add_uris_crawled,json=addUrisCrawled,proto3" json:"add_uris_crawled,omitempty"` AddDocumentsFailed int64 `protobuf:"varint,8,opt,name=add_documents_failed,json=addDocumentsFailed,proto3" json:"add_documents_failed,omitempty"` AddDocumentsOutOfScope int64 `` /* 134-byte string literal not displayed */ AddDocumentsRetried int64 `protobuf:"varint,10,opt,name=add_documents_retried,json=addDocumentsRetried,proto3" json:"add_documents_retried,omitempty"` AddDocumentsDenied int64 `protobuf:"varint,11,opt,name=add_documents_denied,json=addDocumentsDenied,proto3" json:"add_documents_denied,omitempty"` AddCurrentUri *QueuedUri `protobuf:"bytes,12,opt,name=add_current_uri,json=addCurrentUri,proto3" json:"add_current_uri,omitempty"` DeleteCurrentUri *QueuedUri `protobuf:"bytes,13,opt,name=delete_current_uri,json=deleteCurrentUri,proto3" json:"delete_current_uri,omitempty"` Error *v11.Error `protobuf:"bytes,14,opt,name=error,proto3" json:"error,omitempty"` // Extra description of error state // contains filtered or unexported fields }
func (*CrawlExecutionStatusChange) Descriptor
deprecated
func (*CrawlExecutionStatusChange) Descriptor() ([]byte, []int)
Deprecated: Use CrawlExecutionStatusChange.ProtoReflect.Descriptor instead.
func (*CrawlExecutionStatusChange) GetAddBytesCrawled ¶
func (x *CrawlExecutionStatusChange) GetAddBytesCrawled() int64
func (*CrawlExecutionStatusChange) GetAddCurrentUri ¶
func (x *CrawlExecutionStatusChange) GetAddCurrentUri() *QueuedUri
func (*CrawlExecutionStatusChange) GetAddDocumentsCrawled ¶
func (x *CrawlExecutionStatusChange) GetAddDocumentsCrawled() int64
func (*CrawlExecutionStatusChange) GetAddDocumentsDenied ¶
func (x *CrawlExecutionStatusChange) GetAddDocumentsDenied() int64
func (*CrawlExecutionStatusChange) GetAddDocumentsFailed ¶
func (x *CrawlExecutionStatusChange) GetAddDocumentsFailed() int64
func (*CrawlExecutionStatusChange) GetAddDocumentsOutOfScope ¶
func (x *CrawlExecutionStatusChange) GetAddDocumentsOutOfScope() int64
func (*CrawlExecutionStatusChange) GetAddDocumentsRetried ¶
func (x *CrawlExecutionStatusChange) GetAddDocumentsRetried() int64
func (*CrawlExecutionStatusChange) GetAddUrisCrawled ¶
func (x *CrawlExecutionStatusChange) GetAddUrisCrawled() int64
func (*CrawlExecutionStatusChange) GetDeleteCurrentUri ¶
func (x *CrawlExecutionStatusChange) GetDeleteCurrentUri() *QueuedUri
func (*CrawlExecutionStatusChange) GetEndTime ¶
func (x *CrawlExecutionStatusChange) GetEndTime() *timestamppb.Timestamp
func (*CrawlExecutionStatusChange) GetError ¶
func (x *CrawlExecutionStatusChange) GetError() *v11.Error
func (*CrawlExecutionStatusChange) GetId ¶
func (x *CrawlExecutionStatusChange) GetId() string
func (*CrawlExecutionStatusChange) GetState ¶
func (x *CrawlExecutionStatusChange) GetState() CrawlExecutionStatus_State
func (*CrawlExecutionStatusChange) ProtoMessage ¶
func (*CrawlExecutionStatusChange) ProtoMessage()
func (*CrawlExecutionStatusChange) ProtoReflect ¶
func (x *CrawlExecutionStatusChange) ProtoReflect() protoreflect.Message
func (*CrawlExecutionStatusChange) Reset ¶
func (x *CrawlExecutionStatusChange) Reset()
func (*CrawlExecutionStatusChange) String ¶
func (x *CrawlExecutionStatusChange) String() string
type CrawlExecutionStatus_State ¶
type CrawlExecutionStatus_State int32
const ( CrawlExecutionStatus_UNDEFINED CrawlExecutionStatus_State = 0 CrawlExecutionStatus_CREATED CrawlExecutionStatus_State = 1 CrawlExecutionStatus_FETCHING CrawlExecutionStatus_State = 2 CrawlExecutionStatus_SLEEPING CrawlExecutionStatus_State = 3 CrawlExecutionStatus_FINISHED CrawlExecutionStatus_State = 4 CrawlExecutionStatus_ABORTED_TIMEOUT CrawlExecutionStatus_State = 5 CrawlExecutionStatus_ABORTED_SIZE CrawlExecutionStatus_State = 6 CrawlExecutionStatus_ABORTED_MANUAL CrawlExecutionStatus_State = 7 CrawlExecutionStatus_FAILED CrawlExecutionStatus_State = 8 CrawlExecutionStatus_DIED CrawlExecutionStatus_State = 9 )
func (CrawlExecutionStatus_State) Descriptor ¶
func (CrawlExecutionStatus_State) Descriptor() protoreflect.EnumDescriptor
func (CrawlExecutionStatus_State) Enum ¶
func (x CrawlExecutionStatus_State) Enum() *CrawlExecutionStatus_State
func (CrawlExecutionStatus_State) EnumDescriptor
deprecated
func (CrawlExecutionStatus_State) EnumDescriptor() ([]byte, []int)
Deprecated: Use CrawlExecutionStatus_State.Descriptor instead.
func (CrawlExecutionStatus_State) Number ¶
func (x CrawlExecutionStatus_State) Number() protoreflect.EnumNumber
func (CrawlExecutionStatus_State) String ¶
func (x CrawlExecutionStatus_State) String() string
func (CrawlExecutionStatus_State) Type ¶
func (CrawlExecutionStatus_State) Type() protoreflect.EnumType
type CrawlHostGroup ¶
type CrawlHostGroup struct { // If IP-address is not resolved this is SHA-1 of URL. // After IP-resolution this is SHA-1 hash of IP or CrawlHostGroupConfig id if one such config matched. Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` // Lover limit for time between pageloads from this CrawlHostGroup. MinTimeBetweenPageLoadMs int64 `` /* 142-byte string literal not displayed */ // Upper limit for time between pageloads from this CrawlHostGroup. // This is the upper limit for calculation of dealy time, but actual time might be higher depending on // the harvesters capacity. MaxTimeBetweenPageLoadMs int64 `` /* 142-byte string literal not displayed */ // The fetch time of the URI is multiplied with this value to get the delay time before fetching the next URI. // If min_time_between_page_load_ms and/or max_time_between_page_load_ms are set, then those values are used as // the upper/lower limits for delay. // If delay_factor is unset or zero, then a delay_facor of one is assumed. If delay_factor is negative, // a delay_factor of zero is assumed. DelayFactor float32 `protobuf:"fixed32,4,opt,name=delay_factor,json=delayFactor,proto3" json:"delay_factor,omitempty"` // The maximum number of retries before giving up fetching a uri. MaxRetries int32 `protobuf:"varint,5,opt,name=max_retries,json=maxRetries,proto3" json:"max_retries,omitempty"` // The minimum time before a failed page load is retried. RetryDelaySeconds int32 `protobuf:"varint,6,opt,name=retry_delay_seconds,json=retryDelaySeconds,proto3" json:"retry_delay_seconds,omitempty"` // The number of queued Uri's belonging to this CrawlHostGroup QueuedUriCount int64 `protobuf:"varint,7,opt,name=queued_uri_count,json=queuedUriCount,proto3" json:"queued_uri_count,omitempty"` // If this CrawlHostGroup is busy, this field contains the id of the uri currently beeing fetched. CurrentUriId string `protobuf:"bytes,8,opt,name=current_uri_id,json=currentUriId,proto3" json:"current_uri_id,omitempty"` // Token to guard against two harvesters responding to the same request. SessionToken string `protobuf:"bytes,9,opt,name=session_token,json=sessionToken,proto3" json:"session_token,omitempty"` // The time when frontier sent a PageHarvestSpec to a harvester. FetchStartTimeStamp *timestamppb.Timestamp `protobuf:"bytes,10,opt,name=fetch_start_time_stamp,json=fetchStartTimeStamp,proto3" json:"fetch_start_time_stamp,omitempty"` // contains filtered or unexported fields }
func (*CrawlHostGroup) Descriptor
deprecated
func (*CrawlHostGroup) Descriptor() ([]byte, []int)
Deprecated: Use CrawlHostGroup.ProtoReflect.Descriptor instead.
func (*CrawlHostGroup) GetCurrentUriId ¶
func (x *CrawlHostGroup) GetCurrentUriId() string
func (*CrawlHostGroup) GetDelayFactor ¶
func (x *CrawlHostGroup) GetDelayFactor() float32
func (*CrawlHostGroup) GetFetchStartTimeStamp ¶
func (x *CrawlHostGroup) GetFetchStartTimeStamp() *timestamppb.Timestamp
func (*CrawlHostGroup) GetId ¶
func (x *CrawlHostGroup) GetId() string
func (*CrawlHostGroup) GetMaxRetries ¶
func (x *CrawlHostGroup) GetMaxRetries() int32
func (*CrawlHostGroup) GetMaxTimeBetweenPageLoadMs ¶
func (x *CrawlHostGroup) GetMaxTimeBetweenPageLoadMs() int64
func (*CrawlHostGroup) GetMinTimeBetweenPageLoadMs ¶
func (x *CrawlHostGroup) GetMinTimeBetweenPageLoadMs() int64
func (*CrawlHostGroup) GetQueuedUriCount ¶
func (x *CrawlHostGroup) GetQueuedUriCount() int64
func (*CrawlHostGroup) GetRetryDelaySeconds ¶
func (x *CrawlHostGroup) GetRetryDelaySeconds() int32
func (*CrawlHostGroup) GetSessionToken ¶
func (x *CrawlHostGroup) GetSessionToken() string
func (*CrawlHostGroup) ProtoMessage ¶
func (*CrawlHostGroup) ProtoMessage()
func (*CrawlHostGroup) ProtoReflect ¶
func (x *CrawlHostGroup) ProtoReflect() protoreflect.Message
func (*CrawlHostGroup) Reset ¶
func (x *CrawlHostGroup) Reset()
func (*CrawlHostGroup) String ¶
func (x *CrawlHostGroup) String() string
type CrawlSeedRequest ¶
type CrawlSeedRequest struct { JobExecutionId string `protobuf:"bytes,1,opt,name=job_execution_id,json=jobExecutionId,proto3" json:"job_execution_id,omitempty"` Job *v1.ConfigObject `protobuf:"bytes,5,opt,name=job,proto3" json:"job,omitempty"` Seed *v1.ConfigObject `protobuf:"bytes,6,opt,name=seed,proto3" json:"seed,omitempty"` // When this seed should stop crawling. Absence of this value indicates no timeout Timeout *timestamppb.Timestamp `protobuf:"bytes,7,opt,name=timeout,proto3" json:"timeout,omitempty"` // contains filtered or unexported fields }
func (*CrawlSeedRequest) Descriptor
deprecated
func (*CrawlSeedRequest) Descriptor() ([]byte, []int)
Deprecated: Use CrawlSeedRequest.ProtoReflect.Descriptor instead.
func (*CrawlSeedRequest) GetJob ¶
func (x *CrawlSeedRequest) GetJob() *v1.ConfigObject
func (*CrawlSeedRequest) GetJobExecutionId ¶
func (x *CrawlSeedRequest) GetJobExecutionId() string
func (*CrawlSeedRequest) GetSeed ¶
func (x *CrawlSeedRequest) GetSeed() *v1.ConfigObject
func (*CrawlSeedRequest) GetTimeout ¶
func (x *CrawlSeedRequest) GetTimeout() *timestamppb.Timestamp
func (*CrawlSeedRequest) ProtoMessage ¶
func (*CrawlSeedRequest) ProtoMessage()
func (*CrawlSeedRequest) ProtoReflect ¶
func (x *CrawlSeedRequest) ProtoReflect() protoreflect.Message
func (*CrawlSeedRequest) Reset ¶
func (x *CrawlSeedRequest) Reset()
func (*CrawlSeedRequest) String ¶
func (x *CrawlSeedRequest) String() string
type FrontierClient ¶
type FrontierClient interface { // Start crawling seed CrawlSeed(ctx context.Context, in *CrawlSeedRequest, opts ...grpc.CallOption) (*CrawlExecutionId, error) // Request a URI from the Frontiers queue. // Used by a Harvester to fetch a new page. If no URI is ready for harvesting, Frontier should return // gRPC status NOT_FOUND. Harvester should then retry the request after a reasonable backoff time. GetNextPage(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*PageHarvestSpec, error) // Inform Frontier that a page fetch was finished. // Contains metrics, outlinks and error as a stream of messages. Client closes stream when finished. PageCompleted(ctx context.Context, opts ...grpc.CallOption) (Frontier_PageCompletedClient, error) // The number of busy CrawlHostGroups which essentially is the number of web pages currently downloading BusyCrawlHostGroupCount(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*CountResponse, error) // Total number of queued URI's QueueCountTotal(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*CountResponse, error) // Number of queued URI's for a CrawlExecution QueueCountForCrawlExecution(ctx context.Context, in *CrawlExecutionId, opts ...grpc.CallOption) (*CountResponse, error) // Number of queued URI's for a CrawlHostGroup QueueCountForCrawlHostGroup(ctx context.Context, in *CrawlHostGroup, opts ...grpc.CallOption) (*CountResponse, error) }
FrontierClient is the client API for Frontier service.
For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
func NewFrontierClient ¶
func NewFrontierClient(cc grpc.ClientConnInterface) FrontierClient
type FrontierServer ¶
type FrontierServer interface { // Start crawling seed CrawlSeed(context.Context, *CrawlSeedRequest) (*CrawlExecutionId, error) // Request a URI from the Frontiers queue. // Used by a Harvester to fetch a new page. If no URI is ready for harvesting, Frontier should return // gRPC status NOT_FOUND. Harvester should then retry the request after a reasonable backoff time. GetNextPage(context.Context, *emptypb.Empty) (*PageHarvestSpec, error) // Inform Frontier that a page fetch was finished. // Contains metrics, outlinks and error as a stream of messages. Client closes stream when finished. PageCompleted(Frontier_PageCompletedServer) error // The number of busy CrawlHostGroups which essentially is the number of web pages currently downloading BusyCrawlHostGroupCount(context.Context, *emptypb.Empty) (*CountResponse, error) // Total number of queued URI's QueueCountTotal(context.Context, *emptypb.Empty) (*CountResponse, error) // Number of queued URI's for a CrawlExecution QueueCountForCrawlExecution(context.Context, *CrawlExecutionId) (*CountResponse, error) // Number of queued URI's for a CrawlHostGroup QueueCountForCrawlHostGroup(context.Context, *CrawlHostGroup) (*CountResponse, error) // contains filtered or unexported methods }
FrontierServer is the server API for Frontier service. All implementations must embed UnimplementedFrontierServer for forward compatibility
type Frontier_PageCompletedClient ¶
type Frontier_PageCompletedClient interface { Send(*PageHarvest) error CloseAndRecv() (*emptypb.Empty, error) grpc.ClientStream }
type Frontier_PageCompletedServer ¶
type Frontier_PageCompletedServer interface { SendAndClose(*emptypb.Empty) error Recv() (*PageHarvest, error) grpc.ServerStream }
type JobExecutionStatus ¶
type JobExecutionStatus struct { Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` JobId string `protobuf:"bytes,2,opt,name=job_id,json=jobId,proto3" json:"job_id,omitempty"` State JobExecutionStatus_State `protobuf:"varint,3,opt,name=state,proto3,enum=veidemann.api.frontier.v1.JobExecutionStatus_State" json:"state,omitempty"` ExecutionsState map[string]int32 `` /* 195-byte string literal not displayed */ StartTime *timestamppb.Timestamp `protobuf:"bytes,6,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` EndTime *timestamppb.Timestamp `protobuf:"bytes,7,opt,name=end_time,json=endTime,proto3" json:"end_time,omitempty"` DocumentsCrawled int64 `protobuf:"varint,8,opt,name=documents_crawled,json=documentsCrawled,proto3" json:"documents_crawled,omitempty"` BytesCrawled int64 `protobuf:"varint,9,opt,name=bytes_crawled,json=bytesCrawled,proto3" json:"bytes_crawled,omitempty"` UrisCrawled int64 `protobuf:"varint,10,opt,name=uris_crawled,json=urisCrawled,proto3" json:"uris_crawled,omitempty"` DocumentsFailed int64 `protobuf:"varint,11,opt,name=documents_failed,json=documentsFailed,proto3" json:"documents_failed,omitempty"` DocumentsOutOfScope int64 `protobuf:"varint,12,opt,name=documents_out_of_scope,json=documentsOutOfScope,proto3" json:"documents_out_of_scope,omitempty"` DocumentsRetried int64 `protobuf:"varint,13,opt,name=documents_retried,json=documentsRetried,proto3" json:"documents_retried,omitempty"` DocumentsDenied int64 `protobuf:"varint,14,opt,name=documents_denied,json=documentsDenied,proto3" json:"documents_denied,omitempty"` Error *v11.Error `protobuf:"bytes,15,opt,name=error,proto3" json:"error,omitempty"` // Extra description of error state DesiredState JobExecutionStatus_State `` // Used when external process want to abort execution /* 155-byte string literal not displayed */ // contains filtered or unexported fields }
Metadata about an execution of a job. A job execution is the sum of all crawl executions for a job at a specific time.
func (*JobExecutionStatus) Descriptor
deprecated
func (*JobExecutionStatus) Descriptor() ([]byte, []int)
Deprecated: Use JobExecutionStatus.ProtoReflect.Descriptor instead.
func (*JobExecutionStatus) GetBytesCrawled ¶
func (x *JobExecutionStatus) GetBytesCrawled() int64
func (*JobExecutionStatus) GetDesiredState ¶
func (x *JobExecutionStatus) GetDesiredState() JobExecutionStatus_State
func (*JobExecutionStatus) GetDocumentsCrawled ¶
func (x *JobExecutionStatus) GetDocumentsCrawled() int64
func (*JobExecutionStatus) GetDocumentsDenied ¶
func (x *JobExecutionStatus) GetDocumentsDenied() int64
func (*JobExecutionStatus) GetDocumentsFailed ¶
func (x *JobExecutionStatus) GetDocumentsFailed() int64
func (*JobExecutionStatus) GetDocumentsOutOfScope ¶
func (x *JobExecutionStatus) GetDocumentsOutOfScope() int64
func (*JobExecutionStatus) GetDocumentsRetried ¶
func (x *JobExecutionStatus) GetDocumentsRetried() int64
func (*JobExecutionStatus) GetEndTime ¶
func (x *JobExecutionStatus) GetEndTime() *timestamppb.Timestamp
func (*JobExecutionStatus) GetError ¶
func (x *JobExecutionStatus) GetError() *v11.Error
func (*JobExecutionStatus) GetExecutionsState ¶
func (x *JobExecutionStatus) GetExecutionsState() map[string]int32
func (*JobExecutionStatus) GetId ¶
func (x *JobExecutionStatus) GetId() string
func (*JobExecutionStatus) GetJobId ¶
func (x *JobExecutionStatus) GetJobId() string
func (*JobExecutionStatus) GetStartTime ¶
func (x *JobExecutionStatus) GetStartTime() *timestamppb.Timestamp
func (*JobExecutionStatus) GetState ¶
func (x *JobExecutionStatus) GetState() JobExecutionStatus_State
func (*JobExecutionStatus) GetUrisCrawled ¶
func (x *JobExecutionStatus) GetUrisCrawled() int64
func (*JobExecutionStatus) ProtoMessage ¶
func (*JobExecutionStatus) ProtoMessage()
func (*JobExecutionStatus) ProtoReflect ¶
func (x *JobExecutionStatus) ProtoReflect() protoreflect.Message
func (*JobExecutionStatus) Reset ¶
func (x *JobExecutionStatus) Reset()
func (*JobExecutionStatus) String ¶
func (x *JobExecutionStatus) String() string
type JobExecutionStatus_State ¶
type JobExecutionStatus_State int32
const ( JobExecutionStatus_UNDEFINED JobExecutionStatus_State = 0 JobExecutionStatus_CREATED JobExecutionStatus_State = 1 JobExecutionStatus_RUNNING JobExecutionStatus_State = 2 JobExecutionStatus_FINISHED JobExecutionStatus_State = 3 JobExecutionStatus_ABORTED_MANUAL JobExecutionStatus_State = 4 JobExecutionStatus_FAILED JobExecutionStatus_State = 5 JobExecutionStatus_DIED JobExecutionStatus_State = 6 )
func (JobExecutionStatus_State) Descriptor ¶
func (JobExecutionStatus_State) Descriptor() protoreflect.EnumDescriptor
func (JobExecutionStatus_State) Enum ¶
func (x JobExecutionStatus_State) Enum() *JobExecutionStatus_State
func (JobExecutionStatus_State) EnumDescriptor
deprecated
func (JobExecutionStatus_State) EnumDescriptor() ([]byte, []int)
Deprecated: Use JobExecutionStatus_State.Descriptor instead.
func (JobExecutionStatus_State) Number ¶
func (x JobExecutionStatus_State) Number() protoreflect.EnumNumber
func (JobExecutionStatus_State) String ¶
func (x JobExecutionStatus_State) String() string
func (JobExecutionStatus_State) Type ¶
func (JobExecutionStatus_State) Type() protoreflect.EnumType
type PageHarvest ¶
type PageHarvest struct { // Types that are assignable to Msg: // // *PageHarvest_Metrics_ // *PageHarvest_Outlink // *PageHarvest_Error Msg isPageHarvest_Msg `protobuf_oneof:"msg"` // Session token from the PageHarvestSpec. SessionToken string `protobuf:"bytes,5,opt,name=session_token,json=sessionToken,proto3" json:"session_token,omitempty"` // contains filtered or unexported fields }
Message sent from Harvester to return the harvest result. When the fetch is done, a stream of PageHarvest objects are returned: The first object contains metrics. Subsequent objects contain outlinks until all outlinks are sent. Finally the client should complete the request.
func (*PageHarvest) Descriptor
deprecated
func (*PageHarvest) Descriptor() ([]byte, []int)
Deprecated: Use PageHarvest.ProtoReflect.Descriptor instead.
func (*PageHarvest) GetError ¶
func (x *PageHarvest) GetError() *v11.Error
func (*PageHarvest) GetMetrics ¶
func (x *PageHarvest) GetMetrics() *PageHarvest_Metrics
func (*PageHarvest) GetMsg ¶
func (m *PageHarvest) GetMsg() isPageHarvest_Msg
func (*PageHarvest) GetOutlink ¶
func (x *PageHarvest) GetOutlink() *QueuedUri
func (*PageHarvest) GetSessionToken ¶
func (x *PageHarvest) GetSessionToken() string
func (*PageHarvest) ProtoMessage ¶
func (*PageHarvest) ProtoMessage()
func (*PageHarvest) ProtoReflect ¶
func (x *PageHarvest) ProtoReflect() protoreflect.Message
func (*PageHarvest) Reset ¶
func (x *PageHarvest) Reset()
func (*PageHarvest) String ¶
func (x *PageHarvest) String() string
type PageHarvestSpec ¶
type PageHarvestSpec struct { // The URI to fetch QueuedUri *QueuedUri `protobuf:"bytes,1,opt,name=queued_uri,json=queuedUri,proto3" json:"queued_uri,omitempty"` // The configuration for the fetch CrawlConfig *v1.ConfigObject `protobuf:"bytes,2,opt,name=crawl_config,json=crawlConfig,proto3" json:"crawl_config,omitempty"` // Session token for this request. // The Harvester is responsible for setting the same session token in all responses to this request. SessionToken string `protobuf:"bytes,3,opt,name=session_token,json=sessionToken,proto3" json:"session_token,omitempty"` // contains filtered or unexported fields }
A specification of the page to fetch.
func (*PageHarvestSpec) Descriptor
deprecated
func (*PageHarvestSpec) Descriptor() ([]byte, []int)
Deprecated: Use PageHarvestSpec.ProtoReflect.Descriptor instead.
func (*PageHarvestSpec) GetCrawlConfig ¶
func (x *PageHarvestSpec) GetCrawlConfig() *v1.ConfigObject
func (*PageHarvestSpec) GetQueuedUri ¶
func (x *PageHarvestSpec) GetQueuedUri() *QueuedUri
func (*PageHarvestSpec) GetSessionToken ¶
func (x *PageHarvestSpec) GetSessionToken() string
func (*PageHarvestSpec) ProtoMessage ¶
func (*PageHarvestSpec) ProtoMessage()
func (*PageHarvestSpec) ProtoReflect ¶
func (x *PageHarvestSpec) ProtoReflect() protoreflect.Message
func (*PageHarvestSpec) Reset ¶
func (x *PageHarvestSpec) Reset()
func (*PageHarvestSpec) String ¶
func (x *PageHarvestSpec) String() string
type PageHarvest_Error ¶
type PageHarvest_Metrics ¶
type PageHarvest_Metrics struct { // The number of uri's downloaded. The requested uri + embedded resources UriCount int32 `protobuf:"varint,1,opt,name=uri_count,json=uriCount,proto3" json:"uri_count,omitempty"` // Byte count for the resources downloaded. Includes embedded resources BytesDownloaded int64 `protobuf:"varint,2,opt,name=bytes_downloaded,json=bytesDownloaded,proto3" json:"bytes_downloaded,omitempty"` // contains filtered or unexported fields }
func (*PageHarvest_Metrics) Descriptor
deprecated
func (*PageHarvest_Metrics) Descriptor() ([]byte, []int)
Deprecated: Use PageHarvest_Metrics.ProtoReflect.Descriptor instead.
func (*PageHarvest_Metrics) GetBytesDownloaded ¶
func (x *PageHarvest_Metrics) GetBytesDownloaded() int64
func (*PageHarvest_Metrics) GetUriCount ¶
func (x *PageHarvest_Metrics) GetUriCount() int32
func (*PageHarvest_Metrics) ProtoMessage ¶
func (*PageHarvest_Metrics) ProtoMessage()
func (*PageHarvest_Metrics) ProtoReflect ¶
func (x *PageHarvest_Metrics) ProtoReflect() protoreflect.Message
func (*PageHarvest_Metrics) Reset ¶
func (x *PageHarvest_Metrics) Reset()
func (*PageHarvest_Metrics) String ¶
func (x *PageHarvest_Metrics) String() string
type PageHarvest_Metrics_ ¶
type PageHarvest_Metrics_ struct { // Collected metrics for the page fetched Metrics *PageHarvest_Metrics `protobuf:"bytes,2,opt,name=metrics,proto3,oneof"` }
type PageHarvest_Outlink ¶
type PageHarvest_Outlink struct { // The outlinks found in the harvested page Outlink *QueuedUri `protobuf:"bytes,3,opt,name=outlink,proto3,oneof"` }
type QueuedUri ¶
type QueuedUri struct { Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` ExecutionId string `protobuf:"bytes,2,opt,name=execution_id,json=executionId,proto3" json:"execution_id,omitempty"` DiscoveredTimeStamp *timestamppb.Timestamp `protobuf:"bytes,3,opt,name=discovered_time_stamp,json=discoveredTimeStamp,proto3" json:"discovered_time_stamp,omitempty"` // Sequence number to order the fetch of uris from a seed Sequence int64 `protobuf:"varint,4,opt,name=sequence,proto3" json:"sequence,omitempty"` Uri string `protobuf:"bytes,5,opt,name=uri,proto3" json:"uri,omitempty"` Ip string `protobuf:"bytes,7,opt,name=ip,proto3" json:"ip,omitempty"` // * // Get the discoveryPath, // // R - Redirect // E - Embed // X - Speculative embed (aggressive/Javascript link extraction) // L - Link // P - Prerequisite (as for DNS or robots.txt before another URI) DiscoveryPath string `protobuf:"bytes,8,opt,name=discovery_path,json=discoveryPath,proto3" json:"discovery_path,omitempty"` Referrer string `protobuf:"bytes,9,opt,name=referrer,proto3" json:"referrer,omitempty"` Cookies []*Cookie `protobuf:"bytes,10,rep,name=cookies,proto3" json:"cookies,omitempty"` PageFetchTimeMs int64 `protobuf:"varint,11,opt,name=page_fetch_time_ms,json=pageFetchTimeMs,proto3" json:"page_fetch_time_ms,omitempty"` // The time used to fetch and render the the uri including dependencies Retries int32 `protobuf:"varint,12,opt,name=retries,proto3" json:"retries,omitempty"` // Number of times this uri has been scheduled for retry. EarliestFetchTimeStamp *timestamppb.Timestamp `` // Do not fetch this uri before this time /* 132-byte string literal not displayed */ CrawlHostGroupId string `protobuf:"bytes,14,opt,name=crawl_host_group_id,json=crawlHostGroupId,proto3" json:"crawl_host_group_id,omitempty"` // The Crawl Host Group calculated for this uri PolitenessRef *v1.ConfigRef `protobuf:"bytes,15,opt,name=politeness_ref,json=politenessRef,proto3" json:"politeness_ref,omitempty"` // Ref to the politeness config used when discovering this uri Error *v11.Error `protobuf:"bytes,16,opt,name=error,proto3" json:"error,omitempty"` // Contains the error reason if fetch failed // Id of the job execution for this uri. // A job execution is an instance of a job JobExecutionId string `protobuf:"bytes,17,opt,name=job_execution_id,json=jobExecutionId,proto3" json:"job_execution_id,omitempty"` Unresolved bool `protobuf:"varint,18,opt,name=unresolved,proto3" json:"unresolved,omitempty"` // If true, then this uri is just added to the queue and no resolution of ip or robots.txt checks are done yet. FetchStartTimeStamp *timestamppb.Timestamp `protobuf:"bytes,19,opt,name=fetch_start_time_stamp,json=fetchStartTimeStamp,proto3" json:"fetch_start_time_stamp,omitempty"` // The weighting between jobs when two jobs compete on fetching resources from the same hosts. // Copied from CrawlConfig for efficiency. PriorityWeight float64 `protobuf:"fixed64,20,opt,name=priority_weight,json=priorityWeight,proto3" json:"priority_weight,omitempty"` // The seed uri which was the starting point for this uri. SeedUri string `protobuf:"bytes,21,opt,name=seed_uri,json=seedUri,proto3" json:"seed_uri,omitempty"` // Annotations used as parameters to scripts. Annotation []*v1.Annotation `protobuf:"bytes,22,rep,name=annotation,proto3" json:"annotation,omitempty"` // contains filtered or unexported fields }
func (*QueuedUri) Descriptor
deprecated
func (*QueuedUri) GetAnnotation ¶
func (x *QueuedUri) GetAnnotation() []*v1.Annotation
func (*QueuedUri) GetCookies ¶
func (*QueuedUri) GetCrawlHostGroupId ¶
func (*QueuedUri) GetDiscoveredTimeStamp ¶
func (x *QueuedUri) GetDiscoveredTimeStamp() *timestamppb.Timestamp
func (*QueuedUri) GetDiscoveryPath ¶
func (*QueuedUri) GetEarliestFetchTimeStamp ¶
func (x *QueuedUri) GetEarliestFetchTimeStamp() *timestamppb.Timestamp
func (*QueuedUri) GetExecutionId ¶
func (*QueuedUri) GetFetchStartTimeStamp ¶
func (x *QueuedUri) GetFetchStartTimeStamp() *timestamppb.Timestamp
func (*QueuedUri) GetJobExecutionId ¶
func (*QueuedUri) GetPageFetchTimeMs ¶
func (*QueuedUri) GetPolitenessRef ¶
func (*QueuedUri) GetPriorityWeight ¶
func (*QueuedUri) GetReferrer ¶
func (*QueuedUri) GetRetries ¶
func (*QueuedUri) GetSeedUri ¶
func (*QueuedUri) GetSequence ¶
func (*QueuedUri) GetUnresolved ¶
func (*QueuedUri) ProtoMessage ¶
func (*QueuedUri) ProtoMessage()
func (*QueuedUri) ProtoReflect ¶
func (x *QueuedUri) ProtoReflect() protoreflect.Message
type UnimplementedFrontierServer ¶
type UnimplementedFrontierServer struct { }
UnimplementedFrontierServer must be embedded to have forward compatible implementations.
func (UnimplementedFrontierServer) BusyCrawlHostGroupCount ¶
func (UnimplementedFrontierServer) BusyCrawlHostGroupCount(context.Context, *emptypb.Empty) (*CountResponse, error)
func (UnimplementedFrontierServer) CrawlSeed ¶
func (UnimplementedFrontierServer) CrawlSeed(context.Context, *CrawlSeedRequest) (*CrawlExecutionId, error)
func (UnimplementedFrontierServer) GetNextPage ¶
func (UnimplementedFrontierServer) GetNextPage(context.Context, *emptypb.Empty) (*PageHarvestSpec, error)
func (UnimplementedFrontierServer) PageCompleted ¶
func (UnimplementedFrontierServer) PageCompleted(Frontier_PageCompletedServer) error
func (UnimplementedFrontierServer) QueueCountForCrawlExecution ¶
func (UnimplementedFrontierServer) QueueCountForCrawlExecution(context.Context, *CrawlExecutionId) (*CountResponse, error)
func (UnimplementedFrontierServer) QueueCountForCrawlHostGroup ¶
func (UnimplementedFrontierServer) QueueCountForCrawlHostGroup(context.Context, *CrawlHostGroup) (*CountResponse, error)
func (UnimplementedFrontierServer) QueueCountTotal ¶
func (UnimplementedFrontierServer) QueueCountTotal(context.Context, *emptypb.Empty) (*CountResponse, error)
type UnsafeFrontierServer ¶
type UnsafeFrontierServer interface {
// contains filtered or unexported methods
}
UnsafeFrontierServer may be embedded to opt out of forward compatibility for this service. Use of this interface is not recommended, as added methods to FrontierServer will result in compilation errors.