You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
613 lines
27 KiB
613 lines
27 KiB
apiVersion: apiextensions.k8s.io/v1beta1
|
|
kind: CustomResourceDefinition
|
|
metadata:
|
|
creationTimestamp: null
|
|
labels:
|
|
controller-tools.k8s.io: "1.0"
|
|
name: inferenceservices.serving.kubeflow.org
|
|
spec:
|
|
additionalPrinterColumns:
|
|
- JSONPath: .status.url
|
|
name: URL
|
|
type: string
|
|
- JSONPath: .status.conditions[?(@.type=='Ready')].status
|
|
name: Ready
|
|
type: string
|
|
- JSONPath: .status.traffic
|
|
name: Default Traffic
|
|
type: integer
|
|
- JSONPath: .status.canaryTraffic
|
|
name: Canary Traffic
|
|
type: integer
|
|
- JSONPath: .metadata.creationTimestamp
|
|
name: Age
|
|
type: date
|
|
group: serving.kubeflow.org
|
|
names:
|
|
kind: InferenceService
|
|
plural: inferenceservices
|
|
shortNames:
|
|
- inferenceservice
|
|
scope: Namespaced
|
|
validation:
|
|
openAPIV3Schema:
|
|
properties:
|
|
apiVersion:
|
|
description: 'APIVersion defines the versioned schema of this representation
|
|
of an object. Servers should convert recognized schemas to the latest
|
|
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
|
|
type: string
|
|
kind:
|
|
description: 'Kind is a string value representing the REST resource this
|
|
object represents. Servers may infer this from the endpoint the client
|
|
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
|
|
type: string
|
|
metadata:
|
|
type: object
|
|
spec:
|
|
properties:
|
|
canary:
|
|
description: Canary defines an alternate endpoints to route a percentage
|
|
of traffic.
|
|
properties:
|
|
explainer:
|
|
description: Explainer defines the model explanation service spec,
|
|
explainer service calls to predictor or transformer if it is specified.
|
|
properties:
|
|
alibi:
|
|
description: Spec for alibi explainer
|
|
properties:
|
|
config:
|
|
description: Inline custom parameter settings for explainer
|
|
type: object
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Defaults to latest Alibi Version
|
|
type: string
|
|
storageUri:
|
|
description: The location of a trained explanation model
|
|
type: string
|
|
type:
|
|
description: The type of Alibi explainer
|
|
type: string
|
|
required:
|
|
- type
|
|
type: object
|
|
custom:
|
|
description: Spec for a custom explainer
|
|
properties:
|
|
container:
|
|
type: object
|
|
required:
|
|
- container
|
|
type: object
|
|
logger:
|
|
description: Activate request/response logging
|
|
properties:
|
|
mode:
|
|
description: What payloads to log
|
|
type: string
|
|
url:
|
|
description: URL to send request logging CloudEvents
|
|
type: string
|
|
type: object
|
|
maxReplicas:
|
|
description: This is the up bound for autoscaler to scale to
|
|
format: int64
|
|
type: integer
|
|
minReplicas:
|
|
description: Minimum number of replicas, pods won't scale down
|
|
to 0 in case of no traffic
|
|
format: int64
|
|
type: integer
|
|
serviceAccountName:
|
|
description: ServiceAccountName is the name of the ServiceAccount
|
|
to use to run the service
|
|
type: string
|
|
type: object
|
|
predictor:
|
|
description: Predictor defines the model serving spec +required
|
|
properties:
|
|
custom:
|
|
description: Spec for a custom predictor
|
|
properties:
|
|
container:
|
|
type: object
|
|
required:
|
|
- container
|
|
type: object
|
|
logger:
|
|
description: Activate request/response logging
|
|
properties:
|
|
mode:
|
|
description: What payloads to log
|
|
type: string
|
|
url:
|
|
description: URL to send request logging CloudEvents
|
|
type: string
|
|
type: object
|
|
maxReplicas:
|
|
description: This is the up bound for autoscaler to scale to
|
|
format: int64
|
|
type: integer
|
|
minReplicas:
|
|
description: Minimum number of replicas, pods won't scale down
|
|
to 0 in case of no traffic
|
|
format: int64
|
|
type: integer
|
|
onnx:
|
|
description: Spec for ONNX runtime (https://github.com/microsoft/onnxruntime)
|
|
properties:
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
pytorch:
|
|
description: Spec for PyTorch predictor
|
|
properties:
|
|
modelClassName:
|
|
description: Defaults PyTorch model class name to 'PyTorchModel'
|
|
type: string
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
serviceAccountName:
|
|
description: ServiceAccountName is the name of the ServiceAccount
|
|
to use to run the service
|
|
type: string
|
|
sklearn:
|
|
description: Spec for SKLearn predictor
|
|
properties:
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
tensorflow:
|
|
description: Spec for Tensorflow Serving (https://github.com/tensorflow/serving)
|
|
properties:
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map.
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
tensorrt:
|
|
description: Spec for TensorRT Inference Server (https://github.com/NVIDIA/tensorrt-inference-server)
|
|
properties:
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
xgboost:
|
|
description: Spec for XGBoost predictor
|
|
properties:
|
|
nthread:
|
|
description: Number of thread to be used by XGBoost
|
|
format: int64
|
|
type: integer
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
type: object
|
|
transformer:
|
|
description: Transformer defines the pre/post processing before
|
|
and after the predictor call, transformer service calls to predictor
|
|
service.
|
|
properties:
|
|
custom:
|
|
description: Spec for a custom transformer
|
|
properties:
|
|
container:
|
|
type: object
|
|
required:
|
|
- container
|
|
type: object
|
|
logger:
|
|
description: Activate request/response logging
|
|
properties:
|
|
mode:
|
|
description: What payloads to log
|
|
type: string
|
|
url:
|
|
description: URL to send request logging CloudEvents
|
|
type: string
|
|
type: object
|
|
maxReplicas:
|
|
description: This is the up bound for autoscaler to scale to
|
|
format: int64
|
|
type: integer
|
|
minReplicas:
|
|
description: Minimum number of replicas, pods won't scale down
|
|
to 0 in case of no traffic
|
|
format: int64
|
|
type: integer
|
|
serviceAccountName:
|
|
description: ServiceAccountName is the name of the ServiceAccount
|
|
to use to run the service
|
|
type: string
|
|
type: object
|
|
required:
|
|
- predictor
|
|
type: object
|
|
canaryTrafficPercent:
|
|
description: CanaryTrafficPercent defines the percentage of traffic
|
|
going to canary InferenceService endpoints
|
|
format: int64
|
|
type: integer
|
|
default:
|
|
description: Default defines default InferenceService endpoints +required
|
|
properties:
|
|
explainer:
|
|
description: Explainer defines the model explanation service spec,
|
|
explainer service calls to predictor or transformer if it is specified.
|
|
properties:
|
|
alibi:
|
|
description: Spec for alibi explainer
|
|
properties:
|
|
config:
|
|
description: Inline custom parameter settings for explainer
|
|
type: object
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Defaults to latest Alibi Version
|
|
type: string
|
|
storageUri:
|
|
description: The location of a trained explanation model
|
|
type: string
|
|
type:
|
|
description: The type of Alibi explainer
|
|
type: string
|
|
required:
|
|
- type
|
|
type: object
|
|
custom:
|
|
description: Spec for a custom explainer
|
|
properties:
|
|
container:
|
|
type: object
|
|
required:
|
|
- container
|
|
type: object
|
|
logger:
|
|
description: Activate request/response logging
|
|
properties:
|
|
mode:
|
|
description: What payloads to log
|
|
type: string
|
|
url:
|
|
description: URL to send request logging CloudEvents
|
|
type: string
|
|
type: object
|
|
maxReplicas:
|
|
description: This is the up bound for autoscaler to scale to
|
|
format: int64
|
|
type: integer
|
|
minReplicas:
|
|
description: Minimum number of replicas, pods won't scale down
|
|
to 0 in case of no traffic
|
|
format: int64
|
|
type: integer
|
|
serviceAccountName:
|
|
description: ServiceAccountName is the name of the ServiceAccount
|
|
to use to run the service
|
|
type: string
|
|
type: object
|
|
predictor:
|
|
description: Predictor defines the model serving spec +required
|
|
properties:
|
|
custom:
|
|
description: Spec for a custom predictor
|
|
properties:
|
|
container:
|
|
type: object
|
|
required:
|
|
- container
|
|
type: object
|
|
logger:
|
|
description: Activate request/response logging
|
|
properties:
|
|
mode:
|
|
description: What payloads to log
|
|
type: string
|
|
url:
|
|
description: URL to send request logging CloudEvents
|
|
type: string
|
|
type: object
|
|
maxReplicas:
|
|
description: This is the up bound for autoscaler to scale to
|
|
format: int64
|
|
type: integer
|
|
minReplicas:
|
|
description: Minimum number of replicas, pods won't scale down
|
|
to 0 in case of no traffic
|
|
format: int64
|
|
type: integer
|
|
onnx:
|
|
description: Spec for ONNX runtime (https://github.com/microsoft/onnxruntime)
|
|
properties:
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
pytorch:
|
|
description: Spec for PyTorch predictor
|
|
properties:
|
|
modelClassName:
|
|
description: Defaults PyTorch model class name to 'PyTorchModel'
|
|
type: string
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
serviceAccountName:
|
|
description: ServiceAccountName is the name of the ServiceAccount
|
|
to use to run the service
|
|
type: string
|
|
sklearn:
|
|
description: Spec for SKLearn predictor
|
|
properties:
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
tensorflow:
|
|
description: Spec for Tensorflow Serving (https://github.com/tensorflow/serving)
|
|
properties:
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map.
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
tensorrt:
|
|
description: Spec for TensorRT Inference Server (https://github.com/NVIDIA/tensorrt-inference-server)
|
|
properties:
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
xgboost:
|
|
description: Spec for XGBoost predictor
|
|
properties:
|
|
nthread:
|
|
description: Number of thread to be used by XGBoost
|
|
format: int64
|
|
type: integer
|
|
resources:
|
|
description: Defaults to requests and limits of 1CPU, 2Gb
|
|
MEM.
|
|
type: object
|
|
runtimeVersion:
|
|
description: Allowed runtime versions are specified in the
|
|
inferenceservice config map
|
|
type: string
|
|
storageUri:
|
|
description: The location of the trained model
|
|
type: string
|
|
required:
|
|
- storageUri
|
|
type: object
|
|
type: object
|
|
transformer:
|
|
description: Transformer defines the pre/post processing before
|
|
and after the predictor call, transformer service calls to predictor
|
|
service.
|
|
properties:
|
|
custom:
|
|
description: Spec for a custom transformer
|
|
properties:
|
|
container:
|
|
type: object
|
|
required:
|
|
- container
|
|
type: object
|
|
logger:
|
|
description: Activate request/response logging
|
|
properties:
|
|
mode:
|
|
description: What payloads to log
|
|
type: string
|
|
url:
|
|
description: URL to send request logging CloudEvents
|
|
type: string
|
|
type: object
|
|
maxReplicas:
|
|
description: This is the up bound for autoscaler to scale to
|
|
format: int64
|
|
type: integer
|
|
minReplicas:
|
|
description: Minimum number of replicas, pods won't scale down
|
|
to 0 in case of no traffic
|
|
format: int64
|
|
type: integer
|
|
serviceAccountName:
|
|
description: ServiceAccountName is the name of the ServiceAccount
|
|
to use to run the service
|
|
type: string
|
|
type: object
|
|
required:
|
|
- predictor
|
|
type: object
|
|
required:
|
|
- default
|
|
type: object
|
|
status:
|
|
properties:
|
|
canary:
|
|
description: Statuses for the canary endpoints of the InferenceService
|
|
type: object
|
|
canaryTraffic:
|
|
description: Traffic percentage that goes to canary services
|
|
format: int64
|
|
type: integer
|
|
conditions:
|
|
description: Conditions the latest available observations of a resource's
|
|
current state. +patchMergeKey=type +patchStrategy=merge
|
|
items:
|
|
properties:
|
|
lastTransitionTime:
|
|
description: LastTransitionTime is the last time the condition
|
|
transitioned from one status to another. We use VolatileTime
|
|
in place of metav1.Time to exclude this from creating equality.Semantic
|
|
differences (all other things held constant).
|
|
type: string
|
|
message:
|
|
description: A human readable message indicating details about
|
|
the transition.
|
|
type: string
|
|
reason:
|
|
description: The reason for the condition's last transition.
|
|
type: string
|
|
severity:
|
|
description: Severity with which to treat failures of this type
|
|
of condition. When this is not specified, it defaults to Error.
|
|
type: string
|
|
status:
|
|
description: Status of the condition, one of True, False, Unknown.
|
|
+required
|
|
type: string
|
|
type:
|
|
description: Type of condition. +required
|
|
type: string
|
|
required:
|
|
- type
|
|
- status
|
|
type: object
|
|
type: array
|
|
default:
|
|
description: Statuses for the default endpoints of the InferenceService
|
|
type: object
|
|
observedGeneration:
|
|
description: ObservedGeneration is the 'Generation' of the Service that
|
|
was last processed by the controller.
|
|
format: int64
|
|
type: integer
|
|
traffic:
|
|
description: Traffic percentage that goes to default services
|
|
format: int64
|
|
type: integer
|
|
url:
|
|
description: URL of the InferenceService
|
|
type: string
|
|
type: object
|
|
version: v1alpha2
|
|
status:
|
|
acceptedNames:
|
|
kind: ""
|
|
plural: ""
|
|
conditions: []
|
|
storedVersions: []
|