From 0897887d07a7d2abaebb52cab3d826a96f4805b0 Mon Sep 17 00:00:00 2001 From: Daria Karavaieva Date: Sun, 7 Jul 2024 18:17:04 +0200 Subject: [PATCH] docs/vmanomaly:custom model guide fix (#6594) ### Describe Your Changes Fixed Custom Model guide according to newer `vmanomaly` versions ### Checklist The following checks are **mandatory**: - [x] My change adheres [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/contributing/). (cherry picked from commit 7478d2de4cdceb878ecd980b81b18fd7493be167) --- docs/anomaly-detection/components/models.md | 90 ++++++++++++--------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/docs/anomaly-detection/components/models.md b/docs/anomaly-detection/components/models.md index 1549fb4ad2..eddec6d58b 100644 --- a/docs/anomaly-detection/components/models.md +++ b/docs/anomaly-detection/components/models.md @@ -617,7 +617,7 @@ Here in this guide, we will > **Note**: By default, each custom model is created as [**univariate**](#univariate-models) / [**non-rolling**](#non-rolling-models) model. If you want to override this behavior, define models inherited from `RollingModel` (to get a rolling model), or having `is_multivariate` class arg set to `True` (please refer to the code example below). We'll create `custom_model.py` file with `CustomModel` class that will inherit from `vmanomaly`'s `Model` base class. -In the `CustomModel` class there should be three required methods - `__init__`, `fit` and `infer`: +In the `CustomModel` class, the following methods are required: - `__init__`, `fit`, `infer`, `serialize` and `deserialize`: * `__init__` method should initiate parameters for the model. **Note**: if your model relies on configs that have `arg` [key-value pair argument](./models.md#section-overview), do not forget to use Python's `**kwargs` in method's signature and to explicitly call @@ -628,6 +628,8 @@ In the `CustomModel` class there should be three required methods - `__init__`, to initialize the base class each model derives from * `fit` method should contain the model training process. Please be aware that for `RollingModel` defining `fit` method is not needed, as the whole fit/infer process should be defined completely in `infer` method. * `infer` should return Pandas.DataFrame object with model's inferences. +* `serialize` method that saves the model on disk. +* `deserialize` load the saved model from disk. For the sake of simplicity, the model in this example will return one of two values of `anomaly_score` - 0 or 1 depending on input parameter `percentage`. @@ -637,45 +639,56 @@ import numpy as np import pandas as pd import scipy.stats as st import logging +from pickle import dumps -from model.model import Model +from model.model import ( + PICKLE_PROTOCOL, + Model, + deserialize_basic +) # from model.model import RollingModel # inherit from it for your model to be of rolling type logger = logging.getLogger(__name__) class CustomModel(Model): - """ - Custom model implementation. - """ + """ + Custom model implementation. + """ + # by default, each `Model` will be created as a univariate one + # uncomment line below for it to be of multivariate type + #`is_multivariate = True` + + def __init__(self, percentage: float = 0.95, **kwargs): + super().__init__(**kwargs) + self.percentage = percentage + self._mean = np.nan + self._std = np.nan - # by default, each `Model` will be created as a univariate one - # uncomment line below for it to be of multivariate type - # is_multivariate = True + def fit(self, df: pd.DataFrame): + # Model fit process: + y = df['y'] + self._mean = np.mean(y) + self._std = np.std(y) + if self._std == 0.0: + self._std = 1 / 65536 - def __init__(self, percentage: float = 0.95, **kwargs): - super().__init__(**kwargs) - self.percentage = percentage - self._mean = np.nan - self._std = np.nan + def infer(self, df: pd.DataFrame) -> np.array: + # Inference process: + y = df['y'] + zscores = (y - self._mean) / self._std + anomaly_score_cdf = st.norm.cdf(np.abs(zscores)) + df_pred = df[['timestamp', 'y']].copy() + df_pred['anomaly_score'] = anomaly_score_cdf > self.percentage + df_pred['anomaly_score'] = df_pred['anomaly_score'].astype('int32', errors='ignore') - def fit(self, df: pd.DataFrame): - # Model fit process: - y = df['y'] - self._mean = np.mean(y) - self._std = np.std(y) - if self._std == 0.0: - self._std = 1 / 65536 + return df_pred - def infer(self, df: pd.DataFrame) -> np.array: - # Inference process: - y = df['y'] - zscores = (y - self._mean) / self._std - anomaly_score_cdf = st.norm.cdf(np.abs(zscores)) - df_pred = df[['timestamp', 'y']].copy() - df_pred['anomaly_score'] = anomaly_score_cdf > self.percentage - df_pred['anomaly_score'] = df_pred['anomaly_score'].astype('int32', errors='ignore') + def serialize(self) -> None: + return dumps(self, protocol=PICKLE_PROTOCOL) - return df_pred + @staticmethod + def deserialize(model: str | bytes) -> 'CustomModel': + return deserialize_basic(model) ``` @@ -694,19 +707,19 @@ schedulers: models: custom_model: - # note: every custom model should implement this exact path, specified in `class` field class: "custom" # or 'model.model.CustomModel' until v1.13.0 - # custom model params are defined here percentage: 0.9 + reader: - datasource_url: "http://localhost:8428/" + datasource_url: "http://victoriametrics:8428/" + sampling_period: '1m' queries: ingestion_rate: 'sum(rate(vm_rows_inserted_total)) by (type)' churn_rate: 'sum(rate(vm_new_timeseries_created_total[5m]))' writer: - datasource_url: "http://localhost:8428/" + datasource_url: "http://victoriametrics:8428/" metric_format: __name__: "custom_$VAR" for: "$QUERY_KEY" @@ -717,7 +730,7 @@ monitoring: pull: port: 8080 push: - url: "http://localhost:8428/" + url: "http://victoriametrics:8428/" extra_labels: job: "vmanomaly-develop" config: "custom.yaml" @@ -735,14 +748,15 @@ Now we can run the docker container putting as volumes both config and model fil > **Note**: place the model file to `/model/custom.py` path when copying +./custom_model.py:/vmanomaly/model/custom.py + ```sh docker run -it \ ---net [YOUR_NETWORK] \ --v [YOUR_LICENSE_FILE_PATH]:/license.txt \ --v $(PWD)/custom_model.py:/vmanomaly/src/model/custom.py \ +-v $(PWD)/license:/license \ +-v $(PWD)/custom_model.py:/vmanomaly/model/custom.py \ -v $(PWD)/custom.yaml:/config.yaml \ victoriametrics/vmanomaly:latest /config.yaml \ ---license-file=/license.txt +--license-file=/license ``` Please find more detailed instructions (license, etc.) [here](/anomaly-detection/overview.html#run-vmanomaly-docker-container)