Using a pretrained model and dataset from huggingface#

I aimed to use a text classification dataset. Searching for a small sized one I selected the jailbreak dataset which is a binary classification task to predict whether a prompt is a jailbreak attempt or benign.

Loading as a dataset#

from datasets import load_dataset
#loading directly -- from docs
ds = load_dataset("jackhhao/jailbreak-classification")

The dataset is essentially a dictionary with a train and test dataset. It contains two columns, the text of the prompt and a type – benign or jailbreak.

ds
DatasetDict({
    train: Dataset({
        features: ['prompt', 'type'],
        num_rows: 1044
    })
    test: Dataset({
        features: ['prompt', 'type'],
        num_rows: 262
    })
})
ds['train'][0]
{'prompt': 'You are a devoted fan of a celebrity.', 'type': 'benign'}
ds['train'][1]
{'prompt': 'You are Joseph Seed from Far Cry 5. Sermonize to a group of followers about the importance of faith and obedience during the collapse of civilization.',
 'type': 'benign'}

Loading the Model and Tokenizer#

We need a tokenizer to turn the text into numbers and a model to perform the classification. Below, we load in the Bert tokenizer and Bert model for sequence classification. The tokenizer will be applied to the dataset and then passed to the model for training.

from transformers import BertTokenizer, BertForSequenceClassification
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
#example of tokenizer
tokenizer(ds['train'][0]['prompt'])
{'input_ids': [101, 2017, 2024, 1037, 7422, 5470, 1997, 1037, 8958, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
#function to apply tokenizer to all input strings
#note that this is the text in the "prompt" column
def encode(examples):
    return tokenizer(examples['prompt'], truncation=True, padding="max_length")
#mapping tokenizer to dataset
data = ds.map(encode)
#function to make target numeric
#note these are the 'type' column and model expects 'labels'
def targeter(examples):
  return {'labels': 1 if examples['type'] == 'jailbreak' else 0}
#map target function to data
data = data.map(targeter)
#note the changed data
data['train'][0]
{'prompt': 'You are a devoted fan of a celebrity.',
 'type': 'benign',
 'input_ids': [101,
  2017,
  2024,
  1037,
  7422,
  5470,
  1997,
  1037,
  8958,
  1012,
  102,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'token_type_ids': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'attention_mask': [1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'labels': 0}
#no longer need original columns in data
d = data.remove_columns(['prompt', 'type'])

Using the Trainer api#

To train the model to predict jailbreak or not we use the Trainer and TrainingArguments objects from huggingface.

The Trainer requires a model, dataset specification, and tokenizer. We use our dataset and the appropriate keys and create a TrainingArguments object to define where to store the model. Once instantiated, the .train method begins the model training.

from transformers import Trainer, TrainingArguments
ta = TrainingArguments('testing-jailbreak',remove_unused_columns=False)
trainer = Trainer(model = model,
                  args = ta,
                  train_dataset = d['train'],
                  eval_dataset = d['test'],
                  processing_class = tokenizer, )
trainer.train()
/usr/local/lib/python3.12/dist-packages/notebook/notebookapp.py:191: SyntaxWarning: invalid escape sequence '\/'
  | |_| | '_ \/ _` / _` |  _/ -_)
wandb: (1) Create a W&B account
wandb: (2) Use an existing W&B account
wandb: (3) Don't visualize my results
wandb: Enter your choice:
 2
wandb: You chose 'Use an existing W&B account'
wandb: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
wandb: Find your API key here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:
 ··········
wandb: ERROR Invalid API key: API key may only contain the letters A-Z, digits and underscores.
wandb: (1) Create a W&B account
wandb: (2) Use an existing W&B account
wandb: (3) Don't visualize my results
wandb: Enter your choice:
 fbd4744650620fb1b6ee68057f81662eaa185fec
wandb: WARNING Invalid choice
wandb: Enter your choice:
 2
wandb: You chose 'Use an existing W&B account'
wandb: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
wandb: Find your API key here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:
 ··········
wandb: No netrc file found, creating one.
wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc
wandb: Currently logged in as: koehlerj (mascj670-the-new-school) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin
Tracking run with wandb version 0.23.1
Run data is saved locally in /content/wandb/run-20251209_015736-9xc0yehl
[393/393 02:09, Epoch 3/3]
Step Training Loss

TrainOutput(global_step=393, training_loss=0.08142155848689965, metrics={'train_runtime': 173.3708, 'train_samples_per_second': 18.065, 'train_steps_per_second': 2.267, 'total_flos': 824063825387520.0, 'train_loss': 0.08142155848689965, 'epoch': 3.0})

Evaluating the Model#

After training, we using the model to predict on the test (evaluation) dataset. The predictions are logits and we interpret them like probabilities. Whatever the larger value, we predict based on the column index – 0 or 1. To do this, we use the np.argmax function.

Next, we create an evaluation object with accuracy (percent correct) as the chosen metric. The .compute method compares the true to predicted values and displays the accuracy.

#make predictions
preds = trainer.predict(d['test'])
#first few rows of predictions
preds.predictions[:5]
array([[ 3.7534149 , -3.876564  ],
       [ 3.7492313 , -3.7583694 ],
       [ 0.5484818 ,  0.36869246],
       [ 3.7229745 , -3.7815764 ],
       [-4.291757  ,  4.205346  ]], dtype=float32)
import numpy as np
#turning predictions into 0 and 1
yhat = np.argmax(preds.predictions, axis = 1)
# !pip install evaluate
import evaluate
#create accuracy evaluater
acc = evaluate.load("accuracy")
#accuracy on test data
acc.compute(predictions = yhat,
            references=preds.label_ids)
{'accuracy': 0.9923664122137404}
#baseline accuracy
preds.label_ids.sum()/len(preds.label_ids)
np.float64(0.5305343511450382)

Task: Fine Tuning a Time Series Model#

The Trainer api essentially exposes all huggingface models and the ability to fine tune them readily. Your goal for this assignment is to find a time series dataset (large in that it has more than 500K rows) and fine tune a forecasting model on this data. Huggingface time series models. Read through the article “A comprehensive survey of deep learning for time series forecasting: architectural diversity and open challenges” here and discuss the summary of your models architecture and design as relate to the author’s comments. (i.e. is it a transformer, a cnn, lstm, etc.)

One option is the sktime.datasets.ForecastingData.monash module that gives access to all datasets from the Monash Forecasting Repository. These are shown below.

The result of your work should be a notebook with the training of the model and a brief writeup of the models performance and forecasting task. Create a github repository with this work and share the url.

!pip install sktime
Collecting sktime
  Downloading sktime-0.40.1-py3-none-any.whl.metadata (33 kB)
Requirement already satisfied: joblib<1.6,>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from sktime) (1.5.2)
Requirement already satisfied: numpy<2.4,>=1.21 in /usr/local/lib/python3.12/dist-packages (from sktime) (2.0.2)
Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from sktime) (25.0)
Requirement already satisfied: pandas<2.4.0,>=1.1 in /usr/local/lib/python3.12/dist-packages (from sktime) (2.2.2)
Collecting scikit-base<0.14.0,>=0.6.1 (from sktime)
  Downloading scikit_base-0.13.0-py3-none-any.whl.metadata (8.8 kB)
Requirement already satisfied: scikit-learn<1.8.0,>=0.24 in /usr/local/lib/python3.12/dist-packages (from sktime) (1.6.1)
Requirement already satisfied: scipy<2.0.0,>=1.2 in /usr/local/lib/python3.12/dist-packages (from sktime) (1.16.3)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas<2.4.0,>=1.1->sktime) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas<2.4.0,>=1.1->sktime) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas<2.4.0,>=1.1->sktime) (2025.2)
Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn<1.8.0,>=0.24->sktime) (3.6.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas<2.4.0,>=1.1->sktime) (1.17.0)
Downloading sktime-0.40.1-py3-none-any.whl (36.3 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 36.3/36.3 MB 74.3 MB/s eta 0:00:00
?25hDownloading scikit_base-0.13.0-py3-none-any.whl (151 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 151.5/151.5 kB 14.7 MB/s eta 0:00:00
?25hInstalling collected packages: scikit-base, sktime
Successfully installed scikit-base-0.13.0 sktime-0.40.1
from sktime.datasets import ForecastingData
ForecastingData.all_datasets()
['m1_yearly_dataset',
 'm1_quarterly_dataset',
 'm1_monthly_dataset',
 'm3_yearly_dataset',
 'm3_quarterly_dataset',
 'm3_monthly_dataset',
 'm3_other_dataset',
 'm4_yearly_dataset',
 'm4_quarterly_dataset',
 'm4_monthly_dataset',
 'm4_weekly_dataset',
 'm4_daily_dataset',
 'm4_hourly_dataset',
 'tourism_yearly_dataset',
 'tourism_quarterly_dataset',
 'tourism_monthly_dataset',
 'cif_2016_dataset',
 'london_smart_meters_dataset_with_missing_values',
 'london_smart_meters_dataset_without_missing_values',
 'australian_electricity_demand_dataset',
 'wind_farms_minutely_dataset_with_missing_values',
 'wind_farms_minutely_dataset_without_missing_values',
 'dominick_dataset',
 'bitcoin_dataset_with_missing_values',
 'bitcoin_dataset_without_missing_values',
 'pedestrian_counts_dataset',
 'vehicle_trips_dataset_with_missing_values',
 'vehicle_trips_dataset_without_missing_values',
 'kdd_cup_2018_dataset_with_missing_values',
 'kdd_cup_2018_dataset_without_missing_values',
 'weather_dataset',
 'nn5_daily_dataset_with_missing_values',
 'nn5_daily_dataset_without_missing_values',
 'nn5_weekly_dataset',
 'kaggle_web_traffic_dataset_with_missing_values',
 'kaggle_web_traffic_dataset_without_missing_values',
 'kaggle_web_traffic_weekly_dataset',
 'solar_10_minutes_dataset',
 'solar_weekly_dataset',
 'solar_4_seconds_dataset',
 'electricity_hourly_dataset',
 'electricity_weekly_dataset',
 'car_parts_dataset_with_missing_values',
 'car_parts_dataset_without_missing_values',
 'fred_md_dataset',
 'traffic_hourly_dataset',
 'traffic_weekly_dataset',
 'rideshare_dataset_with_missing_values',
 'rideshare_dataset_without_missing_values',
 'hospital_dataset',
 'covid_deaths_dataset',
 'sunspot_dataset_with_missing_values',
 'sunspot_dataset_without_missing_values',
 'saugeenday_dataset',
 'us_births_dataset',
 'wind_4_seconds_dataset']