


pip install ray[tune] -i https://pypi.tuna.tsinghua.edu.cn/simple # pip install ray 安装总是time out
# C:\ProgramData\Anaconda3\envs\torch\lib\site-packages\ray\_private\compat.py", line 14, in patch_redis_empty_recv
#    import redis     ModuleNotFoundError: No module named 'redis' redis 是一个 Key-Value 数据库
pip install redis -i https://pypi.tuna.tsinghua.edu.cn/simple

Quick Start Example


minf(x)=a2+b度量标准(f)和模式(min)min \ f(x)=a^2+b\\ 度量标准(f)和模式(min) min f(x)=a2+b度量标准(f)和模式(min)
       程序为a和b定义一个搜索空间,并让Ray Tune在该空间中搜索最优值。




项目 解释 更多
SerchSpace 搜索空间
Trainable 类似目标函数 # Pass in a Trainable class or function, along with a search space “config”. tune.run(trainable, config={“a”: 2, “b”: 4})
Search Algorithms 搜索函数
Scjesulers 训练策略 tune.run(trainable, config, num_samples, scheduler)
Analyses 结果分析


# 安装ray https://blog.csdn.net/weixin_45211921/article/details/117452963
pip3 install pytest-runner
pip3 install ray
pip install ray[default]# windows 下需要多的步骤

Quick Start Example

# https://docs.ray.io/en/master/tune/index.html
from ray import tune# 1. Define an objective function.
def objective(config):score = config["a"] ** 2 + config["b"]return {"score": score}# 2. Define a search space.
search_space = {"a": tune.grid_search([0.001, 0.01, 0.1, 1.0]),"b": tune.choice([1, 2, 3]),
}# 3. Start a Tune run and print the best result.
analysis = tune.run(objective, config=search_space) # 运行次数 num_samples :tune.run(trainable, config={"a": 2, "b": 4}, num_samples=10)
print(analysis.get_best_config(metric="score", mode="min")) # 度量标准和模式


# https://docs.ray.io/en/latest/tune/key-concepts.html
analysis = tune.run(trainable,config=config,metric="score",mode="min",search_alg=BayesOptSearch(random_search_steps=4),stop={"training_iteration": 20},)
# 获取最佳结果
best_trial = analysis.best_trial  # Get best trial
best_config = analysis.best_config  # Get best trial's hyperparameters
best_logdir = analysis.best_logdir  # Get best trial's logdir
best_checkpoint = analysis.best_checkpoint  # Get best trial's best checkpoint
best_result = analysis.best_result  # Get best trial's last results
best_result_df = analysis.best_result_df  # Get best result as pandas dataframe# 或者直接获取所有的结果用于分析
# Get a dataframe with the last results for each trial
df_results = analysis.results_df
# Get a dataframe of results for a specific score or mode
df = analysis.dataframe(metric="score", mode="max")


== Status == 略去状态信息
#  tune.run 将执行所有 trials (除非出错).
| Trial name            | status   | loc            |     a |   b |
| objective_47380_00000 | RUNNING  | | 0.001 |   3 |
| objective_47380_00001 | PENDING  |                | 0.01  |   3 |
| objective_47380_00002 | PENDING  |                | 0.1   |   3 |
| objective_47380_00003 | PENDING  |                | 1     |   1 |
| Trial name            | status     | loc            |     a |   b |   iter |   total time (s) |   score |
| objective_47380_00001 | RUNNING    | | 0.01  |   3 |      1 |      0           |  3.0001 |
| objective_47380_00002 | RUNNING    | | 0.1   |   3 |        |                  |         |
| objective_47380_00003 | PENDING    |                | 1     |   1 |        |                  |         |
| objective_47380_00000 | TERMINATED | | 0.001 |   3 |      1 |      0.000999928 |  3      |
| Trial name            | status     | loc            |     a |   b |   iter |   total time (s) |   score |
| objective_47380_00000 | TERMINATED | | 0.001 |   3 |      1 |      0.000999928 |  3      |
| objective_47380_00001 | TERMINATED | | 0.01  |   3 |      1 |      0           |  3.0001 |
| objective_47380_00002 | TERMINATED | | 0.1   |   3 |      1 |      0           |  3.01   |
| objective_47380_00003 | TERMINATED | | 1     |   1 |      1 |      0           |  2      |
12.21 seconds (11.50 seconds for the tuning loop)# 环境 虚拟机 win7 8GB 内存 无gpu
{'a': 1.0, 'b': 1}Process finished with exit code 0


# https://docs.ray.io/en/latest/tune/getting-started.html#tune-tutorialimport numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as Ffrom ray import tune
from ray.tune.schedulers import ASHASchedulerclass ConvNet(nn.Module):def __init__(self):super(ConvNet, self).__init__()# In this example, we don't change the model architecture# due to simplicity.self.conv1 = nn.Conv2d(1, 3, kernel_size=3)self.fc = nn.Linear(192, 10)def forward(self, x):x = F.relu(F.max_pool2d(self.conv1(x), 3))x = x.view(-1, 192)x = self.fc(x)return F.log_softmax(x, dim=1)# Change these values if you want the training to run quicker or slower.
EPOCH_SIZE = 1 # 512
TEST_SIZE = 1 # 256def train(model, optimizer, train_loader):device = torch.device("cuda" if torch.cuda.is_available() else "cpu")model.train()for batch_idx, (data, target) in enumerate(train_loader):# We set this just for the example to run quickly.if batch_idx * len(data) > EPOCH_SIZE:returndata, target = data.to(device), target.to(device)optimizer.zero_grad()output = model(data)loss = F.nll_loss(output, target)loss.backward()optimizer.step()def test(model, data_loader):device = torch.device("cuda" if torch.cuda.is_available() else "cpu")model.eval()correct = 0total = 0with torch.no_grad():for batch_idx, (data, target) in enumerate(data_loader):# We set this just for the example to run quickly.if batch_idx * len(data) > TEST_SIZE:breakdata, target = data.to(device), target.to(device)outputs = model(data)_, predicted = torch.max(outputs.data, 1)total += target.size(0)correct += (predicted == target).sum().item()return correct / totaldef train_mnist(config):# Data Setupmnist_transforms = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307, ), (0.3081, ))])train_loader = DataLoader(datasets.MNIST("~/data", train=True, download=True, transform=mnist_transforms),batch_size=64,shuffle=True)test_loader = DataLoader(datasets.MNIST("~/data", train=False, transform=mnist_transforms),batch_size=64,shuffle=True)device = torch.device("cuda" if torch.cuda.is_available() else "cpu")model = ConvNet()model.to(device)optimizer = optim.SGD(model.parameters(), lr=config["lr"], momentum=config["momentum"])for i in range(10):train(model, optimizer, train_loader)acc = test(model, test_loader)# Send the current training result back to Tunetune.report(mean_accuracy=acc)if i % 5 == 0:# This saves the model to the trial directorytorch.save(model.state_dict(), "./model.pth")search_space = {"lr": tune.sample_from(lambda spec: 10 ** (-10 * np.random.rand())),"momentum": tune.uniform(0.1, 0.9),
}# Uncomment this to enable distributed execution
# `ray.init(address="auto")`# Download the dataset first 等待从http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz下载数据
datasets.MNIST("~/data", train=True, download=True)analysis = tune.run(train_mnist, config=search_space) #TODOdfs = analysis.trial_dataframes
[d.mean_accuracy.plot() for d in dfs.values()]


# 首先执行 pip install bayesian-optimization
from ray.tune.suggest.bayesopt import BayesOptSearch# Define the search space
search_space = {"a": tune.uniform(0, 1), "b": tune.uniform(0, 20)}algo = BayesOptSearch(random_search_steps=4)tune.run(trainable,config=search_space,metric="score",mode="min",search_alg=algo,stop={"training_iteration": 20},





Code Example

Random search/grid search

Random search/grid search



Bayesian/Bandit Optimization


AX Example


Blended Search


Blendsearch Example


Cost-Frugal hyperparameter Optimization


CFO Example


Scalable Bayesian Optimization


Dragonfly Example


Bayesian Optimization


SkOpt Example


Tree-Parzen Estimators


Running Tune experiments with HyperOpt


Bayesian Optimization


BayesOpt Example


Bayesian Opt/HyperBand


BOHB Example


Gradient-free Optimization


Nevergrad Example


Optuna search algorithms


Optuna Example


Zeroth-order Optimization


ZOOpt Example


Closed source


SigOpt Example


Heteroscedastic Evolutionary Bayesian Optimization


HEBO Example

日程表与早停 Schedulers

如果未指定调度程序,Tune 将默认使用先进先出 (FIFO) 调度程序,该调度程序仅按您的搜索算法选择的试验按照它们被挑选的顺序通过,并且不会早停。

from ray.tune.schedulers import HyperBandScheduler# Create HyperBand scheduler and minimize the score
hyperband = HyperBandScheduler(metric="score", mode="max")config = {"a": tune.uniform(0, 1), "b": tune.uniform(0, 1)}tune.run(trainable, config=config, num_samples=20, scheduler=hyperband)


== Status ==
Current time: 2022-**-** **:**:** (running for 00:04:28.04)
Memory usage on this node: 7.0/* GiB
Using FIFO scheduling algorithm.
Resources requested: 1.0/2 CPUs, 0/0 GPUs, 0.0/0.82 GiB heap, 0.0/0.41 GiB objects
Result logdir: C:\Users\Administrator\ray_results\train_mnist_2022-05-25_10-04-34
Number of trials: 1/1 (1 RUNNING)
| Trial name              | status   | loc            |          lr |   momentum |
| train_mnist_05ccd_00000 | RUNNING  | | 5.27504e-09 |   0.653214 |
+-------------------------+----------+----------------+-------------+------------+Result for train_mnist_05ccd_00000:date: 2022-**-**_**-**-** # 时间戳done: falseiterations_since_restore: 1mean_accuracy: 0.03125node_ip: 1trial_id: 05ccd_00000Result for train_mnist_05ccd_00000:date: 2022-**-**_**-**-**done: trueexperiment_id: 20149107d1784b8e83e2f3139e19e512experiment_tag: 0_lr=5.275e-09,momentum=0.65321hostname: YYDN-20211110NLiterations_since_restore: 10mean_accuracy: 0.109375node_ip: 0.006000518798828125== Status ==
Current time: 2022-05-25 10:09:05 (running for 00:04:29.67)
Memory usage on this node: 7.1/8.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/2 CPUs, 0/0 GPUs, 0.0/0.82 GiB heap, 0.0/0.41 GiB objects
Number of trials: 1/1 (1 TERMINATED)
| Trial name              | status     | loc            |          lr |   momentum |      acc |   iter |   total time (s) |
| train_mnist_05ccd_00000 | TERMINATED | | 5.27504e-09 |   0.653214 | 0.109375 |     10 |          1.00106 |
+-------------------------+------------+----------------+-------------+------------+----------+--------+------------------+2022-05-25 10:09:05,136 INFO tune.py:701 -- Total run time: 271.33 seconds (269.49 seconds for the tuning loop).
Process finished with exit code -1



# https://docs.ray.io/en/latest/tune/api_docs/search_space.html#tune-sample-docs
config = {# Sample a float uniformly between -5.0 and -1.0"uniform": tune.uniform(-5, -1),# Sample a float uniformly between 3.2 and 5.4,# rounding to increments of 0.2"quniform": tune.quniform(3.2, 5.4, 0.2),# Sample a float uniformly between 0.0001 and 0.01, while# sampling in log space"loguniform": tune.loguniform(1e-4, 1e-2),# Sample a float uniformly between 0.0001 and 0.1, while# sampling in log space and rounding to increments of 0.00005"qloguniform": tune.qloguniform(1e-4, 1e-1, 5e-5),# Sample a random float from a normal distribution with# mean=10 and sd=2"randn": tune.randn(10, 2),# Sample a random float from a normal distribution with# mean=10 and sd=2, rounding to increments of 0.2"qrandn": tune.qrandn(10, 2, 0.2),# Sample a integer uniformly between -9 (inclusive) and 15 (exclusive)"randint": tune.randint(-9, 15),# Sample a random uniformly between -21 (inclusive) and 12 (inclusive (!))# rounding to increments of 3 (includes 12)"qrandint": tune.qrandint(-21, 12, 3),# Sample a integer uniformly between 1 (inclusive) and 10 (exclusive),# while sampling in log space"lograndint": tune.lograndint(1, 10),# Sample a integer uniformly between 1 (inclusive) and 10 (inclusive (!)),# while sampling in log space and rounding to increments of 2"qlograndint": tune.qlograndint(1, 10, 2),# Sample an option uniformly from the specified choices"choice": tune.choice(["a", "b", "c"]),# Sample from a random function, in this case one that# depends on another value from the search space"func": tune.sample_from(lambda spec: spec.config.uniform * 0.01),# Do a grid search over these values. Every value will be sampled# `num_samples` times (`num_samples` is the parameter you pass to `tune.run()`)"grid": tune.grid_search([32, 64, 128])

Tune: A Research Platform for Distributed Model Selection and Training



pytorch tune:这篇文章是对channel进行了优化


How to use Tune with PyTorch(ray的官方文档)

pytorch tune

