MPI/stable_baselinesインストール

# coding: utf-8
# ////////////////////////////////////////////////////////////////////////////
# ///【CartPole-v1】                                                       ///
# ///                                                                      ///
# ///  【改訂履歴】                                                        ///
# /// V0.00 2022/07/05 プロトタイプ                                        ///
# ////////////////////////////////////////////////////////////////////////////
if "__file__" in globals():
    import os, sys
    sys.path.append(os.path.join(os.path.dirname(__file__), "..") )
    sys.path.append(os.path.join(os.path.dirname(__file__), "..") + "//..//")
# from common.nlp_util import *
# tensorflow_warning_cancel()
import gym
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2

env = gym.make("CartPole-v0")
env = DummyVecEnv([lambda: env])
model = PPO2("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=1000)
state = env.reset()
while True:
    env.render()
    action, _ = model.predict(state, deterministic=True)
    state, rewards, done, info = env.step(action)
    if done:
        break

env.close()

# [result]
# (py36) d:\VisualStudio2017\Python3.5_GPU\DeZero\DeZero_DQN\ch10>python case00_hello_baselines.py
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorflow\python\framework\dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)typ# e'.
#   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorflow\python\framework\dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)typ# e'.
#   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorflow\python\framework\dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)typ# e'.
#   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorflow\python\framework\dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)typ# e'.
#   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorflow\python\framework\dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)typ# e'.
#   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorflow\python\framework\dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)typ# e'.
#   np_resource = np.dtype([("resource", np.ubyte, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '# (1,)type'.
#   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '# (1,)type'.
#   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '# (1,)type'.
#   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '# (1,)type'.
#   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '# (1,)type'.
#   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
# C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '# (1,)type'.
#   np_resource = np.dtype([("resource", np.ubyte, 1)])
# WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\stable_baselines\common\tf_util.py:57: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.
# 
# WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\stable_baselines\common\tf_util.py:66: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.
# 
# WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\stable_baselines\common\policies.py:115: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.
# 
# WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\stable_baselines\common\input.py:25: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
# 
# WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\stable_baselines\common\policies.py:562: flatten (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
# Instructions for updating:
# Use keras.layers.flatten instead.
# WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\stable_baselines\a2c\utils.py:156: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.
# 
# WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\stable_baselines\ppo2\ppo2.py:193: The name tf.summary.scalar is deprecated. Please use tf.compat.v1.summary.scalar instead.
# 
# WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support..wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
# Instructions for updating:
# Use tf.where in 2.0, which has the same broadcast rule as np.where
# WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\stable_baselines\ppo2\ppo2.py:209: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.
# 
# WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\py36\lib\site-packages\stable_baselines\ppo2\ppo2.py:245: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.
# 
# --------------------------------------
# | approxkl           | 0.00021873586 |
# | clipfrac           | 0.0           |
# | explained_variance | -0.00127      |
# | fps                | 253           |
# | n_updates          | 1             |
# | policy_entropy     | 0.69292223    |
# | policy_loss        | -0.0035118635 |
# | serial_timesteps   | 128           |
# | time_elapsed       | 0             |
# | total_timesteps    | 128           |
# | value_loss         | 34.327312     |
# --------------------------------------
# --------------------------------------
# | approxkl           | 6.346035e-05  |
# | clipfrac           | 0.0           |
# | explained_variance | 0.0164        |
# | fps                | 787           |
# | n_updates          | 2             |
# | policy_entropy     | 0.6923793     |
# | policy_loss        | -0.0008587773 |
# | serial_timesteps   | 256           |
# | time_elapsed       | 0.505         |
# | total_timesteps    | 256           |
# | value_loss         | 48.824497     |
# --------------------------------------
# -------------------------------------
# | approxkl           | 6.731033e-06 |
# | clipfrac           | 0.0          |
# | explained_variance | 0.0103       |
# | fps                | 787          |
# | n_updates          | 3            |
# | policy_entropy     | 0.69191784   |
# | policy_loss        | 3.005017e-05 |
# | serial_timesteps   | 384          |
# | time_elapsed       | 0.668        |
# | total_timesteps    | 384          |
# | value_loss         | 48.761276    |
# -------------------------------------
# ---------------------------------------
# | approxkl           | 2.8752806e-06  |
# | clipfrac           | 0.0            |
# | explained_variance | -0.00833       |
# | fps                | 773            |
# | n_updates          | 4              |
# | policy_entropy     | 0.6919998      |
# | policy_loss        | -0.00010425784 |
# | serial_timesteps   | 512            |
# | time_elapsed       | 0.83           |
# | total_timesteps    | 512            |
# | value_loss         | 41.801987      |
# ---------------------------------------
# --------------------------------------
# | approxkl           | 1.0969635e-06 |
# | clipfrac           | 0.0           |
# | explained_variance | 0.00405       |
# | fps                | 792           |
# | n_updates          | 5             |
# | policy_entropy     | 0.691302      |
# | policy_loss        | 2.1914486e-05 |
# | serial_timesteps   | 640           |
# | time_elapsed       | 0.997         |
# | total_timesteps    | 640           |
# | value_loss         | 33.806343     |
# --------------------------------------
# ---------------------------------------
# | approxkl           | 2.9547932e-06  |
# | clipfrac           | 0.0            |
# | explained_variance | -0.00746       |
# | fps                | 787            |
# | n_updates          | 6              |
# | policy_entropy     | 0.69167846     |
# | policy_loss        | -0.00022557745 |
# | serial_timesteps   | 768            |
# | time_elapsed       | 1.16           |
# | total_timesteps    | 768            |
# | value_loss         | 40.427795      |
# ---------------------------------------
# --------------------------------------
# | approxkl           | 6.883509e-05  |
# | clipfrac           | 0.0           |
# | explained_variance | -0.0229       |
# | fps                | 731           |
# | n_updates          | 7             |
# | policy_entropy     | 0.6907279     |
# | policy_loss        | -0.0021043923 |
# | serial_timesteps   | 896           |
# | time_elapsed       | 1.32          |
# | total_timesteps    | 896           |
# | value_loss         | 40.58045      |
# --------------------------------------
06. MPI/stable_baselinesインストール