mirror of
https://github.com/google-deepmind/deepmind-research.git
synced 2025-12-06 09:02:05 +08:00
682 lines
26 KiB
Python
682 lines
26 KiB
Python
# Copyright 2020 Deepmind Technologies Limited.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""A prop-carry task that transition between multiple phases."""
|
|
|
|
import collections
|
|
import colorsys
|
|
import enum
|
|
|
|
from absl import logging
|
|
from dm_control import composer
|
|
from dm_control import mjcf
|
|
from dm_control.composer.observation import observable
|
|
from dm_control.locomotion.arenas import floors
|
|
from dm_control.locomotion.mocap import loader as mocap_loader
|
|
from dm_control.mujoco.wrapper import mjbindings
|
|
import numpy as np
|
|
|
|
from catch_carry import arm_opener
|
|
from catch_carry import mocap_data
|
|
from catch_carry import props
|
|
from catch_carry import trajectories
|
|
|
|
_PHYSICS_TIMESTEP = 0.005
|
|
|
|
# Maximum number of physics steps to run when settling props onto pedestals
|
|
# during episode initialization.
|
|
_MAX_SETTLE_STEPS = 1000
|
|
|
|
# Maximum velocity for prop to be considered settled.
|
|
# Used during episode initialization only.
|
|
_SETTLE_QVEL_TOL = 1e-5
|
|
|
|
# Magnitude of the sparse reward.
|
|
_SPARSE_REWARD = 1.0
|
|
|
|
# Maximum distance for walkers to be considered to be "near" a pedestal/target.
|
|
_TARGET_TOL = 0.65
|
|
|
|
# Defines how pedestals are placed around the arena.
|
|
# Pedestals are placed at constant angle intervals around the arena's center.
|
|
_BASE_PEDESTAL_DIST = 3 # Base distance from center.
|
|
_PEDESTAL_DIST_DELTA = 0.5 # Maximum variation on the base distance.
|
|
|
|
# Base hue-luminosity-saturation of the pedestal colors.
|
|
# We rotate through the hue for each pedestal created in the environment.
|
|
_BASE_PEDESTAL_H = 0.1
|
|
_BASE_PEDESTAL_L = 0.3
|
|
_BASE_PEDESTAL_S = 0.7
|
|
|
|
# Pedestal luminosity when active.
|
|
_ACTIVATED_PEDESTAL_L = 0.8
|
|
|
|
_PEDESTAL_SIZE = (0.2, 0.2, 0.02)
|
|
|
|
_SINGLE_PEDESTAL_COLOR = colorsys.hls_to_rgb(.3, .15, .35) + (1.0,)
|
|
|
|
WALKER_PEDESTAL = 'walker_pedestal'
|
|
WALKER_PROP = 'walker_prop'
|
|
PROP_PEDESTAL = 'prop_pedestal'
|
|
TARGET_STATE = 'target_state/'
|
|
CURRENT_STATE = 'meta/current_state/'
|
|
|
|
|
|
def _is_same_state(state_1, state_2):
|
|
if state_1.keys() != state_2.keys():
|
|
return False
|
|
for k in state_1:
|
|
if not np.all(state_1[k] == state_2[k]):
|
|
return False
|
|
return True
|
|
|
|
|
|
def _singleton_or_none(iterable):
|
|
iterator = iter(iterable)
|
|
try:
|
|
return next(iterator)
|
|
except StopIteration:
|
|
return None
|
|
|
|
|
|
def _generate_pedestal_colors(num_pedestals):
|
|
"""Function to get colors for pedestals."""
|
|
colors = []
|
|
for i in range(num_pedestals):
|
|
h = _BASE_PEDESTAL_H + i / num_pedestals
|
|
while h > 1:
|
|
h -= 1
|
|
colors.append(
|
|
colorsys.hls_to_rgb(h, _BASE_PEDESTAL_L, _BASE_PEDESTAL_S) + (1.0,))
|
|
return colors
|
|
|
|
|
|
InitializationParameters = collections.namedtuple(
|
|
'InitializationParameters', ('clip_segment', 'prop_id', 'pedestal_id'))
|
|
|
|
|
|
def _rotate_vector_by_quaternion(vec, quat):
|
|
result = np.empty(3)
|
|
mjbindings.mjlib.mju_rotVecQuat(result, np.asarray(vec), np.asarray(quat))
|
|
return result
|
|
|
|
|
|
@enum.unique
|
|
class WarehousePhase(enum.Enum):
|
|
TERMINATED = 0
|
|
GOTOTARGET = 1
|
|
PICKUP = 2
|
|
CARRYTOTARGET = 3
|
|
PUTDOWN = 4
|
|
|
|
|
|
def _find_random_free_pedestal_id(target_state, random_state):
|
|
free_pedestals = (
|
|
np.where(np.logical_not(np.any(target_state, axis=0)))[0])
|
|
return random_state.choice(free_pedestals)
|
|
|
|
|
|
def _find_random_occupied_pedestal_id(target_state, random_state):
|
|
occupied_pedestals = (
|
|
np.where(np.any(target_state, axis=0))[0])
|
|
return random_state.choice(occupied_pedestals)
|
|
|
|
|
|
def one_hot(values, num_unique):
|
|
return np.squeeze(np.eye(num_unique)[np.array(values).reshape(-1)])
|
|
|
|
|
|
class SinglePropFourPhases(object):
|
|
"""A phase manager that transitions between four phases for a single prop."""
|
|
|
|
def __init__(self, fixed_initialization_phase=None):
|
|
self._phase = WarehousePhase.TERMINATED
|
|
self._fixed_initialization_phase = fixed_initialization_phase
|
|
|
|
def initialize_episode(self, target_state, random_state):
|
|
"""Randomly initializes an episode into one of the four phases."""
|
|
|
|
if self._fixed_initialization_phase is None:
|
|
self._phase = random_state.choice([
|
|
WarehousePhase.GOTOTARGET, WarehousePhase.PICKUP,
|
|
WarehousePhase.CARRYTOTARGET, WarehousePhase.PUTDOWN
|
|
])
|
|
else:
|
|
self._phase = self._fixed_initialization_phase
|
|
self._prop_id = random_state.randint(len(target_state[PROP_PEDESTAL]))
|
|
self._pedestal_id = np.nonzero(
|
|
target_state[PROP_PEDESTAL][self._prop_id])[0][0]
|
|
pedestal_id_for_initialization = self._pedestal_id
|
|
|
|
if self._phase == WarehousePhase.GOTOTARGET:
|
|
clip_segment = trajectories.ClipSegment.APPROACH
|
|
target_state[WALKER_PROP][:] = 0
|
|
target_state[WALKER_PEDESTAL][self._pedestal_id] = 1
|
|
elif self._phase == WarehousePhase.PICKUP:
|
|
clip_segment = trajectories.ClipSegment.PICKUP
|
|
target_state[WALKER_PROP][self._prop_id] = 1
|
|
target_state[WALKER_PEDESTAL][self._pedestal_id] = 1
|
|
# Set self._pedestal_id to the next pedestal after pickup is successful.
|
|
self._pedestal_id = _find_random_free_pedestal_id(
|
|
target_state[PROP_PEDESTAL], random_state)
|
|
target_state[PROP_PEDESTAL][self._prop_id, :] = 0
|
|
elif self._phase == WarehousePhase.CARRYTOTARGET:
|
|
clip_segment = random_state.choice([
|
|
trajectories.ClipSegment.CARRY1, trajectories.ClipSegment.CARRY2])
|
|
self._pedestal_id = _find_random_free_pedestal_id(
|
|
target_state[PROP_PEDESTAL], random_state)
|
|
if clip_segment == trajectories.ClipSegment.CARRY2:
|
|
pedestal_id_for_initialization = self._pedestal_id
|
|
target_state[WALKER_PROP][self._prop_id] = 1
|
|
target_state[WALKER_PEDESTAL][self._pedestal_id] = 1
|
|
target_state[PROP_PEDESTAL][self._prop_id, :] = 0
|
|
elif self._phase == WarehousePhase.PUTDOWN:
|
|
clip_segment = trajectories.ClipSegment.PUTDOWN
|
|
target_state[WALKER_PROP][:] = 0
|
|
target_state[WALKER_PEDESTAL][self._pedestal_id] = 1
|
|
|
|
return InitializationParameters(
|
|
clip_segment, self._prop_id, pedestal_id_for_initialization)
|
|
|
|
def on_success(self, target_state, random_state):
|
|
"""Transitions into the next phase upon success of current phase."""
|
|
if self._phase == WarehousePhase.GOTOTARGET:
|
|
if self._prop_id is not None:
|
|
self._phase = WarehousePhase.PICKUP
|
|
# Set self._pedestal_id to the next pedestal after pickup is successful.
|
|
self._pedestal_id = (
|
|
_find_random_free_pedestal_id(
|
|
target_state[PROP_PEDESTAL], random_state))
|
|
target_state[WALKER_PROP][self._prop_id] = 1
|
|
target_state[PROP_PEDESTAL][self._prop_id, :] = 0
|
|
else:
|
|
# If you go to an empty pedestal, go to pedestal with a prop.
|
|
self._pedestal_id = (
|
|
_find_random_occupied_pedestal_id(
|
|
target_state[PROP_PEDESTAL], random_state))
|
|
target_state[WALKER_PEDESTAL][:] = 0
|
|
target_state[WALKER_PEDESTAL][self._pedestal_id] = 1
|
|
self._prop_id = np.argwhere(
|
|
target_state[PROP_PEDESTAL][:, self._pedestal_id])[0, 0]
|
|
elif self._phase == WarehousePhase.PICKUP:
|
|
self._phase = WarehousePhase.CARRYTOTARGET
|
|
target_state[WALKER_PEDESTAL][:] = 0
|
|
target_state[WALKER_PEDESTAL][self._pedestal_id] = 1
|
|
elif self._phase == WarehousePhase.CARRYTOTARGET:
|
|
self._phase = WarehousePhase.PUTDOWN
|
|
target_state[WALKER_PROP][:] = 0
|
|
target_state[PROP_PEDESTAL][self._prop_id, self._pedestal_id] = 1
|
|
elif self._phase == WarehousePhase.PUTDOWN:
|
|
self._phase = WarehousePhase.GOTOTARGET
|
|
# Set self._pedestal_id to the next pedestal after putdown is successful.
|
|
self._pedestal_id = (
|
|
_find_random_free_pedestal_id(
|
|
target_state[PROP_PEDESTAL], random_state))
|
|
self._prop_id = None
|
|
target_state[WALKER_PEDESTAL][:] = 0
|
|
target_state[WALKER_PEDESTAL][self._pedestal_id] = 1
|
|
return self._phase
|
|
|
|
@property
|
|
def phase(self):
|
|
return self._phase
|
|
|
|
@property
|
|
def prop_id(self):
|
|
return self._prop_id
|
|
|
|
@property
|
|
def pedestal_id(self):
|
|
return self._pedestal_id
|
|
|
|
|
|
class PhasedBoxCarry(composer.Task):
|
|
"""A prop-carry task that transitions between multiple phases."""
|
|
|
|
def __init__(
|
|
self,
|
|
walker,
|
|
num_props,
|
|
num_pedestals,
|
|
proto_modifier=None,
|
|
transition_class=SinglePropFourPhases,
|
|
min_prop_gap=0.05,
|
|
pedestal_height_range=(0.45, 0.75),
|
|
log_transitions=False,
|
|
negative_reward_on_failure_termination=True,
|
|
use_single_pedestal_color=True,
|
|
priority_friction=False,
|
|
fixed_initialization_phase=None):
|
|
"""Initialize phased/instructed box-carrying ("warehouse") task.
|
|
|
|
Args:
|
|
walker: the walker to be used in this task.
|
|
num_props: the number of props in the task scene.
|
|
num_pedestals: the number of floating shelves (pedestals) in the task
|
|
scene.
|
|
proto_modifier: function to modify trajectory proto.
|
|
transition_class: the object that handles the transition logic.
|
|
min_prop_gap: arms are automatically opened to leave a gap around the prop
|
|
to avoid problematic collisions upon initialization.
|
|
pedestal_height_range: range of heights for the pedestal.
|
|
log_transitions: logging/printing of transitions.
|
|
negative_reward_on_failure_termination: boolean for whether to provide
|
|
negative sparse rewards on failure termination.
|
|
use_single_pedestal_color: boolean option for pedestals being the same
|
|
color or different colors.
|
|
priority_friction: sets friction priority thereby making prop objects have
|
|
higher friction.
|
|
fixed_initialization_phase: an instance of the `WarehousePhase` enum that
|
|
specifies the phase in which to always initialize the task, or `None` if
|
|
the initial task phase should be chosen randomly for each episode.
|
|
"""
|
|
self._num_props = num_props
|
|
self._num_pedestals = num_pedestals
|
|
self._proto_modifier = proto_modifier
|
|
self._transition_manager = transition_class(
|
|
fixed_initialization_phase=fixed_initialization_phase)
|
|
self._min_prop_gap = min_prop_gap
|
|
self._pedestal_height_range = pedestal_height_range
|
|
self._log_transitions = log_transitions
|
|
self._target_state = collections.OrderedDict([
|
|
(WALKER_PEDESTAL, np.zeros(num_pedestals)),
|
|
(WALKER_PROP, np.zeros(num_props)),
|
|
(PROP_PEDESTAL, np.zeros([num_props, num_pedestals]))
|
|
])
|
|
self._current_state = collections.OrderedDict([
|
|
(WALKER_PEDESTAL, np.zeros(num_pedestals)),
|
|
(WALKER_PROP, np.zeros(num_props)),
|
|
(PROP_PEDESTAL, np.zeros([num_props, num_pedestals]))
|
|
])
|
|
self._negative_reward_on_failure_termination = (
|
|
negative_reward_on_failure_termination)
|
|
self._priority_friction = priority_friction
|
|
|
|
clips = sorted(
|
|
set(mocap_data.medium_pedestal())
|
|
& (set(mocap_data.small_box()) | set(mocap_data.large_box())))
|
|
loader = mocap_loader.HDF5TrajectoryLoader(
|
|
mocap_data.H5_PATH, trajectories.SinglePropCarrySegmentedTrajectory)
|
|
self._trajectories = [
|
|
loader.get_trajectory(clip.clip_identifier) for clip in clips]
|
|
|
|
self._arena = floors.Floor()
|
|
|
|
self._walker = walker
|
|
self._feet_geoms = (
|
|
walker.mjcf_model.find('body', 'lfoot').find_all('geom') +
|
|
walker.mjcf_model.find('body', 'rfoot').find_all('geom'))
|
|
self._lhand_geoms = (
|
|
walker.mjcf_model.find('body', 'lhand').find_all('geom'))
|
|
self._rhand_geoms = (
|
|
walker.mjcf_model.find('body', 'rhand').find_all('geom'))
|
|
self._trajectories[0].configure_walkers([self._walker])
|
|
walker.create_root_joints(self._arena.attach(walker))
|
|
|
|
control_timestep = self._trajectories[0].dt
|
|
for i, trajectory in enumerate(self._trajectories):
|
|
if trajectory.dt != control_timestep:
|
|
raise ValueError(
|
|
'Inconsistent control timestep: '
|
|
'trajectories[{}].dt == {} but trajectories[0].dt == {}'
|
|
.format(i, trajectory.dt, control_timestep))
|
|
self.set_timesteps(control_timestep, _PHYSICS_TIMESTEP)
|
|
|
|
if use_single_pedestal_color:
|
|
self._pedestal_colors = [_SINGLE_PEDESTAL_COLOR] * num_pedestals
|
|
else:
|
|
self._pedestal_colors = _generate_pedestal_colors(num_pedestals)
|
|
self._pedestals = [props.Pedestal(_PEDESTAL_SIZE, rgba)
|
|
for rgba in self._pedestal_colors]
|
|
for pedestal in self._pedestals:
|
|
self._arena.attach(pedestal)
|
|
|
|
self._props = [
|
|
self._trajectories[0].create_props(
|
|
priority_friction=self._priority_friction)[0]
|
|
for _ in range(num_props)
|
|
]
|
|
for prop in self._props:
|
|
self._arena.add_free_entity(prop)
|
|
|
|
self._task_observables = collections.OrderedDict()
|
|
|
|
self._task_observables['target_phase'] = observable.Generic(
|
|
lambda _: one_hot(self._transition_manager.phase.value, num_unique=5))
|
|
|
|
def ego_prop_xpos(physics):
|
|
prop_id = self._focal_prop_id
|
|
if prop_id is None:
|
|
return np.zeros((3,))
|
|
prop = self._props[prop_id]
|
|
prop_xpos, _ = prop.get_pose(physics)
|
|
walker_xpos = physics.bind(self._walker.root_body).xpos
|
|
return self._walker.transform_vec_to_egocentric_frame(
|
|
physics, prop_xpos - walker_xpos)
|
|
self._task_observables['target_prop/xpos'] = (
|
|
observable.Generic(ego_prop_xpos))
|
|
|
|
def prop_zaxis(physics):
|
|
prop_id = self._focal_prop_id
|
|
if prop_id is None:
|
|
return np.zeros((3,))
|
|
prop = self._props[prop_id]
|
|
prop_xmat = physics.bind(
|
|
mjcf.get_attachment_frame(prop.mjcf_model)).xmat
|
|
return prop_xmat[[2, 5, 8]]
|
|
self._task_observables['target_prop/zaxis'] = (
|
|
observable.Generic(prop_zaxis))
|
|
|
|
def ego_pedestal_xpos(physics):
|
|
pedestal_id = self._focal_pedestal_id
|
|
if pedestal_id is None:
|
|
return np.zeros((3,))
|
|
pedestal = self._pedestals[pedestal_id]
|
|
pedestal_xpos, _ = pedestal.get_pose(physics)
|
|
walker_xpos = physics.bind(self._walker.root_body).xpos
|
|
return self._walker.transform_vec_to_egocentric_frame(
|
|
physics, pedestal_xpos - walker_xpos)
|
|
self._task_observables['target_pedestal/xpos'] = (
|
|
observable.Generic(ego_pedestal_xpos))
|
|
|
|
for obs in (self._walker.observables.proprioception +
|
|
self._walker.observables.kinematic_sensors +
|
|
self._walker.observables.dynamic_sensors +
|
|
list(self._task_observables.values())):
|
|
obs.enabled = True
|
|
|
|
self._focal_prop_id = None
|
|
self._focal_pedestal_id = None
|
|
|
|
@property
|
|
def root_entity(self):
|
|
return self._arena
|
|
|
|
@property
|
|
def task_observables(self):
|
|
return self._task_observables
|
|
|
|
@property
|
|
def name(self):
|
|
return 'warehouse'
|
|
|
|
def initialize_episode_mjcf(self, random_state):
|
|
self._reward = 0.0
|
|
self._discount = 1.0
|
|
self._should_terminate = False
|
|
self._before_step_success = False
|
|
for target_value in self._target_state.values():
|
|
target_value[:] = 0
|
|
for pedestal_id, pedestal in enumerate(self._pedestals):
|
|
angle = 2 * np.pi * pedestal_id / len(self._pedestals)
|
|
dist = (_BASE_PEDESTAL_DIST +
|
|
_PEDESTAL_DIST_DELTA * random_state.uniform(-1, 1))
|
|
|
|
height = random_state.uniform(*self._pedestal_height_range)
|
|
pedestal_pos = [dist * np.cos(angle), dist * np.sin(angle),
|
|
height - pedestal.geom.size[2]]
|
|
mjcf.get_attachment_frame(pedestal.mjcf_model).pos = pedestal_pos
|
|
|
|
for prop in self._props:
|
|
prop.detach()
|
|
self._props = []
|
|
self._trajectory_for_prop = []
|
|
|
|
for prop_id in range(self._num_props):
|
|
trajectory = random_state.choice(self._trajectories)
|
|
if self._proto_modifier:
|
|
trajectory = trajectory.get_modified_trajectory(
|
|
self._proto_modifier, random_state=random_state)
|
|
prop = trajectory.create_props(
|
|
priority_friction=self._priority_friction)[0]
|
|
prop.mjcf_model.model = 'prop_{}'.format(prop_id)
|
|
self._arena.add_free_entity(prop)
|
|
self._props.append(prop)
|
|
self._trajectory_for_prop.append(trajectory)
|
|
|
|
def _settle_props(self, physics):
|
|
prop_freejoints = [mjcf.get_attachment_frame(prop.mjcf_model).freejoint
|
|
for prop in self._props]
|
|
physics.bind(prop_freejoints).qvel = 0
|
|
physics.forward()
|
|
for _ in range(_MAX_SETTLE_STEPS):
|
|
self._update_current_state(physics)
|
|
success = self._evaluate_target_state()
|
|
stopped = max(abs(physics.bind(prop_freejoints).qvel)) < _SETTLE_QVEL_TOL
|
|
if success and stopped:
|
|
break
|
|
else:
|
|
physics.step()
|
|
physics.data.time = 0
|
|
|
|
def initialize_episode(self, physics, random_state):
|
|
self._ground_geomid = physics.bind(
|
|
self._arena.mjcf_model.worldbody.geom[0]).element_id
|
|
self._feet_geomids = set(physics.bind(self._feet_geoms).element_id)
|
|
self._lhand_geomids = set(physics.bind(self._lhand_geoms).element_id)
|
|
self._rhand_geomids = set(physics.bind(self._rhand_geoms).element_id)
|
|
|
|
for prop_id in range(len(self._props)):
|
|
pedestal_id = _find_random_free_pedestal_id(
|
|
self._target_state[PROP_PEDESTAL], random_state)
|
|
pedestal = self._pedestals[pedestal_id]
|
|
self._target_state[PROP_PEDESTAL][prop_id, pedestal_id] = 1
|
|
|
|
for prop_id, prop in enumerate(self._props):
|
|
trajectory = self._trajectory_for_prop[prop_id]
|
|
pedestal_id = np.nonzero(
|
|
self._target_state[PROP_PEDESTAL][prop_id])[0][0]
|
|
pedestal = self._pedestals[pedestal_id]
|
|
pedestal_pos, _ = pedestal.get_pose(physics)
|
|
pedestal_delta = np.array(
|
|
pedestal_pos - trajectory.infer_pedestal_positions()[0])
|
|
pedestal_delta[2] += pedestal.geom.size[2]
|
|
prop_timestep = trajectory.get_timestep_data(0).props[0]
|
|
prop_pos = prop_timestep.position + np.array(pedestal_delta)
|
|
prop_quat = prop_timestep.quaternion
|
|
prop_pos[:2] += random_state.uniform(
|
|
-pedestal.geom.size[:2] / 2, pedestal.geom.size[:2] / 2)
|
|
prop.set_pose(physics, prop_pos, prop_quat)
|
|
self._settle_props(physics)
|
|
|
|
init_params = self._transition_manager.initialize_episode(
|
|
self._target_state, random_state)
|
|
if self._log_transitions:
|
|
logging.info(init_params)
|
|
self._on_transition(physics)
|
|
|
|
init_prop = self._props[init_params.prop_id]
|
|
init_pedestal = self._pedestals[init_params.pedestal_id]
|
|
self._init_prop_id = init_params.prop_id
|
|
self._init_pedestal_id = init_params.pedestal_id
|
|
init_trajectory = self._trajectory_for_prop[init_params.prop_id]
|
|
init_timestep = init_trajectory.get_random_timestep_in_segment(
|
|
init_params.clip_segment, random_state)
|
|
|
|
trajectory_pedestal_pos = init_trajectory.infer_pedestal_positions()[0]
|
|
init_pedestal_pos = np.array(init_pedestal.get_pose(physics)[0])
|
|
delta_pos = init_pedestal_pos - trajectory_pedestal_pos
|
|
delta_pos[2] = 0
|
|
delta_angle = np.pi + np.arctan2(init_pedestal_pos[1], init_pedestal_pos[0])
|
|
delta_quat = (np.cos(delta_angle / 2), 0, 0, np.sin(delta_angle / 2))
|
|
|
|
trajectory_pedestal_to_walker = (
|
|
init_timestep.walkers[0].position - trajectory_pedestal_pos)
|
|
rotated_pedestal_to_walker = _rotate_vector_by_quaternion(
|
|
trajectory_pedestal_to_walker, delta_quat)
|
|
|
|
self._walker.set_pose(
|
|
physics,
|
|
position=trajectory_pedestal_pos + rotated_pedestal_to_walker,
|
|
quaternion=init_timestep.walkers[0].quaternion)
|
|
self._walker.set_velocity(
|
|
physics, velocity=init_timestep.walkers[0].velocity,
|
|
angular_velocity=init_timestep.walkers[0].angular_velocity)
|
|
self._walker.shift_pose(
|
|
physics, position=delta_pos, quaternion=delta_quat,
|
|
rotate_velocity=True)
|
|
physics.bind(self._walker.mocap_joints).qpos = (
|
|
init_timestep.walkers[0].joints)
|
|
physics.bind(self._walker.mocap_joints).qvel = (
|
|
init_timestep.walkers[0].joints_velocity)
|
|
|
|
if init_params.clip_segment in (trajectories.ClipSegment.CARRY1,
|
|
trajectories.ClipSegment.CARRY2,
|
|
trajectories.ClipSegment.PUTDOWN):
|
|
trajectory_pedestal_to_prop = (
|
|
init_timestep.props[0].position - trajectory_pedestal_pos)
|
|
rotated_pedestal_to_prop = _rotate_vector_by_quaternion(
|
|
trajectory_pedestal_to_prop, delta_quat)
|
|
init_prop.set_pose(
|
|
physics,
|
|
position=trajectory_pedestal_pos + rotated_pedestal_to_prop,
|
|
quaternion=init_timestep.props[0].quaternion)
|
|
init_prop.set_velocity(
|
|
physics, velocity=init_timestep.props[0].velocity,
|
|
angular_velocity=init_timestep.props[0].angular_velocity)
|
|
init_prop.shift_pose(
|
|
physics, position=delta_pos,
|
|
quaternion=delta_quat, rotate_velocity=True)
|
|
|
|
# If we have moved the pedestal upwards during height initialization,
|
|
# the prop may now be lodged inside it. We fix that here.
|
|
if init_pedestal_pos[2] > trajectory_pedestal_pos[2]:
|
|
init_prop_geomid = physics.bind(init_prop.geom).element_id
|
|
init_pedestal_geomid = physics.bind(init_pedestal.geom).element_id
|
|
disallowed_contact = sorted((init_prop_geomid, init_pedestal_geomid))
|
|
def has_disallowed_contact():
|
|
physics.forward()
|
|
for contact in physics.data.contact:
|
|
if sorted((contact.geom1, contact.geom2)) == disallowed_contact:
|
|
return True
|
|
return False
|
|
while has_disallowed_contact():
|
|
init_prop.shift_pose(physics, (0, 0, 0.001))
|
|
|
|
self._move_arms_if_necessary(physics)
|
|
self._update_current_state(physics)
|
|
self._previous_step_success = self._evaluate_target_state()
|
|
|
|
self._focal_prop_id = self._init_prop_id
|
|
self._focal_pedestal_id = self._init_pedestal_id
|
|
|
|
def _move_arms_if_necessary(self, physics):
|
|
if self._min_prop_gap is not None:
|
|
for entity in self._props + self._pedestals:
|
|
try:
|
|
arm_opener.open_arms_for_prop(
|
|
physics, self._walker.left_arm_root, self._walker.right_arm_root,
|
|
entity.mjcf_model, self._min_prop_gap)
|
|
except RuntimeError as e:
|
|
raise composer.EpisodeInitializationError(e)
|
|
|
|
def after_step(self, physics, random_state):
|
|
# First we check for failure termination.
|
|
for contact in physics.data.contact:
|
|
if ((contact.geom1 == self._ground_geomid and
|
|
contact.geom2 not in self._feet_geomids) or
|
|
(contact.geom2 == self._ground_geomid and
|
|
contact.geom1 not in self._feet_geomids)):
|
|
if self._negative_reward_on_failure_termination:
|
|
self._reward = -_SPARSE_REWARD
|
|
else:
|
|
self._reward = 0.0
|
|
self._should_terminate = True
|
|
self._discount = 0.0
|
|
return
|
|
|
|
# Then check for normal reward and state transitions.
|
|
self._update_current_state(physics)
|
|
success = self._evaluate_target_state()
|
|
if success and not self._previous_step_success:
|
|
self._reward = _SPARSE_REWARD
|
|
new_phase = (
|
|
self._transition_manager.on_success(self._target_state, random_state))
|
|
self._should_terminate = (new_phase == WarehousePhase.TERMINATED)
|
|
self._on_transition(physics)
|
|
self._previous_step_success = self._evaluate_target_state()
|
|
else:
|
|
self._reward = 0.0
|
|
|
|
def _on_transition(self, physics):
|
|
self._focal_prop_id = self._transition_manager.prop_id
|
|
self._focal_pedestal_id = self._transition_manager.pedestal_id
|
|
if self._log_transitions:
|
|
logging.info('target_state:\n%s', self._target_state)
|
|
for pedestal_id, pedestal_active in enumerate(
|
|
self._target_state[WALKER_PEDESTAL]):
|
|
r, g, b, a = self._pedestal_colors[pedestal_id]
|
|
if pedestal_active:
|
|
h, _, s = colorsys.rgb_to_hls(r, g, b)
|
|
r, g, b = colorsys.hls_to_rgb(h, _ACTIVATED_PEDESTAL_L, s)
|
|
physics.bind(self._pedestals[pedestal_id].geom).rgba = (r, g, b, a)
|
|
|
|
def get_reward(self, physics):
|
|
return self._reward
|
|
|
|
def get_discount(self, physics):
|
|
return self._discount
|
|
|
|
def should_terminate_episode(self, physics):
|
|
return self._should_terminate
|
|
|
|
def _update_current_state(self, physics):
|
|
for current_state_value in self._current_state.values():
|
|
current_state_value[:] = 0
|
|
|
|
# Check if the walker is near each pedestal.
|
|
walker_pos, _ = self._walker.get_pose(physics)
|
|
for pedestal_id, pedestal in enumerate(self._pedestals):
|
|
target_pos, _ = pedestal.get_pose(physics)
|
|
walker_to_target_dist = np.linalg.norm(walker_pos[:2] - target_pos[:2])
|
|
if walker_to_target_dist <= _TARGET_TOL:
|
|
self._current_state[WALKER_PEDESTAL][pedestal_id] = 1
|
|
|
|
prop_geomids = {
|
|
physics.bind(prop.geom).element_id: prop_id
|
|
for prop_id, prop in enumerate(self._props)}
|
|
pedestal_geomids = {
|
|
physics.bind(pedestal.geom).element_id: pedestal_id
|
|
for pedestal_id, pedestal in enumerate(self._pedestals)}
|
|
|
|
prop_pedestal_contact_counts = np.zeros(
|
|
[self._num_props, self._num_pedestals])
|
|
prop_lhand_contact = [False] * self._num_props
|
|
prop_rhand_contact = [False] * self._num_props
|
|
for contact in physics.data.contact:
|
|
prop_id = prop_geomids.get(contact.geom1, prop_geomids.get(contact.geom2))
|
|
pedestal_id = pedestal_geomids.get(
|
|
contact.geom1, pedestal_geomids.get(contact.geom2))
|
|
has_lhand = (contact.geom1 in self._lhand_geomids or
|
|
contact.geom2 in self._lhand_geomids)
|
|
has_rhand = (contact.geom1 in self._rhand_geomids or
|
|
contact.geom2 in self._rhand_geomids)
|
|
if prop_id is not None and pedestal_id is not None:
|
|
prop_pedestal_contact_counts[prop_id, pedestal_id] += 1
|
|
if prop_id is not None and has_lhand:
|
|
prop_lhand_contact[prop_id] = True
|
|
if prop_id is not None and has_rhand:
|
|
prop_rhand_contact[prop_id] = True
|
|
|
|
for prop_id in range(self._num_props):
|
|
if prop_lhand_contact[prop_id] and prop_rhand_contact[prop_id]:
|
|
self._current_state[WALKER_PROP][prop_id] = 1
|
|
pedestal_contact_counts = prop_pedestal_contact_counts[prop_id]
|
|
for pedestal_id in range(self._num_pedestals):
|
|
if pedestal_contact_counts[pedestal_id] >= 4:
|
|
self._current_state[PROP_PEDESTAL][prop_id, pedestal_id] = 1
|
|
|
|
def _evaluate_target_state(self):
|
|
return _is_same_state(self._current_state, self._target_state)
|