Skip to content

Commit 0febbcc

Browse files
authored
Merge pull request #138 from utn-mi/136-read-depth-data-from-realsense-cam
feat(sim): depth image data read out of sim and forwarded to frameset and camera env
2 parents 4b650e5 + df37e32 commit 0febbcc

7 files changed

Lines changed: 91 additions & 31 deletions

File tree

python/rcsss/_core/sim.pyi

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ class FrameSet:
121121
@property
122122
def color_frames(self) -> dict[str, numpy.ndarray[M, numpy.dtype[numpy.uint8]]]: ...
123123
@property
124+
def depth_frames(self) -> dict[str, numpy.ndarray[M, numpy.dtype[numpy.float32]]]: ...
125+
@property
124126
def timestamp(self) -> float: ...
125127

126128
class FrankaHand(rcsss._core.common.Gripper):

python/rcsss/camera/sim.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,20 @@ def _cpp_to_python_frames(self, cpp_frameset: _FrameSet | None) -> FrameSet | No
7575
if cpp_frameset is None:
7676
return None
7777
frames: dict[str, Frame] = {}
78-
for frame_name, cpp_frame in cpp_frameset.color_frames.items():
79-
# copy, reshape and flip the frame
80-
np_frame = np.copy(cpp_frame).reshape(self._cfg.resolution_height, self._cfg.resolution_width, 3)[::-1]
81-
cameraframe = CameraFrame(color=DataFrame(data=np_frame, timestamp=cpp_frameset.timestamp))
78+
c_frames_iter = cpp_frameset.color_frames.items()
79+
d_frames_iter = cpp_frameset.depth_frames.items()
80+
for (color_name, color_frame), (depth_name, depth_frame) in zip(c_frames_iter, d_frames_iter, strict=True):
81+
assert color_name == depth_name
82+
color_np_frame = np.copy(color_frame).reshape(self._cfg.resolution_height, self._cfg.resolution_width, 3)[
83+
::-1
84+
]
85+
depth_np_frame = np.copy(depth_frame).reshape(self._cfg.resolution_height, self._cfg.resolution_width, 1)
86+
cameraframe = CameraFrame(
87+
color=DataFrame(data=color_np_frame, timestamp=cpp_frameset.timestamp),
88+
depth=DataFrame(data=depth_np_frame, timestamp=cpp_frameset.timestamp),
89+
)
8290
frame = Frame(camera=cameraframe, avg_timestamp=cpp_frameset.timestamp)
83-
frames[frame_name] = frame
91+
frames[color_name] = frame
8492
return FrameSet(frames=frames, avg_timestamp=cpp_frameset.timestamp)
8593

8694
@property
@@ -90,7 +98,7 @@ def config(self) -> SimCameraSetConfig:
9098
@property
9199
def camera_names(self) -> list[str]:
92100
"""Should return a list of the activated human readable names of the cameras."""
93-
return [camera.identifier for camera in self._cfg.cameras.values()]
101+
return list(self._cfg.cameras.keys())
94102

95103
@property
96104
def name_to_identifier(self) -> dict[str, str]:

python/rcsss/envs/base.py

Lines changed: 64 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -97,16 +97,19 @@ class GripperDictType(RCSpaceType):
9797
class CameraDictType(RCSpaceType):
9898
frames: dict[
9999
Annotated[str, "camera_names"],
100-
Annotated[
101-
np.ndarray,
102-
# needs to be filled with values downstream
103-
lambda height, width: gym.spaces.Box(
104-
low=0,
105-
high=255,
106-
shape=(height, width, 3),
107-
dtype=np.uint8,
108-
),
109-
"frame",
100+
dict[
101+
Annotated[str, "camera_type"], # "rgb" or "depth"
102+
Annotated[
103+
np.ndarray,
104+
# needs to be filled with values downstream
105+
lambda height, width, color_dim=3, dtype=np.uint8, low=0, high=255: gym.spaces.Box(
106+
low=low,
107+
high=high,
108+
shape=(height, width, color_dim),
109+
dtype=dtype,
110+
),
111+
"frame",
112+
],
110113
],
111114
]
112115

@@ -387,22 +390,46 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
387390

388391

389392
class CameraSetWrapper(ActObsInfoWrapper):
390-
def __init__(self, env, camera_set: BaseCameraSet):
393+
RGB_KEY = "rgb"
394+
DEPTH_KEY = "depth"
395+
396+
def __init__(self, env, camera_set: BaseCameraSet, include_depth: bool = False):
391397
super().__init__(env)
392398
self.unwrapped: FR3Env
393399
self.camera_set = camera_set
400+
self.include_depth = include_depth
394401

395402
self.observation_space: gym.spaces.Dict
403+
# rgb is always included
404+
params: dict = {
405+
"frame": {
406+
"height": camera_set.config.resolution_height,
407+
"width": camera_set.config.resolution_width,
408+
}
409+
}
410+
if self.include_depth:
411+
# depth is optional
412+
params.update(
413+
{
414+
f"/{name}/{self.DEPTH_KEY}/frame": {
415+
"height": camera_set.config.resolution_height,
416+
"width": camera_set.config.resolution_width,
417+
"color_dim": 1,
418+
"dtype": np.float32,
419+
"low": 0.0,
420+
"high": 1.0,
421+
}
422+
for name in camera_set.camera_names
423+
}
424+
)
396425
self.observation_space.spaces.update(
397426
get_space(
398427
CameraDictType,
399-
child_dict_keys_to_unfold={"camera_names": camera_set.camera_names},
400-
params={
401-
"frame": {
402-
"height": camera_set.config.resolution_height,
403-
"width": camera_set.config.resolution_height,
404-
}
428+
child_dict_keys_to_unfold={
429+
"camera_names": camera_set.camera_names,
430+
"camera_type": [self.RGB_KEY, self.DEPTH_KEY] if self.include_depth else [self.RGB_KEY],
405431
},
432+
params=params,
406433
).spaces
407434
)
408435
self.camera_key = get_space_keys(CameraDictType)[0]
@@ -419,11 +446,27 @@ def observation(self, observation: dict, info: dict[str, Any]) -> tuple[dict[str
419446
observation[self.camera_key] = {}
420447
info["camera_available"] = False
421448
return observation, info
422-
assert frameset is not None, "No frame available."
423-
color_frame_dict: dict[str, np.ndarray] = {
424-
camera_name: frame.camera.color.data for camera_name, frame in frameset.frames.items()
449+
450+
def check_depth(depth):
451+
if self.include_depth and depth is None:
452+
msg = "Depth is not available in data but still requested."
453+
raise ValueError(msg)
454+
return self.include_depth
455+
456+
frame_dict: dict[str, dict[str, np.ndarray]] = {
457+
camera_name: (
458+
{
459+
self.RGB_KEY: frame.camera.color.data,
460+
}
461+
if check_depth(frame.camera.depth)
462+
else {
463+
self.RGB_KEY: frame.camera.color.data,
464+
self.DEPTH_KEY: frame.camera.depth.data, # type: ignore
465+
}
466+
)
467+
for camera_name, frame in frameset.frames.items()
425468
}
426-
observation[self.camera_key] = color_frame_dict
469+
observation[self.camera_key] = frame_dict
427470

428471
info["camera_available"] = True
429472
if frameset.avg_timestamp is not None:

python/rcsss/envs/factories.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def fr3_sim_env(
138138

139139
if camera_set_cfg is not None:
140140
camera_set = SimCameraSet(simulation, camera_set_cfg)
141-
env = CameraSetWrapper(env, camera_set)
141+
env = CameraSetWrapper(env, camera_set, include_depth=True)
142142

143143
if gripper_cfg is not None:
144144
gripper = sim.FrankaHand(simulation, "0", gripper_cfg)

src/pybind/rcsss.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ PYBIND11_MODULE(_core, m) {
481481
py::class_<rcs::sim::FrameSet>(sim, "FrameSet")
482482
.def(py::init<>())
483483
.def_readonly("color_frames", &rcs::sim::FrameSet::color_frames)
484+
.def_readonly("depth_frames", &rcs::sim::FrameSet::depth_frames)
484485
.def_readonly("timestamp", &rcs::sim::FrameSet::timestamp);
485486
py::class_<rcs::sim::SimCameraSet>(sim, "SimCameraSet")
486487
.def(py::init<std::shared_ptr<rcs::sim::Sim>,

src/sim/camera.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ void SimCameraSet::frame_callback(const std::string& id, mjrContext& ctx,
7777
int W = viewport.width;
7878
int H = viewport.height;
7979

80-
// allocate rgb buffers
80+
// allocate rgb and depth buffers
8181
ColorFrame frame = ColorFrame::Zero(3 * W * H);
82-
82+
DepthFrame depth = DepthFrame::Zero(1 * W * H);
8383
// update abstract scene
8484
// TODO: we might be able to call this once for all cameras
8585
// there is also a mjv_updateCamera function
@@ -91,18 +91,22 @@ void SimCameraSet::frame_callback(const std::string& id, mjrContext& ctx,
9191
mjr_render(viewport, &scene, &ctx);
9292

9393
// read rgb and depth buffers
94-
mjr_readPixels(frame.data(), NULL, viewport, &ctx);
94+
// depth documentation can be found here:
95+
// https://registry.khronos.org/OpenGL-Refpages/gl4/html/glReadPixels.xhtml
96+
mjr_readPixels(frame.data(), depth.data(), viewport, &ctx);
9597

9698
auto ts = this->sim->d->time;
9799
std::lock_guard<std::mutex> lock(buffer_lock);
98100
// The following code assumes that all render callbacks for a timestep
99101
// happen directly after each other
100102
if (this->last_ts == ts) {
101103
buffer[buffer.size() - 1].color_frames[id] = frame;
104+
buffer[buffer.size() - 1].depth_frames[id] = depth;
102105
} else {
103106
FrameSet fs;
104107
fs.timestamp = ts;
105108
fs.color_frames[id] = frame;
109+
fs.depth_frames[id] = depth;
106110
buffer.push_back(fs);
107111
this->last_ts = ts;
108112
}

src/sim/camera.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@ struct SimCameraSetConfig {
3636

3737
// (H,W,3)
3838
typedef Eigen::Matrix<unsigned char, Eigen::Dynamic, 1> ColorFrame;
39+
typedef Eigen::Matrix<float, Eigen::Dynamic, 1> DepthFrame;
3940

4041
struct FrameSet {
4142
std::unordered_map<std::string, ColorFrame> color_frames;
43+
std::unordered_map<std::string, DepthFrame> depth_frames;
4244
mjtNum timestamp;
4345
};
4446

0 commit comments

Comments
 (0)