# guodong's blog

## 三维重建:可微分的渲染工具tf_mesh_renderer源码阅读

tf_mesh_renderer是可微分的渲染工具，并用TensorFlow来实现。项目仓库地址：tf_mesh_renderer

This is a differentiable, 3D mesh renderer using TensorFlow. This is not an official Google product.

C++渲染内核的输入是一系列3D的顶点和一系列三角形。其中每个三角形包含三个顶点的id, 渲染器的输出是三角形id和重心的权重。在重心权重图像中，像素值是和像素三角有关的（根据像素id判别）像素中心点的权重。渲染器提供关于顶点位置的像素中心的重心权重的微分。

# 2. camera_util.py

```"""Collection of TF functions for managing 3D camera matrices."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import tensorflow as tf

def perspective(aspect_ratio, fov_y, near_clip, far_clip):
"""Computes perspective transformation matrices.   # 计算透视变换矩阵

Functionality mimes gluPerspective (third_party/GL/glu/include/GLU/glu.h).

Args:
aspect_ratio: float value specifying the image aspect ratio (width/height). # 图像纵横比
fov_y: 1-D float32 Tensor with shape [batch_size] specifying output vertical
field of views in degrees. # 以度为单位指定视图的输出垂直视野。
near_clip: 1-D float32 Tensor with shape [batch_size] specifying near
clipping plane distance.   # 指定近剪裁平面距离。
far_clip: 1-D float32 Tensor with shape [batch_size] specifying far clipping
plane distance.            # 指定远剪裁平面距离。

Returns:
A [batch_size, 4, 4] float tensor that maps from right-handed points in eye
space to left-handed points in clip space. # 从右手坐标系转换到左手坐标系的裁剪空间
"""
# The multiplication of fov_y by pi/360.0 simultaneously converts to radians #角度转成弧度
# and adds the half-angle factor of .5.
focal_lengths_y = 1.0 / tf.tan(fov_y * (math.pi / 360.0)) # 计算余切值
depth_range = far_clip - near_clip
p_22 = -(far_clip + near_clip) / depth_range  # 投影矩阵的第三行第三列
p_23 = -2.0 * (far_clip * near_clip / depth_range) #投影矩阵的第三行第四列

zeros = tf.zeros_like(p_23, dtype=tf.float32)
# pyformat: disable
perspective_transform = tf.concat(
[
focal_lengths_y / aspect_ratio, zeros, zeros, zeros,
zeros, focal_lengths_y, zeros, zeros,
zeros, zeros, p_22, p_23,
zeros, zeros, -tf.ones_like(p_23, dtype=tf.float32), zeros
], axis=0)
# pyformat: enable
perspective_transform = tf.reshape(perspective_transform, [4, 4, -1]) # 构建投影矩阵，shape=[4,4,batch_size]
return tf.transpose(perspective_transform, [2, 0, 1]) #shape = [batch_size,4,4]

def look_at(eye, center, world_up):
"""Computes camera viewing matrices. # 计算相机视角矩阵，返回值从世界坐标系转换到视点坐标系的转换矩阵

Functionality mimes gluLookAt (third_party/GL/glu/include/GLU/glu.h).

Args:
eye: 2-D float32 tensor with shape [batch_size, 3] containing the XYZ world
space position of the camera.  # 相机的坐标
center: 2-D float32 tensor with shape [batch_size, 3] containing a position
along the center of the camera's gaze. # 可以理解为目标的坐标
world_up: 2-D float32 tensor with shape [batch_size, 3] specifying the
world's up direction; the output camera will have no tilt with respect
to this direction. # up向量

Returns:
A [batch_size, 4, 4] float tensor containing a right-handed camera
extrinsics matrix that maps points from world space to points in eye space.
# 即将世界坐标系转换到视角坐标系
"""
batch_size = center.shape.value  # batch_size
vector_degeneracy_cutoff = 1e-6  # 当center和eye距离很近，小于阈值的时候报错
forward = center - eye  #
forward_norm = tf.norm(forward, ord='euclidean', axis=1, keep_dims=True)
tf.assert_greater(
forward_norm,
vector_degeneracy_cutoff,
message='Camera matrix is degenerate because eye and center are close.')
forward = tf.divide(forward, forward_norm) # 可以说这个向量是v向量

to_side = tf.cross(forward, world_up) # 计算u向量
to_side_norm = tf.norm(to_side, ord='euclidean', axis=1, keep_dims=True)
tf.assert_greater(
to_side_norm,
vector_degeneracy_cutoff,
message='Camera matrix is degenerate because up and gaze are close or'
'because up is degenerate.')
to_side = tf.divide(to_side, to_side_norm) # u向量
cam_up = tf.cross(to_side, forward)   #v 向量

# 下面应该是将世界坐标系的点转化到视点坐标系
w_column = tf.constant(
batch_size * [[0., 0., 0., 1.]], dtype=tf.float32)  # [batch_size, 4] 定义旋转矩阵最后一列
w_column = tf.reshape(w_column, [batch_size, 4, 1])
view_rotation = tf.stack( # 旋转矩阵，参看http://52zju.cn/?p=582的定义
[to_side, cam_up, -forward, # -forword 是从eye出发的向量，所以取反
tf.zeros_like(to_side, dtype=tf.float32)],
axis=1)  # [batch_size, 4, 3] matrix
view_rotation = tf.concat(
[view_rotation, w_column], axis=2)  # [batch_size, 4, 4] 构成旋转矩阵

identity_batch = tf.tile(tf.expand_dims(tf.eye(3), 0), [batch_size, 1, 1])
view_translation = tf.concat([identity_batch, tf.expand_dims(-eye, 2)], 2) # 平移矩阵
view_translation = tf.concat(
[view_translation,
tf.reshape(w_column, [batch_size, 1, 4])], 1)
camera_matrices = tf.matmul(view_rotation, view_translation) # 旋转平移矩阵
return camera_matrices

def euler_matrices(angles): # 计算欧拉矩阵
"""Computes a XYZ Tait-Bryan (improper Euler angle) rotation.

Returns 4x4 matrices for convenient multiplication with other transformations.

Args:
angles: a [batch_size, 3] tensor containing X, Y, and Z angles in radians.

Returns:
a [batch_size, 4, 4] tensor of matrices.
"""
s = tf.sin(angles)
c = tf.cos(angles)
# Rename variables for readability in the matrix definition below.
c0, c1, c2 = (c[:, 0], c[:, 1], c[:, 2])
s0, s1, s2 = (s[:, 0], s[:, 1], s[:, 2])

zeros = tf.zeros_like(s[:, 0])
ones = tf.ones_like(s[:, 0])

# pyformat: disable
flattened = tf.concat(
[
c2 * c1, c2 * s1 * s0 - c0 * s2, s2 * s0 + c2 * c0 * s1, zeros,
c1 * s2, c2 * c0 + s2 * s1 * s0, c0 * s2 * s1 - c2 * s0, zeros,
-s1, c1 * s0, c1 * c0, zeros,
zeros, zeros, zeros, ones
],
axis=0)
# pyformat: enable
reshaped = tf.reshape(flattened, [4, 4, -1])
return tf.transpose(reshaped, [2, 0, 1])
```

# 3. rasterize_triangles.py

```
"""Differentiable triangle rasterizer."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tensorflow as tf

#加载自定义的运算核
os.path.join(os.environ['TEST_SRCDIR'],
'tf_mesh_renderer/mesh_renderer_dir/kernels/rasterize_triangles_kernel.so'))

# This epsilon should be smaller than any valid barycentric （重心） reweighting factor
# (i.e. the per-pixel reweighting factor used to correct for the effects of
# perspective-incorrect barycentric interpolation). It is necessary primarily
# because the reweighting factor will be 0 for factors outside the mesh, and we
# need to ensure the image color and gradient outside the region of the mesh are
# 0.
_MINIMUM_REWEIGHTING_THRESHOLD = 1e-6

# This epsilon is the minimum absolute value of a homogenous coordinate before
# it is clipped. It should be sufficiently large such that the output of
# the perspective divide step with this denominator still has good working
# precision with 32 bit arithmetic, and sufficiently small so that in practice
# vertices are almost never close enough to a clipping plane to be thresholded.
_MINIMUM_PERSPECTIVE_DIVIDE_THRESHOLD = 1e-6

def rasterize_triangles(vertices, attributes, triangles, projection_matrices,
image_width, image_height, background_value):
"""Rasterizes the input scene and computes interpolated vertex attributes.
# 注 Rasterizes的翻译：convert (an image stored as an outline) into pixels that can be displayed on a screen or printed.
NOTE: the rasterizer does no triangle clipping. Triangles that lie outside the
viewing frustum (esp. behind the camera) may be drawn incorrectly.

Args:
vertices: 3-D float32 tensor with shape [batch_size, vertex_count, 3]. Each
triplet is an xyz position in model space.
# 模型空间的坐标系
attributes: 3-D float32 tensor with shape [batch_size, vertex_count,
attribute_count]. Each vertex attribute is interpolated
across the triangle using barycentric interpolation.
# 使用重心内插进行插值
triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet
should contain vertex indices describing a triangle such that the
triangle's normal points toward the viewer if the forward order of the
triplet defines a clockwise winding of the vertices. Gradients with
respect to this tensor are not available.
projection_matrices: 3-D float tensor with shape [batch_size, 4, 4]
containing model-view-perspective projection matrices.
# 投影矩阵
image_width: int specifying desired output image width in pixels.
image_height: int specifying desired output image height in pixels.
background_value: a 1-D float32 tensor with shape [attribute_count]. Pixels
that lie outside all triangles take this value.
# 背景值

Returns:
A 4-D float32 tensor with shape [batch_size, image_height, image_width,
attribute_count], containing the interpolated vertex attributes at
each pixel.
# 包含每个内插顶点属性的像素

Raises:
ValueError: An invalid argument to the method is detected.
"""
if not image_width >0:
raise ValueError('Image width must be > 0.')
if not image_height > 0:
raise ValueError('Image height must be > 0.')
if len(vertices.shape) != 3:
raise ValueError('The vertex buffer must be 3D.')
batch_size = vertices.shape.value
vertex_count = vertices.shape.value

# We map the coordinates to normalized device coordinates before passing
# the scene to the rendering kernel to keep as many ops in tensorflow as
# possible.

homogeneous_coord = tf.ones([batch_size, vertex_count, 1], dtype=tf.float32) # 齐次坐标
vertices_homogeneous = tf.concat([vertices, homogeneous_coord], 2)

# Vertices are given in row-major order, but the transformation pipeline is
# column major:
clip_space_points = tf.matmul(
vertices_homogeneous, projection_matrices, transpose_b=True)

# Perspective divide, first thresholding the homogeneous coordinate to avoid
# the possibility of NaNs:
clip_space_points_w = tf.maximum(
tf.abs(clip_space_points[:, :, 3:4]),
_MINIMUM_PERSPECTIVE_DIVIDE_THRESHOLD) * tf.sign(
clip_space_points[:, :, 3:4])
normalized_device_coordinates = (
clip_space_points[:, :, 0:3] / clip_space_points_w) # 判断是否位于裁剪空间

per_image_uncorrected_barycentric_coordinates = []
per_image_vertex_ids = []
for im in xrange(vertices.shape):
barycentric_coords, triangle_ids, _ = ( # 质心坐标
rasterize_triangles_module.rasterize_triangles(
normalized_device_coordinates[im, :, :], triangles, image_width,
image_height))
per_image_uncorrected_barycentric_coordinates.append(
tf.reshape(barycentric_coords, [-1, 3]))

# Gathers the vertex indices now because the indices don't contain a batch
# identifier, and reindexes the vertex ids to point to a (batch,vertex_id)
vertex_ids = tf.gather(triangles, tf.reshape(triangle_ids, [-1]))
reindexed_ids = tf.add(vertex_ids, im * vertices.shape.value)
per_image_vertex_ids.append(reindexed_ids)

uncorrected_barycentric_coordinates = tf.concat(
per_image_uncorrected_barycentric_coordinates, axis=0)
vertex_ids = tf.concat(per_image_vertex_ids, axis=0)

# Indexes with each pixel's clip-space triangle's extrema (the pixel's
# 'corner points') ids to get the relevant properties for deferred shading.
flattened_vertex_attributes = tf.reshape(attributes,
[batch_size * vertex_count, -1])
corner_attributes = tf.gather(flattened_vertex_attributes, vertex_ids)

# Barycentric interpolation is linear in the reciprocal of the homogeneous
# W coordinate, so we use these weights to correct for the effects of
# perspective distortion after rasterization.
perspective_distortion_weights = tf.reciprocal(
tf.reshape(clip_space_points_w, [-1]))
corner_distortion_weights = tf.gather(perspective_distortion_weights,
vertex_ids)

# Apply perspective correction to the barycentric coordinates. This step is
# required since the rasterizer receives normalized-device coordinates (i.e.,
# after perspective division), so it can't apply perspective correction to the
# interpolated values.
weighted_barycentric_coordinates = tf.multiply(
uncorrected_barycentric_coordinates, corner_distortion_weights)
barycentric_reweighting_factor = tf.reduce_sum(
weighted_barycentric_coordinates, axis=1)

corrected_barycentric_coordinates = tf.divide(
weighted_barycentric_coordinates,
tf.expand_dims(
tf.maximum(barycentric_reweighting_factor,
_MINIMUM_REWEIGHTING_THRESHOLD),
axis=1))

# Computes the pixel attributes by interpolating the known attributes at the
# corner points of the triangle interpolated with the barycentric coordinates.
weighted_vertex_attributes = tf.multiply(
corner_attributes,
tf.expand_dims(corrected_barycentric_coordinates, axis=2))
summed_attributes = tf.reduce_sum(weighted_vertex_attributes, axis=1)
attribute_images = tf.reshape(summed_attributes,
[batch_size, image_height, image_width, -1])

# Barycentric coordinates should approximately sum to one where there is
# rendered geometry, but be exactly zero where there is not.
alphas = tf.clip_by_value(
tf.reduce_sum(2.0 * corrected_barycentric_coordinates, axis=1), 0.0, 1.0)
alphas = tf.reshape(alphas, [batch_size, image_height, image_width, 1])

attributes_with_background = (
alphas * attribute_images + (1.0 - alphas) * background_value)

return attributes_with_background

def _rasterize_triangles_grad(op, df_dbarys, df_dids, df_dz):
# Gradients are only supported for barycentric coordinates. Gradients for the
# z-buffer are possible as well but not currently implemented.
del df_dids, df_dz
op.inputs, op.inputs, op.outputs, op.outputs, df_dbarys,
op.get_attr('image_width'), op.get_attr('image_height')), None

```

# 4. mesh_renderer.py

```
"""Differentiable 3-D rendering of a triangle mesh."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

import camera_utils
import rasterize_triangles

alphas,
pixel_positions,
light_positions,
light_intensities,
diffuse_colors=None,
camera_position=None,
specular_colors=None,
shininess_coefficients=None,
ambient_color=None):
"""Computes pixelwise lighting from rasterized buffers with the Phong model.
# 计算phong光照模型
Args:
normals: a 4D float32 tensor with shape [batch_size, image_height,
image_width, 3]. The inner dimension is the world space XYZ normal for
the corresponding pixel. Should be already normalized.
# 归一化的法向量，xyz空间坐标系
alphas: a 3D float32 tensor with shape [batch_size, image_height,
image_width]. The inner dimension is the alpha value (transparency)
for the corresponding pixel.
# 对应的每个像素的透明度
pixel_positions: a 4D float32 tensor with shape [batch_size, image_height,
image_width, 3]. The inner dimension is the world space XYZ position for
the corresponding pixel.
# 像素的坐标，xyz空间坐标系
light_positions: a 3D tensor with shape [batch_size, light_count, 3]. The
XYZ position of each light in the scene. In the same coordinate space as
pixel_positions.
# 光线的位置，和像素坐标对应
light_intensities: a 3D tensor with shape [batch_size, light_count, 3]. The
RGB intensity values for each light. Intensities may be above one.
# 每个光线的RGB的亮度
diffuse_colors: a 4D float32 tensor with shape [batch_size, image_height,
image_width, 3]. The inner dimension is the diffuse RGB coefficients at
a pixel in the range [0, 1].
# 漫反射RGB系数，范围0-1
camera_position: a 1D tensor with shape [batch_size, 3]. The XYZ camera
position in the scene. If supplied, specular reflections will be
computed. If not supplied, specular_colors and shininess_coefficients
are expected to be None. In the same coordinate space as
pixel_positions.
# 相机坐标。如果提供，则计算镜面反射，否则不计算镜面反射和光照强度系数
specular_colors: a 4D float32 tensor with shape [batch_size, image_height,
image_width, 3]. The inner dimension is the specular RGB coefficients at
a pixel in the range [0, 1]. If None, assumed to be tf.zeros()
# 镜面反射系数
shininess_coefficients: A 3D float32 tensor that is broadcasted to shape
[batch_size, image_height, image_width]. The inner dimension is the
shininess coefficient for the object at a pixel. Dimensions that are
constant can be given length 1, so [batch_size, 1, 1] and [1, 1, 1] are
also valid input shapes.
# 光强系数
ambient_color: a 2D tensor with shape [batch_size, 3]. The RGB ambient
color, which is added to each pixel before tone mapping. If None, it is
assumed to be tf.zeros().
# 周围颜色：映射前会加到每个像素
Returns:
A 4D float32 tensor of shape [batch_size, image_height, image_width, 4]
containing the lit RGBA color values for each image at each pixel. Colors
are in the range [0,1].
# 每张图片中的每个像素的RGBA颜色

Raises:
ValueError: An invalid argument to the method is detected.
"""
batch_size, image_height, image_width = [s.value for s in normals.shape[:-1]]
light_count = light_positions.shape.value
pixel_count = image_height * image_width
# Reshape all values to easily do pixelwise computations:
normals = tf.reshape(normals, [batch_size, -1, 3])
alphas = tf.reshape(alphas, [batch_size, -1, 1])
diffuse_colors = tf.reshape(diffuse_colors, [batch_size, -1, 3])
if camera_position is not None:
specular_colors = tf.reshape(specular_colors, [batch_size, -1, 3])

# Ambient component
output_colors = tf.zeros([batch_size, image_height * image_width, 3])
if ambient_color is not None:
ambient_reshaped = tf.expand_dims(ambient_color, axis=1)
output_colors = tf.add(output_colors, ambient_reshaped * diffuse_colors)

# Diffuse component
pixel_positions = tf.reshape(pixel_positions, [batch_size, -1, 3])
per_light_pixel_positions = tf.stack(
[pixel_positions] * light_count,
axis=1)  # [batch_size, light_count, pixel_count, 3]
directions_to_lights = tf.nn.l2_normalize(
tf.expand_dims(light_positions, axis=2) - per_light_pixel_positions,
dim=3)  # [batch_size, light_count, pixel_count, 3]
# The specular component should only contribute when the light and normal
# face one another (i.e. the dot product is nonnegative):
normals_dot_lights = tf.clip_by_value(
tf.reduce_sum(
tf.expand_dims(normals, axis=1) * directions_to_lights, axis=3), 0.0,
1.0)  # [batch_size, light_count, pixel_count]
diffuse_output = tf.expand_dims(
diffuse_colors, axis=1) * tf.expand_dims(
normals_dot_lights, axis=3) * tf.expand_dims(
light_intensities, axis=2)
diffuse_output = tf.reduce_sum(
diffuse_output, axis=1)  # [batch_size, pixel_count, 3]
output_colors = tf.add(output_colors, diffuse_output)

# Specular component
if camera_position is not None:
camera_position = tf.reshape(camera_position, [batch_size, 1, 3])
mirror_reflection_direction = tf.nn.l2_normalize(
2.0 * tf.expand_dims(normals_dot_lights, axis=3) * tf.expand_dims(
normals, axis=1) - directions_to_lights,
dim=3)
direction_to_camera = tf.nn.l2_normalize(
camera_position - pixel_positions, dim=2)
reflection_direction_dot_camera_direction = tf.reduce_sum(
tf.expand_dims(direction_to_camera, axis=1) *
mirror_reflection_direction,
axis=3)
# The specular component should only contribute when the reflection is
# external:
reflection_direction_dot_camera_direction = tf.clip_by_value(
tf.nn.l2_normalize(reflection_direction_dot_camera_direction, dim=2),
0.0, 1.0)
# The specular component should also only contribute when the diffuse
# component contributes:
reflection_direction_dot_camera_direction = tf.where(
normals_dot_lights != 0.0, reflection_direction_dot_camera_direction,
tf.zeros_like(
reflection_direction_dot_camera_direction, dtype=tf.float32))
# Reshape to support broadcasting the shininess coefficient, which rarely
# varies per-vertex:
reflection_direction_dot_camera_direction = tf.reshape(
reflection_direction_dot_camera_direction,
[batch_size, light_count, image_height, image_width])
shininess_coefficients = tf.expand_dims(shininess_coefficients, axis=1)
specularity = tf.reshape(
tf.pow(reflection_direction_dot_camera_direction,
shininess_coefficients),
[batch_size, light_count, pixel_count, 1])
specular_output = tf.expand_dims(
specular_colors, axis=1) * specularity * tf.expand_dims(
light_intensities, axis=2)
specular_output = tf.reduce_sum(specular_output, axis=1)
output_colors = tf.add(output_colors, specular_output)
rgb_images = tf.reshape(output_colors,
[batch_size, image_height, image_width, 3])
alpha_images = tf.reshape(alphas, [batch_size, image_height, image_width, 1])
valid_rgb_values = tf.concat(3 * [alpha_images > 0.5], axis=3)
rgb_images = tf.where(valid_rgb_values, rgb_images,
tf.zeros_like(rgb_images, dtype=tf.float32))
return tf.reverse(tf.concat([rgb_images, alpha_images], axis=3), axis=)

def tone_mapper(image, gamma):
"""Applies gamma correction to the input image.
# gamma 矫正

Tone maps the input image batch in order to make scenes with a high dynamic
range viewable. The gamma correction factor is computed separately per image,
but is shared between all provided channels. The exact function computed is:

image_out = A*image_in^gamma, where A is an image-wide constant computed so
that the maximum image value is approximately 1. The correction is applied
to all channels.

Args:
image: 4-D float32 tensor with shape [batch_size, image_height,
image_width, channel_count]. The batch of images to tone map.
gamma: 0-D float32 nonnegative tensor. Values of gamma below one compress
relative contrast in the image, and values above one increase it. A
value of 1 is equivalent to scaling the image to have a maximum value
of 1.
Returns:
4-D float32 tensor with shape [batch_size, image_height, image_width,
channel_count]. Contains the gamma-corrected images, clipped to the range
[0, 1].
"""
batch_size = image.shape.value
corrected_image = tf.pow(image, gamma)
image_max = tf.reduce_max(
tf.reshape(corrected_image, [batch_size, -1]), axis=1)
scaled_image = tf.divide(corrected_image,
tf.reshape(image_max, [batch_size, 1, 1, 1]))
return tf.clip_by_value(scaled_image, 0.0, 1.0)

def mesh_renderer(vertices,
triangles,
normals,
diffuse_colors,
camera_position,
camera_lookat,
camera_up,
light_positions,
light_intensities,
image_width,
image_height,
specular_colors=None,
shininess_coefficients=None,
ambient_color=None,
fov_y=40.0,
near_clip=0.01,
far_clip=10.0):
"""Renders an input scene using phong shading, and returns an output image.
# 使用phone阴影模型，输入是场景，输出是一张图片

Args:
vertices: 3-D float32 tensor with shape [batch_size, vertex_count, 3]. Each
triplet is an xyz position in world space.
triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet
should contain vertex indices describing a triangle such that the
triangle's normal points toward the viewer if the forward order of the
triplet defines a clockwise winding of the vertices. Gradients with
respect to this tensor are not available.
normals: 3-D float32 tensor with shape [batch_size, vertex_count, 3]. Each
triplet is the xyz vertex normal for its corresponding vertex. Each
vector is assumed to be already normalized.
diffuse_colors: 3-D float32 tensor with shape [batch_size,
vertex_count, 3]. The RGB diffuse reflection in the range [0,1] for
each vertex.
camera_position: 2-D tensor with shape [batch_size, 3] or 1-D tensor with
shape  specifying the XYZ world space camera position.
camera_lookat: 2-D tensor with shape [batch_size, 3] or 1-D tensor with
shape  containing an XYZ point along the center of the camera's gaze.
camera_up: 2-D tensor with shape [batch_size, 3] or 1-D tensor with shape
 containing the up direction for the camera. The camera will have no
tilt with respect to this direction.
light_positions: a 3-D tensor with shape [batch_size, light_count, 3]. The
XYZ position of each light in the scene. In the same coordinate space as
pixel_positions.
light_intensities: a 3-D tensor with shape [batch_size, light_count, 3]. The
RGB intensity values for each light. Intensities may be above one.
image_width: int specifying desired output image width in pixels.
image_height: int specifying desired output image height in pixels.
specular_colors: 3-D float32 tensor with shape [batch_size,
vertex_count, 3]. The RGB specular reflection in the range [0, 1] for
each vertex.  If supplied, specular reflections will be computed, and
both specular_colors and shininess_coefficients are expected.
shininess_coefficients: a 0D-2D float32 tensor with maximum shape
[batch_size, vertex_count]. The phong shininess coefficient of each
vertex. A 0D tensor or float gives a constant shininess coefficient
across all batches and images. A 1D tensor must have shape [batch_size],
and a single shininess coefficient per image is used.
ambient_color: a 2D tensor with shape [batch_size, 3]. The RGB ambient
color, which is added to each pixel in the scene. If None, it is
assumed to be black.
fov_y: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
desired output image y field of view in degrees.
near_clip: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
near clipping plane distance.
far_clip: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
far clipping plane distance.

Returns:
A 4-D float32 tensor of shape [batch_size, image_height, image_width, 4]
containing the lit RGBA color values for each image at each pixel. RGB
colors are the intensity values before tonemapping and can be in the range
[0, infinity]. Clipping to the range [0,1] with tf.clip_by_value is likely
reasonable for both viewing and training most scenes. More complex scenes
with multiple lights should tone map color values for display only. One
simple tonemapping approach is to rescale color values as x/(1+x); gamma
compression is another common techinque. Alpha values are zero for
background pixels and near one for mesh pixels.
Raises:
ValueError: An invalid argument to the method is detected.
"""
if len(vertices.shape) != 3:
raise ValueError('Vertices must have shape [batch_size, vertex_count, 3].')
batch_size = vertices.shape.value
if len(normals.shape) != 3:
raise ValueError('Normals must have shape [batch_size, vertex_count, 3].')
if len(light_positions.shape) != 3:
raise ValueError(
'Light_positions must have shape [batch_size, light_count, 3].')
if len(light_intensities.shape) != 3:
raise ValueError(
'Light_intensities must have shape [batch_size, light_count, 3].')
if len(diffuse_colors.shape) != 3:
raise ValueError(
'vertex_diffuse_colors must have shape [batch_size, vertex_count, 3].')
if (ambient_color is not None and
ambient_color.get_shape().as_list() != [batch_size, 3]):
raise ValueError('Ambient_color must have shape [batch_size, 3].')
if camera_position.get_shape().as_list() == :
camera_position = tf.tile(
tf.expand_dims(camera_position, axis=0), [batch_size, 1])
elif camera_position.get_shape().as_list() != [batch_size, 3]:
raise ValueError('Camera_position must have shape [batch_size, 3]')
if camera_lookat.get_shape().as_list() == :
camera_lookat = tf.tile(
tf.expand_dims(camera_lookat, axis=0), [batch_size, 1])
elif camera_lookat.get_shape().as_list() != [batch_size, 3]:
raise ValueError('Camera_lookat must have shape [batch_size, 3]')
if camera_up.get_shape().as_list() == :
camera_up = tf.tile(tf.expand_dims(camera_up, axis=0), [batch_size, 1])
elif camera_up.get_shape().as_list() != [batch_size, 3]:
raise ValueError('Camera_up must have shape [batch_size, 3]')
if isinstance(fov_y, float):
fov_y = tf.constant(batch_size * [fov_y], dtype=tf.float32)
elif not fov_y.get_shape().as_list():
fov_y = tf.tile(tf.expand_dims(fov_y, 0), [batch_size])
elif fov_y.get_shape().as_list() != [batch_size]:
raise ValueError('Fov_y must be a float, a 0D tensor, or a 1D tensor with'
'shape [batch_size]')
if isinstance(near_clip, float):
near_clip = tf.constant(batch_size * [near_clip], dtype=tf.float32)
elif not near_clip.get_shape().as_list():
near_clip = tf.tile(tf.expand_dims(near_clip, 0), [batch_size])
elif near_clip.get_shape().as_list() != [batch_size]:
raise ValueError('Near_clip must be a float, a 0D tensor, or a 1D tensor'
'with shape [batch_size]')
if isinstance(far_clip, float):
far_clip = tf.constant(batch_size * [far_clip], dtype=tf.float32)
elif not far_clip.get_shape().as_list():
far_clip = tf.tile(tf.expand_dims(far_clip, 0), [batch_size])
elif far_clip.get_shape().as_list() != [batch_size]:
raise ValueError('Far_clip must be a float, a 0D tensor, or a 1D tensor'
'with shape [batch_size]')
if specular_colors is not None and shininess_coefficients is None:
raise ValueError(
'Specular colors were supplied without shininess coefficients.')
if shininess_coefficients is not None and specular_colors is None:
raise ValueError(
'Shininess coefficients were supplied without specular colors.')
if specular_colors is not None:
# Since a 0-D float32 tensor is accepted, also accept a float.
if isinstance(shininess_coefficients, float):
shininess_coefficients = tf.constant(
shininess_coefficients, dtype=tf.float32)
if len(specular_colors.shape) != 3:
raise ValueError('The specular colors must have shape [batch_size, '
'vertex_count, 3].')
if len(shininess_coefficients.shape) > 2:
raise ValueError('The shininess coefficients must have shape at most'
'[batch_size, vertex_count].')
# If we don't have per-vertex coefficients, we can just reshape the
# input shininess to broadcast later, rather than interpolating an
# additional vertex attribute:
if len(shininess_coefficients.shape) < 2:
vertex_attributes = tf.concat( [normals, vertices, diffuse_colors, specular_colors], axis=2)
else:
vertex_attributes = tf.concat( [ normals, vertices, diffuse_colors, specular_colors, tf.expand_dims(shininess_coefficients, axis=2) ], axis=2)
else:
vertex_attributes = tf.concat([normals, vertices, diffuse_colors], axis=2)
camera_matrices = camera_utils.look_at(camera_position, camera_lookat, camera_up)
perspective_transforms = camera_utils.perspective(image_width / image_height, fov_y, near_clip, far_clip)
clip_space_transforms = tf.matmul(perspective_transforms, camera_matrices)
pixel_attributes = rasterize_triangles.rasterize_triangles( vertices, vertex_attributes, triangles, clip_space_transforms, image_width, image_height, [-1] * vertex_attributes.shape.value) # Extract the interpolated vertex attributes from the pixel buffer and
# supply them to the shader:
pixel_normals = tf.nn.l2_normalize(pixel_attributes[:, :, :, 0:3], dim=3)
pixel_positions = pixel_attributes[:, :, :, 3:6]
diffuse_colors = pixel_attributes[:, :, :, 6:9]
if specular_colors is not None:
specular_colors = pixel_attributes[:, :, :, 9:12] # Retrieve the interpolated shininess coefficients if necessary, or just
# reshape our input for broadcasting:
if len(shininess_coefficients.shape) == 2:
shininess_coefficients = pixel_attributes[:, :, :, 12]
else:
shininess_coefficients = tf.reshape(shininess_coefficients, [-1, 1, 1])
pixel_mask = tf.cast(tf.reduce_any(diffuse_colors >= 0, axis=3), tf.float32)

normals=pixel_normals,
pixel_positions=pixel_positions,
light_positions=light_positions,
light_intensities=light_intensities,
diffuse_colors=diffuse_colors,
camera_position=camera_position if specular_colors is not None else None,
specular_colors=specular_colors,
shininess_coefficients=shininess_coefficients,
ambient_color=ambient_color)
return renders
``` 