Source code
Revision control
Copy as Markdown
Other Tools
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
use std::ops::Range;
use crate::{
api::JxlOutputBuffer,
error::Result,
image::{DataTypeTag, Rect},
render::{
internal::{ChannelInfo, Stage},
low_memory_pipeline::{helpers::get_distinct_indices, run_stage::ExtraInfo},
},
util::{ShiftRightCeil, SmallVec, mirror, tracing_wrappers::*},
};
use super::{LowMemoryRenderPipeline, row_buffers::RowBuffer};
// Most images have at most 7 channels (RGBA + noise extra channels).
// 8 gives a bit extra leeway and makes the size a power of two.
pub(super) type ChannelVec<T> = SmallVec<T, 8>;
fn apply_x_padding(
input_type: DataTypeTag,
row: &mut [u8],
to_pad: Range<isize>,
valid_pixels: Range<isize>,
) {
let x0_offset = RowBuffer::x0_byte_offset() as isize;
let num_valid = valid_pixels.clone().count();
let sz = input_type.size();
match sz {
1 => {
for x in to_pad {
let sx = mirror(x - valid_pixels.start, num_valid) as isize + valid_pixels.start;
let from = (x0_offset + sx) as usize;
let to = (x0_offset + x) as usize;
row[to] = row[from];
}
}
2 => {
for x in to_pad {
let sx = mirror(x - valid_pixels.start, num_valid) as isize + valid_pixels.start;
let from = (x0_offset + sx * 2) as usize;
let to = (x0_offset + x * 2) as usize;
row[to] = row[from];
row[to + 1] = row[from + 1];
}
}
4 => {
for x in to_pad {
let sx = mirror(x - valid_pixels.start, num_valid) as isize + valid_pixels.start;
let from = (x0_offset + sx * 4) as usize;
let to = (x0_offset + x * 4) as usize;
row[to] = row[from];
row[to + 1] = row[from + 1];
row[to + 2] = row[from + 2];
row[to + 3] = row[from + 3];
}
}
_ => {
unimplemented!("only 1, 2 or 4 byte data types supported");
}
}
}
impl LowMemoryRenderPipeline {
fn fill_initial_buffers(
&mut self,
c: usize,
y: usize,
(x0, xsize): (usize, usize),
(gx, gy): (usize, usize),
) {
if !self.shared.channel_is_used[c] {
return;
}
let ChannelInfo {
ty,
downsample: (dx, dy),
} = self.shared.channel_info[0][c];
let ty = ty.expect("Channel info should be populated at this point");
let group_ysize = 1 << (self.shared.log_group_size - dy as usize);
let group_xsize = 1 << (self.shared.log_group_size - dx as usize);
let (bx, by) = self.border_size;
let group_y0 = gy * group_ysize;
let group_x0 = gx << (self.shared.log_group_size - dx as usize);
let group_x1 = group_x0 + group_xsize;
let (input_y, igy, is_topbottom) = if y < group_y0 {
(y + (by >> dy) * 4 - group_y0, gy - 1, true)
} else if y >= group_y0 + group_ysize {
(y - group_y0 - group_ysize, gy + 1, true)
} else {
(y - group_y0, gy, false)
};
let output_row = self.row_buffers[0][c].get_row_mut::<u8>(y);
let copy_x0 = x0.saturating_sub(self.input_border_pixels[c].0);
let copy_x1 =
(x0 + xsize + self.input_border_pixels[c].0).min(self.shared.input_size.0.shrc(dx));
debug_assert!(copy_x1 >= group_x0);
let mut copy_byte_offset = RowBuffer::x0_byte_offset() - (x0 - copy_x0) * ty.size();
let base_gid = igy * self.shared.group_count.0 + gx;
// Previous group horizontally, if needed.
if copy_x0 < group_x0 {
let (input_buf, xs) = if is_topbottom {
(
self.input_buffers[base_gid - 1].topbottom[c]
.as_ref()
.unwrap(),
group_xsize,
)
} else {
(
self.input_buffers[base_gid - 1].leftright[c]
.as_ref()
.unwrap(),
4 * (bx >> dx),
)
};
let input_row = input_buf.row(input_y);
let to_copy = (group_x0 - copy_x0) * ty.size();
let src_byte_offset = xs * ty.size() - to_copy;
output_row[copy_byte_offset..copy_byte_offset + to_copy]
.copy_from_slice(&input_row[src_byte_offset..src_byte_offset + to_copy]);
copy_byte_offset += to_copy;
}
let input_buf = if is_topbottom {
self.input_buffers[base_gid].topbottom[c].as_ref().unwrap()
} else {
self.input_buffers[base_gid].data[c].as_ref().unwrap()
};
let input_row = input_buf.row(input_y);
let copy_start = copy_x0.saturating_sub(group_x0) * ty.size();
let copy_end = (copy_x1.min(group_x1) - group_x0) * ty.size();
let to_copy = copy_end - copy_start;
output_row[copy_byte_offset..copy_byte_offset + to_copy]
.copy_from_slice(&input_row[copy_start..copy_end]);
copy_byte_offset += to_copy;
// Next group horizontally, if any.
if copy_x1 > group_x1 {
let input_buf = if is_topbottom {
self.input_buffers[base_gid + 1].topbottom[c]
.as_ref()
.unwrap()
} else {
self.input_buffers[base_gid + 1].leftright[c]
.as_ref()
.unwrap()
};
let input_row = input_buf.row(input_y);
let dx = self.shared.channel_info[0][c].downsample.0;
let gid = gy * self.shared.group_count.0 + gx;
let next_group_xsize = self.shared.group_size(gid + 1).0.shrc(dx);
let border_x = (copy_x1 - group_x1).min(next_group_xsize);
output_row[copy_byte_offset..copy_byte_offset + border_x * ty.size()]
.copy_from_slice(&input_row[..border_x * ty.size()]);
if border_x + group_x1 < copy_x1 {
let pad_from = (xsize + border_x) as isize;
let pad_to = (xsize + copy_x1 - group_x1) as isize;
apply_x_padding(ty, output_row, pad_from..pad_to, 0..pad_from);
}
}
}
// Renders *parts* of group's worth of data.
// In particular, renders the sub-rectangle given in `image_area`, where (1, 1) refers to
// the center of the group, and 0 and 2 include data from the neighbouring group (if any).
#[instrument(skip(self, buffers))]
pub(super) fn render_group(
&mut self,
(gx, gy): (usize, usize),
image_area: Rect,
buffers: &mut [Option<JxlOutputBuffer>],
) -> Result<()> {
let start_of_row = image_area.origin.0 == 0;
let end_of_row = image_area.end().0 == self.shared.input_size.0;
let Rect {
origin: (x0, y0),
size: (xsize, num_rows),
} = image_area;
let num_channels = self.shared.num_channels();
let num_extra_rows = self.border_size.1;
// This follows the same implementation strategy as the C++ code in libjxl.
// We pretend that every stage has a vertical shift of 0, i.e. it is as tall
// as the final image.
// We call each such row a "virtual" row, because it may or may not correspond
// to an actual row of the current processing stage; actual processing happens
// when vy % (1<<vshift) == 0.
let vy0 = y0.saturating_sub(num_extra_rows);
let vy1 = image_area.end().1 + num_extra_rows;
for vy in vy0..vy1 {
let mut current_origin = (0, 0);
let mut current_size = self.shared.input_size;
// Step 1: read input channels.
for c in 0..num_channels {
// Same logic as below, but adapted to the input stage.
let (dx, dy) = self.shared.channel_info[0][c].downsample;
let scaled_y_border = self.input_border_pixels[c].1 << dy;
let stage_vy = vy as isize - num_extra_rows as isize + scaled_y_border as isize;
if stage_vy % (1 << dy) != 0 {
continue;
}
if stage_vy - (y0 as isize) < -(scaled_y_border as isize) {
continue;
}
let y = stage_vy >> dy;
// Do not produce rows in out-of-bounds areas.
if y < 0 || y >= self.shared.input_size.1.shrc(dy) as isize {
continue;
}
let y = y as usize;
self.fill_initial_buffers(c, y, (x0 >> dx, xsize >> dx), (gx, gy));
}
// Step 2: go through stages one by one.
for (i, stage) in self.shared.stages.iter().enumerate() {
let (dx, dy) = self.downsampling_for_stage[i];
// The logic below uses *virtual* y coordinates, so we need to convert the border
// amount appropriately.
let scaled_y_border = self.stage_output_border_pixels[i].1 << dy;
// I knew the reason behind this formula at some point, but now I don't.
let stage_vy = vy as isize - num_extra_rows as isize + scaled_y_border as isize;
if stage_vy % (1 << dy) != 0 {
continue;
}
if stage_vy - (y0 as isize) < -(scaled_y_border as isize) {
continue;
}
let y = stage_vy >> dy;
let shifted_ysize = self.shared.input_size.1.shrc(dy);
// Do not produce rows in out-of-bounds areas.
if y < 0 || y >= shifted_ysize as isize {
continue;
}
let y = y as usize;
let out_extra_x = self.stage_output_border_pixels[i].0;
let shifted_xsize = xsize.shrc(dx);
match stage {
Stage::InPlace(s) => {
let mut buffers = get_distinct_indices(
&mut self.row_buffers,
&self.sorted_buffer_indices[i],
);
s.run_stage_on(
ExtraInfo {
xsize: shifted_xsize,
current_row: y,
group_x0: x0 >> dx,
out_extra_x,
start_of_row,
end_of_row,
image_height: shifted_ysize,
},
&mut buffers,
self.local_states[i].as_deref_mut(),
);
}
Stage::Save(s) => {
// Find buffers for channels that will be saved.
// Channel ordering is handled in stage_input_buffer_index construction.
let mut input_data: ChannelVec<_> = self.stage_input_buffer_index[i]
.iter()
.map(|(si, ci)| &self.row_buffers[*si][*ci])
.collect();
// Append opaque alpha buffer if fill_opaque_alpha is set
if let Some(ref alpha_buf) = self.opaque_alpha_buffers[i] {
input_data.push(alpha_buf);
}
s.save_lowmem(
&input_data,
&mut *buffers,
(xsize >> dx, num_rows >> dy),
y,
(x0 >> dx, y0 >> dy),
current_size,
current_origin,
)?;
}
Stage::Extend(s) => {
current_size = s.image_size;
current_origin = s.frame_origin;
}
Stage::InOut(s) => {
let borderx = s.border().0 as usize;
let bordery = s.border().1 as isize;
// Apply x padding.
if gx == 0 && borderx != 0 {
for (si, ci) in self.stage_input_buffer_index[i].iter() {
for iy in -bordery..=bordery {
let y = mirror(y as isize + iy, shifted_ysize);
apply_x_padding(
s.input_type(),
self.row_buffers[*si][*ci].get_row_mut::<u8>(y),
-(borderx as isize)..0,
// Either xsize is the actual size of the image, or it is
// much larger than borderx, so this works out either way.
0..shifted_xsize as isize,
);
}
}
}
if gx + 1 == self.shared.group_count.0 && borderx != 0 {
for (si, ci) in self.stage_input_buffer_index[i].iter() {
for iy in -bordery..=bordery {
let y = mirror(y as isize + iy, shifted_ysize);
apply_x_padding(
s.input_type(),
self.row_buffers[*si][*ci].get_row_mut::<u8>(y),
shifted_xsize as isize..(shifted_xsize + borderx) as isize,
// borderx..0 is either data from the neighbouring group or
// data that was filled in by the iteration above.
-(borderx as isize)..shifted_xsize as isize,
);
}
}
}
let (inb, outb) = self.row_buffers.split_at_mut(i + 1);
// Prepare pointers to input and output buffers.
let input_data: ChannelVec<_> = self.stage_input_buffer_index[i]
.iter()
.map(|(si, ci)| &inb[*si][*ci])
.collect();
s.run_stage_on(
ExtraInfo {
xsize: shifted_xsize,
current_row: y,
group_x0: x0 >> dx,
out_extra_x,
start_of_row,
end_of_row,
image_height: shifted_ysize,
},
&input_data,
&mut outb[0][..],
self.local_states[i].as_deref_mut(),
);
}
}
}
}
Ok(())
}
// Renders a chunk of data outside the current frame.
#[instrument(skip(self, buffers))]
pub(super) fn render_outside_frame(
&mut self,
xrange: Range<usize>,
yrange: Range<usize>,
buffers: &mut [Option<JxlOutputBuffer>],
) -> Result<()> {
let num_channels = self.shared.num_channels();
let x0 = xrange.start;
let y0 = yrange.start;
let xsize = xrange.clone().count();
let ysize = yrange.clone().count();
// Significantly simplified version of render_group.
for y in yrange.clone() {
let extend = self.shared.extend_stage_index.unwrap();
// Step 1: get padding from extend stage.
for c in 0..num_channels {
let (si, ci) = self.stage_input_buffer_index[extend][c];
let buffer = &mut self.row_buffers[si][ci];
let Stage::Extend(extend) = &self.shared.stages[extend] else {
unreachable!("extend stage is not an extend stage");
};
let row = &mut buffer.get_row_mut(y)[RowBuffer::x0_offset::<f32>()..];
extend.process_row_chunk((x0, y), xsize, c, row);
}
// Step 2: go through remaining stages one by one.
for (i, stage) in self.shared.stages.iter().enumerate().skip(extend + 1) {
assert_eq!(self.downsampling_for_stage[i], (0, 0));
match stage {
Stage::InPlace(s) => {
let mut buffers = get_distinct_indices(
&mut self.row_buffers,
&self.sorted_buffer_indices[i],
);
s.run_stage_on(
ExtraInfo {
xsize,
current_row: y,
group_x0: x0,
out_extra_x: 0,
start_of_row: false,
end_of_row: false,
image_height: self.shared.input_size.1,
},
&mut buffers,
self.local_states[i].as_deref_mut(),
);
}
Stage::Save(s) => {
// Find buffers for channels that will be saved.
// Channel ordering is handled in stage_input_buffer_index construction.
let mut input_data: ChannelVec<_> = self.stage_input_buffer_index[i]
.iter()
.map(|(si, ci)| &self.row_buffers[*si][*ci])
.collect();
// Append opaque alpha buffer if fill_opaque_alpha is set
if let Some(ref alpha_buf) = self.opaque_alpha_buffers[i] {
input_data.push(alpha_buf);
}
s.save_lowmem(
&input_data,
&mut *buffers,
(xsize, ysize),
y,
(x0, y0),
(xrange.end, yrange.end), // this is not true, but works out correctly.
(0, 0),
)?;
}
Stage::Extend(_) => {
unreachable!("duplicate extend stage");
}
Stage::InOut(s) => {
assert_eq!(s.border(), (0, 0));
let (inb, outb) = self.row_buffers.split_at_mut(i + 1);
// Prepare pointers to input and output buffers.
let input_data: ChannelVec<_> = self.stage_input_buffer_index[i]
.iter()
.map(|(si, ci)| &inb[*si][*ci])
.collect();
s.run_stage_on(
ExtraInfo {
xsize,
current_row: y,
group_x0: x0,
out_extra_x: 0,
start_of_row: false,
end_of_row: false,
image_height: self.shared.input_size.1,
},
&input_data,
&mut outb[0][..],
self.local_states[i].as_deref_mut(),
);
}
}
}
}
Ok(())
}
}