1 //! Word wrapping algorithms.
2 //!
3 //! After a text has been broken into words (or [`Fragment`]s), one
4 //! now has to decide how to break the fragments into lines. The
5 //! simplest algorithm for this is implemented by [`wrap_first_fit`]:
6 //! it uses no look-ahead and simply adds fragments to the line as
7 //! long as they fit. However, this can lead to poor line breaks if a
8 //! large fragment almost-but-not-quite fits on a line. When that
9 //! happens, the fragment is moved to the next line and it will leave
10 //! behind a large gap. A more advanced algorithm, implemented by
11 //! [`wrap_optimal_fit`], will take this into account. The optimal-fit
12 //! algorithm considers all possible line breaks and will attempt to
13 //! minimize the gaps left behind by overly short lines.
14 //!
15 //! While both algorithms run in linear time, the first-fit algorithm
16 //! is about 4 times faster than the optimal-fit algorithm.
17 
18 #[cfg(feature = "smawk")]
19 mod optimal_fit;
20 #[cfg(feature = "smawk")]
21 pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
22 
23 use crate::core::{Fragment, Word};
24 
25 /// Describes how to wrap words into lines.
26 ///
27 /// The simplest approach is to wrap words one word at a time and
28 /// accept the first way of wrapping which fit
29 /// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
30 /// enabled, a more complex algorithm is available which will look at
31 /// an entire paragraph at a time in order to find optimal line breaks
32 /// ([`WrapAlgorithm::OptimalFit`]).
33 #[derive(Clone, Copy)]
34 pub enum WrapAlgorithm {
35     /// Wrap words using a fast and simple algorithm.
36     ///
37     /// This algorithm uses no look-ahead when finding line breaks.
38     /// Implemented by [`wrap_first_fit`], please see that function for
39     /// details and examples.
40     FirstFit,
41 
42     /// Wrap words using an advanced algorithm with look-ahead.
43     ///
44     /// This wrapping algorithm considers the entire paragraph to find
45     /// optimal line breaks. When wrapping text, "penalties" are
46     /// assigned to line breaks based on the gaps left at the end of
47     /// lines. See [`Penalties`] for details.
48     ///
49     /// The underlying wrapping algorithm is implemented by
50     /// [`wrap_optimal_fit`], please see that function for examples.
51     ///
52     /// **Note:** Only available when the `smawk` Cargo feature is
53     /// enabled.
54     #[cfg(feature = "smawk")]
55     OptimalFit(Penalties),
56 
57     /// Custom wrapping function.
58     ///
59     /// Use this if you want to implement your own wrapping algorithm.
60     /// The function can freely decide how to turn a slice of
61     /// [`Word`]s into lines.
62     ///
63     /// # Example
64     ///
65     /// ```
66     /// use textwrap::core::Word;
67     /// use textwrap::{wrap, Options, WrapAlgorithm};
68     ///
69     /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
70     ///     let mut lines = Vec::new();
71     ///     let mut step = 1;
72     ///     let mut start_idx = 0;
73     ///     while start_idx + step <= words.len() {
74     ///       lines.push(&words[start_idx .. start_idx+step]);
75     ///       start_idx += step;
76     ///       step += 1;
77     ///     }
78     ///     lines
79     /// }
80     ///
81     /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
82     /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
83     ///            vec!["First,",
84     ///                 "second, third,",
85     ///                 "fourth, fifth, sixth"]);
86     /// ```
87     Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
88 }
89 
90 impl PartialEq for WrapAlgorithm {
91     /// Compare two wrap algorithms.
92     ///
93     /// ```
94     /// use textwrap::WrapAlgorithm;
95     ///
96     /// assert_eq!(WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit);
97     /// #[cfg(feature = "smawk")] {
98     ///     assert_eq!(WrapAlgorithm::new_optimal_fit(), WrapAlgorithm::new_optimal_fit());
99     /// }
100     /// ```
101     ///
102     /// Note that `WrapAlgorithm::Custom1` values never compare equal:
103     ///
104     /// ```
105     /// use textwrap::WrapAlgorithm;
106     ///
107     /// assert_ne!(WrapAlgorithm::Custom(|words, line_widths| vec![words]),
108     ///            WrapAlgorithm::Custom(|words, line_widths| vec![words]));
109     /// ```
eq(&self, other: &Self) -> bool110     fn eq(&self, other: &Self) -> bool {
111         match (self, other) {
112             (WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit) => true,
113             #[cfg(feature = "smawk")]
114             (WrapAlgorithm::OptimalFit(a), WrapAlgorithm::OptimalFit(b)) => a == b,
115             (_, _) => false,
116         }
117     }
118 }
119 
120 impl std::fmt::Debug for WrapAlgorithm {
fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result121     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122         match self {
123             WrapAlgorithm::FirstFit => f.write_str("FirstFit"),
124             #[cfg(feature = "smawk")]
125             WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties),
126             WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"),
127         }
128     }
129 }
130 
131 impl WrapAlgorithm {
132     /// Create new wrap algorithm.
133     ///
134     /// The best wrapping algorithm is used by default, i.e.,
135     /// [`WrapAlgorithm::OptimalFit`] if available, otherwise
136     /// [`WrapAlgorithm::FirstFit`].
new() -> Self137     pub const fn new() -> Self {
138         #[cfg(not(feature = "smawk"))]
139         {
140             WrapAlgorithm::FirstFit
141         }
142 
143         #[cfg(feature = "smawk")]
144         {
145             WrapAlgorithm::new_optimal_fit()
146         }
147     }
148 
149     /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
150     /// works well for monospace text.
151     ///
152     /// **Note:** Only available when the `smawk` Cargo feature is
153     /// enabled.
154     #[cfg(feature = "smawk")]
new_optimal_fit() -> Self155     pub const fn new_optimal_fit() -> Self {
156         WrapAlgorithm::OptimalFit(Penalties::new())
157     }
158 
159     /// Wrap words according to line widths.
160     ///
161     /// The `line_widths` slice gives the target line width for each
162     /// line (the last slice element is repeated as necessary). This
163     /// can be used to implement hanging indentation.
164     #[inline]
wrap<'a, 'b>( &self, words: &'b [Word<'a>], line_widths: &'b [usize], ) -> Vec<&'b [Word<'a>]>165     pub fn wrap<'a, 'b>(
166         &self,
167         words: &'b [Word<'a>],
168         line_widths: &'b [usize],
169     ) -> Vec<&'b [Word<'a>]> {
170         // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
171         // = 9_007_199_254_740_992 can be represented without loss by
172         // a f64. Larger line widths will be rounded to the nearest
173         // representable number.
174         let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
175 
176         match self {
177             WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
178 
179             #[cfg(feature = "smawk")]
180             WrapAlgorithm::OptimalFit(penalties) => {
181                 // The computation cannnot overflow when the line
182                 // widths are restricted to usize.
183                 wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
184             }
185 
186             WrapAlgorithm::Custom(func) => func(words, line_widths),
187         }
188     }
189 }
190 
191 impl Default for WrapAlgorithm {
default() -> Self192     fn default() -> Self {
193         WrapAlgorithm::new()
194     }
195 }
196 
197 /// Wrap abstract fragments into lines with a first-fit algorithm.
198 ///
199 /// The `line_widths` slice gives the target line width for each line
200 /// (the last slice element is repeated as necessary). This can be
201 /// used to implement hanging indentation.
202 ///
203 /// The fragments must already have been split into the desired
204 /// widths, this function will not (and cannot) attempt to split them
205 /// further when arranging them into lines.
206 ///
207 /// # First-Fit Algorithm
208 ///
209 /// This implements a simple “greedy” algorithm: accumulate fragments
210 /// one by one and when a fragment no longer fits, start a new line.
211 /// There is no look-ahead, we simply take first fit of the fragments
212 /// we find.
213 ///
214 /// While fast and predictable, this algorithm can produce poor line
215 /// breaks when a long fragment is moved to a new line, leaving behind
216 /// a large gap:
217 ///
218 /// ```
219 /// use textwrap::core::Word;
220 /// use textwrap::wrap_algorithms::wrap_first_fit;
221 /// use textwrap::WordSeparator;
222 ///
223 /// // Helper to convert wrapped lines to a Vec<String>.
224 /// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
225 ///     lines.iter().map(|line| {
226 ///         line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
227 ///     }).collect::<Vec<_>>()
228 /// }
229 ///
230 /// let text = "These few words will unfortunately not wrap nicely.";
231 /// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
232 /// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
233 ///            vec!["These few words",
234 ///                 "will",  // <-- short line
235 ///                 "unfortunately",
236 ///                 "not wrap",
237 ///                 "nicely."]);
238 ///
239 /// // We can avoid the short line if we look ahead:
240 /// #[cfg(feature = "smawk")]
241 /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
242 /// #[cfg(feature = "smawk")]
243 /// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
244 ///            vec!["These few",
245 ///                 "words will",
246 ///                 "unfortunately",
247 ///                 "not wrap",
248 ///                 "nicely."]);
249 /// ```
250 ///
251 /// The [`wrap_optimal_fit`] function was used above to get better
252 /// line breaks. It uses an advanced algorithm which tries to avoid
253 /// short lines. This function is about 4 times faster than
254 /// [`wrap_optimal_fit`].
255 ///
256 /// # Examples
257 ///
258 /// Imagine you're building a house site and you have a number of
259 /// tasks you need to execute. Things like pour foundation, complete
260 /// framing, install plumbing, electric cabling, install insulation.
261 ///
262 /// The construction workers can only work during daytime, so they
263 /// need to pack up everything at night. Because they need to secure
264 /// their tools and move machines back to the garage, this process
265 /// takes much more time than the time it would take them to simply
266 /// switch to another task.
267 ///
268 /// You would like to make a list of tasks to execute every day based
269 /// on your estimates. You can model this with a program like this:
270 ///
271 /// ```
272 /// use textwrap::core::{Fragment, Word};
273 /// use textwrap::wrap_algorithms::wrap_first_fit;
274 ///
275 /// #[derive(Debug)]
276 /// struct Task<'a> {
277 ///     name: &'a str,
278 ///     hours: f64,   // Time needed to complete task.
279 ///     sweep: f64,   // Time needed for a quick sweep after task during the day.
280 ///     cleanup: f64, // Time needed for full cleanup if day ends with this task.
281 /// }
282 ///
283 /// impl Fragment for Task<'_> {
284 ///     fn width(&self) -> f64 { self.hours }
285 ///     fn whitespace_width(&self) -> f64 { self.sweep }
286 ///     fn penalty_width(&self) -> f64 { self.cleanup }
287 /// }
288 ///
289 /// // The morning tasks
290 /// let tasks = vec![
291 ///     Task { name: "Foundation",  hours: 4.0, sweep: 2.0, cleanup: 3.0 },
292 ///     Task { name: "Framing",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
293 ///     Task { name: "Plumbing",    hours: 2.0, sweep: 2.0, cleanup: 2.0 },
294 ///     Task { name: "Electrical",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
295 ///     Task { name: "Insulation",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
296 ///     Task { name: "Drywall",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
297 ///     Task { name: "Floors",      hours: 3.0, sweep: 1.0, cleanup: 2.0 },
298 ///     Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
299 ///     Task { name: "Bathrooms",   hours: 2.0, sweep: 1.0, cleanup: 2.0 },
300 /// ];
301 ///
302 /// // Fill tasks into days, taking `day_length` into account. The
303 /// // output shows the hours worked per day along with the names of
304 /// // the tasks for that day.
305 /// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
306 ///     let mut days = Vec::new();
307 ///     // Assign tasks to days. The assignment is a vector of slices,
308 ///     // with a slice per day.
309 ///     let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
310 ///     for day in assigned_days.iter() {
311 ///         let last = day.last().unwrap();
312 ///         let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
313 ///         let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
314 ///         days.push((work_hours - last.sweep + last.cleanup, names));
315 ///     }
316 ///     days
317 /// }
318 ///
319 /// // With a single crew working 8 hours a day:
320 /// assert_eq!(
321 ///     assign_days(&tasks, 8.0),
322 ///     [
323 ///         (7.0, vec!["Foundation"]),
324 ///         (8.0, vec!["Framing", "Plumbing"]),
325 ///         (7.0, vec!["Electrical", "Insulation"]),
326 ///         (5.0, vec!["Drywall"]),
327 ///         (7.0, vec!["Floors", "Countertops"]),
328 ///         (4.0, vec!["Bathrooms"]),
329 ///     ]
330 /// );
331 ///
332 /// // With two crews working in shifts, 16 hours a day:
333 /// assert_eq!(
334 ///     assign_days(&tasks, 16.0),
335 ///     [
336 ///         (14.0, vec!["Foundation", "Framing", "Plumbing"]),
337 ///         (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
338 ///         (6.0, vec!["Countertops", "Bathrooms"]),
339 ///     ]
340 /// );
341 /// ```
342 ///
343 /// Apologies to anyone who actually knows how to build a house and
344 /// knows how long each step takes :-)
wrap_first_fit<'a, 'b, T: Fragment>( fragments: &'a [T], line_widths: &'b [f64], ) -> Vec<&'a [T]>345 pub fn wrap_first_fit<'a, 'b, T: Fragment>(
346     fragments: &'a [T],
347     line_widths: &'b [f64],
348 ) -> Vec<&'a [T]> {
349     // The final line width is used for all remaining lines.
350     let default_line_width = line_widths.last().copied().unwrap_or(0.0);
351     let mut lines = Vec::new();
352     let mut start = 0;
353     let mut width = 0.0;
354 
355     for (idx, fragment) in fragments.iter().enumerate() {
356         let line_width = line_widths
357             .get(lines.len())
358             .copied()
359             .unwrap_or(default_line_width);
360         if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
361             lines.push(&fragments[start..idx]);
362             start = idx;
363             width = 0.0;
364         }
365         width += fragment.width() + fragment.whitespace_width();
366     }
367     lines.push(&fragments[start..]);
368     lines
369 }
370 
371 #[cfg(test)]
372 mod tests {
373     use super::*;
374 
375     #[derive(Debug, PartialEq)]
376     struct Word(f64);
377 
378     #[rustfmt::skip]
379     impl Fragment for Word {
width(&self) -> f64380         fn width(&self) -> f64 { self.0 }
whitespace_width(&self) -> f64381         fn whitespace_width(&self) -> f64 { 1.0 }
penalty_width(&self) -> f64382         fn penalty_width(&self) -> f64 { 0.0 }
383     }
384 
385     #[test]
wrap_string_longer_than_f64()386     fn wrap_string_longer_than_f64() {
387         let words = vec![
388             Word(1e307),
389             Word(2e307),
390             Word(3e307),
391             Word(4e307),
392             Word(5e307),
393             Word(6e307),
394         ];
395         // Wrap at just under f64::MAX (~19e307). The tiny
396         // whitespace_widths disappear because of loss of precision.
397         assert_eq!(
398             wrap_first_fit(&words, &[15e307]),
399             &[
400                 vec![
401                     Word(1e307),
402                     Word(2e307),
403                     Word(3e307),
404                     Word(4e307),
405                     Word(5e307)
406                 ],
407                 vec![Word(6e307)]
408             ]
409         );
410     }
411 }
412