xref: /aosp_15_r20/external/pdfium/xfa/fgas/layout/cfgas_txtbreak.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fgas/layout/cfgas_txtbreak.h"
8 
9 #include <algorithm>
10 
11 #include "build/build_config.h"
12 #include "core/fxcrt/fx_codepage.h"
13 #include "core/fxcrt/fx_extension.h"
14 #include "core/fxcrt/fx_safe_types.h"
15 #include "core/fxcrt/stl_util.h"
16 #include "core/fxge/text_char_pos.h"
17 #include "third_party/base/check.h"
18 #include "third_party/base/containers/adapters.h"
19 #include "third_party/base/numerics/safe_conversions.h"
20 #include "xfa/fgas/font/cfgas_gefont.h"
21 #include "xfa/fgas/layout/cfgas_char.h"
22 #include "xfa/fgas/layout/fgas_arabic.h"
23 #include "xfa/fgas/layout/fgas_linebreak.h"
24 
25 namespace {
26 
27 struct FX_FORMCHAR {
28   uint16_t wch;
29   uint16_t wForm;
30   int32_t iWidth;
31 };
32 
IsCtrlCode(wchar_t wch)33 bool IsCtrlCode(wchar_t wch) {
34   FX_CHARTYPE dwRet = pdfium::unicode::GetCharType(wch);
35   return dwRet == FX_CHARTYPE::kTab || dwRet == FX_CHARTYPE::kControl;
36 }
37 
38 }  // namespace
39 
CFGAS_TxtBreak()40 CFGAS_TxtBreak::CFGAS_TxtBreak() : CFGAS_Break(LayoutStyle::kNone) {}
41 
42 CFGAS_TxtBreak::~CFGAS_TxtBreak() = default;
43 
SetLineWidth(float fLineWidth)44 void CFGAS_TxtBreak::SetLineWidth(float fLineWidth) {
45   m_iLineWidth = FXSYS_roundf(fLineWidth * kConversionFactor);
46   DCHECK(m_iLineWidth >= 20000);
47 }
48 
SetAlignment(int32_t iAlignment)49 void CFGAS_TxtBreak::SetAlignment(int32_t iAlignment) {
50   DCHECK(iAlignment >= CFX_TxtLineAlignment_Left);
51   DCHECK(iAlignment <= CFX_TxtLineAlignment_Justified);
52   m_iAlignment = iAlignment;
53 }
54 
SetCombWidth(float fCombWidth)55 void CFGAS_TxtBreak::SetCombWidth(float fCombWidth) {
56   m_iCombWidth = FXSYS_roundf(fCombWidth * kConversionFactor);
57 }
58 
AppendChar_Combination(CFGAS_Char * pCurChar)59 void CFGAS_TxtBreak::AppendChar_Combination(CFGAS_Char* pCurChar) {
60   FX_SAFE_INT32 iCharWidth = m_iCombWidth;
61   pCurChar->m_iCharWidth = -1;
62   if (!m_bCombText) {
63     wchar_t wch = pCurChar->char_code();
64     CFGAS_Char* pLastChar = GetLastChar(0, false, false);
65     if (pLastChar &&
66         (pLastChar->m_dwCharStyles & FX_TXTCHARSTYLE_ArabicShadda) == 0) {
67       wchar_t wLast = pLastChar->char_code();
68       absl::optional<uint16_t> maybe_shadda;
69       if (wch == pdfium::arabic::kArabicShadda) {
70         maybe_shadda = pdfium::arabic::GetArabicFromShaddaTable(wLast);
71       } else if (wLast == pdfium::arabic::kArabicShadda) {
72         maybe_shadda = pdfium::arabic::GetArabicFromShaddaTable(wch);
73       }
74       if (maybe_shadda.has_value()) {
75         wch = maybe_shadda.value();
76         pCurChar->m_dwCharStyles |= FX_TXTCHARSTYLE_ArabicShadda;
77         pLastChar->m_dwCharStyles |= FX_TXTCHARSTYLE_ArabicShadda;
78         pLastChar->m_iCharWidth = 0;
79       }
80     }
81     absl::optional<uint16_t> iCharWidthRet;
82     if (m_pFont) {
83       iCharWidthRet = m_pFont->GetCharWidth(wch);
84     }
85     iCharWidth = iCharWidthRet.value_or(0);
86     iCharWidth *= m_iFontSize;
87     iCharWidth *= m_iHorizontalScale;
88     iCharWidth /= 100;
89   }
90   iCharWidth *= -1;
91   pCurChar->m_iCharWidth = iCharWidth.ValueOrDefault(0);
92 }
93 
AppendChar_Tab(CFGAS_Char * pCurChar)94 void CFGAS_TxtBreak::AppendChar_Tab(CFGAS_Char* pCurChar) {
95   m_eCharType = FX_CHARTYPE::kTab;
96 }
97 
AppendChar_Control(CFGAS_Char * pCurChar)98 CFGAS_Char::BreakType CFGAS_TxtBreak::AppendChar_Control(CFGAS_Char* pCurChar) {
99   m_eCharType = FX_CHARTYPE::kControl;
100   CFGAS_Char::BreakType dwRet = CFGAS_Char::BreakType::kNone;
101   if (!m_bSingleLine) {
102     wchar_t wch = pCurChar->char_code();
103     switch (wch) {
104       case L'\v':
105       case pdfium::unicode::kLineSeparator:
106         dwRet = CFGAS_Char::BreakType::kLine;
107         break;
108       case L'\f':
109         dwRet = CFGAS_Char::BreakType::kPage;
110         break;
111       case pdfium::unicode::kParagraphSeparator:
112         dwRet = CFGAS_Char::BreakType::kParagraph;
113         break;
114       default:
115         if (wch == m_wParagraphBreakChar)
116           dwRet = CFGAS_Char::BreakType::kParagraph;
117         break;
118     }
119     if (dwRet != CFGAS_Char::BreakType::kNone)
120       dwRet = EndBreak(dwRet);
121   }
122   return dwRet;
123 }
124 
AppendChar_Arabic(CFGAS_Char * pCurChar)125 CFGAS_Char::BreakType CFGAS_TxtBreak::AppendChar_Arabic(CFGAS_Char* pCurChar) {
126   FX_CHARTYPE chartype = pCurChar->GetCharType();
127   int32_t& iLineWidth = m_pCurLine->m_iWidth;
128   wchar_t wForm;
129   CFGAS_Char* pLastChar = nullptr;
130   bool bAlef = false;
131   if (!m_bCombText && m_eCharType >= FX_CHARTYPE::kArabicAlef &&
132       m_eCharType <= FX_CHARTYPE::kArabicDistortion) {
133     FX_SAFE_INT32 iCharWidth = 0;
134     pLastChar = GetLastChar(1, true, false);
135     if (pLastChar) {
136       if (pLastChar->m_iCharWidth > 0)
137         iLineWidth -= pLastChar->m_iCharWidth;
138       iCharWidth = pLastChar->m_iCharWidth;
139 
140       CFGAS_Char* pPrevChar = GetLastChar(2, true, false);
141       wForm = pdfium::arabic::GetFormChar(pLastChar, pPrevChar, pCurChar);
142       bAlef = (wForm == pdfium::unicode::kZeroWidthNoBreakSpace &&
143                pLastChar->GetCharType() == FX_CHARTYPE::kArabicAlef);
144       if (m_pFont) {
145         iCharWidth = m_pFont->GetCharWidth(wForm).value_or(0);
146       }
147       if (wForm == pdfium::unicode::kZeroWidthNoBreakSpace)
148         iCharWidth = 0;
149 
150       iCharWidth *= m_iFontSize;
151       iCharWidth *= m_iHorizontalScale;
152       iCharWidth /= 100;
153 
154       int32_t iCharWidthValid = iCharWidth.ValueOrDefault(0);
155       pLastChar->m_iCharWidth = iCharWidthValid;
156       iLineWidth += iCharWidthValid;
157     }
158   }
159 
160   m_eCharType = chartype;
161   wForm = pdfium::arabic::GetFormChar(pCurChar, bAlef ? nullptr : pLastChar,
162                                       nullptr);
163   FX_SAFE_INT32 iCharWidth = 0;
164   if (m_bCombText) {
165     iCharWidth = m_iCombWidth;
166   } else {
167     if (m_pFont && wForm != pdfium::unicode::kZeroWidthNoBreakSpace) {
168       iCharWidth = m_pFont->GetCharWidth(wForm).value_or(0);
169     }
170     iCharWidth *= m_iFontSize;
171     iCharWidth *= m_iHorizontalScale;
172     iCharWidth /= 100;
173   }
174 
175   int32_t iCharWidthValid = iCharWidth.ValueOrDefault(0);
176   pCurChar->m_iCharWidth = iCharWidthValid;
177   iLineWidth += iCharWidthValid;
178 
179   m_pCurLine->IncrementArabicCharCount();
180   if (!m_bSingleLine && IsGreaterThanLineWidth(iLineWidth))
181     return EndBreak(CFGAS_Char::BreakType::kLine);
182   return CFGAS_Char::BreakType::kNone;
183 }
184 
AppendChar_Others(CFGAS_Char * pCurChar)185 CFGAS_Char::BreakType CFGAS_TxtBreak::AppendChar_Others(CFGAS_Char* pCurChar) {
186   FX_CHARTYPE chartype = pCurChar->GetCharType();
187   int32_t& iLineWidth = m_pCurLine->m_iWidth;
188   m_eCharType = chartype;
189   wchar_t wch = pCurChar->char_code();
190   wchar_t wForm = wch;
191 
192   FX_SAFE_INT32 iCharWidth = 0;
193   if (m_bCombText) {
194     iCharWidth = m_iCombWidth;
195   } else if (m_pFont) {
196     iCharWidth = m_pFont->GetCharWidth(wForm).value_or(0);
197     iCharWidth *= m_iFontSize;
198     iCharWidth *= m_iHorizontalScale;
199     iCharWidth /= 100;
200   }
201   iCharWidth += m_iCharSpace;
202 
203   int32_t iValidCharWidth = iCharWidth.ValueOrDefault(0);
204   pCurChar->m_iCharWidth = iValidCharWidth;
205   iLineWidth += iValidCharWidth;
206   if (!m_bSingleLine && chartype != FX_CHARTYPE::kSpace &&
207       IsGreaterThanLineWidth(iLineWidth)) {
208     return EndBreak(CFGAS_Char::BreakType::kLine);
209   }
210 
211   return CFGAS_Char::BreakType::kNone;
212 }
213 
AppendChar(wchar_t wch)214 CFGAS_Char::BreakType CFGAS_TxtBreak::AppendChar(wchar_t wch) {
215   FX_CHARTYPE chartype = pdfium::unicode::GetCharType(wch);
216   m_pCurLine->m_LineChars.emplace_back(wch, m_iHorizontalScale,
217                                        m_iVerticalScale);
218   CFGAS_Char* pCurChar = &m_pCurLine->m_LineChars.back();
219   pCurChar->m_dwCharStyles = m_iAlignment | (1 << 8);
220 
221   CFGAS_Char::BreakType dwRet1 = CFGAS_Char::BreakType::kNone;
222   if (chartype != FX_CHARTYPE::kCombination &&
223       GetUnifiedCharType(m_eCharType) != GetUnifiedCharType(chartype) &&
224       m_eCharType != FX_CHARTYPE::kUnknown && !m_bSingleLine &&
225       IsGreaterThanLineWidth(m_pCurLine->m_iWidth) &&
226       (m_eCharType != FX_CHARTYPE::kSpace ||
227        chartype != FX_CHARTYPE::kControl)) {
228     dwRet1 = EndBreak(CFGAS_Char::BreakType::kLine);
229     if (!m_pCurLine->m_LineChars.empty())
230       pCurChar = &m_pCurLine->m_LineChars.back();
231   }
232 
233   CFGAS_Char::BreakType dwRet2 = CFGAS_Char::BreakType::kNone;
234   if (wch == m_wParagraphBreakChar) {
235     // This is handled in AppendChar_Control, but it seems like \n and \r
236     // don't get matched as control characters so we go into AppendChar_other
237     // and never detect the new paragraph ...
238     dwRet2 = CFGAS_Char::BreakType::kParagraph;
239     EndBreak(dwRet2);
240   } else {
241     switch (chartype) {
242       case FX_CHARTYPE::kTab:
243         AppendChar_Tab(pCurChar);
244         break;
245       case FX_CHARTYPE::kControl:
246         dwRet2 = AppendChar_Control(pCurChar);
247         break;
248       case FX_CHARTYPE::kCombination:
249         AppendChar_Combination(pCurChar);
250         break;
251       case FX_CHARTYPE::kArabicAlef:
252       case FX_CHARTYPE::kArabicSpecial:
253       case FX_CHARTYPE::kArabicDistortion:
254       case FX_CHARTYPE::kArabicNormal:
255       case FX_CHARTYPE::kArabicForm:
256       case FX_CHARTYPE::kArabic:
257         dwRet2 = AppendChar_Arabic(pCurChar);
258         break;
259       case FX_CHARTYPE::kUnknown:
260       case FX_CHARTYPE::kSpace:
261       case FX_CHARTYPE::kNumeric:
262       case FX_CHARTYPE::kNormal:
263         dwRet2 = AppendChar_Others(pCurChar);
264         break;
265     }
266   }
267   return std::max(dwRet1, dwRet2);
268 }
269 
EndBreakSplitLine(CFGAS_BreakLine * pNextLine,bool bAllChars)270 void CFGAS_TxtBreak::EndBreakSplitLine(CFGAS_BreakLine* pNextLine,
271                                        bool bAllChars) {
272   bool bDone = false;
273   CFGAS_Char* pTC;
274   if (!m_bSingleLine && IsGreaterThanLineWidth(m_pCurLine->m_iWidth)) {
275     pTC = m_pCurLine->LastChar();
276     switch (pTC->GetCharType()) {
277       case FX_CHARTYPE::kTab:
278       case FX_CHARTYPE::kControl:
279       case FX_CHARTYPE::kSpace:
280         break;
281       default:
282         SplitTextLine(m_pCurLine, pNextLine, bAllChars);
283         bDone = true;
284         break;
285     }
286   }
287   if (bAllChars && !bDone) {
288     int32_t iEndPos = m_pCurLine->m_iWidth;
289     GetBreakPos(&m_pCurLine->m_LineChars, bAllChars, true, &iEndPos);
290   }
291 }
292 
EndBreakBidiLine(CFGAS_Char::BreakType dwStatus)293 std::deque<CFGAS_Break::TPO> CFGAS_TxtBreak::EndBreakBidiLine(
294     CFGAS_Char::BreakType dwStatus) {
295   CFGAS_BreakPiece tp;
296   std::deque<TPO> tpos;
297   CFGAS_Char* pTC;
298   std::vector<CFGAS_Char>& chars = m_pCurLine->m_LineChars;
299   if (!m_pCurLine->HasArabicChar()) {
300     tp.SetStatus(dwStatus);
301     tp.SetStartPos(m_pCurLine->m_iStart);
302     tp.SetWidth(m_pCurLine->m_iWidth);
303     tp.SetStartChar(0);
304     tp.SetCharCount(fxcrt::CollectionSize<int32_t>(m_pCurLine->m_LineChars));
305     tp.SetChars(&m_pCurLine->m_LineChars);
306     pTC = &chars[0];
307     tp.SetCharStyles(pTC->m_dwCharStyles);
308     tp.SetHorizontalScale(pTC->horizonal_scale());
309     tp.SetVerticalScale(pTC->vertical_scale());
310     m_pCurLine->m_LinePieces.push_back(tp);
311     tpos.push_back({0, 0});
312     return tpos;
313   }
314 
315   size_t iBidiNum = 0;
316   for (size_t i = 0; i < m_pCurLine->m_LineChars.size(); ++i) {
317     pTC = &chars[i];
318     pTC->m_iBidiPos = static_cast<int32_t>(i);
319     if (pTC->GetCharType() != FX_CHARTYPE::kControl)
320       iBidiNum = i;
321     if (i == 0)
322       pTC->m_iBidiLevel = 1;
323   }
324   CFGAS_Char::BidiLine(&chars, iBidiNum + 1);
325 
326   tp.SetStatus(CFGAS_Char::BreakType::kPiece);
327   tp.SetStartPos(m_pCurLine->m_iStart);
328   tp.SetChars(&m_pCurLine->m_LineChars);
329   int32_t iBidiLevel = -1;
330   int32_t iCharWidth;
331   int32_t i = 0;
332   int32_t j = -1;
333   int32_t iCount = fxcrt::CollectionSize<int32_t>(m_pCurLine->m_LineChars);
334   while (i < iCount) {
335     pTC = &chars[i];
336     if (iBidiLevel < 0) {
337       iBidiLevel = pTC->m_iBidiLevel;
338       tp.SetWidth(0);
339       tp.SetBidiLevel(iBidiLevel);
340       tp.SetBidiPos(pTC->m_iBidiOrder);
341       tp.SetCharStyles(pTC->m_dwCharStyles);
342       tp.SetHorizontalScale(pTC->horizonal_scale());
343       tp.SetVerticalScale(pTC->vertical_scale());
344       tp.SetStatus(CFGAS_Char::BreakType::kPiece);
345     }
346     if (iBidiLevel != pTC->m_iBidiLevel ||
347         pTC->m_dwStatus != CFGAS_Char::BreakType::kNone) {
348       if (iBidiLevel == pTC->m_iBidiLevel) {
349         tp.SetStatus(pTC->m_dwStatus);
350         iCharWidth = pTC->m_iCharWidth;
351         if (iCharWidth > 0)
352           tp.IncrementWidth(iCharWidth);
353 
354         i++;
355       }
356       tp.SetCharCount(i - tp.GetStartChar());
357       m_pCurLine->m_LinePieces.push_back(tp);
358       tp.IncrementStartPos(tp.GetWidth());
359       tp.SetStartChar(i);
360       tpos.push_back({++j, tp.GetBidiPos()});
361       iBidiLevel = -1;
362     } else {
363       iCharWidth = pTC->m_iCharWidth;
364       if (iCharWidth > 0)
365         tp.IncrementWidth(iCharWidth);
366 
367       i++;
368     }
369   }
370   if (i > tp.GetStartChar()) {
371     tp.SetStatus(dwStatus);
372     tp.SetCharCount(i - tp.GetStartChar());
373     m_pCurLine->m_LinePieces.push_back(tp);
374     tpos.push_back({++j, tp.GetBidiPos()});
375   }
376   if (j > -1) {
377     if (j > 0) {
378       std::sort(tpos.begin(), tpos.end());
379       int32_t iStartPos = 0;
380       for (i = 0; i <= j; i++) {
381         CFGAS_BreakPiece& ttp = m_pCurLine->m_LinePieces[tpos[i].index];
382         ttp.SetStartPos(iStartPos);
383         iStartPos += ttp.GetWidth();
384       }
385     }
386     m_pCurLine->m_LinePieces[j].SetStatus(dwStatus);
387   }
388   return tpos;
389 }
390 
EndBreakAlignment(const std::deque<TPO> & tpos,bool bAllChars,CFGAS_Char::BreakType dwStatus)391 void CFGAS_TxtBreak::EndBreakAlignment(const std::deque<TPO>& tpos,
392                                        bool bAllChars,
393                                        CFGAS_Char::BreakType dwStatus) {
394   int32_t iNetWidth = m_pCurLine->m_iWidth;
395   int32_t iGapChars = 0;
396   bool bFind = false;
397   for (const TPO& pos : pdfium::base::Reversed(tpos)) {
398     const CFGAS_BreakPiece& ttp = m_pCurLine->m_LinePieces[pos.index];
399     if (!bFind)
400       iNetWidth = ttp.GetEndPos();
401 
402     bool bArabic = FX_IsOdd(ttp.GetBidiLevel());
403     int32_t j = bArabic ? 0 : ttp.GetCharCount() - 1;
404     while (j > -1 && j < ttp.GetCharCount()) {
405       const CFGAS_Char* pTC = ttp.GetChar(j);
406       if (pTC->m_eLineBreakType == FX_LINEBREAKTYPE::kDIRECT_BRK)
407         iGapChars++;
408       if (!bFind || !bAllChars) {
409         FX_CHARTYPE chartype = pTC->GetCharType();
410         if (chartype == FX_CHARTYPE::kSpace ||
411             chartype == FX_CHARTYPE::kControl) {
412           if (!bFind && bAllChars && pTC->m_iCharWidth > 0)
413             iNetWidth -= pTC->m_iCharWidth;
414         } else {
415           bFind = true;
416           if (!bAllChars)
417             break;
418         }
419       }
420       j += bArabic ? 1 : -1;
421     }
422     if (!bAllChars && bFind)
423       break;
424   }
425 
426   int32_t iOffset = m_iLineWidth - iNetWidth;
427   if (iGapChars > 0 && m_iAlignment & CFX_TxtLineAlignment_Justified &&
428       dwStatus != CFGAS_Char::BreakType::kParagraph) {
429     int32_t iStart = -1;
430     for (auto& tpo : tpos) {
431       CFGAS_BreakPiece& ttp = m_pCurLine->m_LinePieces[tpo.index];
432       if (iStart < -1)
433         iStart = ttp.GetStartPos();
434       else
435         ttp.SetStartPos(iStart);
436 
437       for (int32_t j = 0; j < ttp.GetCharCount() && iGapChars > 0;
438            j++, iGapChars--) {
439         CFGAS_Char* pTC = ttp.GetChar(j);
440         if (pTC->m_eLineBreakType != FX_LINEBREAKTYPE::kDIRECT_BRK ||
441             pTC->m_iCharWidth < 0) {
442           continue;
443         }
444         int32_t k = iOffset / iGapChars;
445         pTC->m_iCharWidth += k;
446         ttp.IncrementWidth(k);
447         iOffset -= k;
448       }
449       iStart += ttp.GetWidth();
450     }
451   } else if (m_iAlignment & CFX_TxtLineAlignment_Center ||
452              m_iAlignment & CFX_TxtLineAlignment_Right) {
453     if (m_iAlignment & CFX_TxtLineAlignment_Center &&
454         !(m_iAlignment & CFX_TxtLineAlignment_Right)) {
455       iOffset /= 2;
456     }
457     if (iOffset > 0) {
458       for (auto& ttp : m_pCurLine->m_LinePieces)
459         ttp.IncrementStartPos(iOffset);
460     }
461   }
462 }
463 
EndBreak(CFGAS_Char::BreakType dwStatus)464 CFGAS_Char::BreakType CFGAS_TxtBreak::EndBreak(CFGAS_Char::BreakType dwStatus) {
465   DCHECK(dwStatus != CFGAS_Char::BreakType::kNone);
466 
467   if (!m_pCurLine->m_LinePieces.empty()) {
468     if (dwStatus != CFGAS_Char::BreakType::kPiece)
469       m_pCurLine->m_LinePieces.back().SetStatus(dwStatus);
470     return m_pCurLine->m_LinePieces.back().GetStatus();
471   }
472 
473   if (HasLine()) {
474     if (m_Lines[m_iReadyLineIndex].m_LinePieces.empty())
475       return CFGAS_Char::BreakType::kNone;
476 
477     if (dwStatus != CFGAS_Char::BreakType::kPiece)
478       m_Lines[m_iReadyLineIndex].m_LinePieces.back().SetStatus(dwStatus);
479     return m_Lines[m_iReadyLineIndex].m_LinePieces.back().GetStatus();
480   }
481 
482   if (m_pCurLine->m_LineChars.empty())
483     return CFGAS_Char::BreakType::kNone;
484 
485   m_pCurLine->m_LineChars.back().m_dwStatus = dwStatus;
486   if (dwStatus == CFGAS_Char::BreakType::kPiece)
487     return dwStatus;
488 
489   m_iReadyLineIndex = m_pCurLine == &m_Lines[0] ? 0 : 1;
490   CFGAS_BreakLine* pNextLine = &m_Lines[1 - m_iReadyLineIndex];
491   const bool bAllChars = m_iAlignment > CFX_TxtLineAlignment_Right;
492   EndBreakSplitLine(pNextLine, bAllChars);
493 
494   std::deque<TPO> tpos = EndBreakBidiLine(dwStatus);
495   if (m_iAlignment > CFX_TxtLineAlignment_Left)
496     EndBreakAlignment(tpos, bAllChars, dwStatus);
497 
498   m_pCurLine = pNextLine;
499   CFGAS_Char* pTC = GetLastChar(0, false, false);
500   m_eCharType = pTC ? pTC->GetCharType() : FX_CHARTYPE::kUnknown;
501   return dwStatus;
502 }
503 
GetBreakPos(std::vector<CFGAS_Char> * pChars,bool bAllChars,bool bOnlyBrk,int32_t * pEndPos)504 int32_t CFGAS_TxtBreak::GetBreakPos(std::vector<CFGAS_Char>* pChars,
505                                     bool bAllChars,
506                                     bool bOnlyBrk,
507                                     int32_t* pEndPos) {
508   std::vector<CFGAS_Char>& chars = *pChars;
509   int32_t iLength = fxcrt::CollectionSize<int32_t>(chars) - 1;
510   if (iLength < 1)
511     return iLength;
512 
513   int32_t iBreak = -1;
514   int32_t iBreakPos = -1;
515   int32_t iIndirect = -1;
516   int32_t iIndirectPos = -1;
517   int32_t iLast = -1;
518   int32_t iLastPos = -1;
519   if (m_bSingleLine || *pEndPos <= m_iLineWidth) {
520     if (!bAllChars)
521       return iLength;
522 
523     iBreak = iLength;
524     iBreakPos = *pEndPos;
525   }
526 
527   FX_LINEBREAKTYPE eType;
528   FX_BREAKPROPERTY nCur;
529   FX_BREAKPROPERTY nNext;
530   CFGAS_Char* pCur = &chars[iLength--];
531   if (bAllChars)
532     pCur->m_eLineBreakType = FX_LINEBREAKTYPE::kUNKNOWN;
533 
534   nNext = pdfium::unicode::GetBreakProperty(pCur->char_code());
535   int32_t iCharWidth = pCur->m_iCharWidth;
536   if (iCharWidth > 0)
537     *pEndPos -= iCharWidth;
538 
539   while (iLength >= 0) {
540     pCur = &chars[iLength];
541     nCur = pdfium::unicode::GetBreakProperty(pCur->char_code());
542     if (nNext == FX_BREAKPROPERTY::kSP)
543       eType = FX_LINEBREAKTYPE::kPROHIBITED_BRK;
544     else
545       eType = GetLineBreakTypeFromPair(nCur, nNext);
546     if (bAllChars)
547       pCur->m_eLineBreakType = eType;
548     if (!bOnlyBrk) {
549       if (m_bSingleLine || *pEndPos <= m_iLineWidth ||
550           nCur == FX_BREAKPROPERTY::kSP) {
551         if (eType == FX_LINEBREAKTYPE::kDIRECT_BRK && iBreak < 0) {
552           iBreak = iLength;
553           iBreakPos = *pEndPos;
554           if (!bAllChars)
555             return iLength;
556         } else if (eType == FX_LINEBREAKTYPE::kINDIRECT_BRK && iIndirect < 0) {
557           iIndirect = iLength;
558           iIndirectPos = *pEndPos;
559         }
560         if (iLast < 0) {
561           iLast = iLength;
562           iLastPos = *pEndPos;
563         }
564       }
565       iCharWidth = pCur->m_iCharWidth;
566       if (iCharWidth > 0)
567         *pEndPos -= iCharWidth;
568     }
569     nNext = nCur;
570     iLength--;
571   }
572   if (bOnlyBrk)
573     return 0;
574   if (iBreak > -1) {
575     *pEndPos = iBreakPos;
576     return iBreak;
577   }
578   if (iIndirect > -1) {
579     *pEndPos = iIndirectPos;
580     return iIndirect;
581   }
582   if (iLast > -1) {
583     *pEndPos = iLastPos;
584     return iLast;
585   }
586   return 0;
587 }
588 
SplitTextLine(CFGAS_BreakLine * pCurLine,CFGAS_BreakLine * pNextLine,bool bAllChars)589 void CFGAS_TxtBreak::SplitTextLine(CFGAS_BreakLine* pCurLine,
590                                    CFGAS_BreakLine* pNextLine,
591                                    bool bAllChars) {
592   DCHECK(pCurLine);
593   DCHECK(pNextLine);
594 
595   if (pCurLine->m_LineChars.size() < 2)
596     return;
597 
598   int32_t iEndPos = pCurLine->m_iWidth;
599   std::vector<CFGAS_Char>& curChars = pCurLine->m_LineChars;
600   int32_t iCharPos = GetBreakPos(&curChars, bAllChars, false, &iEndPos);
601   if (iCharPos < 0)
602     iCharPos = 0;
603 
604   iCharPos++;
605   if (iCharPos >= fxcrt::CollectionSize<int32_t>(pCurLine->m_LineChars)) {
606     pNextLine->Clear();
607     CFGAS_Char* pTC = &curChars[iCharPos - 1];
608     pTC->m_eLineBreakType = FX_LINEBREAKTYPE::kUNKNOWN;
609     return;
610   }
611 
612   pNextLine->m_LineChars =
613       std::vector<CFGAS_Char>(curChars.begin() + iCharPos, curChars.end());
614   curChars.erase(curChars.begin() + iCharPos, curChars.end());
615   pCurLine->m_iWidth = iEndPos;
616   CFGAS_Char* pTC = &curChars[iCharPos - 1];
617   pTC->m_eLineBreakType = FX_LINEBREAKTYPE::kUNKNOWN;
618   int32_t iWidth = 0;
619   for (size_t i = 0; i < pNextLine->m_LineChars.size(); ++i) {
620     if (pNextLine->m_LineChars[i].GetCharType() >= FX_CHARTYPE::kArabicAlef) {
621       pCurLine->DecrementArabicCharCount();
622       pNextLine->IncrementArabicCharCount();
623     }
624     iWidth += std::max(0, pNextLine->m_LineChars[i].m_iCharWidth);
625     pNextLine->m_LineChars[i].m_dwStatus = CFGAS_Char::BreakType::kNone;
626   }
627   pNextLine->m_iWidth = iWidth;
628 }
629 
GetDisplayPos(const Run & run,TextCharPos * pCharPos) const630 size_t CFGAS_TxtBreak::GetDisplayPos(const Run& run,
631                                      TextCharPos* pCharPos) const {
632   if (run.iLength < 1)
633     return 0;
634 
635   Engine* pEngine = run.pEdtEngine;
636   const wchar_t* pStr = run.wsStr.c_str();
637   int32_t* pWidths = run.pWidths;
638   int32_t iLength = run.iLength - 1;
639   RetainPtr<CFGAS_GEFont> pFont = run.pFont;
640   Mask<LayoutStyle> dwStyles = run.dwStyles;
641   CFX_RectF rtText(*run.pRect);
642   const bool bRTLPiece = (run.dwCharStyles & FX_TXTCHARSTYLE_OddBidiLevel) != 0;
643   const float fFontSize = run.fFontSize;
644   const int32_t iFontSize = FXSYS_roundf(fFontSize * 20.0f);
645   const int32_t iAscent = pFont->GetAscent();
646   const int32_t iDescent = pFont->GetDescent();
647   const int32_t iMaxHeight = iAscent - iDescent;
648   const float fAscent = iMaxHeight ? fFontSize * iAscent / iMaxHeight : 0;
649   int32_t iHorScale = run.iHorizontalScale;
650   int32_t iVerScale = run.iVerticalScale;
651   bool bSkipSpace = run.bSkipSpace;
652 
653   const float fYBase = rtText.top + (rtText.height - fFontSize) / 2.0f;
654   float fX = bRTLPiece ? rtText.right() : rtText.left;
655   float fY = fYBase + fAscent;
656 
657   size_t szCount = 0;
658   int32_t iNext = 0;
659   wchar_t wPrev = pdfium::unicode::kZeroWidthNoBreakSpace;
660   wchar_t wNext = pdfium::unicode::kZeroWidthNoBreakSpace;
661   wchar_t wForm = pdfium::unicode::kZeroWidthNoBreakSpace;
662   wchar_t wLast = pdfium::unicode::kZeroWidthNoBreakSpace;
663   bool bShadda = false;
664   bool bLam = false;
665   for (int32_t i = 0; i <= iLength; i++) {
666     int32_t iAbsolute = i + run.iStart;
667     int32_t iWidth;
668     wchar_t wch;
669     if (pEngine) {
670       wch = pEngine->GetChar(iAbsolute);
671       iWidth = pEngine->GetWidthOfChar(iAbsolute);
672     } else {
673       wch = *pStr++;
674       iWidth = *pWidths++;
675     }
676 
677     FX_CHARTYPE chartype = pdfium::unicode::GetCharType(wch);
678     if (chartype == FX_CHARTYPE::kArabicAlef && iWidth == 0) {
679       wPrev = pdfium::unicode::kZeroWidthNoBreakSpace;
680       wLast = wch;
681       continue;
682     }
683 
684     if (chartype >= FX_CHARTYPE::kArabicAlef) {
685       if (i < iLength) {
686         if (pEngine) {
687           iNext = i + 1;
688           while (iNext <= iLength) {
689             int32_t iNextAbsolute = iNext + run.iStart;
690             wNext = pEngine->GetChar(iNextAbsolute);
691             if (pdfium::unicode::GetCharType(wNext) !=
692                 FX_CHARTYPE::kCombination) {
693               break;
694             }
695             iNext++;
696           }
697           if (iNext > iLength)
698             wNext = pdfium::unicode::kZeroWidthNoBreakSpace;
699         } else {
700           int32_t j = -1;
701           do {
702             j++;
703             if (i + j >= iLength)
704               break;
705 
706             wNext = pStr[j];
707           } while (pdfium::unicode::GetCharType(wNext) ==
708                    FX_CHARTYPE::kCombination);
709           if (i + j >= iLength)
710             wNext = pdfium::unicode::kZeroWidthNoBreakSpace;
711         }
712       } else {
713         wNext = pdfium::unicode::kZeroWidthNoBreakSpace;
714       }
715 
716       wForm = pdfium::arabic::GetFormChar(wch, wPrev, wNext);
717       bLam = (wPrev == pdfium::arabic::kArabicLetterLam &&
718               wch == pdfium::arabic::kArabicLetterLam &&
719               wNext == pdfium::arabic::kArabicLetterHeh);
720     } else if (chartype == FX_CHARTYPE::kCombination) {
721       wForm = wch;
722       if (wch >= 0x064C && wch <= 0x0651) {
723         if (bShadda) {
724           wForm = pdfium::unicode::kZeroWidthNoBreakSpace;
725           bShadda = false;
726         } else {
727           wNext = pdfium::unicode::kZeroWidthNoBreakSpace;
728           if (pEngine) {
729             iNext = i + 1;
730             if (iNext <= iLength) {
731               int32_t iNextAbsolute = iNext + run.iStart;
732               wNext = pEngine->GetChar(iNextAbsolute);
733             }
734           } else if (i < iLength) {
735             wNext = *pStr;
736           }
737           absl::optional<uint16_t> maybe_shadda;
738           if (wch == pdfium::arabic::kArabicShadda) {
739             maybe_shadda = pdfium::arabic::GetArabicFromShaddaTable(wNext);
740           } else if (wNext == pdfium::arabic::kArabicShadda) {
741             maybe_shadda = pdfium::arabic::GetArabicFromShaddaTable(wch);
742           }
743           if (maybe_shadda.has_value()) {
744             wForm = maybe_shadda.value();
745             bShadda = true;
746           }
747         }
748       } else {
749         bShadda = false;
750       }
751     } else if (chartype == FX_CHARTYPE::kNumeric) {
752       wForm = wch;
753     } else if (wch == L'.') {
754       wForm = wch;
755     } else if (wch == L',') {
756       wForm = wch;
757     } else if (bRTLPiece) {
758       wForm = pdfium::unicode::GetMirrorChar(wch);
759     } else {
760       wForm = wch;
761     }
762     if (chartype != FX_CHARTYPE::kCombination)
763       bShadda = false;
764     if (chartype < FX_CHARTYPE::kArabicAlef)
765       bLam = false;
766 
767     bool bEmptyChar =
768         (chartype >= FX_CHARTYPE::kTab && chartype <= FX_CHARTYPE::kControl);
769     if (wForm == pdfium::unicode::kZeroWidthNoBreakSpace)
770       bEmptyChar = true;
771 
772     int32_t iForms = bLam ? 3 : 1;
773     szCount += (bEmptyChar && bSkipSpace) ? 0 : iForms;
774     if (!pCharPos) {
775       if (iWidth > 0)
776         wPrev = wch;
777       wLast = wch;
778       continue;
779     }
780 
781     int32_t iCharWidth = iWidth;
782     if (iCharWidth < 0)
783       iCharWidth = -iCharWidth;
784 
785     iCharWidth /= iFontSize;
786     FX_FORMCHAR formChars[3];
787     formChars[0].wch = wch;
788     formChars[0].wForm = wForm;
789     formChars[0].iWidth = iCharWidth;
790     if (bLam) {
791       formChars[1].wForm = pdfium::arabic::kArabicShadda;
792       formChars[1].iWidth =
793           pFont->GetCharWidth(pdfium::arabic::kArabicShadda).value_or(0);
794       formChars[2].wForm = pdfium::arabic::kArabicLetterSuperscriptAlef;
795       formChars[2].iWidth =
796           pFont->GetCharWidth(pdfium::arabic::kArabicLetterSuperscriptAlef)
797               .value_or(0);
798     }
799 
800     for (int32_t j = 0; j < iForms; j++) {
801       wForm = (wchar_t)formChars[j].wForm;
802       iCharWidth = formChars[j].iWidth;
803       if (j > 0) {
804         chartype = FX_CHARTYPE::kCombination;
805         wch = wForm;
806         wLast = (wchar_t)formChars[j - 1].wForm;
807       }
808       if (!bEmptyChar || (bEmptyChar && !bSkipSpace)) {
809         pCharPos->m_GlyphIndex = pFont->GetGlyphIndex(wForm);
810 #if BUILDFLAG(IS_APPLE)
811         pCharPos->m_ExtGID = pCharPos->m_GlyphIndex;
812 #endif
813         pCharPos->m_FontCharWidth = iCharWidth;
814       }
815 
816       const float fCharWidth = fFontSize * iCharWidth / 1000.0f;
817       if (bRTLPiece && chartype != FX_CHARTYPE::kCombination)
818         fX -= fCharWidth;
819 
820       if (!bEmptyChar || (bEmptyChar && !bSkipSpace)) {
821         pCharPos->m_Origin = CFX_PointF(fX, fY);
822 
823         if (!!(dwStyles & LayoutStyle::kCombText)) {
824           int32_t iFormWidth = pFont->GetCharWidth(wForm).value_or(iCharWidth);
825           float fOffset = fFontSize * (iCharWidth - iFormWidth) / 2000.0f;
826           pCharPos->m_Origin.x += fOffset;
827         }
828         if (chartype == FX_CHARTYPE::kCombination) {
829           absl::optional<FX_RECT> rtBBox = pFont->GetCharBBox(wForm);
830           if (rtBBox.has_value()) {
831             pCharPos->m_Origin.y =
832                 fYBase + fFontSize -
833                 fFontSize * rtBBox.value().Height() / iMaxHeight;
834           }
835           if (wForm == wch &&
836               wLast != pdfium::unicode::kZeroWidthNoBreakSpace) {
837             if (pdfium::unicode::GetCharType(wLast) ==
838                 FX_CHARTYPE::kCombination) {
839               absl::optional<FX_RECT> rtOtherBox = pFont->GetCharBBox(wLast);
840               if (rtOtherBox.has_value()) {
841                 pCharPos->m_Origin.y -=
842                     fFontSize * rtOtherBox.value().Height() / iMaxHeight;
843               }
844             }
845           }
846         }
847       }
848       if (!bRTLPiece && chartype != FX_CHARTYPE::kCombination)
849         fX += fCharWidth;
850 
851       if (!bEmptyChar || (bEmptyChar && !bSkipSpace)) {
852         pCharPos->m_bGlyphAdjust = true;
853         pCharPos->m_AdjustMatrix[0] = -1;
854         pCharPos->m_AdjustMatrix[1] = 0;
855         pCharPos->m_AdjustMatrix[2] = 0;
856         pCharPos->m_AdjustMatrix[3] = 1;
857 
858         if (iHorScale != 100 || iVerScale != 100) {
859           pCharPos->m_AdjustMatrix[0] =
860               pCharPos->m_AdjustMatrix[0] * iHorScale / 100.0f;
861           pCharPos->m_AdjustMatrix[1] =
862               pCharPos->m_AdjustMatrix[1] * iHorScale / 100.0f;
863           pCharPos->m_AdjustMatrix[2] =
864               pCharPos->m_AdjustMatrix[2] * iVerScale / 100.0f;
865           pCharPos->m_AdjustMatrix[3] =
866               pCharPos->m_AdjustMatrix[3] * iVerScale / 100.0f;
867         }
868         pCharPos++;
869       }
870     }
871     if (iWidth > 0)
872       wPrev = static_cast<wchar_t>(formChars[0].wch);
873     wLast = wch;
874   }
875   return szCount;
876 }
877 
GetCharRects(const Run & run) const878 std::vector<CFX_RectF> CFGAS_TxtBreak::GetCharRects(const Run& run) const {
879   if (run.iLength < 1)
880     return std::vector<CFX_RectF>();
881 
882   Engine* pEngine = run.pEdtEngine;
883   const wchar_t* pStr = run.wsStr.c_str();
884   int32_t* pWidths = run.pWidths;
885   int32_t iLength = run.iLength;
886   CFX_RectF rect(*run.pRect);
887   float fFontSize = run.fFontSize;
888   bool bRTLPiece = !!(run.dwCharStyles & FX_TXTCHARSTYLE_OddBidiLevel);
889   bool bSingleLine = !!(run.dwStyles & LayoutStyle::kSingleLine);
890   float fStart = bRTLPiece ? rect.right() : rect.left;
891 
892   std::vector<CFX_RectF> rtArray(iLength);
893   for (int32_t i = 0; i < iLength; i++) {
894     wchar_t wch;
895     int32_t iCharSize;
896     if (pEngine) {
897       int32_t iAbsolute = i + run.iStart;
898       wch = pEngine->GetChar(iAbsolute);
899       iCharSize = pEngine->GetWidthOfChar(iAbsolute);
900     } else {
901       wch = *pStr++;
902       iCharSize = *pWidths++;
903     }
904     float fCharSize = static_cast<float>(iCharSize) / kConversionFactor;
905     bool bRet = (!bSingleLine && IsCtrlCode(wch));
906     if (!(wch == L'\v' || wch == L'\f' ||
907           wch == pdfium::unicode::kLineSeparator ||
908           wch == pdfium::unicode::kParagraphSeparator || wch == L'\n')) {
909       bRet = false;
910     }
911     if (bRet)
912       fCharSize = fFontSize / 2.0f;
913     rect.left = fStart;
914     if (bRTLPiece) {
915       rect.left -= fCharSize;
916       fStart -= fCharSize;
917     } else {
918       fStart += fCharSize;
919     }
920     rect.width = fCharSize;
921     rtArray[i] = rect;
922   }
923   return rtArray;
924 }
925 
926 CFGAS_TxtBreak::Engine::~Engine() = default;
927 
928 CFGAS_TxtBreak::Run::Run() = default;
929 
930 CFGAS_TxtBreak::Run::~Run() = default;
931 
932 CFGAS_TxtBreak::Run::Run(const CFGAS_TxtBreak::Run& other) = default;
933