1 // Scintilla source code edit control 2 /** @file Document.cxx 3 ** Text document that handles notifications, DBCS, styling, words and end of line. 4 **/ 5 // Copyright 1998-2011 by Neil Hodgson <[email protected]> 6 // The License.txt file describes the conditions under which this software may be distributed. 7 8 #include <cstddef> 9 #include <cstdlib> 10 #include <cassert> 11 #include <cstring> 12 #include <cstdio> 13 #include <cmath> 14 15 #include <stdexcept> 16 #include <string> 17 #include <string_view> 18 #include <vector> 19 #include <forward_list> 20 #include <algorithm> 21 #include <memory> 22 #include <chrono> 23 24 #ifndef NO_CXX11_REGEX 25 #include <regex> 26 #endif 27 28 #include "Platform.h" 29 30 #include "ILoader.h" 31 #include "ILexer.h" 32 #include "Scintilla.h" 33 34 #include "CharacterSet.h" 35 #include "CharacterCategory.h" 36 #include "Position.h" 37 #include "SplitVector.h" 38 #include "Partitioning.h" 39 #include "RunStyles.h" 40 #include "CellBuffer.h" 41 #include "PerLine.h" 42 #include "CharClassify.h" 43 #include "Decoration.h" 44 #include "CaseFolder.h" 45 #include "Document.h" 46 #include "RESearch.h" 47 #include "UniConversion.h" 48 #include "ElapsedPeriod.h" 49 50 using namespace Scintilla; 51 52 void LexInterface::Colourise(Sci::Position start, Sci::Position end) { 53 if (pdoc && instance && !performingStyle) { 54 // Protect against reentrance, which may occur, for example, when 55 // fold points are discovered while performing styling and the folding 56 // code looks for child lines which may trigger styling. 57 performingStyle = true; 58 59 const Sci::Position lengthDoc = pdoc->Length(); 60 if (end == -1) 61 end = lengthDoc; 62 const Sci::Position len = end - start; 63 64 PLATFORM_ASSERT(len >= 0); 65 PLATFORM_ASSERT(start + len <= lengthDoc); 66 67 int styleStart = 0; 68 if (start > 0) 69 styleStart = pdoc->StyleAt(start - 1); 70 71 if (len > 0) { 72 instance->Lex(start, len, styleStart, pdoc); 73 instance->Fold(start, len, styleStart, pdoc); 74 } 75 76 performingStyle = false; 77 } 78 } 79 80 int LexInterface::LineEndTypesSupported() { 81 if (instance) { 82 return instance->LineEndTypesSupported(); 83 } 84 return 0; 85 } 86 87 ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept : 88 duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) { 89 } 90 91 void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept { 92 // Only adjust for multiple actions to avoid instability 93 if (numberActions < 8) 94 return; 95 96 // Alpha value for exponential smoothing. 97 // Most recent value contributes 25% to smoothed value. 98 constexpr double alpha = 0.25; 99 100 const double durationOne = durationOfActions / numberActions; 101 duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration, 102 minDuration, maxDuration); 103 } 104 105 double ActionDuration::Duration() const noexcept { 106 return duration; 107 } 108 109 Document::Document(int options) : 110 cb((options & SC_DOCUMENTOPTION_STYLES_NONE) == 0, (options & SC_DOCUMENTOPTION_TEXT_LARGE) != 0), 111 durationStyleOneLine(0.00001, 0.000001, 0.0001) { 112 refCount = 0; 113 #ifdef _WIN32 114 eolMode = SC_EOL_CRLF; 115 #else 116 eolMode = SC_EOL_LF; 117 #endif 118 dbcsCodePage = SC_CP_UTF8; 119 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT; 120 endStyled = 0; 121 styleClock = 0; 122 enteredModification = 0; 123 enteredStyling = 0; 124 enteredReadOnlyCount = 0; 125 insertionSet = false; 126 tabInChars = 8; 127 indentInChars = 0; 128 actualIndentInChars = 8; 129 useTabs = true; 130 tabIndents = true; 131 backspaceUnindents = false; 132 133 matchesValid = false; 134 135 perLineData[ldMarkers] = std::make_unique<LineMarkers>(); 136 perLineData[ldLevels] = std::make_unique<LineLevels>(); 137 perLineData[ldState] = std::make_unique<LineState>(); 138 perLineData[ldMargin] = std::make_unique<LineAnnotation>(); 139 perLineData[ldAnnotation] = std::make_unique<LineAnnotation>(); 140 perLineData[ldEOLAnnotation] = std::make_unique<LineAnnotation>(); 141 142 decorations = DecorationListCreate(IsLarge()); 143 144 cb.SetPerLine(this); 145 cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage); 146 } 147 148 Document::~Document() { 149 for (const WatcherWithUserData &watcher : watchers) { 150 watcher.watcher->NotifyDeleted(this, watcher.userData); 151 } 152 } 153 154 // Increase reference count and return its previous value. 155 int Document::AddRef() { 156 return refCount++; 157 } 158 159 // Decrease reference count and return its previous value. 160 // Delete the document if reference count reaches zero. 161 int SCI_METHOD Document::Release() { 162 const int curRefCount = --refCount; 163 if (curRefCount == 0) 164 delete this; 165 return curRefCount; 166 } 167 168 void Document::Init() { 169 for (const std::unique_ptr<PerLine> &pl : perLineData) { 170 if (pl) 171 pl->Init(); 172 } 173 } 174 175 void Document::InsertLine(Sci::Line line) { 176 for (const std::unique_ptr<PerLine> &pl : perLineData) { 177 if (pl) 178 pl->InsertLine(line); 179 } 180 } 181 182 void Document::InsertLines(Sci::Line line, Sci::Line lines) { 183 for (const auto &pl : perLineData) { 184 if (pl) 185 pl->InsertLines(line, lines); 186 } 187 } 188 189 void Document::RemoveLine(Sci::Line line) { 190 for (const std::unique_ptr<PerLine> &pl : perLineData) { 191 if (pl) 192 pl->RemoveLine(line); 193 } 194 } 195 196 LineMarkers *Document::Markers() const noexcept { 197 return dynamic_cast<LineMarkers *>(perLineData[ldMarkers].get()); 198 } 199 200 LineLevels *Document::Levels() const noexcept { 201 return dynamic_cast<LineLevels *>(perLineData[ldLevels].get()); 202 } 203 204 LineState *Document::States() const noexcept { 205 return dynamic_cast<LineState *>(perLineData[ldState].get()); 206 } 207 208 LineAnnotation *Document::Margins() const noexcept { 209 return dynamic_cast<LineAnnotation *>(perLineData[ldMargin].get()); 210 } 211 212 LineAnnotation *Document::Annotations() const noexcept { 213 return dynamic_cast<LineAnnotation *>(perLineData[ldAnnotation].get()); 214 } 215 216 LineAnnotation *Document::EOLAnnotations() const noexcept { 217 return dynamic_cast<LineAnnotation *>(perLineData[ldEOLAnnotation].get()); 218 } 219 220 int Document::LineEndTypesSupported() const { 221 if ((SC_CP_UTF8 == dbcsCodePage) && pli) 222 return pli->LineEndTypesSupported(); 223 else 224 return 0; 225 } 226 227 bool Document::SetDBCSCodePage(int dbcsCodePage_) { 228 if (dbcsCodePage != dbcsCodePage_) { 229 dbcsCodePage = dbcsCodePage_; 230 SetCaseFolder(nullptr); 231 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); 232 cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage); 233 ModifiedAt(0); // Need to restyle whole document 234 return true; 235 } else { 236 return false; 237 } 238 } 239 240 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) { 241 if (lineEndBitSet != lineEndBitSet_) { 242 lineEndBitSet = lineEndBitSet_; 243 const int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported(); 244 if (lineEndBitSetActive != cb.GetLineEndTypes()) { 245 ModifiedAt(0); 246 cb.SetLineEndTypes(lineEndBitSetActive); 247 return true; 248 } else { 249 return false; 250 } 251 } else { 252 return false; 253 } 254 } 255 256 void Document::SetSavePoint() { 257 cb.SetSavePoint(); 258 NotifySavePoint(true); 259 } 260 261 void Document::TentativeUndo() { 262 if (!TentativeActive()) 263 return; 264 CheckReadOnly(); 265 if (enteredModification == 0) { 266 enteredModification++; 267 if (!cb.IsReadOnly()) { 268 const bool startSavePoint = cb.IsSavePoint(); 269 bool multiLine = false; 270 const int steps = cb.TentativeSteps(); 271 //Platform::DebugPrintf("Steps=%d\n", steps); 272 for (int step = 0; step < steps; step++) { 273 const Sci::Line prevLinesTotal = LinesTotal(); 274 const Action &action = cb.GetUndoStep(); 275 if (action.at == removeAction) { 276 NotifyModified(DocModification( 277 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action)); 278 } else if (action.at == containerAction) { 279 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO); 280 dm.token = action.position; 281 NotifyModified(dm); 282 } else { 283 NotifyModified(DocModification( 284 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action)); 285 } 286 cb.PerformUndoStep(); 287 if (action.at != containerAction) { 288 ModifiedAt(action.position); 289 } 290 291 int modFlags = SC_PERFORMED_UNDO; 292 // With undo, an insertion action becomes a deletion notification 293 if (action.at == removeAction) { 294 modFlags |= SC_MOD_INSERTTEXT; 295 } else if (action.at == insertAction) { 296 modFlags |= SC_MOD_DELETETEXT; 297 } 298 if (steps > 1) 299 modFlags |= SC_MULTISTEPUNDOREDO; 300 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; 301 if (linesAdded != 0) 302 multiLine = true; 303 if (step == steps - 1) { 304 modFlags |= SC_LASTSTEPINUNDOREDO; 305 if (multiLine) 306 modFlags |= SC_MULTILINEUNDOREDO; 307 } 308 NotifyModified(DocModification(modFlags, action.position, action.lenData, 309 linesAdded, action.data.get())); 310 } 311 312 const bool endSavePoint = cb.IsSavePoint(); 313 if (startSavePoint != endSavePoint) 314 NotifySavePoint(endSavePoint); 315 316 cb.TentativeCommit(); 317 } 318 enteredModification--; 319 } 320 } 321 322 int Document::GetMark(Sci::Line line) const noexcept { 323 return Markers()->MarkValue(line); 324 } 325 326 Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept { 327 return Markers()->MarkerNext(lineStart, mask); 328 } 329 330 int Document::AddMark(Sci::Line line, int markerNum) { 331 if (line >= 0 && line <= LinesTotal()) { 332 const int prev = Markers()->AddMark(line, markerNum, LinesTotal()); 333 const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); 334 NotifyModified(mh); 335 return prev; 336 } else { 337 return -1; 338 } 339 } 340 341 void Document::AddMarkSet(Sci::Line line, int valueSet) { 342 if (line < 0 || line > LinesTotal()) { 343 return; 344 } 345 unsigned int m = valueSet; 346 for (int i = 0; m; i++, m >>= 1) { 347 if (m & 1) 348 Markers()->AddMark(line, i, LinesTotal()); 349 } 350 const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); 351 NotifyModified(mh); 352 } 353 354 void Document::DeleteMark(Sci::Line line, int markerNum) { 355 Markers()->DeleteMark(line, markerNum, false); 356 const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); 357 NotifyModified(mh); 358 } 359 360 void Document::DeleteMarkFromHandle(int markerHandle) { 361 Markers()->DeleteMarkFromHandle(markerHandle); 362 DocModification mh(SC_MOD_CHANGEMARKER); 363 mh.line = -1; 364 NotifyModified(mh); 365 } 366 367 void Document::DeleteAllMarks(int markerNum) { 368 bool someChanges = false; 369 for (Sci::Line line = 0; line < LinesTotal(); line++) { 370 if (Markers()->DeleteMark(line, markerNum, true)) 371 someChanges = true; 372 } 373 if (someChanges) { 374 DocModification mh(SC_MOD_CHANGEMARKER); 375 mh.line = -1; 376 NotifyModified(mh); 377 } 378 } 379 380 Sci::Line Document::LineFromHandle(int markerHandle) const noexcept { 381 return Markers()->LineFromHandle(markerHandle); 382 } 383 384 int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept { 385 return Markers()->NumberFromLine(line, which); 386 } 387 388 int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept { 389 return Markers()->HandleFromLine(line, which); 390 } 391 392 Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const { 393 return cb.LineStart(line); 394 } 395 396 bool Document::IsLineStartPosition(Sci::Position position) const { 397 return LineStart(LineFromPosition(position)) == position; 398 } 399 400 Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const { 401 if (line >= LinesTotal() - 1) { 402 return LineStart(line + 1); 403 } else { 404 Sci::Position position = LineStart(line + 1); 405 if (SC_LINE_END_TYPE_UNICODE == cb.GetLineEndTypes()) { 406 const unsigned char bytes[] = { 407 cb.UCharAt(position-3), 408 cb.UCharAt(position-2), 409 cb.UCharAt(position-1), 410 }; 411 if (UTF8IsSeparator(bytes)) { 412 return position - UTF8SeparatorLength; 413 } 414 if (UTF8IsNEL(bytes+1)) { 415 return position - UTF8NELLength; 416 } 417 } 418 position--; // Back over CR or LF 419 // When line terminator is CR+LF, may need to go back one more 420 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) { 421 position--; 422 } 423 return position; 424 } 425 } 426 427 void SCI_METHOD Document::SetErrorStatus(int status) { 428 // Tell the watchers an error has occurred. 429 for (const WatcherWithUserData &watcher : watchers) { 430 watcher.watcher->NotifyErrorOccurred(this, watcher.userData, status); 431 } 432 } 433 434 Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const { 435 return cb.LineFromPosition(pos); 436 } 437 438 Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept { 439 // Avoids casting in callers for this very common function 440 return cb.LineFromPosition(pos); 441 } 442 443 Sci::Position Document::LineEndPosition(Sci::Position position) const { 444 return LineEnd(LineFromPosition(position)); 445 } 446 447 bool Document::IsLineEndPosition(Sci::Position position) const { 448 return LineEnd(LineFromPosition(position)) == position; 449 } 450 451 bool Document::IsPositionInLineEnd(Sci::Position position) const { 452 return position >= LineEnd(LineFromPosition(position)); 453 } 454 455 Sci::Position Document::VCHomePosition(Sci::Position position) const { 456 const Sci::Line line = SciLineFromPosition(position); 457 const Sci::Position startPosition = LineStart(line); 458 const Sci::Position endLine = LineEnd(line); 459 Sci::Position startText = startPosition; 460 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t')) 461 startText++; 462 if (position == startText) 463 return startPosition; 464 else 465 return startText; 466 } 467 468 Sci::Position Document::IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept { 469 return cb.IndexLineStart(line, lineCharacterIndex); 470 } 471 472 Sci::Line Document::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept { 473 return cb.LineFromPositionIndex(pos, lineCharacterIndex); 474 } 475 476 int SCI_METHOD Document::SetLevel(Sci_Position line, int level) { 477 const int prev = Levels()->SetLevel(line, level, LinesTotal()); 478 if (prev != level) { 479 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER, 480 LineStart(line), 0, 0, nullptr, line); 481 mh.foldLevelNow = level; 482 mh.foldLevelPrev = prev; 483 NotifyModified(mh); 484 } 485 return prev; 486 } 487 488 int SCI_METHOD Document::GetLevel(Sci_Position line) const { 489 return Levels()->GetLevel(line); 490 } 491 492 void Document::ClearLevels() { 493 Levels()->ClearLevels(); 494 } 495 496 static bool IsSubordinate(int levelStart, int levelTry) noexcept { 497 if (levelTry & SC_FOLDLEVELWHITEFLAG) 498 return true; 499 else 500 return LevelNumber(levelStart) < LevelNumber(levelTry); 501 } 502 503 Sci::Line Document::GetLastChild(Sci::Line lineParent, int level, Sci::Line lastLine) { 504 if (level == -1) 505 level = LevelNumber(GetLevel(lineParent)); 506 const Sci::Line maxLine = LinesTotal(); 507 const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1; 508 Sci::Line lineMaxSubord = lineParent; 509 while (lineMaxSubord < maxLine - 1) { 510 EnsureStyledTo(LineStart(lineMaxSubord + 2)); 511 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1))) 512 break; 513 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG)) 514 break; 515 lineMaxSubord++; 516 } 517 if (lineMaxSubord > lineParent) { 518 if (level > LevelNumber(GetLevel(lineMaxSubord + 1))) { 519 // Have chewed up some whitespace that belongs to a parent so seek back 520 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) { 521 lineMaxSubord--; 522 } 523 } 524 } 525 return lineMaxSubord; 526 } 527 528 Sci::Line Document::GetFoldParent(Sci::Line line) const { 529 const int level = LevelNumber(GetLevel(line)); 530 Sci::Line lineLook = line - 1; 531 while ((lineLook > 0) && ( 532 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) || 533 (LevelNumber(GetLevel(lineLook)) >= level)) 534 ) { 535 lineLook--; 536 } 537 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) && 538 (LevelNumber(GetLevel(lineLook)) < level)) { 539 return lineLook; 540 } else { 541 return -1; 542 } 543 } 544 545 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) { 546 const int level = GetLevel(line); 547 const Sci::Line lookLastLine = std::max(line, lastLine) + 1; 548 549 Sci::Line lookLine = line; 550 int lookLineLevel = level; 551 int lookLineLevelNum = LevelNumber(lookLineLevel); 552 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || 553 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= LevelNumber(GetLevel(lookLine + 1)))))) { 554 lookLineLevel = GetLevel(--lookLine); 555 lookLineLevelNum = LevelNumber(lookLineLevel); 556 } 557 558 Sci::Line beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine); 559 if (beginFoldBlock == -1) { 560 highlightDelimiter.Clear(); 561 return; 562 } 563 564 Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine); 565 Sci::Line firstChangeableLineBefore = -1; 566 if (endFoldBlock < line) { 567 lookLine = beginFoldBlock - 1; 568 lookLineLevel = GetLevel(lookLine); 569 lookLineLevelNum = LevelNumber(lookLineLevel); 570 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) { 571 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) { 572 if (GetLastChild(lookLine, -1, lookLastLine) == line) { 573 beginFoldBlock = lookLine; 574 endFoldBlock = line; 575 firstChangeableLineBefore = line - 1; 576 } 577 } 578 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && (LevelNumber(GetLevel(lookLine - 1)) > lookLineLevelNum)) 579 break; 580 lookLineLevel = GetLevel(--lookLine); 581 lookLineLevelNum = LevelNumber(lookLineLevel); 582 } 583 } 584 if (firstChangeableLineBefore == -1) { 585 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel); 586 lookLine >= beginFoldBlock; 587 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) { 588 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > LevelNumber(level))) { 589 firstChangeableLineBefore = lookLine; 590 break; 591 } 592 } 593 } 594 if (firstChangeableLineBefore == -1) 595 firstChangeableLineBefore = beginFoldBlock - 1; 596 597 Sci::Line firstChangeableLineAfter = -1; 598 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel); 599 lookLine <= endFoldBlock; 600 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) { 601 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < LevelNumber(GetLevel(lookLine + 1)))) { 602 firstChangeableLineAfter = lookLine; 603 break; 604 } 605 } 606 if (firstChangeableLineAfter == -1) 607 firstChangeableLineAfter = endFoldBlock + 1; 608 609 highlightDelimiter.beginFoldBlock = beginFoldBlock; 610 highlightDelimiter.endFoldBlock = endFoldBlock; 611 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore; 612 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter; 613 } 614 615 Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept { 616 return std::clamp<Sci::Position>(pos, 0, LengthNoExcept()); 617 } 618 619 bool Document::IsCrLf(Sci::Position pos) const noexcept { 620 if (pos < 0) 621 return false; 622 if (pos >= (LengthNoExcept() - 1)) 623 return false; 624 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n'); 625 } 626 627 int Document::LenChar(Sci::Position pos) const noexcept { 628 if (pos < 0 || pos >= LengthNoExcept()) { 629 // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds. 630 return 1; 631 } else if (IsCrLf(pos)) { 632 return 2; 633 } 634 635 const unsigned char leadByte = cb.UCharAt(pos); 636 if (!dbcsCodePage || UTF8IsAscii(leadByte)) { 637 // Common case: ASCII character 638 return 1; 639 } 640 if (SC_CP_UTF8 == dbcsCodePage) { 641 const int widthCharBytes = UTF8BytesOfLead[leadByte]; 642 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; 643 for (int b = 1; b < widthCharBytes; b++) { 644 charBytes[b] = cb.UCharAt(pos + b); 645 } 646 const int utf8status = UTF8Classify(charBytes, widthCharBytes); 647 if (utf8status & UTF8MaskInvalid) { 648 // Treat as invalid and use up just one byte 649 return 1; 650 } else { 651 return utf8status & UTF8MaskWidth; 652 } 653 } else { 654 if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < LengthNoExcept())) { 655 return 2; 656 } else { 657 return 1; 658 } 659 } 660 } 661 662 bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept { 663 Sci::Position trail = pos; 664 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1))) 665 trail--; 666 start = (trail > 0) ? trail-1 : trail; 667 668 const unsigned char leadByte = cb.UCharAt(start); 669 const int widthCharBytes = UTF8BytesOfLead[leadByte]; 670 if (widthCharBytes == 1) { 671 return false; 672 } else { 673 const int trailBytes = widthCharBytes - 1; 674 const Sci::Position len = pos - start; 675 if (len > trailBytes) 676 // pos too far from lead 677 return false; 678 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; 679 for (Sci::Position b=1; b<widthCharBytes && ((start+b) < cb.Length()); b++) 680 charBytes[b] = cb.CharAt(start+b); 681 const int utf8status = UTF8Classify(charBytes, widthCharBytes); 682 if (utf8status & UTF8MaskInvalid) 683 return false; 684 end = start + widthCharBytes; 685 return true; 686 } 687 } 688 689 // Normalise a position so that it is not halfway through a two byte character. 690 // This can occur in two situations - 691 // When lines are terminated with \r\n pairs which should be treated as one character. 692 // When displaying DBCS text such as Japanese. 693 // If moving, move the position in the indicated direction. 694 Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd) const noexcept { 695 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir); 696 // If out of range, just return minimum/maximum value. 697 if (pos <= 0) 698 return 0; 699 if (pos >= LengthNoExcept()) 700 return LengthNoExcept(); 701 702 // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept()); 703 if (checkLineEnd && IsCrLf(pos - 1)) { 704 if (moveDir > 0) 705 return pos + 1; 706 else 707 return pos - 1; 708 } 709 710 if (dbcsCodePage) { 711 if (SC_CP_UTF8 == dbcsCodePage) { 712 const unsigned char ch = cb.UCharAt(pos); 713 // If ch is not a trail byte then pos is valid intercharacter position 714 if (UTF8IsTrailByte(ch)) { 715 Sci::Position startUTF = pos; 716 Sci::Position endUTF = pos; 717 if (InGoodUTF8(pos, startUTF, endUTF)) { 718 // ch is a trail byte within a UTF-8 character 719 if (moveDir > 0) 720 pos = endUTF; 721 else 722 pos = startUTF; 723 } 724 // Else invalid UTF-8 so return position of isolated trail byte 725 } 726 } else { 727 // Anchor DBCS calculations at start of line because start of line can 728 // not be a DBCS trail byte. 729 const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); 730 if (pos == posStartLine) 731 return pos; 732 733 // Step back until a non-lead-byte is found. 734 Sci::Position posCheck = pos; 735 while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1))) 736 posCheck--; 737 738 // Check from known start of character. 739 while (posCheck < pos) { 740 const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(posCheck)) ? 2 : 1; 741 if (posCheck + mbsize == pos) { 742 return pos; 743 } else if (posCheck + mbsize > pos) { 744 if (moveDir > 0) { 745 return posCheck + mbsize; 746 } else { 747 return posCheck; 748 } 749 } 750 posCheck += mbsize; 751 } 752 } 753 } 754 755 return pos; 756 } 757 758 // NextPosition moves between valid positions - it can not handle a position in the middle of a 759 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar. 760 // A \r\n pair is treated as two characters. 761 Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept { 762 // If out of range, just return minimum/maximum value. 763 const int increment = (moveDir > 0) ? 1 : -1; 764 if (pos + increment <= 0) 765 return 0; 766 if (pos + increment >= cb.Length()) 767 return cb.Length(); 768 769 if (dbcsCodePage) { 770 if (SC_CP_UTF8 == dbcsCodePage) { 771 if (increment == 1) { 772 // Simple forward movement case so can avoid some checks 773 const unsigned char leadByte = cb.UCharAt(pos); 774 if (UTF8IsAscii(leadByte)) { 775 // Single byte character or invalid 776 pos++; 777 } else { 778 const int widthCharBytes = UTF8BytesOfLead[leadByte]; 779 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; 780 for (int b=1; b<widthCharBytes; b++) 781 charBytes[b] = cb.CharAt(pos+b); 782 const int utf8status = UTF8Classify(charBytes, widthCharBytes); 783 if (utf8status & UTF8MaskInvalid) 784 pos++; 785 else 786 pos += utf8status & UTF8MaskWidth; 787 } 788 } else { 789 // Examine byte before position 790 pos--; 791 const unsigned char ch = cb.UCharAt(pos); 792 // If ch is not a trail byte then pos is valid intercharacter position 793 if (UTF8IsTrailByte(ch)) { 794 // If ch is a trail byte in a valid UTF-8 character then return start of character 795 Sci::Position startUTF = pos; 796 Sci::Position endUTF = pos; 797 if (InGoodUTF8(pos, startUTF, endUTF)) { 798 pos = startUTF; 799 } 800 // Else invalid UTF-8 so return position of isolated trail byte 801 } 802 } 803 } else { 804 if (moveDir > 0) { 805 const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1; 806 pos += mbsize; 807 if (pos > cb.Length()) 808 pos = cb.Length(); 809 } else { 810 // Anchor DBCS calculations at start of line because start of line can 811 // not be a DBCS trail byte. 812 const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); 813 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx 814 // http://msdn.microsoft.com/en-us/library/cc194790.aspx 815 if ((pos - 1) <= posStartLine) { 816 return pos - 1; 817 } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) { 818 // Must actually be trail byte 819 return pos - 2; 820 } else { 821 // Otherwise, step back until a non-lead-byte is found. 822 Sci::Position posTemp = pos - 1; 823 while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp))) 824 ; 825 // Now posTemp+1 must point to the beginning of a character, 826 // so figure out whether we went back an even or an odd 827 // number of bytes and go back 1 or 2 bytes, respectively. 828 return (pos - 1 - ((pos - posTemp) & 1)); 829 } 830 } 831 } 832 } else { 833 pos += increment; 834 } 835 836 return pos; 837 } 838 839 bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept { 840 // Returns true if pos changed 841 Sci::Position posNext = NextPosition(pos, moveDir); 842 if (posNext == pos) { 843 return false; 844 } else { 845 pos = posNext; 846 return true; 847 } 848 } 849 850 Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { 851 if (position >= LengthNoExcept()) { 852 return CharacterExtracted(unicodeReplacementChar, 0); 853 } 854 const unsigned char leadByte = cb.UCharAt(position); 855 if (!dbcsCodePage || UTF8IsAscii(leadByte)) { 856 // Common case: ASCII character 857 return CharacterExtracted(leadByte, 1); 858 } 859 if (SC_CP_UTF8 == dbcsCodePage) { 860 const int widthCharBytes = UTF8BytesOfLead[leadByte]; 861 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; 862 for (int b = 1; b<widthCharBytes; b++) 863 charBytes[b] = cb.UCharAt(position + b); 864 const int utf8status = UTF8Classify(charBytes, widthCharBytes); 865 if (utf8status & UTF8MaskInvalid) { 866 // Treat as invalid and use up just one byte 867 return CharacterExtracted(unicodeReplacementChar, 1); 868 } else { 869 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); 870 } 871 } else { 872 if (IsDBCSLeadByteNoExcept(leadByte) && ((position + 1) < LengthNoExcept())) { 873 return CharacterExtracted::DBCS(leadByte, cb.UCharAt(position + 1)); 874 } else { 875 return CharacterExtracted(leadByte, 1); 876 } 877 } 878 } 879 880 Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept { 881 if (position <= 0) { 882 return CharacterExtracted(unicodeReplacementChar, 0); 883 } 884 const unsigned char previousByte = cb.UCharAt(position - 1); 885 if (0 == dbcsCodePage) { 886 return CharacterExtracted(previousByte, 1); 887 } 888 if (SC_CP_UTF8 == dbcsCodePage) { 889 if (UTF8IsAscii(previousByte)) { 890 return CharacterExtracted(previousByte, 1); 891 } 892 position--; 893 // If previousByte is not a trail byte then its invalid 894 if (UTF8IsTrailByte(previousByte)) { 895 // If previousByte is a trail byte in a valid UTF-8 character then find start of character 896 Sci::Position startUTF = position; 897 Sci::Position endUTF = position; 898 if (InGoodUTF8(position, startUTF, endUTF)) { 899 const Sci::Position widthCharBytes = endUTF - startUTF; 900 unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 }; 901 for (Sci::Position b = 0; b<widthCharBytes; b++) 902 charBytes[b] = cb.UCharAt(startUTF + b); 903 const int utf8status = UTF8Classify(charBytes, widthCharBytes); 904 if (utf8status & UTF8MaskInvalid) { 905 // Treat as invalid and use up just one byte 906 return CharacterExtracted(unicodeReplacementChar, 1); 907 } else { 908 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); 909 } 910 } 911 // Else invalid UTF-8 so return position of isolated trail byte 912 } 913 return CharacterExtracted(unicodeReplacementChar, 1); 914 } else { 915 // Moving backwards in DBCS is complex so use NextPosition 916 const Sci::Position posStartCharacter = NextPosition(position, -1); 917 return CharacterAfter(posStartCharacter); 918 } 919 } 920 921 // Return -1 on out-of-bounds 922 Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const { 923 Sci::Position pos = positionStart; 924 if (dbcsCodePage) { 925 const int increment = (characterOffset > 0) ? 1 : -1; 926 while (characterOffset != 0) { 927 const Sci::Position posNext = NextPosition(pos, increment); 928 if (posNext == pos) 929 return INVALID_POSITION; 930 pos = posNext; 931 characterOffset -= increment; 932 } 933 } else { 934 pos = positionStart + characterOffset; 935 if ((pos < 0) || (pos > Length())) 936 return INVALID_POSITION; 937 } 938 return pos; 939 } 940 941 Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept { 942 Sci::Position pos = positionStart; 943 if (dbcsCodePage) { 944 const int increment = (characterOffset > 0) ? 1 : -1; 945 while (characterOffset != 0) { 946 const Sci::Position posNext = NextPosition(pos, increment); 947 if (posNext == pos) 948 return INVALID_POSITION; 949 if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16. 950 characterOffset -= increment; 951 pos = posNext; 952 characterOffset -= increment; 953 } 954 } else { 955 pos = positionStart + characterOffset; 956 if ((pos < 0) || (pos > LengthNoExcept())) 957 return INVALID_POSITION; 958 } 959 return pos; 960 } 961 962 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const { 963 int character; 964 int bytesInCharacter = 1; 965 const unsigned char leadByte = cb.UCharAt(position); 966 if (dbcsCodePage) { 967 if (SC_CP_UTF8 == dbcsCodePage) { 968 if (UTF8IsAscii(leadByte)) { 969 // Single byte character or invalid 970 character = leadByte; 971 } else { 972 const int widthCharBytes = UTF8BytesOfLead[leadByte]; 973 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; 974 for (int b=1; b<widthCharBytes; b++) 975 charBytes[b] = cb.UCharAt(position+b); 976 const int utf8status = UTF8Classify(charBytes, widthCharBytes); 977 if (utf8status & UTF8MaskInvalid) { 978 // Report as singleton surrogate values which are invalid Unicode 979 character = 0xDC80 + leadByte; 980 } else { 981 bytesInCharacter = utf8status & UTF8MaskWidth; 982 character = UnicodeFromUTF8(charBytes); 983 } 984 } 985 } else { 986 if (IsDBCSLeadByteNoExcept(leadByte)) { 987 bytesInCharacter = 2; 988 character = (leadByte << 8) | cb.UCharAt(position+1); 989 } else { 990 character = leadByte; 991 } 992 } 993 } else { 994 character = leadByte; 995 } 996 if (pWidth) { 997 *pWidth = bytesInCharacter; 998 } 999 return character; 1000 } 1001 1002 int SCI_METHOD Document::CodePage() const { 1003 return dbcsCodePage; 1004 } 1005 1006 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const { 1007 // Used by lexers so must match IDocument method exactly 1008 return IsDBCSLeadByteNoExcept(ch); 1009 } 1010 1011 bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept { 1012 // Used inside core Scintilla 1013 // Byte ranges found in Wikipedia articles with relevant search strings in each case 1014 const unsigned char uch = ch; 1015 switch (dbcsCodePage) { 1016 case 932: 1017 // Shift_jis 1018 return ((uch >= 0x81) && (uch <= 0x9F)) || 1019 ((uch >= 0xE0) && (uch <= 0xFC)); 1020 // Lead bytes F0 to FC may be a Microsoft addition. 1021 case 936: 1022 // GBK 1023 return (uch >= 0x81) && (uch <= 0xFE); 1024 case 949: 1025 // Korean Wansung KS C-5601-1987 1026 return (uch >= 0x81) && (uch <= 0xFE); 1027 case 950: 1028 // Big5 1029 return (uch >= 0x81) && (uch <= 0xFE); 1030 case 1361: 1031 // Korean Johab KS C-5601-1992 1032 return 1033 ((uch >= 0x84) && (uch <= 0xD3)) || 1034 ((uch >= 0xD8) && (uch <= 0xDE)) || 1035 ((uch >= 0xE0) && (uch <= 0xF9)); 1036 } 1037 return false; 1038 } 1039 1040 bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept { 1041 const unsigned char lead = ch; 1042 switch (dbcsCodePage) { 1043 case 932: 1044 // Shift_jis 1045 return 1046 (lead == 0x85) || 1047 (lead == 0x86) || 1048 (lead == 0xEB) || 1049 (lead == 0xEC) || 1050 (lead == 0xEF) || 1051 (lead == 0xFA) || 1052 (lead == 0xFB) || 1053 (lead == 0xFC); 1054 case 936: 1055 // GBK 1056 return (lead == 0x80) || (lead == 0xFF); 1057 case 949: 1058 // Korean Wansung KS C-5601-1987 1059 return (lead == 0x80) || (lead == 0xC9) || (lead >= 0xFE); 1060 case 950: 1061 // Big5 1062 return 1063 ((lead >= 0x80) && (lead <= 0xA0)) || 1064 (lead == 0xC8) || 1065 (lead >= 0xFA); 1066 case 1361: 1067 // Korean Johab KS C-5601-1992 1068 return 1069 ((lead >= 0x80) && (lead <= 0x83)) || 1070 ((lead >= 0xD4) && (lead <= 0xD8)) || 1071 (lead == 0xDF) || 1072 (lead >= 0xFA); 1073 } 1074 return false; 1075 } 1076 1077 bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept { 1078 const unsigned char trail = ch; 1079 switch (dbcsCodePage) { 1080 case 932: 1081 // Shift_jis 1082 return 1083 (trail <= 0x3F) || 1084 (trail == 0x7F) || 1085 (trail >= 0xFD); 1086 case 936: 1087 // GBK 1088 return 1089 (trail <= 0x3F) || 1090 (trail == 0x7F) || 1091 (trail == 0xFF); 1092 case 949: 1093 // Korean Wansung KS C-5601-1987 1094 return 1095 (trail <= 0x40) || 1096 ((trail >= 0x5B) && (trail <= 0x60)) || 1097 ((trail >= 0x7B) && (trail <= 0x80)) || 1098 (trail == 0xFF); 1099 case 950: 1100 // Big5 1101 return 1102 (trail <= 0x3F) || 1103 ((trail >= 0x7F) && (trail <= 0xA0)) || 1104 (trail == 0xFF); 1105 case 1361: 1106 // Korean Johab KS C-5601-1992 1107 return 1108 (trail <= 0x30) || 1109 (trail == 0x7F) || 1110 (trail == 0x80) || 1111 (trail == 0xFF); 1112 } 1113 return false; 1114 } 1115 1116 int Document::DBCSDrawBytes(std::string_view text) const noexcept { 1117 if (text.length() <= 1) { 1118 return static_cast<int>(text.length()); 1119 } 1120 if (IsDBCSLeadByteNoExcept(text[0])) { 1121 return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2; 1122 } else { 1123 return 1; 1124 } 1125 } 1126 1127 static constexpr bool IsSpaceOrTab(int ch) noexcept { 1128 return ch == ' ' || ch == '\t'; 1129 } 1130 1131 // Need to break text into segments near lengthSegment but taking into 1132 // account the encoding to not break inside a UTF-8 or DBCS character 1133 // and also trying to avoid breaking inside a pair of combining characters. 1134 // The segment length must always be long enough (more than 4 bytes) 1135 // so that there will be at least one whole character to make a segment. 1136 // For UTF-8, text must consist only of valid whole characters. 1137 // In preference order from best to worst: 1138 // 1) Break after space 1139 // 2) Break before punctuation 1140 // 3) Break after whole character 1141 1142 int Document::SafeSegment(const char *text, int length, int lengthSegment) const noexcept { 1143 if (length <= lengthSegment) 1144 return length; 1145 int lastSpaceBreak = -1; 1146 int lastPunctuationBreak = -1; 1147 int lastEncodingAllowedBreak = 0; 1148 for (int j=0; j < lengthSegment;) { 1149 const unsigned char ch = text[j]; 1150 if (j > 0) { 1151 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) { 1152 lastSpaceBreak = j; 1153 } 1154 if (ch < 'A') { 1155 lastPunctuationBreak = j; 1156 } 1157 } 1158 lastEncodingAllowedBreak = j; 1159 1160 if (dbcsCodePage == SC_CP_UTF8) { 1161 j += UTF8BytesOfLead[ch]; 1162 } else if (dbcsCodePage) { 1163 j += IsDBCSLeadByteNoExcept(ch) ? 2 : 1; 1164 } else { 1165 j++; 1166 } 1167 } 1168 if (lastSpaceBreak >= 0) { 1169 return lastSpaceBreak; 1170 } else if (lastPunctuationBreak >= 0) { 1171 return lastPunctuationBreak; 1172 } 1173 return lastEncodingAllowedBreak; 1174 } 1175 1176 EncodingFamily Document::CodePageFamily() const noexcept { 1177 if (SC_CP_UTF8 == dbcsCodePage) 1178 return EncodingFamily::unicode; 1179 else if (dbcsCodePage) 1180 return EncodingFamily::dbcs; 1181 else 1182 return EncodingFamily::eightBit; 1183 } 1184 1185 void Document::ModifiedAt(Sci::Position pos) noexcept { 1186 if (endStyled > pos) 1187 endStyled = pos; 1188 } 1189 1190 void Document::CheckReadOnly() { 1191 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) { 1192 enteredReadOnlyCount++; 1193 NotifyModifyAttempt(); 1194 enteredReadOnlyCount--; 1195 } 1196 } 1197 1198 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt. 1199 // SetStyleAt does not change the persistent state of a document 1200 1201 bool Document::DeleteChars(Sci::Position pos, Sci::Position len) { 1202 if (pos < 0) 1203 return false; 1204 if (len <= 0) 1205 return false; 1206 if ((pos + len) > LengthNoExcept()) 1207 return false; 1208 CheckReadOnly(); 1209 if (enteredModification != 0) { 1210 return false; 1211 } else { 1212 enteredModification++; 1213 if (!cb.IsReadOnly()) { 1214 NotifyModified( 1215 DocModification( 1216 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER, 1217 pos, len, 1218 0, 0)); 1219 const Sci::Line prevLinesTotal = LinesTotal(); 1220 const bool startSavePoint = cb.IsSavePoint(); 1221 bool startSequence = false; 1222 const char *text = cb.DeleteChars(pos, len, startSequence); 1223 if (startSavePoint && cb.IsCollectingUndo()) 1224 NotifySavePoint(false); 1225 if ((pos < LengthNoExcept()) || (pos == 0)) 1226 ModifiedAt(pos); 1227 else 1228 ModifiedAt(pos-1); 1229 NotifyModified( 1230 DocModification( 1231 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0), 1232 pos, len, 1233 LinesTotal() - prevLinesTotal, text)); 1234 } 1235 enteredModification--; 1236 } 1237 return !cb.IsReadOnly(); 1238 } 1239 1240 /** 1241 * Insert a string with a length. 1242 */ 1243 Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) { 1244 if (insertLength <= 0) { 1245 return 0; 1246 } 1247 CheckReadOnly(); // Application may change read only state here 1248 if (cb.IsReadOnly()) { 1249 return 0; 1250 } 1251 if (enteredModification != 0) { 1252 return 0; 1253 } 1254 enteredModification++; 1255 insertionSet = false; 1256 insertion.clear(); 1257 NotifyModified( 1258 DocModification( 1259 SC_MOD_INSERTCHECK, 1260 position, insertLength, 1261 0, s)); 1262 if (insertionSet) { 1263 s = insertion.c_str(); 1264 insertLength = insertion.length(); 1265 } 1266 NotifyModified( 1267 DocModification( 1268 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER, 1269 position, insertLength, 1270 0, s)); 1271 const Sci::Line prevLinesTotal = LinesTotal(); 1272 const bool startSavePoint = cb.IsSavePoint(); 1273 bool startSequence = false; 1274 const char *text = cb.InsertString(position, s, insertLength, startSequence); 1275 if (startSavePoint && cb.IsCollectingUndo()) 1276 NotifySavePoint(false); 1277 ModifiedAt(position); 1278 NotifyModified( 1279 DocModification( 1280 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0), 1281 position, insertLength, 1282 LinesTotal() - prevLinesTotal, text)); 1283 if (insertionSet) { // Free memory as could be large 1284 std::string().swap(insertion); 1285 } 1286 enteredModification--; 1287 return insertLength; 1288 } 1289 1290 void Document::ChangeInsertion(const char *s, Sci::Position length) { 1291 insertionSet = true; 1292 insertion.assign(s, length); 1293 } 1294 1295 int SCI_METHOD Document::AddData(const char *data, Sci_Position length) { 1296 try { 1297 const Sci::Position position = Length(); 1298 InsertString(position, data, length); 1299 } catch (std::bad_alloc &) { 1300 return SC_STATUS_BADALLOC; 1301 } catch (...) { 1302 return SC_STATUS_FAILURE; 1303 } 1304 return 0; 1305 } 1306 1307 void * SCI_METHOD Document::ConvertToDocument() { 1308 return this; 1309 } 1310 1311 Sci::Position Document::Undo() { 1312 Sci::Position newPos = -1; 1313 CheckReadOnly(); 1314 if ((enteredModification == 0) && (cb.IsCollectingUndo())) { 1315 enteredModification++; 1316 if (!cb.IsReadOnly()) { 1317 const bool startSavePoint = cb.IsSavePoint(); 1318 bool multiLine = false; 1319 const int steps = cb.StartUndo(); 1320 //Platform::DebugPrintf("Steps=%d\n", steps); 1321 Sci::Position coalescedRemovePos = -1; 1322 Sci::Position coalescedRemoveLen = 0; 1323 Sci::Position prevRemoveActionPos = -1; 1324 Sci::Position prevRemoveActionLen = 0; 1325 for (int step = 0; step < steps; step++) { 1326 const Sci::Line prevLinesTotal = LinesTotal(); 1327 const Action &action = cb.GetUndoStep(); 1328 if (action.at == removeAction) { 1329 NotifyModified(DocModification( 1330 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action)); 1331 } else if (action.at == containerAction) { 1332 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO); 1333 dm.token = action.position; 1334 NotifyModified(dm); 1335 if (!action.mayCoalesce) { 1336 coalescedRemovePos = -1; 1337 coalescedRemoveLen = 0; 1338 prevRemoveActionPos = -1; 1339 prevRemoveActionLen = 0; 1340 } 1341 } else { 1342 NotifyModified(DocModification( 1343 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action)); 1344 } 1345 cb.PerformUndoStep(); 1346 if (action.at != containerAction) { 1347 ModifiedAt(action.position); 1348 newPos = action.position; 1349 } 1350 1351 int modFlags = SC_PERFORMED_UNDO; 1352 // With undo, an insertion action becomes a deletion notification 1353 if (action.at == removeAction) { 1354 newPos += action.lenData; 1355 modFlags |= SC_MOD_INSERTTEXT; 1356 if ((coalescedRemoveLen > 0) && 1357 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) { 1358 coalescedRemoveLen += action.lenData; 1359 newPos = coalescedRemovePos + coalescedRemoveLen; 1360 } else { 1361 coalescedRemovePos = action.position; 1362 coalescedRemoveLen = action.lenData; 1363 } 1364 prevRemoveActionPos = action.position; 1365 prevRemoveActionLen = action.lenData; 1366 } else if (action.at == insertAction) { 1367 modFlags |= SC_MOD_DELETETEXT; 1368 coalescedRemovePos = -1; 1369 coalescedRemoveLen = 0; 1370 prevRemoveActionPos = -1; 1371 prevRemoveActionLen = 0; 1372 } 1373 if (steps > 1) 1374 modFlags |= SC_MULTISTEPUNDOREDO; 1375 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; 1376 if (linesAdded != 0) 1377 multiLine = true; 1378 if (step == steps - 1) { 1379 modFlags |= SC_LASTSTEPINUNDOREDO; 1380 if (multiLine) 1381 modFlags |= SC_MULTILINEUNDOREDO; 1382 } 1383 NotifyModified(DocModification(modFlags, action.position, action.lenData, 1384 linesAdded, action.data.get())); 1385 } 1386 1387 const bool endSavePoint = cb.IsSavePoint(); 1388 if (startSavePoint != endSavePoint) 1389 NotifySavePoint(endSavePoint); 1390 } 1391 enteredModification--; 1392 } 1393 return newPos; 1394 } 1395 1396 Sci::Position Document::Redo() { 1397 Sci::Position newPos = -1; 1398 CheckReadOnly(); 1399 if ((enteredModification == 0) && (cb.IsCollectingUndo())) { 1400 enteredModification++; 1401 if (!cb.IsReadOnly()) { 1402 const bool startSavePoint = cb.IsSavePoint(); 1403 bool multiLine = false; 1404 const int steps = cb.StartRedo(); 1405 for (int step = 0; step < steps; step++) { 1406 const Sci::Line prevLinesTotal = LinesTotal(); 1407 const Action &action = cb.GetRedoStep(); 1408 if (action.at == insertAction) { 1409 NotifyModified(DocModification( 1410 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action)); 1411 } else if (action.at == containerAction) { 1412 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO); 1413 dm.token = action.position; 1414 NotifyModified(dm); 1415 } else { 1416 NotifyModified(DocModification( 1417 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action)); 1418 } 1419 cb.PerformRedoStep(); 1420 if (action.at != containerAction) { 1421 ModifiedAt(action.position); 1422 newPos = action.position; 1423 } 1424 1425 int modFlags = SC_PERFORMED_REDO; 1426 if (action.at == insertAction) { 1427 newPos += action.lenData; 1428 modFlags |= SC_MOD_INSERTTEXT; 1429 } else if (action.at == removeAction) { 1430 modFlags |= SC_MOD_DELETETEXT; 1431 } 1432 if (steps > 1) 1433 modFlags |= SC_MULTISTEPUNDOREDO; 1434 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; 1435 if (linesAdded != 0) 1436 multiLine = true; 1437 if (step == steps - 1) { 1438 modFlags |= SC_LASTSTEPINUNDOREDO; 1439 if (multiLine) 1440 modFlags |= SC_MULTILINEUNDOREDO; 1441 } 1442 NotifyModified( 1443 DocModification(modFlags, action.position, action.lenData, 1444 linesAdded, action.data.get())); 1445 } 1446 1447 const bool endSavePoint = cb.IsSavePoint(); 1448 if (startSavePoint != endSavePoint) 1449 NotifySavePoint(endSavePoint); 1450 } 1451 enteredModification--; 1452 } 1453 return newPos; 1454 } 1455 1456 void Document::DelChar(Sci::Position pos) { 1457 DeleteChars(pos, LenChar(pos)); 1458 } 1459 1460 void Document::DelCharBack(Sci::Position pos) { 1461 if (pos <= 0) { 1462 return; 1463 } else if (IsCrLf(pos - 2)) { 1464 DeleteChars(pos - 2, 2); 1465 } else if (dbcsCodePage) { 1466 const Sci::Position startChar = NextPosition(pos, -1); 1467 DeleteChars(startChar, pos - startChar); 1468 } else { 1469 DeleteChars(pos - 1, 1); 1470 } 1471 } 1472 1473 static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept { 1474 return ((pos / tabSize) + 1) * tabSize; 1475 } 1476 1477 static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) { 1478 std::string indentation; 1479 if (!insertSpaces) { 1480 while (indent >= tabSize) { 1481 indentation += '\t'; 1482 indent -= tabSize; 1483 } 1484 } 1485 while (indent > 0) { 1486 indentation += ' '; 1487 indent--; 1488 } 1489 return indentation; 1490 } 1491 1492 int SCI_METHOD Document::GetLineIndentation(Sci_Position line) { 1493 int indent = 0; 1494 if ((line >= 0) && (line < LinesTotal())) { 1495 const Sci::Position lineStart = LineStart(line); 1496 const Sci::Position length = Length(); 1497 for (Sci::Position i = lineStart; i < length; i++) { 1498 const char ch = cb.CharAt(i); 1499 if (ch == ' ') 1500 indent++; 1501 else if (ch == '\t') 1502 indent = static_cast<int>(NextTab(indent, tabInChars)); 1503 else 1504 return indent; 1505 } 1506 } 1507 return indent; 1508 } 1509 1510 Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) { 1511 const int indentOfLine = GetLineIndentation(line); 1512 if (indent < 0) 1513 indent = 0; 1514 if (indent != indentOfLine) { 1515 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs); 1516 const Sci::Position thisLineStart = LineStart(line); 1517 const Sci::Position indentPos = GetLineIndentPosition(line); 1518 UndoGroup ug(this); 1519 DeleteChars(thisLineStart, indentPos - thisLineStart); 1520 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(), 1521 linebuf.length()); 1522 } else { 1523 return GetLineIndentPosition(line); 1524 } 1525 } 1526 1527 Sci::Position Document::GetLineIndentPosition(Sci::Line line) const { 1528 if (line < 0) 1529 return 0; 1530 Sci::Position pos = LineStart(line); 1531 const Sci::Position length = Length(); 1532 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) { 1533 pos++; 1534 } 1535 return pos; 1536 } 1537 1538 Sci::Position Document::GetColumn(Sci::Position pos) { 1539 Sci::Position column = 0; 1540 const Sci::Line line = SciLineFromPosition(pos); 1541 if ((line >= 0) && (line < LinesTotal())) { 1542 for (Sci::Position i = LineStart(line); i < pos;) { 1543 const char ch = cb.CharAt(i); 1544 if (ch == '\t') { 1545 column = NextTab(column, tabInChars); 1546 i++; 1547 } else if (ch == '\r') { 1548 return column; 1549 } else if (ch == '\n') { 1550 return column; 1551 } else if (i >= Length()) { 1552 return column; 1553 } else { 1554 column++; 1555 i = NextPosition(i, 1); 1556 } 1557 } 1558 } 1559 return column; 1560 } 1561 1562 Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept { 1563 startPos = MovePositionOutsideChar(startPos, 1, false); 1564 endPos = MovePositionOutsideChar(endPos, -1, false); 1565 Sci::Position count = 0; 1566 Sci::Position i = startPos; 1567 while (i < endPos) { 1568 count++; 1569 i = NextPosition(i, 1); 1570 } 1571 return count; 1572 } 1573 1574 Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept { 1575 startPos = MovePositionOutsideChar(startPos, 1, false); 1576 endPos = MovePositionOutsideChar(endPos, -1, false); 1577 Sci::Position count = 0; 1578 Sci::Position i = startPos; 1579 while (i < endPos) { 1580 count++; 1581 const Sci::Position next = NextPosition(i, 1); 1582 if ((next - i) > 3) 1583 count++; 1584 i = next; 1585 } 1586 return count; 1587 } 1588 1589 Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) { 1590 Sci::Position position = LineStart(line); 1591 if ((line >= 0) && (line < LinesTotal())) { 1592 Sci::Position columnCurrent = 0; 1593 while ((columnCurrent < column) && (position < Length())) { 1594 const char ch = cb.CharAt(position); 1595 if (ch == '\t') { 1596 columnCurrent = NextTab(columnCurrent, tabInChars); 1597 if (columnCurrent > column) 1598 return position; 1599 position++; 1600 } else if (ch == '\r') { 1601 return position; 1602 } else if (ch == '\n') { 1603 return position; 1604 } else { 1605 columnCurrent++; 1606 position = NextPosition(position, 1); 1607 } 1608 } 1609 } 1610 return position; 1611 } 1612 1613 void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) { 1614 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab 1615 for (Sci::Line line = lineBottom; line >= lineTop; line--) { 1616 const Sci::Position indentOfLine = GetLineIndentation(line); 1617 if (forwards) { 1618 if (LineStart(line) < LineEnd(line)) { 1619 SetLineIndentation(line, indentOfLine + IndentSize()); 1620 } 1621 } else { 1622 SetLineIndentation(line, indentOfLine - IndentSize()); 1623 } 1624 } 1625 } 1626 1627 // Convert line endings for a piece of text to a particular mode. 1628 // Stop at len or when a NUL is found. 1629 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) { 1630 std::string dest; 1631 for (size_t i = 0; (i < len) && (s[i]); i++) { 1632 if (s[i] == '\n' || s[i] == '\r') { 1633 if (eolModeWanted == SC_EOL_CR) { 1634 dest.push_back('\r'); 1635 } else if (eolModeWanted == SC_EOL_LF) { 1636 dest.push_back('\n'); 1637 } else { // eolModeWanted == SC_EOL_CRLF 1638 dest.push_back('\r'); 1639 dest.push_back('\n'); 1640 } 1641 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) { 1642 i++; 1643 } 1644 } else { 1645 dest.push_back(s[i]); 1646 } 1647 } 1648 return dest; 1649 } 1650 1651 void Document::ConvertLineEnds(int eolModeSet) { 1652 UndoGroup ug(this); 1653 1654 for (Sci::Position pos = 0; pos < Length(); pos++) { 1655 if (cb.CharAt(pos) == '\r') { 1656 if (cb.CharAt(pos + 1) == '\n') { 1657 // CRLF 1658 if (eolModeSet == SC_EOL_CR) { 1659 DeleteChars(pos + 1, 1); // Delete the LF 1660 } else if (eolModeSet == SC_EOL_LF) { 1661 DeleteChars(pos, 1); // Delete the CR 1662 } else { 1663 pos++; 1664 } 1665 } else { 1666 // CR 1667 if (eolModeSet == SC_EOL_CRLF) { 1668 pos += InsertString(pos + 1, "\n", 1); // Insert LF 1669 } else if (eolModeSet == SC_EOL_LF) { 1670 pos += InsertString(pos, "\n", 1); // Insert LF 1671 DeleteChars(pos, 1); // Delete CR 1672 pos--; 1673 } 1674 } 1675 } else if (cb.CharAt(pos) == '\n') { 1676 // LF 1677 if (eolModeSet == SC_EOL_CRLF) { 1678 pos += InsertString(pos, "\r", 1); // Insert CR 1679 } else if (eolModeSet == SC_EOL_CR) { 1680 pos += InsertString(pos, "\r", 1); // Insert CR 1681 DeleteChars(pos, 1); // Delete LF 1682 pos--; 1683 } 1684 } 1685 } 1686 1687 } 1688 1689 int Document::Options() const noexcept { 1690 return (IsLarge() ? SC_DOCUMENTOPTION_TEXT_LARGE : 0) | 1691 (cb.HasStyles() ? 0 : SC_DOCUMENTOPTION_STYLES_NONE); 1692 } 1693 1694 bool Document::IsWhiteLine(Sci::Line line) const { 1695 Sci::Position currentChar = LineStart(line); 1696 const Sci::Position endLine = LineEnd(line); 1697 while (currentChar < endLine) { 1698 if (!IsSpaceOrTab(cb.CharAt(currentChar))) { 1699 return false; 1700 } 1701 ++currentChar; 1702 } 1703 return true; 1704 } 1705 1706 Sci::Position Document::ParaUp(Sci::Position pos) const { 1707 Sci::Line line = SciLineFromPosition(pos); 1708 line--; 1709 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines 1710 line--; 1711 } 1712 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines 1713 line--; 1714 } 1715 line++; 1716 return LineStart(line); 1717 } 1718 1719 Sci::Position Document::ParaDown(Sci::Position pos) const { 1720 Sci::Line line = SciLineFromPosition(pos); 1721 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines 1722 line++; 1723 } 1724 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines 1725 line++; 1726 } 1727 if (line < LinesTotal()) 1728 return LineStart(line); 1729 else // end of a document 1730 return LineEnd(line-1); 1731 } 1732 1733 CharClassify::cc Document::WordCharacterClass(unsigned int ch) const { 1734 if (dbcsCodePage && (!UTF8IsAscii(ch))) { 1735 if (SC_CP_UTF8 == dbcsCodePage) { 1736 // Use hard coded Unicode class 1737 const CharacterCategory cc = charMap.CategoryFor(ch); 1738 switch (cc) { 1739 1740 // Separator, Line/Paragraph 1741 case ccZl: 1742 case ccZp: 1743 return CharClassify::ccNewLine; 1744 1745 // Separator, Space 1746 case ccZs: 1747 // Other 1748 case ccCc: 1749 case ccCf: 1750 case ccCs: 1751 case ccCo: 1752 case ccCn: 1753 return CharClassify::ccSpace; 1754 1755 // Letter 1756 case ccLu: 1757 case ccLl: 1758 case ccLt: 1759 case ccLm: 1760 case ccLo: 1761 // Number 1762 case ccNd: 1763 case ccNl: 1764 case ccNo: 1765 // Mark - includes combining diacritics 1766 case ccMn: 1767 case ccMc: 1768 case ccMe: 1769 return CharClassify::ccWord; 1770 1771 // Punctuation 1772 case ccPc: 1773 case ccPd: 1774 case ccPs: 1775 case ccPe: 1776 case ccPi: 1777 case ccPf: 1778 case ccPo: 1779 // Symbol 1780 case ccSm: 1781 case ccSc: 1782 case ccSk: 1783 case ccSo: 1784 return CharClassify::ccPunctuation; 1785 1786 } 1787 } else { 1788 // Asian DBCS 1789 return CharClassify::ccWord; 1790 } 1791 } 1792 return charClass.GetClass(static_cast<unsigned char>(ch)); 1793 } 1794 1795 /** 1796 * Used by commands that want to select whole words. 1797 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0. 1798 */ 1799 Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const { 1800 CharClassify::cc ccStart = CharClassify::ccWord; 1801 if (delta < 0) { 1802 if (!onlyWordCharacters) { 1803 const CharacterExtracted ce = CharacterBefore(pos); 1804 ccStart = WordCharacterClass(ce.character); 1805 } 1806 while (pos > 0) { 1807 const CharacterExtracted ce = CharacterBefore(pos); 1808 if (WordCharacterClass(ce.character) != ccStart) 1809 break; 1810 pos -= ce.widthBytes; 1811 } 1812 } else { 1813 if (!onlyWordCharacters && pos < LengthNoExcept()) { 1814 const CharacterExtracted ce = CharacterAfter(pos); 1815 ccStart = WordCharacterClass(ce.character); 1816 } 1817 while (pos < LengthNoExcept()) { 1818 const CharacterExtracted ce = CharacterAfter(pos); 1819 if (WordCharacterClass(ce.character) != ccStart) 1820 break; 1821 pos += ce.widthBytes; 1822 } 1823 } 1824 return MovePositionOutsideChar(pos, delta, true); 1825 } 1826 1827 /** 1828 * Find the start of the next word in either a forward (delta >= 0) or backwards direction 1829 * (delta < 0). 1830 * This is looking for a transition between character classes although there is also some 1831 * additional movement to transit white space. 1832 * Used by cursor movement by word commands. 1833 */ 1834 Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const { 1835 if (delta < 0) { 1836 while (pos > 0) { 1837 const CharacterExtracted ce = CharacterBefore(pos); 1838 if (WordCharacterClass(ce.character) != CharClassify::ccSpace) 1839 break; 1840 pos -= ce.widthBytes; 1841 } 1842 if (pos > 0) { 1843 CharacterExtracted ce = CharacterBefore(pos); 1844 const CharClassify::cc ccStart = WordCharacterClass(ce.character); 1845 while (pos > 0) { 1846 ce = CharacterBefore(pos); 1847 if (WordCharacterClass(ce.character) != ccStart) 1848 break; 1849 pos -= ce.widthBytes; 1850 } 1851 } 1852 } else { 1853 CharacterExtracted ce = CharacterAfter(pos); 1854 const CharClassify::cc ccStart = WordCharacterClass(ce.character); 1855 while (pos < LengthNoExcept()) { 1856 ce = CharacterAfter(pos); 1857 if (WordCharacterClass(ce.character) != ccStart) 1858 break; 1859 pos += ce.widthBytes; 1860 } 1861 while (pos < LengthNoExcept()) { 1862 ce = CharacterAfter(pos); 1863 if (WordCharacterClass(ce.character) != CharClassify::ccSpace) 1864 break; 1865 pos += ce.widthBytes; 1866 } 1867 } 1868 return pos; 1869 } 1870 1871 /** 1872 * Find the end of the next word in either a forward (delta >= 0) or backwards direction 1873 * (delta < 0). 1874 * This is looking for a transition between character classes although there is also some 1875 * additional movement to transit white space. 1876 * Used by cursor movement by word commands. 1877 */ 1878 Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const { 1879 if (delta < 0) { 1880 if (pos > 0) { 1881 CharacterExtracted ce = CharacterBefore(pos); 1882 const CharClassify::cc ccStart = WordCharacterClass(ce.character); 1883 if (ccStart != CharClassify::ccSpace) { 1884 while (pos > 0) { 1885 ce = CharacterBefore(pos); 1886 if (WordCharacterClass(ce.character) != ccStart) 1887 break; 1888 pos -= ce.widthBytes; 1889 } 1890 } 1891 while (pos > 0) { 1892 ce = CharacterBefore(pos); 1893 if (WordCharacterClass(ce.character) != CharClassify::ccSpace) 1894 break; 1895 pos -= ce.widthBytes; 1896 } 1897 } 1898 } else { 1899 while (pos < LengthNoExcept()) { 1900 const CharacterExtracted ce = CharacterAfter(pos); 1901 if (WordCharacterClass(ce.character) != CharClassify::ccSpace) 1902 break; 1903 pos += ce.widthBytes; 1904 } 1905 if (pos < LengthNoExcept()) { 1906 CharacterExtracted ce = CharacterAfter(pos); 1907 const CharClassify::cc ccStart = WordCharacterClass(ce.character); 1908 while (pos < LengthNoExcept()) { 1909 ce = CharacterAfter(pos); 1910 if (WordCharacterClass(ce.character) != ccStart) 1911 break; 1912 pos += ce.widthBytes; 1913 } 1914 } 1915 } 1916 return pos; 1917 } 1918 1919 /** 1920 * Check that the character at the given position is a word or punctuation character and that 1921 * the previous character is of a different character class. 1922 */ 1923 bool Document::IsWordStartAt(Sci::Position pos) const { 1924 if (pos >= LengthNoExcept()) 1925 return false; 1926 if (pos > 0) { 1927 const CharacterExtracted cePos = CharacterAfter(pos); 1928 const CharClassify::cc ccPos = WordCharacterClass(cePos.character); 1929 const CharacterExtracted cePrev = CharacterBefore(pos); 1930 const CharClassify::cc ccPrev = WordCharacterClass(cePrev.character); 1931 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) && 1932 (ccPos != ccPrev); 1933 } 1934 return true; 1935 } 1936 1937 /** 1938 * Check that the character at the given position is a word or punctuation character and that 1939 * the next character is of a different character class. 1940 */ 1941 bool Document::IsWordEndAt(Sci::Position pos) const { 1942 if (pos <= 0) 1943 return false; 1944 if (pos < LengthNoExcept()) { 1945 const CharacterExtracted cePos = CharacterAfter(pos); 1946 const CharClassify::cc ccPos = WordCharacterClass(cePos.character); 1947 const CharacterExtracted cePrev = CharacterBefore(pos); 1948 const CharClassify::cc ccPrev = WordCharacterClass(cePrev.character); 1949 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) && 1950 (ccPrev != ccPos); 1951 } 1952 return true; 1953 } 1954 1955 /** 1956 * Check that the given range is has transitions between character classes at both 1957 * ends and where the characters on the inside are word or punctuation characters. 1958 */ 1959 bool Document::IsWordAt(Sci::Position start, Sci::Position end) const { 1960 return (start < end) && IsWordStartAt(start) && IsWordEndAt(end); 1961 } 1962 1963 bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const { 1964 return (!word && !wordStart) || 1965 (word && IsWordAt(pos, pos + length)) || 1966 (wordStart && IsWordStartAt(pos)); 1967 } 1968 1969 bool Document::HasCaseFolder() const noexcept { 1970 return pcf != nullptr; 1971 } 1972 1973 void Document::SetCaseFolder(CaseFolder *pcf_) noexcept { 1974 pcf.reset(pcf_); 1975 } 1976 1977 Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept { 1978 const unsigned char leadByte = cb.UCharAt(position); 1979 if (UTF8IsAscii(leadByte)) { 1980 // Common case: ASCII character 1981 return CharacterExtracted(leadByte, 1); 1982 } 1983 const int widthCharBytes = UTF8BytesOfLead[leadByte]; 1984 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; 1985 for (int b=1; b<widthCharBytes; b++) 1986 charBytes[b] = cb.UCharAt(position + b); 1987 const int utf8status = UTF8Classify(charBytes, widthCharBytes); 1988 if (utf8status & UTF8MaskInvalid) { 1989 // Treat as invalid and use up just one byte 1990 return CharacterExtracted(unicodeReplacementChar, 1); 1991 } else { 1992 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); 1993 } 1994 } 1995 1996 /** 1997 * Find text in document, supporting both forward and backward 1998 * searches (just pass minPos > maxPos to do a backward search) 1999 * Has not been tested with backwards DBCS searches yet. 2000 */ 2001 Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search, 2002 int flags, Sci::Position *length) { 2003 if (*length <= 0) 2004 return minPos; 2005 const bool caseSensitive = (flags & SCFIND_MATCHCASE) != 0; 2006 const bool word = (flags & SCFIND_WHOLEWORD) != 0; 2007 const bool wordStart = (flags & SCFIND_WORDSTART) != 0; 2008 const bool regExp = (flags & SCFIND_REGEXP) != 0; 2009 if (regExp) { 2010 if (!regex) 2011 regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass)); 2012 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length); 2013 } else { 2014 2015 const bool forward = minPos <= maxPos; 2016 const int increment = forward ? 1 : -1; 2017 2018 // Range endpoints should not be inside DBCS characters, but just in case, move them. 2019 const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false); 2020 const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false); 2021 2022 // Compute actual search ranges needed 2023 const Sci::Position lengthFind = *length; 2024 2025 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind); 2026 const Sci::Position limitPos = std::max(startPos, endPos); 2027 Sci::Position pos = startPos; 2028 if (!forward) { 2029 // Back all of a character 2030 pos = NextPosition(pos, increment); 2031 } 2032 if (caseSensitive) { 2033 const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; 2034 const char charStartSearch = search[0]; 2035 while (forward ? (pos < endSearch) : (pos >= endSearch)) { 2036 if (CharAt(pos) == charStartSearch) { 2037 bool found = (pos + lengthFind) <= limitPos; 2038 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) { 2039 found = CharAt(pos + indexSearch) == search[indexSearch]; 2040 } 2041 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { 2042 return pos; 2043 } 2044 } 2045 if (!NextCharacter(pos, increment)) 2046 break; 2047 } 2048 } else if (SC_CP_UTF8 == dbcsCodePage) { 2049 constexpr size_t maxFoldingExpansion = 4; 2050 std::vector<char> searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1); 2051 const size_t lenSearch = 2052 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); 2053 char bytes[UTF8MaxBytes + 1] = ""; 2054 char folded[UTF8MaxBytes * maxFoldingExpansion + 1] = ""; 2055 while (forward ? (pos < endPos) : (pos >= endPos)) { 2056 int widthFirstCharacter = 0; 2057 Sci::Position posIndexDocument = pos; 2058 size_t indexSearch = 0; 2059 bool characterMatches = true; 2060 for (;;) { 2061 const unsigned char leadByte = cb.UCharAt(posIndexDocument); 2062 bytes[0] = leadByte; 2063 int widthChar = 1; 2064 if (!UTF8IsAscii(leadByte)) { 2065 const int widthCharBytes = UTF8BytesOfLead[leadByte]; 2066 for (int b=1; b<widthCharBytes; b++) { 2067 bytes[b] = cb.CharAt(posIndexDocument+b); 2068 } 2069 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth; 2070 } 2071 if (!widthFirstCharacter) 2072 widthFirstCharacter = widthChar; 2073 if ((posIndexDocument + widthChar) > limitPos) 2074 break; 2075 const size_t lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); 2076 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing 2077 assert((indexSearch + lenFlat) <= searchThing.size()); 2078 // Does folded match the buffer 2079 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); 2080 if (!characterMatches) 2081 break; 2082 posIndexDocument += widthChar; 2083 indexSearch += lenFlat; 2084 if (indexSearch >= lenSearch) 2085 break; 2086 } 2087 if (characterMatches && (indexSearch == lenSearch)) { 2088 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) { 2089 *length = posIndexDocument - pos; 2090 return pos; 2091 } 2092 } 2093 if (forward) { 2094 pos += widthFirstCharacter; 2095 } else { 2096 if (!NextCharacter(pos, increment)) 2097 break; 2098 } 2099 } 2100 } else if (dbcsCodePage) { 2101 constexpr size_t maxBytesCharacter = 2; 2102 constexpr size_t maxFoldingExpansion = 4; 2103 std::vector<char> searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1); 2104 const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); 2105 while (forward ? (pos < endPos) : (pos >= endPos)) { 2106 Sci::Position indexDocument = 0; 2107 size_t indexSearch = 0; 2108 bool characterMatches = true; 2109 while (characterMatches && 2110 ((pos + indexDocument) < limitPos) && 2111 (indexSearch < lenSearch)) { 2112 char bytes[maxBytesCharacter + 1]; 2113 bytes[0] = cb.CharAt(pos + indexDocument); 2114 const Sci::Position widthChar = IsDBCSLeadByteNoExcept(bytes[0]) ? 2 : 1; 2115 if (widthChar == 2) 2116 bytes[1] = cb.CharAt(pos + indexDocument + 1); 2117 if ((pos + indexDocument + widthChar) > limitPos) 2118 break; 2119 char folded[maxBytesCharacter * maxFoldingExpansion + 1]; 2120 const size_t lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); 2121 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing 2122 assert((indexSearch + lenFlat) <= searchThing.size()); 2123 // Does folded match the buffer 2124 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); 2125 indexDocument += widthChar; 2126 indexSearch += lenFlat; 2127 } 2128 if (characterMatches && (indexSearch == lenSearch)) { 2129 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) { 2130 *length = indexDocument; 2131 return pos; 2132 } 2133 } 2134 if (!NextCharacter(pos, increment)) 2135 break; 2136 } 2137 } else { 2138 const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; 2139 std::vector<char> searchThing(lengthFind + 1); 2140 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); 2141 while (forward ? (pos < endSearch) : (pos >= endSearch)) { 2142 bool found = (pos + lengthFind) <= limitPos; 2143 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) { 2144 const char ch = CharAt(pos + indexSearch); 2145 char folded[2]; 2146 pcf->Fold(folded, sizeof(folded), &ch, 1); 2147 found = folded[0] == searchThing[indexSearch]; 2148 } 2149 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { 2150 return pos; 2151 } 2152 if (!NextCharacter(pos, increment)) 2153 break; 2154 } 2155 } 2156 } 2157 //Platform::DebugPrintf("Not found\n"); 2158 return -1; 2159 } 2160 2161 const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) { 2162 if (regex) 2163 return regex->SubstituteByPosition(this, text, length); 2164 else 2165 return nullptr; 2166 } 2167 2168 int Document::LineCharacterIndex() const noexcept { 2169 return cb.LineCharacterIndex(); 2170 } 2171 2172 void Document::AllocateLineCharacterIndex(int lineCharacterIndex) { 2173 return cb.AllocateLineCharacterIndex(lineCharacterIndex); 2174 } 2175 2176 void Document::ReleaseLineCharacterIndex(int lineCharacterIndex) { 2177 return cb.ReleaseLineCharacterIndex(lineCharacterIndex); 2178 } 2179 2180 Sci::Line Document::LinesTotal() const noexcept { 2181 return cb.Lines(); 2182 } 2183 2184 void Document::SetDefaultCharClasses(bool includeWordClass) { 2185 charClass.SetDefaultCharClasses(includeWordClass); 2186 } 2187 2188 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) { 2189 charClass.SetCharClasses(chars, newCharClass); 2190 } 2191 2192 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) const { 2193 return charClass.GetCharsOfClass(characterClass, buffer); 2194 } 2195 2196 void Document::SetCharacterCategoryOptimization(int countCharacters) { 2197 charMap.Optimize(countCharacters); 2198 } 2199 2200 int Document::CharacterCategoryOptimization() const noexcept { 2201 return charMap.Size(); 2202 } 2203 2204 void SCI_METHOD Document::StartStyling(Sci_Position position) { 2205 endStyled = position; 2206 } 2207 2208 bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) { 2209 if (enteredStyling != 0) { 2210 return false; 2211 } else { 2212 enteredStyling++; 2213 const Sci::Position prevEndStyled = endStyled; 2214 if (cb.SetStyleFor(endStyled, length, style)) { 2215 const DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER, 2216 prevEndStyled, length); 2217 NotifyModified(mh); 2218 } 2219 endStyled += length; 2220 enteredStyling--; 2221 return true; 2222 } 2223 } 2224 2225 bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) { 2226 if (enteredStyling != 0) { 2227 return false; 2228 } else { 2229 enteredStyling++; 2230 bool didChange = false; 2231 Sci::Position startMod = 0; 2232 Sci::Position endMod = 0; 2233 for (int iPos = 0; iPos < length; iPos++, endStyled++) { 2234 PLATFORM_ASSERT(endStyled < Length()); 2235 if (cb.SetStyleAt(endStyled, styles[iPos])) { 2236 if (!didChange) { 2237 startMod = endStyled; 2238 } 2239 didChange = true; 2240 endMod = endStyled; 2241 } 2242 } 2243 if (didChange) { 2244 const DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER, 2245 startMod, endMod - startMod + 1); 2246 NotifyModified(mh); 2247 } 2248 enteredStyling--; 2249 return true; 2250 } 2251 } 2252 2253 void Document::EnsureStyledTo(Sci::Position pos) { 2254 if ((enteredStyling == 0) && (pos > GetEndStyled())) { 2255 IncrementStyleClock(); 2256 if (pli && !pli->UseContainerLexing()) { 2257 const Sci::Line lineEndStyled = SciLineFromPosition(GetEndStyled()); 2258 const Sci::Position endStyledTo = LineStart(lineEndStyled); 2259 pli->Colourise(endStyledTo, pos); 2260 } else { 2261 // Ask the watchers to style, and stop as soon as one responds. 2262 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); 2263 (pos > GetEndStyled()) && (it != watchers.end()); ++it) { 2264 it->watcher->NotifyStyleNeeded(this, it->userData, pos); 2265 } 2266 } 2267 } 2268 } 2269 2270 void Document::StyleToAdjustingLineDuration(Sci::Position pos) { 2271 const Sci::Line lineFirst = SciLineFromPosition(GetEndStyled()); 2272 ElapsedPeriod epStyling; 2273 EnsureStyledTo(pos); 2274 const Sci::Line lineLast = SciLineFromPosition(GetEndStyled()); 2275 durationStyleOneLine.AddSample(lineLast - lineFirst, epStyling.Duration()); 2276 } 2277 2278 void Document::LexerChanged() { 2279 // Tell the watchers the lexer has changed. 2280 for (const WatcherWithUserData &watcher : watchers) { 2281 watcher.watcher->NotifyLexerChanged(this, watcher.userData); 2282 } 2283 } 2284 2285 LexInterface *Document::GetLexInterface() const noexcept { 2286 return pli.get(); 2287 } 2288 2289 void Document::SetLexInterface(std::unique_ptr<LexInterface> pLexInterface) noexcept { 2290 pli = std::move(pLexInterface); 2291 } 2292 2293 int SCI_METHOD Document::SetLineState(Sci_Position line, int state) { 2294 const int statePrevious = States()->SetLineState(line, state); 2295 if (state != statePrevious) { 2296 const DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, nullptr, 2297 static_cast<Sci::Line>(line)); 2298 NotifyModified(mh); 2299 } 2300 return statePrevious; 2301 } 2302 2303 int SCI_METHOD Document::GetLineState(Sci_Position line) const { 2304 return States()->GetLineState(line); 2305 } 2306 2307 Sci::Line Document::GetMaxLineState() const noexcept { 2308 return States()->GetMaxLineState(); 2309 } 2310 2311 void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) { 2312 const DocModification mh(SC_MOD_LEXERSTATE, start, 2313 end-start, 0, 0, 0); 2314 NotifyModified(mh); 2315 } 2316 2317 StyledText Document::MarginStyledText(Sci::Line line) const noexcept { 2318 const LineAnnotation *pla = Margins(); 2319 return StyledText(pla->Length(line), pla->Text(line), 2320 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); 2321 } 2322 2323 void Document::MarginSetText(Sci::Line line, const char *text) { 2324 Margins()->SetText(line, text); 2325 const DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 2326 0, 0, 0, line); 2327 NotifyModified(mh); 2328 } 2329 2330 void Document::MarginSetStyle(Sci::Line line, int style) { 2331 Margins()->SetStyle(line, style); 2332 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 2333 0, 0, 0, line)); 2334 } 2335 2336 void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) { 2337 Margins()->SetStyles(line, styles); 2338 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 2339 0, 0, 0, line)); 2340 } 2341 2342 void Document::MarginClearAll() { 2343 const Sci::Line maxEditorLine = LinesTotal(); 2344 for (Sci::Line l=0; l<maxEditorLine; l++) 2345 MarginSetText(l, nullptr); 2346 // Free remaining data 2347 Margins()->ClearAll(); 2348 } 2349 2350 StyledText Document::AnnotationStyledText(Sci::Line line) const noexcept { 2351 const LineAnnotation *pla = Annotations(); 2352 return StyledText(pla->Length(line), pla->Text(line), 2353 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); 2354 } 2355 2356 void Document::AnnotationSetText(Sci::Line line, const char *text) { 2357 if (line >= 0 && line < LinesTotal()) { 2358 const Sci::Line linesBefore = AnnotationLines(line); 2359 Annotations()->SetText(line, text); 2360 const int linesAfter = AnnotationLines(line); 2361 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 2362 0, 0, 0, line); 2363 mh.annotationLinesAdded = linesAfter - linesBefore; 2364 NotifyModified(mh); 2365 } 2366 } 2367 2368 void Document::AnnotationSetStyle(Sci::Line line, int style) { 2369 if (line >= 0 && line < LinesTotal()) { 2370 Annotations()->SetStyle(line, style); 2371 const DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 2372 0, 0, 0, line); 2373 NotifyModified(mh); 2374 } 2375 } 2376 2377 void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) { 2378 if (line >= 0 && line < LinesTotal()) { 2379 Annotations()->SetStyles(line, styles); 2380 } 2381 } 2382 2383 int Document::AnnotationLines(Sci::Line line) const noexcept { 2384 return Annotations()->Lines(line); 2385 } 2386 2387 void Document::AnnotationClearAll() { 2388 const Sci::Line maxEditorLine = LinesTotal(); 2389 for (Sci::Line l=0; l<maxEditorLine; l++) 2390 AnnotationSetText(l, nullptr); 2391 // Free remaining data 2392 Annotations()->ClearAll(); 2393 } 2394 2395 StyledText Document::EOLAnnotationStyledText(Sci::Line line) const noexcept { 2396 const LineAnnotation *pla = EOLAnnotations(); 2397 return StyledText(pla->Length(line), pla->Text(line), 2398 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); 2399 } 2400 2401 void Document::EOLAnnotationSetText(Sci::Line line, const char *text) { 2402 if (line >= 0 && line < LinesTotal()) { 2403 EOLAnnotations()->SetText(line, text); 2404 const DocModification mh(SC_MOD_CHANGEEOLANNOTATION, LineStart(line), 2405 0, 0, 0, line); 2406 NotifyModified(mh); 2407 } 2408 } 2409 2410 void Document::EOLAnnotationSetStyle(Sci::Line line, int style) { 2411 if (line >= 0 && line < LinesTotal()) { 2412 EOLAnnotations()->SetStyle(line, style); 2413 const DocModification mh(SC_MOD_CHANGEEOLANNOTATION, LineStart(line), 2414 0, 0, 0, line); 2415 NotifyModified(mh); 2416 } 2417 } 2418 2419 void Document::EOLAnnotationClearAll() { 2420 const Sci::Line maxEditorLine = LinesTotal(); 2421 for (Sci::Line l=0; l<maxEditorLine; l++) 2422 EOLAnnotationSetText(l, nullptr); 2423 // Free remaining data 2424 EOLAnnotations()->ClearAll(); 2425 } 2426 2427 void Document::IncrementStyleClock() noexcept { 2428 styleClock = (styleClock + 1) % 0x100000; 2429 } 2430 2431 void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) { 2432 decorations->SetCurrentIndicator(indicator); 2433 } 2434 2435 void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) { 2436 const FillResult<Sci::Position> fr = decorations->FillRange( 2437 position, value, fillLength); 2438 if (fr.changed) { 2439 const DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER, 2440 fr.position, fr.fillLength); 2441 NotifyModified(mh); 2442 } 2443 } 2444 2445 bool Document::AddWatcher(DocWatcher *watcher, void *userData) { 2446 const WatcherWithUserData wwud(watcher, userData); 2447 std::vector<WatcherWithUserData>::iterator it = 2448 std::find(watchers.begin(), watchers.end(), wwud); 2449 if (it != watchers.end()) 2450 return false; 2451 watchers.push_back(wwud); 2452 return true; 2453 } 2454 2455 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) { 2456 std::vector<WatcherWithUserData>::iterator it = 2457 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData)); 2458 if (it != watchers.end()) { 2459 watchers.erase(it); 2460 return true; 2461 } 2462 return false; 2463 } 2464 2465 void Document::NotifyModifyAttempt() { 2466 for (const WatcherWithUserData &watcher : watchers) { 2467 watcher.watcher->NotifyModifyAttempt(this, watcher.userData); 2468 } 2469 } 2470 2471 void Document::NotifySavePoint(bool atSavePoint) { 2472 for (const WatcherWithUserData &watcher : watchers) { 2473 watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint); 2474 } 2475 } 2476 2477 void Document::NotifyModified(DocModification mh) { 2478 if (mh.modificationType & SC_MOD_INSERTTEXT) { 2479 decorations->InsertSpace(mh.position, mh.length); 2480 } else if (mh.modificationType & SC_MOD_DELETETEXT) { 2481 decorations->DeleteRange(mh.position, mh.length); 2482 } 2483 for (const WatcherWithUserData &watcher : watchers) { 2484 watcher.watcher->NotifyModified(this, mh, watcher.userData); 2485 } 2486 } 2487 2488 // Used for word part navigation. 2489 static bool IsASCIIPunctuationCharacter(unsigned int ch) noexcept { 2490 switch (ch) { 2491 case '!': 2492 case '"': 2493 case '#': 2494 case '$': 2495 case '%': 2496 case '&': 2497 case '\'': 2498 case '(': 2499 case ')': 2500 case '*': 2501 case '+': 2502 case ',': 2503 case '-': 2504 case '.': 2505 case '/': 2506 case ':': 2507 case ';': 2508 case '<': 2509 case '=': 2510 case '>': 2511 case '?': 2512 case '@': 2513 case '[': 2514 case '\\': 2515 case ']': 2516 case '^': 2517 case '_': 2518 case '`': 2519 case '{': 2520 case '|': 2521 case '}': 2522 case '~': 2523 return true; 2524 default: 2525 return false; 2526 } 2527 } 2528 2529 bool Document::IsWordPartSeparator(unsigned int ch) const { 2530 return (WordCharacterClass(ch) == CharClassify::ccWord) && IsASCIIPunctuationCharacter(ch); 2531 } 2532 2533 Sci::Position Document::WordPartLeft(Sci::Position pos) const { 2534 if (pos > 0) { 2535 pos -= CharacterBefore(pos).widthBytes; 2536 CharacterExtracted ceStart = CharacterAfter(pos); 2537 if (IsWordPartSeparator(ceStart.character)) { 2538 while (pos > 0 && IsWordPartSeparator(CharacterAfter(pos).character)) { 2539 pos -= CharacterBefore(pos).widthBytes; 2540 } 2541 } 2542 if (pos > 0) { 2543 ceStart = CharacterAfter(pos); 2544 pos -= CharacterBefore(pos).widthBytes; 2545 if (IsLowerCase(ceStart.character)) { 2546 while (pos > 0 && IsLowerCase(CharacterAfter(pos).character)) 2547 pos -= CharacterBefore(pos).widthBytes; 2548 if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character)) 2549 pos += CharacterAfter(pos).widthBytes; 2550 } else if (IsUpperCase(ceStart.character)) { 2551 while (pos > 0 && IsUpperCase(CharacterAfter(pos).character)) 2552 pos -= CharacterBefore(pos).widthBytes; 2553 if (!IsUpperCase(CharacterAfter(pos).character)) 2554 pos += CharacterAfter(pos).widthBytes; 2555 } else if (IsADigit(ceStart.character)) { 2556 while (pos > 0 && IsADigit(CharacterAfter(pos).character)) 2557 pos -= CharacterBefore(pos).widthBytes; 2558 if (!IsADigit(CharacterAfter(pos).character)) 2559 pos += CharacterAfter(pos).widthBytes; 2560 } else if (IsASCIIPunctuationCharacter(ceStart.character)) { 2561 while (pos > 0 && IsASCIIPunctuationCharacter(CharacterAfter(pos).character)) 2562 pos -= CharacterBefore(pos).widthBytes; 2563 if (!IsASCIIPunctuationCharacter(CharacterAfter(pos).character)) 2564 pos += CharacterAfter(pos).widthBytes; 2565 } else if (isspacechar(ceStart.character)) { 2566 while (pos > 0 && isspacechar(CharacterAfter(pos).character)) 2567 pos -= CharacterBefore(pos).widthBytes; 2568 if (!isspacechar(CharacterAfter(pos).character)) 2569 pos += CharacterAfter(pos).widthBytes; 2570 } else if (!IsASCII(ceStart.character)) { 2571 while (pos > 0 && !IsASCII(CharacterAfter(pos).character)) 2572 pos -= CharacterBefore(pos).widthBytes; 2573 if (IsASCII(CharacterAfter(pos).character)) 2574 pos += CharacterAfter(pos).widthBytes; 2575 } else { 2576 pos += CharacterAfter(pos).widthBytes; 2577 } 2578 } 2579 } 2580 return pos; 2581 } 2582 2583 Sci::Position Document::WordPartRight(Sci::Position pos) const { 2584 CharacterExtracted ceStart = CharacterAfter(pos); 2585 const Sci::Position length = LengthNoExcept(); 2586 if (IsWordPartSeparator(ceStart.character)) { 2587 while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character)) 2588 pos += CharacterAfter(pos).widthBytes; 2589 ceStart = CharacterAfter(pos); 2590 } 2591 if (!IsASCII(ceStart.character)) { 2592 while (pos < length && !IsASCII(CharacterAfter(pos).character)) 2593 pos += CharacterAfter(pos).widthBytes; 2594 } else if (IsLowerCase(ceStart.character)) { 2595 while (pos < length && IsLowerCase(CharacterAfter(pos).character)) 2596 pos += CharacterAfter(pos).widthBytes; 2597 } else if (IsUpperCase(ceStart.character)) { 2598 if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) { 2599 pos += CharacterAfter(pos).widthBytes; 2600 while (pos < length && IsLowerCase(CharacterAfter(pos).character)) 2601 pos += CharacterAfter(pos).widthBytes; 2602 } else { 2603 while (pos < length && IsUpperCase(CharacterAfter(pos).character)) 2604 pos += CharacterAfter(pos).widthBytes; 2605 } 2606 if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character)) 2607 pos -= CharacterBefore(pos).widthBytes; 2608 } else if (IsADigit(ceStart.character)) { 2609 while (pos < length && IsADigit(CharacterAfter(pos).character)) 2610 pos += CharacterAfter(pos).widthBytes; 2611 } else if (IsASCIIPunctuationCharacter(ceStart.character)) { 2612 while (pos < length && IsASCIIPunctuationCharacter(CharacterAfter(pos).character)) 2613 pos += CharacterAfter(pos).widthBytes; 2614 } else if (isspacechar(ceStart.character)) { 2615 while (pos < length && isspacechar(CharacterAfter(pos).character)) 2616 pos += CharacterAfter(pos).widthBytes; 2617 } else { 2618 pos += CharacterAfter(pos).widthBytes; 2619 } 2620 return pos; 2621 } 2622 2623 static constexpr bool IsLineEndChar(char c) noexcept { 2624 return (c == '\n' || c == '\r'); 2625 } 2626 2627 Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept { 2628 const int sStart = cb.StyleAt(pos); 2629 if (delta < 0) { 2630 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos)))) 2631 pos--; 2632 pos++; 2633 } else { 2634 while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos)))) 2635 pos++; 2636 } 2637 return pos; 2638 } 2639 2640 static char BraceOpposite(char ch) noexcept { 2641 switch (ch) { 2642 case '(': 2643 return ')'; 2644 case ')': 2645 return '('; 2646 case '[': 2647 return ']'; 2648 case ']': 2649 return '['; 2650 case '{': 2651 return '}'; 2652 case '}': 2653 return '{'; 2654 case '<': 2655 return '>'; 2656 case '>': 2657 return '<'; 2658 default: 2659 return '\0'; 2660 } 2661 } 2662 2663 // TODO: should be able to extend styled region to find matching brace 2664 Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) noexcept { 2665 const char chBrace = CharAt(position); 2666 const char chSeek = BraceOpposite(chBrace); 2667 if (chSeek == '\0') 2668 return - 1; 2669 const int styBrace = StyleIndexAt(position); 2670 int direction = -1; 2671 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<') 2672 direction = 1; 2673 int depth = 1; 2674 position = useStartPos ? startPos : NextPosition(position, direction); 2675 while ((position >= 0) && (position < LengthNoExcept())) { 2676 const char chAtPos = CharAt(position); 2677 const int styAtPos = StyleIndexAt(position); 2678 if ((position > GetEndStyled()) || (styAtPos == styBrace)) { 2679 if (chAtPos == chBrace) 2680 depth++; 2681 if (chAtPos == chSeek) 2682 depth--; 2683 if (depth == 0) 2684 return position; 2685 } 2686 const Sci::Position positionBeforeMove = position; 2687 position = NextPosition(position, direction); 2688 if (position == positionBeforeMove) 2689 break; 2690 } 2691 return - 1; 2692 } 2693 2694 /** 2695 * Implementation of RegexSearchBase for the default built-in regular expression engine 2696 */ 2697 class BuiltinRegex : public RegexSearchBase { 2698 public: 2699 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {} 2700 BuiltinRegex(const BuiltinRegex &) = delete; 2701 BuiltinRegex(BuiltinRegex &&) = delete; 2702 BuiltinRegex &operator=(const BuiltinRegex &) = delete; 2703 BuiltinRegex &operator=(BuiltinRegex &&) = delete; 2704 ~BuiltinRegex() override = default; 2705 2706 Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, 2707 bool caseSensitive, bool word, bool wordStart, int flags, 2708 Sci::Position *length) override; 2709 2710 const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override; 2711 2712 private: 2713 RESearch search; 2714 std::string substituted; 2715 }; 2716 2717 namespace { 2718 2719 /** 2720 * RESearchRange keeps track of search range. 2721 */ 2722 class RESearchRange { 2723 public: 2724 const Document *doc; 2725 int increment; 2726 Sci::Position startPos; 2727 Sci::Position endPos; 2728 Sci::Line lineRangeStart; 2729 Sci::Line lineRangeEnd; 2730 Sci::Line lineRangeBreak; 2731 RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) noexcept : doc(doc_) { 2732 increment = (minPos <= maxPos) ? 1 : -1; 2733 2734 // Range endpoints should not be inside DBCS characters or between a CR and LF, 2735 // but just in case, move them. 2736 startPos = doc->MovePositionOutsideChar(minPos, 1, true); 2737 endPos = doc->MovePositionOutsideChar(maxPos, 1, true); 2738 2739 lineRangeStart = doc->SciLineFromPosition(startPos); 2740 lineRangeEnd = doc->SciLineFromPosition(endPos); 2741 lineRangeBreak = lineRangeEnd + increment; 2742 } 2743 Range LineRange(Sci::Line line) const { 2744 Range range(doc->LineStart(line), doc->LineEnd(line)); 2745 if (increment == 1) { 2746 if (line == lineRangeStart) 2747 range.start = startPos; 2748 if (line == lineRangeEnd) 2749 range.end = endPos; 2750 } else { 2751 if (line == lineRangeEnd) 2752 range.start = endPos; 2753 if (line == lineRangeStart) 2754 range.end = startPos; 2755 } 2756 return range; 2757 } 2758 }; 2759 2760 // Define a way for the Regular Expression code to access the document 2761 class DocumentIndexer : public CharacterIndexer { 2762 Document *pdoc; 2763 Sci::Position end; 2764 public: 2765 DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept : 2766 pdoc(pdoc_), end(end_) { 2767 } 2768 2769 DocumentIndexer(const DocumentIndexer &) = delete; 2770 DocumentIndexer(DocumentIndexer &&) = delete; 2771 DocumentIndexer &operator=(const DocumentIndexer &) = delete; 2772 DocumentIndexer &operator=(DocumentIndexer &&) = delete; 2773 2774 ~DocumentIndexer() override = default; 2775 2776 char CharAt(Sci::Position index) const noexcept override { 2777 if (index < 0 || index >= end) 2778 return 0; 2779 else 2780 return pdoc->CharAt(index); 2781 } 2782 }; 2783 2784 #ifndef NO_CXX11_REGEX 2785 2786 class ByteIterator { 2787 public: 2788 typedef std::bidirectional_iterator_tag iterator_category; 2789 typedef char value_type; 2790 typedef ptrdiff_t difference_type; 2791 typedef char* pointer; 2792 typedef char& reference; 2793 2794 const Document *doc; 2795 Sci::Position position; 2796 2797 ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : 2798 doc(doc_), position(position_) { 2799 } 2800 ByteIterator(const ByteIterator &other) noexcept { 2801 doc = other.doc; 2802 position = other.position; 2803 } 2804 ByteIterator(ByteIterator &&other) noexcept { 2805 doc = other.doc; 2806 position = other.position; 2807 } 2808 ByteIterator &operator=(const ByteIterator &other) noexcept { 2809 if (this != &other) { 2810 doc = other.doc; 2811 position = other.position; 2812 } 2813 return *this; 2814 } 2815 ByteIterator &operator=(ByteIterator &&) noexcept = default; 2816 ~ByteIterator() = default; 2817 char operator*() const noexcept { 2818 return doc->CharAt(position); 2819 } 2820 ByteIterator &operator++() noexcept { 2821 position++; 2822 return *this; 2823 } 2824 ByteIterator operator++(int) noexcept { 2825 ByteIterator retVal(*this); 2826 position++; 2827 return retVal; 2828 } 2829 ByteIterator &operator--() noexcept { 2830 position--; 2831 return *this; 2832 } 2833 bool operator==(const ByteIterator &other) const noexcept { 2834 return doc == other.doc && position == other.position; 2835 } 2836 bool operator!=(const ByteIterator &other) const noexcept { 2837 return doc != other.doc || position != other.position; 2838 } 2839 Sci::Position Pos() const noexcept { 2840 return position; 2841 } 2842 Sci::Position PosRoundUp() const noexcept { 2843 return position; 2844 } 2845 }; 2846 2847 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide. 2848 // Would be better to use sizeof(wchar_t) or similar to differentiate 2849 // but easier for now to hard-code platforms. 2850 // C++11 has char16_t and char32_t but neither Clang nor Visual C++ 2851 // appear to allow specializing basic_regex over these. 2852 2853 #ifdef _WIN32 2854 #define WCHAR_T_IS_16 1 2855 #else 2856 #define WCHAR_T_IS_16 0 2857 #endif 2858 2859 #if WCHAR_T_IS_16 2860 2861 // On Windows, report non-BMP characters as 2 separate surrogates as that 2862 // matches wregex since it is based on wchar_t. 2863 class UTF8Iterator { 2864 // These 3 fields determine the iterator position and are used for comparisons 2865 const Document *doc; 2866 Sci::Position position; 2867 size_t characterIndex; 2868 // Remaining fields are derived from the determining fields so are excluded in comparisons 2869 unsigned int lenBytes; 2870 size_t lenCharacters; 2871 wchar_t buffered[2]; 2872 public: 2873 typedef std::bidirectional_iterator_tag iterator_category; 2874 typedef wchar_t value_type; 2875 typedef ptrdiff_t difference_type; 2876 typedef wchar_t* pointer; 2877 typedef wchar_t& reference; 2878 2879 UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : 2880 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} { 2881 buffered[0] = 0; 2882 buffered[1] = 0; 2883 if (doc) { 2884 ReadCharacter(); 2885 } 2886 } 2887 UTF8Iterator(const UTF8Iterator &other) noexcept : buffered{} { 2888 doc = other.doc; 2889 position = other.position; 2890 characterIndex = other.characterIndex; 2891 lenBytes = other.lenBytes; 2892 lenCharacters = other.lenCharacters; 2893 buffered[0] = other.buffered[0]; 2894 buffered[1] = other.buffered[1]; 2895 } 2896 UTF8Iterator(UTF8Iterator &&other) noexcept = default; 2897 UTF8Iterator &operator=(const UTF8Iterator &other) noexcept { 2898 if (this != &other) { 2899 doc = other.doc; 2900 position = other.position; 2901 characterIndex = other.characterIndex; 2902 lenBytes = other.lenBytes; 2903 lenCharacters = other.lenCharacters; 2904 buffered[0] = other.buffered[0]; 2905 buffered[1] = other.buffered[1]; 2906 } 2907 return *this; 2908 } 2909 UTF8Iterator &operator=(UTF8Iterator &&) noexcept = default; 2910 ~UTF8Iterator() = default; 2911 wchar_t operator*() const noexcept { 2912 assert(lenCharacters != 0); 2913 return buffered[characterIndex]; 2914 } 2915 UTF8Iterator &operator++() noexcept { 2916 if ((characterIndex + 1) < (lenCharacters)) { 2917 characterIndex++; 2918 } else { 2919 position += lenBytes; 2920 ReadCharacter(); 2921 characterIndex = 0; 2922 } 2923 return *this; 2924 } 2925 UTF8Iterator operator++(int) noexcept { 2926 UTF8Iterator retVal(*this); 2927 if ((characterIndex + 1) < (lenCharacters)) { 2928 characterIndex++; 2929 } else { 2930 position += lenBytes; 2931 ReadCharacter(); 2932 characterIndex = 0; 2933 } 2934 return retVal; 2935 } 2936 UTF8Iterator &operator--() noexcept { 2937 if (characterIndex) { 2938 characterIndex--; 2939 } else { 2940 position = doc->NextPosition(position, -1); 2941 ReadCharacter(); 2942 characterIndex = lenCharacters - 1; 2943 } 2944 return *this; 2945 } 2946 bool operator==(const UTF8Iterator &other) const noexcept { 2947 // Only test the determining fields, not the character widths and values derived from this 2948 return doc == other.doc && 2949 position == other.position && 2950 characterIndex == other.characterIndex; 2951 } 2952 bool operator!=(const UTF8Iterator &other) const noexcept { 2953 // Only test the determining fields, not the character widths and values derived from this 2954 return doc != other.doc || 2955 position != other.position || 2956 characterIndex != other.characterIndex; 2957 } 2958 Sci::Position Pos() const noexcept { 2959 return position; 2960 } 2961 Sci::Position PosRoundUp() const noexcept { 2962 if (characterIndex) 2963 return position + lenBytes; // Force to end of character 2964 else 2965 return position; 2966 } 2967 private: 2968 void ReadCharacter() noexcept { 2969 const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position); 2970 lenBytes = charExtracted.widthBytes; 2971 if (charExtracted.character == unicodeReplacementChar) { 2972 lenCharacters = 1; 2973 buffered[0] = static_cast<wchar_t>(charExtracted.character); 2974 } else { 2975 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered); 2976 } 2977 } 2978 }; 2979 2980 #else 2981 2982 // On Unix, report non-BMP characters as single characters 2983 2984 class UTF8Iterator { 2985 const Document *doc; 2986 Sci::Position position; 2987 public: 2988 typedef std::bidirectional_iterator_tag iterator_category; 2989 typedef wchar_t value_type; 2990 typedef ptrdiff_t difference_type; 2991 typedef wchar_t* pointer; 2992 typedef wchar_t& reference; 2993 2994 UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : 2995 doc(doc_), position(position_) { 2996 } 2997 UTF8Iterator(const UTF8Iterator &other) noexcept { 2998 doc = other.doc; 2999 position = other.position; 3000 } 3001 UTF8Iterator(UTF8Iterator &&other) noexcept = default; 3002 UTF8Iterator &operator=(const UTF8Iterator &other) noexcept { 3003 if (this != &other) { 3004 doc = other.doc; 3005 position = other.position; 3006 } 3007 return *this; 3008 } 3009 UTF8Iterator &operator=(UTF8Iterator &&) noexcept = default; 3010 ~UTF8Iterator() = default; 3011 wchar_t operator*() const noexcept { 3012 const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position); 3013 return charExtracted.character; 3014 } 3015 UTF8Iterator &operator++() noexcept { 3016 position = doc->NextPosition(position, 1); 3017 return *this; 3018 } 3019 UTF8Iterator operator++(int) noexcept { 3020 UTF8Iterator retVal(*this); 3021 position = doc->NextPosition(position, 1); 3022 return retVal; 3023 } 3024 UTF8Iterator &operator--() noexcept { 3025 position = doc->NextPosition(position, -1); 3026 return *this; 3027 } 3028 bool operator==(const UTF8Iterator &other) const noexcept { 3029 return doc == other.doc && position == other.position; 3030 } 3031 bool operator!=(const UTF8Iterator &other) const noexcept { 3032 return doc != other.doc || position != other.position; 3033 } 3034 Sci::Position Pos() const noexcept { 3035 return position; 3036 } 3037 Sci::Position PosRoundUp() const noexcept { 3038 return position; 3039 } 3040 }; 3041 3042 #endif 3043 3044 std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) { 3045 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default; 3046 if (!doc->IsLineStartPosition(startPos)) 3047 flagsMatch |= std::regex_constants::match_not_bol; 3048 if (!doc->IsLineEndPosition(endPos)) 3049 flagsMatch |= std::regex_constants::match_not_eol; 3050 return flagsMatch; 3051 } 3052 3053 template<typename Iterator, typename Regex> 3054 bool MatchOnLines(const Document *doc, const Regex ®exp, const RESearchRange &resr, RESearch &search) { 3055 std::match_results<Iterator> match; 3056 3057 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range. 3058 // CRLF line ends are also a problem as ^ and $ only treat LF as a line end. 3059 // The std::regex::multiline option was added to C++17 to improve behaviour but 3060 // has not been implemented by compiler runtimes with MSVC always in multiline 3061 // mode and libc++ and libstdc++ always in single-line mode. 3062 // If multiline regex worked well then the line by line iteration could be removed 3063 // for the forwards case and replaced with the following 4 lines: 3064 #ifdef REGEX_MULTILINE 3065 Iterator itStart(doc, resr.startPos); 3066 Iterator itEnd(doc, resr.endPos); 3067 const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos); 3068 const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch); 3069 #else 3070 // Line by line. 3071 bool matched = false; 3072 for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { 3073 const Range lineRange = resr.LineRange(line); 3074 Iterator itStart(doc, lineRange.start); 3075 Iterator itEnd(doc, lineRange.end); 3076 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end); 3077 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch); 3078 // Check for the last match on this line. 3079 if (matched) { 3080 if (resr.increment == -1) { 3081 while (matched) { 3082 Iterator itNext(doc, match[0].second.PosRoundUp()); 3083 flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end); 3084 std::match_results<Iterator> matchNext; 3085 matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch); 3086 if (matched) { 3087 if (match[0].first == match[0].second) { 3088 // Empty match means failure so exit 3089 return false; 3090 } 3091 match = matchNext; 3092 } 3093 } 3094 matched = true; 3095 } 3096 break; 3097 } 3098 } 3099 #endif 3100 if (matched) { 3101 for (size_t co = 0; co < match.size(); co++) { 3102 search.bopat[co] = match[co].first.Pos(); 3103 search.eopat[co] = match[co].second.PosRoundUp(); 3104 const Sci::Position lenMatch = search.eopat[co] - search.bopat[co]; 3105 search.pat[co].resize(lenMatch); 3106 for (Sci::Position iPos = 0; iPos < lenMatch; iPos++) { 3107 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]); 3108 } 3109 } 3110 } 3111 return matched; 3112 } 3113 3114 Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, 3115 bool caseSensitive, Sci::Position *length, RESearch &search) { 3116 const RESearchRange resr(doc, minPos, maxPos); 3117 try { 3118 //ElapsedPeriod ep; 3119 std::regex::flag_type flagsRe = std::regex::ECMAScript; 3120 // Flags that appear to have no effect: 3121 // | std::regex::collate | std::regex::extended; 3122 if (!caseSensitive) 3123 flagsRe = flagsRe | std::regex::icase; 3124 3125 // Clear the RESearch so can fill in matches 3126 search.Clear(); 3127 3128 bool matched = false; 3129 if (SC_CP_UTF8 == doc->dbcsCodePage) { 3130 const std::wstring ws = WStringFromUTF8(s); 3131 std::wregex regexp; 3132 regexp.assign(ws, flagsRe); 3133 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search); 3134 3135 } else { 3136 std::regex regexp; 3137 regexp.assign(s, flagsRe); 3138 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search); 3139 } 3140 3141 Sci::Position posMatch = -1; 3142 if (matched) { 3143 posMatch = search.bopat[0]; 3144 *length = search.eopat[0] - search.bopat[0]; 3145 } 3146 // Example - search in doc/ScintillaHistory.html for 3147 // [[:upper:]]eta[[:space:]] 3148 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds. 3149 //const double durSearch = ep.Duration(true); 3150 //Platform::DebugPrintf("Search:%9.6g \n", durSearch); 3151 return posMatch; 3152 } catch (std::regex_error &) { 3153 // Failed to create regular expression 3154 throw RegexError(); 3155 } catch (...) { 3156 // Failed in some other way 3157 return -1; 3158 } 3159 } 3160 3161 #endif 3162 3163 } 3164 3165 Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, 3166 bool caseSensitive, bool, bool, int flags, 3167 Sci::Position *length) { 3168 3169 #ifndef NO_CXX11_REGEX 3170 if (flags & SCFIND_CXX11REGEX) { 3171 return Cxx11RegexFindText(doc, minPos, maxPos, s, 3172 caseSensitive, length, search); 3173 } 3174 #endif 3175 3176 const RESearchRange resr(doc, minPos, maxPos); 3177 3178 const bool posix = (flags & SCFIND_POSIX) != 0; 3179 3180 const char *errmsg = search.Compile(s, *length, caseSensitive, posix); 3181 if (errmsg) { 3182 return -1; 3183 } 3184 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\)) 3185 // Replace first '.' with '-' in each property file variable reference: 3186 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\)) 3187 // Replace: $(\1-\2) 3188 Sci::Position pos = -1; 3189 Sci::Position lenRet = 0; 3190 const bool searchforLineStart = s[0] == '^'; 3191 const char searchEnd = s[*length - 1]; 3192 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0'; 3193 const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\'); 3194 for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { 3195 Sci::Position startOfLine = doc->LineStart(line); 3196 Sci::Position endOfLine = doc->LineEnd(line); 3197 if (resr.increment == 1) { 3198 if (line == resr.lineRangeStart) { 3199 if ((resr.startPos != startOfLine) && searchforLineStart) 3200 continue; // Can't match start of line if start position after start of line 3201 startOfLine = resr.startPos; 3202 } 3203 if (line == resr.lineRangeEnd) { 3204 if ((resr.endPos != endOfLine) && searchforLineEnd) 3205 continue; // Can't match end of line if end position before end of line 3206 endOfLine = resr.endPos; 3207 } 3208 } else { 3209 if (line == resr.lineRangeEnd) { 3210 if ((resr.endPos != startOfLine) && searchforLineStart) 3211 continue; // Can't match start of line if end position after start of line 3212 startOfLine = resr.endPos; 3213 } 3214 if (line == resr.lineRangeStart) { 3215 if ((resr.startPos != endOfLine) && searchforLineEnd) 3216 continue; // Can't match end of line if start position before end of line 3217 endOfLine = resr.startPos; 3218 } 3219 } 3220 3221 const DocumentIndexer di(doc, endOfLine); 3222 int success = search.Execute(di, startOfLine, endOfLine); 3223 if (success) { 3224 pos = search.bopat[0]; 3225 // Ensure only whole characters selected 3226 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false); 3227 lenRet = search.eopat[0] - search.bopat[0]; 3228 // There can be only one start of a line, so no need to look for last match in line 3229 if ((resr.increment == -1) && !searchforLineStart) { 3230 // Check for the last match on this line. 3231 int repetitions = 1000; // Break out of infinite loop 3232 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) { 3233 success = search.Execute(di, pos+1, endOfLine); 3234 if (success) { 3235 if (search.eopat[0] <= minPos) { 3236 pos = search.bopat[0]; 3237 lenRet = search.eopat[0] - search.bopat[0]; 3238 } else { 3239 success = 0; 3240 } 3241 } 3242 } 3243 } 3244 break; 3245 } 3246 } 3247 *length = lenRet; 3248 return pos; 3249 } 3250 3251 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) { 3252 substituted.clear(); 3253 const DocumentIndexer di(doc, doc->Length()); 3254 search.GrabMatches(di); 3255 for (Sci::Position j = 0; j < *length; j++) { 3256 if (text[j] == '\\') { 3257 if (text[j + 1] >= '0' && text[j + 1] <= '9') { 3258 const unsigned int patNum = text[j + 1] - '0'; 3259 const Sci::Position len = search.eopat[patNum] - search.bopat[patNum]; 3260 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur 3261 substituted.append(search.pat[patNum].c_str(), len); 3262 j++; 3263 } else { 3264 j++; 3265 switch (text[j]) { 3266 case 'a': 3267 substituted.push_back('\a'); 3268 break; 3269 case 'b': 3270 substituted.push_back('\b'); 3271 break; 3272 case 'f': 3273 substituted.push_back('\f'); 3274 break; 3275 case 'n': 3276 substituted.push_back('\n'); 3277 break; 3278 case 'r': 3279 substituted.push_back('\r'); 3280 break; 3281 case 't': 3282 substituted.push_back('\t'); 3283 break; 3284 case 'v': 3285 substituted.push_back('\v'); 3286 break; 3287 case '\\': 3288 substituted.push_back('\\'); 3289 break; 3290 default: 3291 substituted.push_back('\\'); 3292 j--; 3293 } 3294 } 3295 } else { 3296 substituted.push_back(text[j]); 3297 } 3298 } 3299 *length = substituted.length(); 3300 return substituted.c_str(); 3301 } 3302 3303 #ifndef SCI_OWNREGEX 3304 3305 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) { 3306 return new BuiltinRegex(charClassTable); 3307 } 3308 3309 #endif 3310