blob: bcac77227673987cac274f9a75df4e3865655c29 [file] [log] [blame]
/*
* Copyright (C) 2015 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minikin/LineBreaker.h"
#include <algorithm>
#include <limits>
#include "minikin/Characters.h"
#include "minikin/Layout.h"
#include "minikin/Range.h"
#include "minikin/U16StringPiece.h"
#include "HyphenatorMap.h"
#include "LayoutUtils.h"
#include "Locale.h"
#include "LocaleListCache.h"
#include "MinikinInternal.h"
#include "WordBreaker.h"
namespace minikin {
// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
// constants are larger than any reasonable actual width score.
const float SCORE_INFTY = std::numeric_limits<float>::max();
const float SCORE_OVERFULL = 1e12f;
const float SCORE_DESPERATE = 1e10f;
// Multiplier for hyphen penalty on last line.
const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
// Penalty assigned to each line break (to try to minimize number of lines)
// TODO: when we implement full justification (so spaces can shrink and stretch), this is
// probably not the most appropriate method.
const float LINE_PENALTY_MULTIPLIER = 2.0f;
// Penalty assigned to shrinking the whitepsace.
const float SHRINK_PENALTY_MULTIPLIER = 4.0f;
// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for
// unreasonably long words. This is somewhat of a heuristic because extremely long words
// are possible in some languages. This does mean that very long real words can get
// broken by desperate breaks, with no hyphens.
const size_t LONGEST_HYPHENATED_WORD = 45;
// Maximum amount that spaces can shrink, in justified text.
const float SHRINKABILITY = 1.0 / 3.0;
constexpr size_t LAST_BREAK_OFFSET_NOWHERE = SIZE_MAX;
constexpr size_t LAST_BREAK_OFFSET_DESPERATE = LAST_BREAK_OFFSET_NOWHERE - 1;
inline static const LocaleList& getLocaleList(uint32_t localeListId) {
android::AutoMutex _l(gMinikinLock);
return LocaleListCache::getById(localeListId);
}
MeasuredText MeasuredText::generate(const U16StringPiece& text,
const std::vector<std::unique_ptr<Run>>& runs) {
MeasuredText result(text.size());
for (const auto& run : runs) {
const uint32_t runOffset = run->getRange().getStart();
run->getMetrics(text, result.widths.data() + runOffset,
result.extents.data() + runOffset, result.overhangs.data() + runOffset);
}
return result;
}
LineBreaker::LineBreaker(const U16StringPiece& textBuffer, const MeasuredText& measuredText,
BreakStrategy strategy, HyphenationFrequency frequency, bool justified)
: LineBreaker(std::make_unique<WordBreaker>(), textBuffer, measuredText, strategy,
frequency, justified) {}
LineBreaker::LineBreaker(std::unique_ptr<WordBreaker>&& breaker, const U16StringPiece& textBuffer,
const MeasuredText& measuredText, BreakStrategy strategy,
HyphenationFrequency frequency, bool justified)
: mCurrentLocaleListId(LocaleListCache::kInvalidListId),
mWordBreaker(std::move(breaker)),
mTextBuf(textBuffer),
mMeasuredText(measuredText),
mStrategy(strategy),
mHyphenationFrequency(frequency),
mJustified(justified),
mLastConsideredGreedyCandidate(LAST_BREAK_OFFSET_NOWHERE),
mSpaceCount(0) {
mWordBreaker->setText(mTextBuf.data(), mTextBuf.size());
// handle initial break here because addStyleRun may never be called
mCandidates.push_back({
0 /* offset */, 0.0 /* preBreak */, 0.0 /* postBreak */,
0.0 /* firstOverhang */, 0.0 /* secondOverhang */,
0.0 /* penalty */,
0 /* preSpaceCount */, 0 /* postSpaceCount */,
HyphenationType::DONT_BREAK /* hyphenType */,
false /* isRtl. TODO: may need to be based on input. */});
}
LineBreaker::~LineBreaker() {}
const LineBreaker::Candidate& LineBreaker::getLastBreakCandidate() const {
MINIKIN_ASSERT(mLastGreedyBreakIndex != LAST_BREAK_OFFSET_NOWHERE,
"Line break hasn't started.");
return mLastGreedyBreakIndex == LAST_BREAK_OFFSET_DESPERATE
? mFakeDesperateCandidate : mCandidates[mLastGreedyBreakIndex];
}
void LineBreaker::setLocaleList(uint32_t localeListId, size_t restartFrom) {
if (mCurrentLocaleListId == localeListId) {
return;
}
const LocaleList& newLocales = getLocaleList(localeListId);
const Locale newLocale = newLocales.empty() ? Locale() : newLocales[0];
const uint64_t newLocaleId = newLocale.getIdentifier();
const bool needUpdate =
// The first time setLocale is called.
mCurrentLocaleListId == LocaleListCache::kInvalidListId ||
// The effective locale is changed.
newLocaleId != mCurrentLocaleId;
// For now, we ignore all locales except the first valid one.
// TODO: Support selecting the locale based on the script of the text.
mCurrentLocaleListId = localeListId;
mCurrentLocaleId = newLocaleId;
if (needUpdate) {
mWordBreaker->followingWithLocale(newLocale, restartFrom);
mHyphenator = HyphenatorMap::lookup(newLocale);
}
}
// This function determines whether a character is a space that disappears at end of line.
// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
// plus '\n'.
// Note: all such characters are in the BMP, so it's ok to use code units for this.
static bool isLineEndSpace(uint16_t c) {
return c == '\n'
|| c == ' ' // SPACE
|| c == 0x1680 // OGHAM SPACE MARK
|| (0x2000 <= c && c <= 0x200A && c != 0x2007) // EN QUAD, EM QUAD, EN SPACE, EM SPACE,
// THREE-PER-EM SPACE, FOUR-PER-EM SPACE,
// SIX-PER-EM SPACE, PUNCTUATION SPACE,
// THIN SPACE, HAIR SPACE
|| c == 0x205F // MEDIUM MATHEMATICAL SPACE
|| c == 0x3000; // IDEOGRAPHIC SPACE
}
// Hyphenates a string potentially containing non-breaking spaces.
std::vector<HyphenationType> LineBreaker::hyphenate(const U16StringPiece& str) {
std::vector<HyphenationType> out;
const size_t len = str.size();
out.reserve(len);
// A word here is any consecutive string of non-NBSP characters.
bool inWord = false;
size_t wordStart = 0; // The initial value will never be accessed, but just in case.
for (size_t i = 0; i <= len; i++) {
if (i == len || str[i] == CHAR_NBSP) {
if (inWord) {
// A word just ended. Hyphenate it.
const size_t wordLen = i - wordStart;
if (wordLen <= LONGEST_HYPHENATED_WORD) {
if (wordStart == 0) {
// The string starts with a word. Use out directly.
mHyphenator->hyphenate(&out, str.data(), wordLen);
} else {
std::vector<HyphenationType> wordVec;
mHyphenator->hyphenate(&wordVec, str.data() + wordStart, wordLen);
out.insert(out.end(), wordVec.cbegin(), wordVec.cend());
}
} else { // Word is too long. Inefficient to hyphenate.
out.insert(out.end(), wordLen, HyphenationType::DONT_BREAK);
}
inWord = false;
}
if (i < len) {
// Insert one DONT_BREAK for the NBSP.
out.push_back(HyphenationType::DONT_BREAK);
}
} else if (!inWord) {
inWord = true;
wordStart = i;
}
}
return out;
}
// Compute the total overhang of text based on per-cluster advances and overhangs.
// The two input vectors are expected to be of the same size.
/* static */ LayoutOverhang LineBreaker::computeOverhang(float totalAdvance,
const std::vector<float>& advances, const std::vector<LayoutOverhang>& overhangs,
bool isRtl) {
ParaWidth left = 0.0;
ParaWidth right = 0.0;
ParaWidth seenAdvance = 0.0;
const size_t len = advances.size();
if (isRtl) {
for (size_t i = 0; i < len; i++) {
right = std::max(right, overhangs[i].right - seenAdvance);
seenAdvance += advances[i];
left = std::max(left, overhangs[i].left - (totalAdvance - seenAdvance));
}
} else {
for (size_t i = 0; i < len; i++) {
left = std::max(left, overhangs[i].left - seenAdvance);
seenAdvance += advances[i];
right = std::max(right, overhangs[i].right - (totalAdvance - seenAdvance));
}
}
return {static_cast<float>(left), static_cast<float>(right)};
}
// This adds all the hyphenation candidates for a given word by first finding all the hyphenation
// points and then calling addWordBreak for each.
//
// paint will be used for measuring the text and paint must not be null.
// wordRange is the range for the word.
// contextRange is the range from the last word breakpoint to the first code unit after the word.
// For example, if the word starts with the punctuations or ends with spaces, the contextRange
// contains both punctuations and trailing spaces but wordRange excludes them.
// lastBreakWidth is the width seen until the begining of context range.
// bidiFlags keep the bidi flags to determine the direction of text for layout and other
// calculations. It may only be Bidi::FORCE_RTL or Bidi::FORCE_LTR.
//
// The following parameters are needed to be passed to addWordBreak:
// postBreak is the width that would be seen if we decide to break at the end of the word (so it
// doesn't count any line-ending space after the word).
// postSpaceCount is the number of spaces that would be seen if we decide to break at the end of the
// word (and like postBreak it doesn't count any line-ending space after the word).
// hyphenPenalty is the amount of penalty for hyphenation.
void LineBreaker::addHyphenationCandidates(
const Run& run,
const Range& contextRange,
const Range& wordRange,
ParaWidth lastBreakWidth,
ParaWidth postBreak,
size_t postSpaceCount,
float hyphenPenalty) {
MINIKIN_ASSERT(contextRange.contains(wordRange), "Context must contain word range");
const bool isRtlWord = run.isRtl();
const std::vector<HyphenationType> hyphenResult = hyphenate(mTextBuf.substr(wordRange));
std::vector<float> advances;
std::vector<LayoutOverhang> overhangs;
advances.reserve(contextRange.getLength());
overhangs.reserve(contextRange.getLength());
// measure hyphenated substrings
for (size_t j : wordRange) {
HyphenationType hyph = hyphenResult[wordRange.toRangeOffset(j)];
if (hyph == HyphenationType::DONT_BREAK) {
continue;
}
auto hyphenPart = contextRange.split(j);
const Range& firstPart = hyphenPart.first;
const Range& secondPart = hyphenPart.second;
const size_t firstPartLen = firstPart.getLength();
advances.resize(firstPartLen);
overhangs.resize(firstPartLen);
const float firstPartWidth = run.measureHyphenPiece(
mTextBuf, firstPart, StartHyphenEdit::NO_EDIT, editForThisLine(hyph),
advances.data(), overhangs.data());
const ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;
LayoutOverhang overhang = computeOverhang(firstPartWidth, advances, overhangs, isRtlWord);
// TODO: This ignores potential overhang from a previous word, e.g. in "R table" if the
// right overhang of the R is larger than the advance of " ta-". In such cases, we need to
// take the existing overhang into account.
const float firstOverhang = isRtlWord ? overhang.left : overhang.right;
const size_t secondPartLen = secondPart.getLength();
advances.resize(secondPartLen);
overhangs.resize(secondPartLen);
const float secondPartWidth = run.measureHyphenPiece(
mTextBuf, secondPart, editForNextLine(hyph), EndHyphenEdit::NO_EDIT,
advances.data(), overhangs.data());
// hyphPreBreak is calculated like this so that when the line width for a future line break
// is being calculated, the width of the whole word would be subtracted and the width of the
// second part would be added.
const ParaWidth hyphPreBreak = postBreak - secondPartWidth;
overhang = computeOverhang(secondPartWidth, advances, overhangs, isRtlWord);
const float secondOverhang = isRtlWord ? overhang.right : overhang.left;
addWordBreak(j, hyphPreBreak, hyphPostBreak, firstOverhang, secondOverhang,
postSpaceCount, postSpaceCount, hyphenPenalty, hyph, isRtlWord);
}
}
// This method finds the candidate word breaks (using the ICU break iterator) and sends them
// to addWordBreak.
void LineBreaker::addRun(const Run& run, const LineWidth& lineWidth, const TabStops& tabStops) {
const bool isRtl = run.isRtl();
const Range& range = run.getRange();
const bool canHyphenate = run.canHyphenate();
float hyphenPenalty = 0.0;
if (canHyphenate) {
const MinikinPaint* paint = run.getPaint();
// a heuristic that seems to perform well
hyphenPenalty = 0.5 * paint->size * paint->scaleX * lineWidth.getAt(0);
if (mHyphenationFrequency == HyphenationFrequency::Normal) {
hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
}
if (mJustified) {
// Make hyphenation more aggressive for fully justified text (so that "normal" in
// justified mode is the same as "full" in ragged-right).
hyphenPenalty *= 0.25;
} else {
// Line penalty is zero for justified text.
mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
}
}
setLocaleList(run.getLocaleListId(), range.getStart());
size_t current = (size_t) mWordBreaker->current();
// This will keep the index of last code unit seen that's not a line-ending space, plus one.
// In other words, the index of the first code unit after a word.
Range hyphenationContextRange(range.getStart(), range.getStart());
ParaWidth lastBreakWidth = mWidth; // The width of the text as of the previous break point.
ParaWidth postBreak = mWidth; // The width of text seen if we decide to break here
// postBreak plus potential forward overhang. Guaranteed to be >= postBreak.
ParaWidth postBreakWithOverhang = mWidth;
// The maximum amount of backward overhang seen since last word.
float maxBackwardOverhang = 0;
size_t postSpaceCount = mSpaceCount;
const bool hyphenate = canHyphenate && mHyphenationFrequency != HyphenationFrequency::None;
for (size_t i : range) {
const uint16_t c = mTextBuf[i];
if (c == CHAR_TAB) {
// Fall back to greedy; other modes don't know how to deal with tabs.
mStrategy = BreakStrategy::Greedy;
// In order to figure out the actual width of the tab, we need to run the greedy
// algorithm on all previous text and determine the last line break's preBreak.
const ParaWidth lastPreBreak = computeBreaksGreedyPartial(lineWidth);
mWidth = lastPreBreak + tabStops.nextTab(mWidth - lastPreBreak);
if (mFirstTabIndex == INT_MAX) {
mFirstTabIndex = static_cast<int>(i);
}
// No need to update afterWord since tab characters can not be an end of word character
// in WordBreaker. See the implementation of WordBreaker::wordEnd.
} else {
if (isWordSpace(c)) {
mSpaceCount += 1;
}
mWidth += mMeasuredText.widths[i];
if (isLineEndSpace(c)) {
// If we break a line on a line-ending space, that space goes away. So postBreak
// and postSpaceCount, which keep the width and number of spaces if we decide to
// break at this point, don't need to get adjusted.
//
// TODO: handle the rare case of line ending spaces having overhang (it can happen
// for U+1680 OGHAM SPACE MARK).
} else {
postBreak = mWidth;
postSpaceCount = mSpaceCount;
hyphenationContextRange.setEnd(i + 1);
// TODO: This doesn't work for very tight lines and large overhangs, where the
// overhang from a previous word that may end up on an earline line may be
// considered still in effect for a later word. But that's expected to be very rare,
// so we ignore it for now.
const float forwardOverhang =
isRtl ? mMeasuredText.overhangs[i].left : mMeasuredText.overhangs[i].right;
postBreakWithOverhang = std::max(postBreakWithOverhang,
postBreak + forwardOverhang);
float backwardOverhang =
isRtl ? mMeasuredText.overhangs[i].right : mMeasuredText.overhangs[i].left;
// Adjust backwardOverhang by the advance already seen from the last break.
backwardOverhang -= (mWidth - mMeasuredText.widths[i]) - lastBreakWidth;
maxBackwardOverhang = std::max(maxBackwardOverhang, backwardOverhang);
}
}
if (i + 1 == current) { // We are at the end of a word.
// We skip breaks for zero-width characters inside replacement spans.
const bool addBreak = canHyphenate || current == range.getEnd() ||
mMeasuredText.widths[current] > 0;
if (addBreak) {
// adjust second overhang for previous breaks
adjustSecondOverhang(maxBackwardOverhang);
}
if (hyphenate) {
const Range wordRange = mWordBreaker->wordRange();
if (!wordRange.isEmpty() && range.contains(wordRange)) {
addHyphenationCandidates(run, hyphenationContextRange, wordRange,
lastBreakWidth, postBreak, postSpaceCount, hyphenPenalty);
}
}
if (addBreak) {
const float penalty = hyphenPenalty * mWordBreaker->breakBadness();
// TODO: overhangs may need adjustment at bidi boundaries.
addWordBreak(current, mWidth /* preBreak */, postBreak,
postBreakWithOverhang - postBreak /* firstOverhang */,
0.0 /* secondOverhang, to be adjusted later */,
mSpaceCount, postSpaceCount,
penalty, HyphenationType::DONT_BREAK, isRtl);
}
hyphenationContextRange = Range(current, current);
lastBreakWidth = mWidth;
maxBackwardOverhang = 0;
current = (size_t)mWordBreaker->next();
}
}
}
// Add desperate breaks for the greedy algorithm.
// Note: these breaks are based on the shaping of the (non-broken) original text; they
// are imprecise especially in the presence of kerning, ligatures, overhangs, and Arabic shaping.
void LineBreaker::addDesperateBreaksGreedy(ParaWidth existingPreBreak, size_t start, size_t end,
const LineWidth& lineWidth) {
ParaWidth width = mMeasuredText.widths[start];
for (size_t i = start + 1; i < end; i++) {
const float w = mMeasuredText.widths[i];
if (w > 0) { // Add desperate breaks only before grapheme clusters.
const ParaWidth newWidth = width + w;
if (!fitsOnCurrentLine(newWidth, 0.0, 0.0, lineWidth)) {
const Candidate& lastGreedyBreak = getLastBreakCandidate();
constexpr HyphenationType hyphen = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
pushBreak(i, width, computeMaxExtent(lastGreedyBreak.offset, i),
packHyphenEdit(editForNextLine(lastGreedyBreak.hyphenType),
editForThisLine(hyphen)));
existingPreBreak += width;
// Only set the fields that will be later read.
mFakeDesperateCandidate.offset = i;
mFakeDesperateCandidate.preBreak = existingPreBreak;
mFakeDesperateCandidate.secondOverhang = 0.0;
mFakeDesperateCandidate.hyphenType = hyphen;
mLastGreedyBreakIndex = LAST_BREAK_OFFSET_DESPERATE;
width = w;
} else {
width = newWidth;
}
}
}
}
// Add a word break (possibly for a hyphenated fragment).
inline void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
float firstOverhang, float secondOverhang, size_t preSpaceCount, size_t postSpaceCount,
float penalty, HyphenationType hyph, bool isRtl) {
mCandidates.push_back({offset, preBreak, postBreak, firstOverhang, secondOverhang, penalty,
preSpaceCount, postSpaceCount, hyph, isRtl});
}
// Go back and adjust earlier line breaks if needed.
void LineBreaker::adjustSecondOverhang(float secondOverhang) {
const size_t lastCand = mCandidates.size() - 1;
const ParaWidth lastPreBreak = mCandidates[lastCand].preBreak;
for (ssize_t i = lastCand; i >= 0; i--) {
// "lastPreBreak - mCandidates[i].preBreak" is the amount of difference in mWidth when those
// breaks where added. So by subtracting that difference, we are subtracting the difference
// in advances in order to find out how much overhang still remains.
const float remainingOverhang = secondOverhang - (lastPreBreak - mCandidates[i].preBreak);
if (remainingOverhang <= 0.0) {
// No more remaining overhang. We don't need to adjust anything anymore.
return;
}
mCandidates[i].secondOverhang = std::max(mCandidates[i].secondOverhang,
remainingOverhang);
}
}
// Find the needed extent between the start and end ranges. start is inclusive and end is exclusive.
// Both are indices of the source string.
MinikinExtent LineBreaker::computeMaxExtent(size_t start, size_t end) const {
MinikinExtent res = {0.0, 0.0, 0.0};
for (size_t j = start; j < end; j++) {
res.extendBy(mMeasuredText.extents[j]);
}
return res;
}
void LineBreaker::addGreedyBreak(size_t breakIndex) {
const Candidate& candidate = mCandidates[breakIndex];
const Candidate& lastGreedyBreak = getLastBreakCandidate();
pushBreak(candidate.offset,
candidate.postBreak - lastGreedyBreak.preBreak,
computeMaxExtent(lastGreedyBreak.offset, candidate.offset),
packHyphenEdit(editForNextLine(lastGreedyBreak.hyphenType),
editForThisLine(candidate.hyphenType)));
mLastGreedyBreakIndex = breakIndex;
}
// Also add desperate breaks if needed (ie when word exceeds current line width).
void LineBreaker::considerGreedyBreakCandidate(size_t candIndex, const LineWidth& lineWidth) {
const Candidate* cand = &mCandidates[candIndex];
const Candidate* lastGreedyBreak = &getLastBreakCandidate();
float leftOverhang, rightOverhang;
// TODO: Only works correctly for unidirectional text. Needs changes for bidi text.
if (cand->isRtl) {
leftOverhang = cand->firstOverhang;
rightOverhang = lastGreedyBreak->secondOverhang;
} else {
leftOverhang = lastGreedyBreak->secondOverhang;
rightOverhang = cand->firstOverhang;
}
while (!fitsOnCurrentLine(cand->postBreak - lastGreedyBreak->preBreak,
leftOverhang, rightOverhang, lineWidth)) {
// This break would create an overfull line, pick the best break and break there (greedy).
// We do this in a loop, since there's no guarantee that after a break the remaining text
// would fit on the next line.
if (mBestGreedyBreaks.empty()) {
// If no break has been found since last break but we are inside this loop, the
// section between the last line break and this candidate doesn't fit in the available
// space. So we need to consider desperate breaks.
// Add desperate breaks starting immediately after the last break.
addDesperateBreaksGreedy(lastGreedyBreak->preBreak, lastGreedyBreak->offset,
cand->offset, lineWidth);
break;
} else {
// Break at the best known break.
addGreedyBreak(popBestGreedyBreak());
// addGreedyBreak updates the last break candidate.
lastGreedyBreak = &getLastBreakCandidate();
if (cand->isRtl) {
rightOverhang = lastGreedyBreak->secondOverhang;
} else {
leftOverhang = lastGreedyBreak->secondOverhang;
}
}
}
insertGreedyBreakCandidate(candIndex, cand->penalty);
}
void LineBreaker::pushBreak(int offset, float width, MinikinExtent extent, HyphenEdit hyphenEdit) {
mBreaks.push_back(offset);
mWidths.push_back(width);
mAscents.push_back(extent.ascent);
mDescents.push_back(extent.descent);
const int flags = ((mFirstTabIndex < mBreaks.back()) << kTab_Shift) | hyphenEdit;
mFlags.push_back(flags);
mFirstTabIndex = INT_MAX;
}
LineBreaker::ParaWidth LineBreaker::computeBreaksGreedyPartial(const LineWidth& lineWidth) {
size_t firstCandidate;
if (mLastConsideredGreedyCandidate == SIZE_MAX) {
// Clear results and reset greedy line breaker state if we are here for the first time.
clearResults();
mBestGreedyBreaks.clear();
mLastGreedyBreakIndex = 0;
mFirstTabIndex = INT_MAX;
firstCandidate = 1;
} else {
firstCandidate = mLastConsideredGreedyCandidate + 1;
}
const size_t lastCandidate = mCandidates.size() - 1;
for (size_t cand = firstCandidate; cand <= lastCandidate; cand++) {
considerGreedyBreakCandidate(cand, lineWidth);
}
mLastConsideredGreedyCandidate = lastCandidate;
return getLastBreakCandidate().preBreak;
}
// Get the width of a space. May return 0 if there are no spaces.
// Note: if there are multiple different widths for spaces (for example, because of mixing of
// fonts), it's only guaranteed to pick one.
float LineBreaker::getSpaceWidth() const {
for (size_t i = 0; i < mTextBuf.size(); i++) {
if (isWordSpace(mTextBuf[i])) {
return mMeasuredText.widths[i];
}
}
return 0.0f;
}
bool LineBreaker::fitsOnCurrentLine(float width, float leftOverhang, float rightOverhang,
const LineWidth& lineWidth) const {
const size_t lineNo = mBreaks.size();
const float availableWidth = lineWidth.getAt(lineNo);
const float availableLeftPadding = lineWidth.getLeftPaddingAt(lineNo);
const float availableRightPadding = lineWidth.getRightPaddingAt(lineNo);
const float remainingLeftOverhang = std::max(0.0f, leftOverhang - availableLeftPadding);
const float remainingRightOverhang = std::max(0.0f, rightOverhang - availableRightPadding);
return width + remainingLeftOverhang + remainingRightOverhang <= availableWidth;
}
void LineBreaker::computeBreaksGreedy(const LineWidth& lineWidth) {
computeBreaksGreedyPartial(lineWidth);
// All breaks but the last have been added by computeBreaksGreedyPartial() already.
const Candidate* lastCandidate = &mCandidates.back();
if (mCandidates.size() == 1 || mLastGreedyBreakIndex != (mCandidates.size() - 1)) {
const Candidate& lastGreedyBreak = getLastBreakCandidate();
pushBreak(lastCandidate->offset,
lastCandidate->postBreak - lastGreedyBreak.preBreak,
computeMaxExtent(lastGreedyBreak.offset, lastCandidate->offset),
packHyphenEdit(editForNextLine(lastGreedyBreak.hyphenType),
EndHyphenEdit::NO_EDIT));
// No need to update mLastGreedyBreakIndex because we're done.
}
}
// Add desperate breaks for the optimal algorithm.
// Note: these breaks are based on the shaping of the (non-broken) original text; they
// are imprecise especially in the presence of kerning, ligatures, overhangs, and Arabic shaping.
void LineBreaker::addDesperateBreaksOptimal(std::vector<Candidate>* out, ParaWidth existingPreBreak,
size_t postSpaceCount, bool isRtl, size_t start, size_t end) {
ParaWidth width = existingPreBreak + mMeasuredText.widths[start];
for (size_t i = start + 1; i < end; i++) {
const float w = mMeasuredText.widths[i];
if (w > 0) { // Add desperate breaks only before grapheme clusters.
out->push_back({i /* offset */, width /* preBreak */, width /* postBreak */,
0.0 /* firstOverhang */, 0.0 /* secondOverhang */,
SCORE_DESPERATE /* penalty */,
// postSpaceCount doesn't include trailing spaces.
postSpaceCount /* preSpaceCount */, postSpaceCount /* postSpaceCount */,
HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN /* hyphenType */,
isRtl /* isRtl */});
width += w;
}
}
}
void LineBreaker::addAllDesperateBreaksOptimal(const LineWidth& lineWidth) {
const ParaWidth minLineWidth = lineWidth.getMin();
size_t firstDesperateIndex = 0;
const size_t nCand = mCandidates.size();
for (size_t i = 1; i < nCand; i++) {
const ParaWidth requiredWidth = mCandidates[i].postBreak - mCandidates[i - 1].preBreak;
if (requiredWidth > minLineWidth) {
// A desperate break is needed.
firstDesperateIndex = i;
break;
}
}
if (firstDesperateIndex == 0) { // No desperate breaks needed.
return;
}
// This temporary holds an expanded list of candidates, which will be later copied back into
// mCandidates. The beginning of mCandidates, where there are no desparate breaks, is skipped.
std::vector<Candidate> expandedCandidates;
const size_t nRemainingCandidates = nCand - firstDesperateIndex;
expandedCandidates.reserve(nRemainingCandidates + 1); // At least one more is needed.
for (size_t i = firstDesperateIndex; i < nCand; i++) {
const Candidate& previousCand = mCandidates[i - 1];
const Candidate& thisCand = mCandidates[i];
const ParaWidth requiredWidth = thisCand.postBreak - previousCand.preBreak;
if (requiredWidth > minLineWidth) {
addDesperateBreaksOptimal(&expandedCandidates, previousCand.preBreak,
thisCand.postSpaceCount, thisCand.isRtl, previousCand.offset /* start */,
thisCand.offset /* end */);
}
expandedCandidates.push_back(thisCand);
}
mCandidates.reserve(firstDesperateIndex + expandedCandidates.size());
// Iterator to the first candidate to insert from expandedCandidates. The candidates before this
// would simply be copied.
auto firstToInsert = expandedCandidates.begin() + nRemainingCandidates;
// Copy until the end of mCandidates.
std::copy(expandedCandidates.begin(), firstToInsert, mCandidates.begin() + firstDesperateIndex);
// Insert the rest.
mCandidates.insert(mCandidates.end(), firstToInsert, expandedCandidates.end());
}
// Follow "prev" links in mCandidates array, and copy to result arrays.
void LineBreaker::finishBreaksOptimal(const std::vector<OptimalBreaksData>& breaksData) {
// clear output vectors.
clearResults();
const size_t nCand = mCandidates.size();
size_t prev;
for (size_t i = nCand - 1; i > 0; i = prev) {
prev = breaksData[i].prev;
mBreaks.push_back(mCandidates[i].offset);
mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
MinikinExtent extent = computeMaxExtent(mCandidates[prev].offset, mCandidates[i].offset);
mAscents.push_back(extent.ascent);
mDescents.push_back(extent.descent);
const HyphenEdit edit = packHyphenEdit(
prev == 0 ? StartHyphenEdit::NO_EDIT : editForNextLine(mCandidates[prev].hyphenType),
editForThisLine(mCandidates[i].hyphenType));
mFlags.push_back(static_cast<int>(edit));
}
std::reverse(mBreaks.begin(), mBreaks.end());
std::reverse(mWidths.begin(), mWidths.end());
std::reverse(mFlags.begin(), mFlags.end());
}
void LineBreaker::computeBreaksOptimal(const LineWidth& lineWidth) {
size_t active = 0;
const size_t nCand = mCandidates.size();
const float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f;
std::vector<OptimalBreaksData> breaksData;
breaksData.reserve(nCand);
breaksData.push_back({0.0, 0, 0}); // The first candidate is always at the first line.
// "i" iterates through candidates for the end of the line.
for (size_t i = 1; i < nCand; i++) {
const bool atEnd = i == nCand - 1;
float best = SCORE_INFTY;
size_t bestPrev = 0;
size_t lineNumberLast = breaksData[active].lineNumber;
float width = lineWidth.getAt(lineNumberLast);
ParaWidth leftEdge = mCandidates[i].postBreak - width;
float bestHope = 0;
// "j" iterates through candidates for the beginning of the line.
for (size_t j = active; j < i; j++) {
const size_t lineNumber = breaksData[j].lineNumber;
if (lineNumber != lineNumberLast) {
const float widthNew = lineWidth.getAt(lineNumber);
if (widthNew != width) {
leftEdge = mCandidates[i].postBreak - width;
bestHope = 0;
width = widthNew;
}
lineNumberLast = lineNumber;
}
const float jScore = breaksData[j].score;
if (jScore + bestHope >= best) continue;
const float delta = mCandidates[j].preBreak - leftEdge;
// compute width score for line
// Note: the "bestHope" optimization makes the assumption that, when delta is
// non-negative, widthScore will increase monotonically as successive candidate
// breaks are considered.
float widthScore = 0.0f;
float additionalPenalty = 0.0f;
if ((atEnd || !mJustified) && delta < 0) {
widthScore = SCORE_OVERFULL;
} else if (atEnd && mStrategy != BreakStrategy::Balanced) {
// increase penalty for hyphen on last line
additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
} else {
widthScore = delta * delta;
if (delta < 0) {
if (-delta < maxShrink *
(mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) {
widthScore *= SHRINK_PENALTY_MULTIPLIER;
} else {
widthScore = SCORE_OVERFULL;
}
}
}
if (delta < 0) {
active = j + 1;
} else {
bestHope = widthScore;
}
const float score = jScore + widthScore + additionalPenalty;
if (score <= best) {
best = score;
bestPrev = j;
}
}
breaksData.push_back({
best + mCandidates[i].penalty + mLinePenalty, // score
bestPrev, // prev
breaksData[bestPrev].lineNumber + 1}); // lineNumber
}
finishBreaksOptimal(breaksData);
}
LineBreakResult LineBreaker::computeBreaks(const std::vector<std::unique_ptr<Run>>& runs,
const LineWidth& lineWidth,
const TabStops& tabStops) {
for (const auto& run : runs) {
addRun(*run, lineWidth, tabStops);
}
if (mStrategy == BreakStrategy::Greedy) {
computeBreaksGreedy(lineWidth);
} else {
addAllDesperateBreaksOptimal(lineWidth);
computeBreaksOptimal(lineWidth);
}
LineBreakResult result;
result.breakPoints = std::move(mBreaks);
result.widths = std::move(mWidths);
result.ascents = std::move(mAscents);
result.descents = std::move(mDescents);
result.flags = std::move(mFlags);
return result;
}
size_t LineBreaker::popBestGreedyBreak() {
const size_t bestBreak = mBestGreedyBreaks.front().index;
mBestGreedyBreaks.pop_front();
return bestBreak;
}
void LineBreaker::insertGreedyBreakCandidate(size_t index, float penalty) {
GreedyBreak gBreak = {index, penalty};
if (!mBestGreedyBreaks.empty()) {
// Find the location in the queue where the penalty is <= the current penalty, and drop the
// elements from there to the end of the queue.
auto where = std::lower_bound(
mBestGreedyBreaks.begin(), mBestGreedyBreaks.end(), gBreak,
[](GreedyBreak first, GreedyBreak second) -> bool
{ return first.penalty < second.penalty; });
if (where != mBestGreedyBreaks.end()) {
mBestGreedyBreaks.erase(where, mBestGreedyBreaks.end());
}
}
mBestGreedyBreaks.push_back(gBreak);
}
} // namespace minikin