blob: 682b752a5c32fcec24800b9e3af1a1e48b7c88e9 [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
#include <cstdint>
#include <memory>
#include <set>
#include <string>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/absl_ports/str_join.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/iterator/section-restrict-data.h"
#include "icing/proto/search.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
// An iterator that simply takes ownership of SectionRestrictData.
class SectionRestrictDataHolderIterator : public DocHitInfoIterator {
public:
explicit SectionRestrictDataHolderIterator(
std::unique_ptr<DocHitInfoIterator> delegate,
std::unique_ptr<SectionRestrictData> data)
: delegate_(std::move(delegate)), data_(std::move(data)) {}
libtextclassifier3::Status Advance() override {
auto result = delegate_->Advance();
doc_hit_info_ = delegate_->doc_hit_info();
return result;
}
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
std::move(*delegate_).TrimRightMostNode());
if (trimmed_delegate.iterator_ != nullptr) {
trimmed_delegate.iterator_ =
std::make_unique<SectionRestrictDataHolderIterator>(
std::move(trimmed_delegate.iterator_), std::move(data_));
}
return trimmed_delegate;
}
void MapChildren(const ChildrenMapper& mapper) override {
delegate_ = mapper(std::move(delegate_));
}
CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
std::string ToString() const override { return delegate_->ToString(); }
void PopulateMatchedTermsStats(
std::vector<TermMatchInfo>* matched_terms_stats,
SectionIdMask filtering_section_mask) const override {
return delegate_->PopulateMatchedTermsStats(matched_terms_stats,
filtering_section_mask);
}
private:
std::unique_ptr<DocHitInfoIterator> delegate_;
std::unique_ptr<SectionRestrictData> data_;
};
DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
std::unique_ptr<DocHitInfoIterator> delegate, SectionRestrictData* data)
: delegate_(std::move(delegate)), data_(data) {}
std::unique_ptr<DocHitInfoIterator>
DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
std::unique_ptr<DocHitInfoIterator> iterator,
const DocumentStore* document_store, const SchemaStore* schema_store,
std::set<std::string> target_sections, int64_t current_time_ms) {
std::unordered_map<std::string, std::set<std::string>> type_property_filters;
type_property_filters[std::string(SchemaStore::kSchemaTypeWildcard)] =
std::move(target_sections);
auto data = std::make_unique<SectionRestrictData>(
document_store, schema_store, current_time_ms, type_property_filters);
std::unique_ptr<DocHitInfoIterator> result =
ApplyRestrictions(std::move(iterator), data.get());
return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
std::move(data));
}
std::unique_ptr<DocHitInfoIterator>
DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
std::unique_ptr<DocHitInfoIterator> iterator,
const DocumentStore* document_store, const SchemaStore* schema_store,
const SearchSpecProto& search_spec, int64_t current_time_ms) {
std::unordered_map<std::string, std::set<std::string>> type_property_filters;
for (const SchemaStore::ExpandedTypePropertyMask& type_property_mask :
schema_store->ExpandTypePropertyMasks(
search_spec.type_property_filters())) {
type_property_filters[type_property_mask.schema_type] =
std::set<std::string>(type_property_mask.paths.begin(),
type_property_mask.paths.end());
}
auto data = std::make_unique<SectionRestrictData>(
document_store, schema_store, current_time_ms, type_property_filters);
std::unique_ptr<DocHitInfoIterator> result =
ApplyRestrictions(std::move(iterator), data.get());
return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
std::move(data));
}
std::unique_ptr<DocHitInfoIterator>
DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
std::unique_ptr<DocHitInfoIterator> iterator, SectionRestrictData* data) {
ChildrenMapper mapper;
mapper = [&data, &mapper](std::unique_ptr<DocHitInfoIterator> iterator)
-> std::unique_ptr<DocHitInfoIterator> {
if (iterator->is_leaf()) {
return std::make_unique<DocHitInfoIteratorSectionRestrict>(
std::move(iterator), data);
} else {
iterator->MapChildren(mapper);
return iterator;
}
};
return mapper(std::move(iterator));
}
libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
while (delegate_->Advance().ok()) {
DocumentId document_id = delegate_->doc_hit_info().document_id();
auto data_optional = data_->document_store().GetAliveDocumentFilterData(
document_id, data_->current_time_ms());
if (!data_optional) {
// Ran into some error retrieving information on this hit, skip
continue;
}
// Guaranteed that the DocumentFilterData exists at this point
SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
auto schema_type_or = data_->schema_store().GetSchemaType(schema_type_id);
if (!schema_type_or.ok()) {
// Ran into error retrieving schema type, skip
continue;
}
const std::string* schema_type = std::move(schema_type_or).ValueOrDie();
SectionIdMask allowed_sections_mask =
data_->ComputeAllowedSectionsMask(*schema_type);
// A hit can be in multiple sections at once, need to check which of the
// section ids match the sections allowed by type_property_masks_. This can
// be done by doing a bitwise and of the section_id_mask in the doc hit and
// the allowed_sections_mask.
SectionIdMask section_id_mask =
delegate_->doc_hit_info().hit_section_ids_mask() &
allowed_sections_mask;
// Return this document if:
// - the sectionIdMask is not empty after applying property filters, or
// - no property filters apply for its schema type (allowed_sections_mask
// == kSectionIdMaskAll). This is needed to ensure that in case of empty
// query (which uses doc-hit-info-iterator-all-document-id), where
// section_id_mask is kSectionIdMaskNone, doc hits with no property
// restrictions don't get filtered out. Doc hits for schema types for
// whom property filters are specified will still get filtered out.
if (allowed_sections_mask == kSectionIdMaskAll ||
section_id_mask != kSectionIdMaskNone) {
doc_hit_info_ = delegate_->doc_hit_info();
doc_hit_info_.set_hit_section_ids_mask(section_id_mask);
return libtextclassifier3::Status::OK;
}
// Didn't find a matching section name for this hit. Continue.
}
// Didn't find anything on the delegate iterator.
return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
std::move(*delegate_).TrimRightMostNode());
// TrimRightMostNode is only used by suggestion processor to process query
// expression, so an entry for wildcard should always be present in
// type_property_filters_ when code flow reaches here. If the InternalError
// below is returned, that means TrimRightMostNode hasn't been called in the
// right context.
const auto it = data_->type_property_filters().find("*");
if (it == data_->type_property_filters().end()) {
return absl_ports::InternalError(
"A wildcard entry should always be present in type property filters "
"whenever TrimRightMostNode() is called for "
"DocHitInfoIteratorSectionRestrict");
}
const std::set<std::string>& target_sections = it->second;
if (target_sections.empty()) {
return absl_ports::InternalError(
"Target sections should not be empty whenever TrimRightMostNode() is "
"called for DocHitInfoIteratorSectionRestrict");
}
if (trimmed_delegate.iterator_ == nullptr) {
// TODO(b/228240987): Update TrimmedNode and downstream code to handle
// multiple section restricts.
trimmed_delegate.target_section_ = std::move(*target_sections.begin());
return trimmed_delegate;
}
trimmed_delegate.iterator_ =
std::unique_ptr<DocHitInfoIteratorSectionRestrict>(
new DocHitInfoIteratorSectionRestrict(
std::move(trimmed_delegate.iterator_), std::move(data_)));
return std::move(trimmed_delegate);
}
std::string DocHitInfoIteratorSectionRestrict::ToString() const {
std::string output = "";
for (auto it = data_->type_property_filters().cbegin();
it != data_->type_property_filters().cend(); it++) {
std::string paths = absl_ports::StrJoin(it->second, ",");
output += (it->first) + ":" + (paths) + "; ";
}
std::string result = "{" + output.substr(0, output.size() - 2) + "}: ";
return absl_ports::StrCat(result, delegate_->ToString());
}
} // namespace lib
} // namespace icing