xref: /aosp_15_r20/external/pdfium/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
6 
7 #include <stdint.h>
8 
9 #include <map>
10 #include <numeric>
11 #include <set>
12 #include <sstream>
13 #include <utility>
14 #include <vector>
15 
16 #include "core/fpdfapi/page/cpdf_pageobject.h"
17 #include "core/fpdfapi/page/cpdf_pageobjectholder.h"
18 #include "core/fpdfapi/parser/cpdf_array.h"
19 #include "core/fpdfapi/parser/cpdf_dictionary.h"
20 #include "core/fpdfapi/parser/cpdf_document.h"
21 #include "core/fpdfapi/parser/cpdf_reference.h"
22 #include "core/fpdfapi/parser/cpdf_stream.h"
23 #include "core/fpdfapi/parser/object_tree_traversal_util.h"
24 #include "third_party/abseil-cpp/absl/types/variant.h"
25 #include "third_party/base/check.h"
26 #include "third_party/base/containers/adapters.h"
27 #include "third_party/base/containers/contains.h"
28 #include "third_party/base/numerics/safe_conversions.h"
29 
CPDF_PageContentManager(CPDF_PageObjectHolder * page_obj_holder,CPDF_Document * document)30 CPDF_PageContentManager::CPDF_PageContentManager(
31     CPDF_PageObjectHolder* page_obj_holder,
32     CPDF_Document* document)
33     : page_obj_holder_(page_obj_holder),
34       document_(document),
35       objects_with_multi_refs_(GetObjectsWithMultipleReferences(document_)) {
36   RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
37   RetainPtr<CPDF_Object> contents_obj =
38       page_dict->GetMutableObjectFor("Contents");
39   RetainPtr<CPDF_Array> contents_array = ToArray(contents_obj);
40   if (contents_array) {
41     CHECK(contents_array->IsInline());
42     contents_ = std::move(contents_array);
43     return;
44   }
45 
46   RetainPtr<CPDF_Reference> contents_reference = ToReference(contents_obj);
47   if (contents_reference) {
48     RetainPtr<CPDF_Object> indirect_obj =
49         contents_reference->GetMutableDirect();
50     if (!indirect_obj)
51       return;
52 
53     contents_array.Reset(indirect_obj->AsMutableArray());
54     if (contents_array) {
55       if (pdfium::Contains(objects_with_multi_refs_,
56                            contents_array->GetObjNum())) {
57         RetainPtr<CPDF_Array> cloned_contents_array =
58             pdfium::WrapRetain(contents_array->Clone()->AsMutableArray());
59         page_dict->SetFor("Contents", cloned_contents_array);
60         contents_ = std::move(cloned_contents_array);
61       } else {
62         contents_ = std::move(contents_array);
63       }
64     } else if (indirect_obj->IsStream()) {
65       contents_ = pdfium::WrapRetain(indirect_obj->AsMutableStream());
66     }
67   }
68 }
69 
~CPDF_PageContentManager()70 CPDF_PageContentManager::~CPDF_PageContentManager() {
71   ExecuteScheduledRemovals();
72 }
73 
GetStreamByIndex(size_t stream_index)74 RetainPtr<CPDF_Stream> CPDF_PageContentManager::GetStreamByIndex(
75     size_t stream_index) {
76   RetainPtr<CPDF_Stream> contents_stream = GetContentsStream();
77   if (contents_stream) {
78     return stream_index == 0 ? contents_stream : nullptr;
79   }
80 
81   RetainPtr<CPDF_Array> contents_array = GetContentsArray();
82   if (!contents_array) {
83     return nullptr;
84   }
85 
86   RetainPtr<CPDF_Reference> stream_reference =
87       ToReference(contents_array->GetMutableObjectAt(stream_index));
88   if (!stream_reference)
89     return nullptr;
90 
91   return ToStream(stream_reference->GetMutableDirect());
92 }
93 
AddStream(fxcrt::ostringstream * buf)94 size_t CPDF_PageContentManager::AddStream(fxcrt::ostringstream* buf) {
95   auto new_stream = document_->NewIndirect<CPDF_Stream>();
96   new_stream->SetDataFromStringstream(buf);
97 
98   // If there is one Content stream (not in an array), now there will be two, so
99   // create an array with the old and the new one. The new one's index is 1.
100   RetainPtr<CPDF_Stream> contents_stream = GetContentsStream();
101   if (contents_stream) {
102     auto new_contents_array = document_->NewIndirect<CPDF_Array>();
103     new_contents_array->AppendNew<CPDF_Reference>(document_,
104                                                   contents_stream->GetObjNum());
105     new_contents_array->AppendNew<CPDF_Reference>(document_,
106                                                   new_stream->GetObjNum());
107 
108     RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
109     page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
110                                          new_contents_array->GetObjNum());
111     contents_ = std::move(new_contents_array);
112     return 1;
113   }
114 
115   // If there is an array, just add the new stream to it, at the last position.
116   RetainPtr<CPDF_Array> contents_array = GetContentsArray();
117   if (contents_array) {
118     contents_array->AppendNew<CPDF_Reference>(document_,
119                                               new_stream->GetObjNum());
120     return contents_array->size() - 1;
121   }
122 
123   // There were no Contents, so add the new stream as the single Content stream.
124   // Its index is 0.
125   RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
126   page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
127                                        new_stream->GetObjNum());
128   contents_ = std::move(new_stream);
129   return 0;
130 }
131 
UpdateStream(size_t stream_index,fxcrt::ostringstream * buf)132 void CPDF_PageContentManager::UpdateStream(size_t stream_index,
133                                            fxcrt::ostringstream* buf) {
134   // If `buf` is now empty, remove the stream instead of setting the data.
135   if (buf->tellp() <= 0) {
136     ScheduleRemoveStreamByIndex(stream_index);
137     return;
138   }
139 
140   RetainPtr<CPDF_Stream> existing_stream = GetStreamByIndex(stream_index);
141   CHECK(existing_stream);
142   if (!pdfium::Contains(objects_with_multi_refs_,
143                         existing_stream->GetObjNum())) {
144     existing_stream->SetDataFromStringstreamAndRemoveFilter(buf);
145     return;
146   }
147 
148   if (GetContentsStream()) {
149     auto new_stream = document_->NewIndirect<CPDF_Stream>();
150     new_stream->SetDataFromStringstream(buf);
151     RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
152     page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
153                                          new_stream->GetObjNum());
154   }
155 
156   RetainPtr<CPDF_Array> contents_array = GetContentsArray();
157   if (!contents_array) {
158     return;
159   }
160 
161   RetainPtr<CPDF_Reference> stream_reference =
162       ToReference(contents_array->GetMutableObjectAt(stream_index));
163   if (!stream_reference) {
164     return;
165   }
166 
167   auto new_stream = document_->NewIndirect<CPDF_Stream>();
168   new_stream->SetDataFromStringstream(buf);
169   stream_reference->SetRef(document_, new_stream->GetObjNum());
170 }
171 
ScheduleRemoveStreamByIndex(size_t stream_index)172 void CPDF_PageContentManager::ScheduleRemoveStreamByIndex(size_t stream_index) {
173   streams_to_remove_.insert(stream_index);
174 }
175 
ExecuteScheduledRemovals()176 void CPDF_PageContentManager::ExecuteScheduledRemovals() {
177   // This method assumes there are no dirty streams in the
178   // CPDF_PageObjectHolder. If there were any, their indexes would need to be
179   // updated.
180   // Since CPDF_PageContentManager is only instantiated in
181   // CPDF_PageContentGenerator::GenerateContent(), which cleans up the dirty
182   // streams first, this should always be true.
183   DCHECK(!page_obj_holder_->HasDirtyStreams());
184 
185   if (streams_to_remove_.empty()) {
186     return;
187   }
188 
189   RetainPtr<CPDF_Stream> contents_stream = GetContentsStream();
190   if (contents_stream) {
191     // Only stream that can be removed is 0.
192     if (streams_to_remove_.find(0) != streams_to_remove_.end()) {
193       RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
194       page_dict->RemoveFor("Contents");
195     }
196     return;
197   }
198 
199   RetainPtr<CPDF_Array> contents_array = GetContentsArray();
200   if (!contents_array) {
201     return;
202   }
203 
204   // Initialize a vector with the old stream indexes. This will be used to build
205   // a map from the old to the new indexes.
206   std::vector<size_t> streams_left(contents_array->size());
207   std::iota(streams_left.begin(), streams_left.end(), 0);
208 
209   // In reverse order so as to not change the indexes in the middle of the loop,
210   // remove the streams.
211   for (size_t stream_index : pdfium::base::Reversed(streams_to_remove_)) {
212     contents_array->RemoveAt(stream_index);
213     streams_left.erase(streams_left.begin() + stream_index);
214   }
215 
216   // Create a mapping from the old to the new stream indexes, shifted due to the
217   // deletion of the |streams_to_remove_|.
218   std::map<size_t, size_t> stream_index_mapping;
219   for (size_t i = 0; i < streams_left.size(); ++i) {
220     stream_index_mapping[streams_left[i]] = i;
221   }
222 
223   // Update the page objects' content stream indexes.
224   for (const auto& obj : *page_obj_holder_) {
225     int32_t old_stream_index = obj->GetContentStream();
226     int32_t new_stream_index = pdfium::base::checked_cast<int32_t>(
227         stream_index_mapping[old_stream_index]);
228     obj->SetContentStream(new_stream_index);
229   }
230 
231   // Even if there is a single content stream now, keep the array with a single
232   // element. It's valid, a second stream might be added in the near future, and
233   // the complexity of removing it is not worth it.
234 }
235 
GetContentsStream()236 RetainPtr<CPDF_Stream> CPDF_PageContentManager::GetContentsStream() {
237   if (absl::holds_alternative<RetainPtr<CPDF_Stream>>(contents_)) {
238     return absl::get<RetainPtr<CPDF_Stream>>(contents_);
239   }
240   return nullptr;
241 }
242 
GetContentsArray()243 RetainPtr<CPDF_Array> CPDF_PageContentManager::GetContentsArray() {
244   if (absl::holds_alternative<RetainPtr<CPDF_Array>>(contents_)) {
245     return absl::get<RetainPtr<CPDF_Array>>(contents_);
246   }
247   return nullptr;
248 }
249