xref: /aosp_15_r20/external/tensorflow/tensorflow/core/lib/io/buffered_inputstream.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_LIB_IO_BUFFERED_INPUTSTREAM_H_
17 #define TENSORFLOW_CORE_LIB_IO_BUFFERED_INPUTSTREAM_H_
18 
19 #include "tensorflow/core/lib/io/inputstream_interface.h"
20 #include "tensorflow/core/platform/file_system.h"
21 
22 namespace tensorflow {
23 namespace io {
24 
25 // Provides a buffer on top of an InputStreamInterface. A single instance of
26 // BufferedInputStream is NOT safe for concurrent use by multiple threads.
27 class BufferedInputStream : public InputStreamInterface {
28  public:
29   // Does not take ownership of input_stream unless owns_input_stream is set
30   // to true. input_stream must outlive *this then.
31   // TODO(rohanj): Remove owns_input_stream once the constructor below is
32   // removed.
33   BufferedInputStream(InputStreamInterface* input_stream, size_t buffer_bytes,
34                       bool owns_input_stream = false);
35 
36   // For backwards compatibility, expose an interface that is similar to what
37   // InputBuffer exposes. Does not take ownership of file. file must outlive
38   // *this. This will be removed once we migrate all uses of this class to the
39   // constructor above.
40   BufferedInputStream(RandomAccessFile* file, size_t buffer_bytes);
41 
42   ~BufferedInputStream() override;
43 
44   tensorflow::Status ReadNBytes(int64_t bytes_to_read,
45                                 tstring* result) override;
46 
47   tensorflow::Status SkipNBytes(int64_t bytes_to_skip) override;
48 
49   int64_t Tell() const override;
50 
51   // Seek to this offset within the file.
52   //
53   // If we seek to somewhere within our pre-buffered data, we will re-use what
54   // data we can.  Otherwise, Seek() throws out the current buffer and the next
55   // read will trigger an underlying read.
56   //
57   // Note: When seeking backwards in a stream, this implementation uses
58   // Reset() + SkipNBytes(), so its performance will be dependent
59   // largely on the performance of SkipNBytes().
60   tensorflow::Status Seek(int64_t position);
61 
62   // Read one text line of data into "*result" until end-of-file or a
63   // \n is read.  (The \n is not included in the result.)  Overwrites
64   // any existing data in *result.
65   //
66   // If successful, returns OK.  If we are already at the end of the
67   // file, we return an OUT_OF_RANGE error.  Otherwise, we return
68   // some other non-OK status.
69   tensorflow::Status ReadLine(std::string* result);
70   tensorflow::Status ReadLine(tstring* result);
71 
72   // Returns one text line of data until end-of-file or a '\n' is read. The '\n'
73   // is included in the result.
74   // This method is a substitute for ReadLine() when called from Python which is
75   // the expectation in the python File::readline() API.
76   // Also, '\0's are treated like any other character within the line and given
77   // no special treatment.
78   std::string ReadLineAsString();
79 
80   // Skip one text line of data.
81   //
82   // If successful, returns OK.  If we are already at the end of the
83   // file, we return an OUT_OF_RANGE error.  Otherwise, we return
84   // some other non-OK status.
85   tensorflow::Status SkipLine();
86 
87   // Reads the entire contents of the file into *result.
88   //
89   // Note: the amount of memory used by this function call is unbounded, so only
90   // use in ops that expect that behavior.
91   template <typename T>
92   tensorflow::Status ReadAll(T* result);
93 
94   tensorflow::Status Reset() override;
95 
96  private:
97   tensorflow::Status FillBuffer();
98   template <typename StringType>
99   tensorflow::Status ReadLineHelper(StringType* result, bool include_eol);
100 
101   InputStreamInterface* input_stream_;  // not owned.
102   size_t size_;                         // buffer size.
103   tstring buf_;                         // the buffer itself.
104   // buf_[pos_, limit_) holds the valid "read ahead" data in the file.
105   size_t pos_ = 0;    // current position in buf_.
106   size_t limit_ = 0;  // just past the end of valid data in buf_.
107   bool owns_input_stream_ = false;
108   // When EoF is reached, file_status_ contains the status to skip unnecessary
109   // buffer allocations.
110   tensorflow::Status file_status_ = OkStatus();
111 
112   TF_DISALLOW_COPY_AND_ASSIGN(BufferedInputStream);
113 };
114 
115 // Explicit instantiations defined in buffered_inputstream.cc.
116 #ifndef SWIG
117 extern template tensorflow::Status BufferedInputStream::ReadAll<std::string>(
118     std::string* result);
119 extern template tensorflow::Status BufferedInputStream::ReadAll<tstring>(
120     tstring* result);
121 #endif  // SWIG
122 
123 }  // namespace io
124 }  // namespace tensorflow
125 
126 #endif  // TENSORFLOW_CORE_LIB_IO_BUFFERED_INPUTSTREAM_H_
127