blob: e053c397345a07e69dfa8f72a3d5ebbede86a883 [file] [log] [blame]
Haibo Huangd8830302020-03-03 10:09:46 -08001"""Manage shelves of pickled objects.
2
3A "shelf" is a persistent, dictionary-like object. The difference
4with dbm databases is that the values (not the keys!) in a shelf can
5be essentially arbitrary Python objects -- anything that the "pickle"
6module can handle. This includes most class instances, recursive data
7types, and objects containing lots of shared sub-objects. The keys
8are ordinary strings.
9
10To summarize the interface (key is a string, data is an arbitrary
11object):
12
13 import shelve
14 d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
15
16 d[key] = data # store data at key (overwrites old data if
17 # using an existing key)
18 data = d[key] # retrieve a COPY of the data at key (raise
19 # KeyError if no such key) -- NOTE that this
20 # access returns a *copy* of the entry!
21 del d[key] # delete data stored at key (raises KeyError
22 # if no such key)
23 flag = key in d # true if the key exists
24 list = d.keys() # a list of all existing keys (slow!)
25
26 d.close() # close it
27
28Dependent on the implementation, closing a persistent dictionary may
29or may not be necessary to flush changes to disk.
30
31Normally, d[key] returns a COPY of the entry. This needs care when
32mutable entries are mutated: for example, if d[key] is a list,
33 d[key].append(anitem)
34does NOT modify the entry d[key] itself, as stored in the persistent
35mapping -- it only modifies the copy, which is then immediately
36discarded, so that the append has NO effect whatsoever. To append an
37item to d[key] in a way that will affect the persistent mapping, use:
38 data = d[key]
39 data.append(anitem)
40 d[key] = data
41
42To avoid the problem with mutable entries, you may pass the keyword
43argument writeback=True in the call to shelve.open. When you use:
44 d = shelve.open(filename, writeback=True)
45then d keeps a cache of all entries you access, and writes them all back
46to the persistent mapping when you call d.close(). This ensures that
47such usage as d[key].append(anitem) works as intended.
48
49However, using keyword argument writeback=True may consume vast amount
50of memory for the cache, and it may make d.close() very slow, if you
51access many of d's entries after opening it in this way: d has no way to
52check which of the entries you access are mutable and/or which ones you
53actually mutate, so it must cache, and write back at close, all of the
54entries that you access. You can call d.sync() to write back all the
55entries in the cache, and empty the cache (d.sync() also synchronizes
56the persistent dictionary on disk, if feasible).
57"""
58
Yi Kong71199322022-08-30 15:53:45 +080059from pickle import DEFAULT_PROTOCOL, Pickler, Unpickler
Haibo Huangd8830302020-03-03 10:09:46 -080060from io import BytesIO
61
62import collections.abc
63
64__all__ = ["Shelf", "BsdDbShelf", "DbfilenameShelf", "open"]
65
66class _ClosedDict(collections.abc.MutableMapping):
67 'Marker for a closed dict. Access attempts raise a ValueError.'
68
69 def closed(self, *args):
70 raise ValueError('invalid operation on closed shelf')
71 __iter__ = __len__ = __getitem__ = __setitem__ = __delitem__ = keys = closed
72
73 def __repr__(self):
74 return '<Closed Dictionary>'
75
76
77class Shelf(collections.abc.MutableMapping):
78 """Base class for shelf implementations.
79
80 This is initialized with a dictionary-like object.
81 See the module's __doc__ string for an overview of the interface.
82 """
83
84 def __init__(self, dict, protocol=None, writeback=False,
85 keyencoding="utf-8"):
86 self.dict = dict
87 if protocol is None:
Yi Kong71199322022-08-30 15:53:45 +080088 protocol = DEFAULT_PROTOCOL
Haibo Huangd8830302020-03-03 10:09:46 -080089 self._protocol = protocol
90 self.writeback = writeback
91 self.cache = {}
92 self.keyencoding = keyencoding
93
94 def __iter__(self):
95 for k in self.dict.keys():
96 yield k.decode(self.keyencoding)
97
98 def __len__(self):
99 return len(self.dict)
100
101 def __contains__(self, key):
102 return key.encode(self.keyencoding) in self.dict
103
104 def get(self, key, default=None):
105 if key.encode(self.keyencoding) in self.dict:
106 return self[key]
107 return default
108
109 def __getitem__(self, key):
110 try:
111 value = self.cache[key]
112 except KeyError:
113 f = BytesIO(self.dict[key.encode(self.keyencoding)])
114 value = Unpickler(f).load()
115 if self.writeback:
116 self.cache[key] = value
117 return value
118
119 def __setitem__(self, key, value):
120 if self.writeback:
121 self.cache[key] = value
122 f = BytesIO()
123 p = Pickler(f, self._protocol)
124 p.dump(value)
125 self.dict[key.encode(self.keyencoding)] = f.getvalue()
126
127 def __delitem__(self, key):
128 del self.dict[key.encode(self.keyencoding)]
129 try:
130 del self.cache[key]
131 except KeyError:
132 pass
133
134 def __enter__(self):
135 return self
136
137 def __exit__(self, type, value, traceback):
138 self.close()
139
140 def close(self):
141 if self.dict is None:
142 return
143 try:
144 self.sync()
145 try:
146 self.dict.close()
147 except AttributeError:
148 pass
149 finally:
150 # Catch errors that may happen when close is called from __del__
151 # because CPython is in interpreter shutdown.
152 try:
153 self.dict = _ClosedDict()
154 except:
155 self.dict = None
156
157 def __del__(self):
158 if not hasattr(self, 'writeback'):
159 # __init__ didn't succeed, so don't bother closing
160 # see http://bugs.python.org/issue1339007 for details
161 return
162 self.close()
163
164 def sync(self):
165 if self.writeback and self.cache:
166 self.writeback = False
167 for key, entry in self.cache.items():
168 self[key] = entry
169 self.writeback = True
170 self.cache = {}
171 if hasattr(self.dict, 'sync'):
172 self.dict.sync()
173
174
175class BsdDbShelf(Shelf):
176 """Shelf implementation using the "BSD" db interface.
177
178 This adds methods first(), next(), previous(), last() and
179 set_location() that have no counterpart in [g]dbm databases.
180
181 The actual database must be opened using one of the "bsddb"
182 modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
183 bsddb.rnopen) and passed to the constructor.
184
185 See the module's __doc__ string for an overview of the interface.
186 """
187
188 def __init__(self, dict, protocol=None, writeback=False,
189 keyencoding="utf-8"):
190 Shelf.__init__(self, dict, protocol, writeback, keyencoding)
191
192 def set_location(self, key):
193 (key, value) = self.dict.set_location(key)
194 f = BytesIO(value)
195 return (key.decode(self.keyencoding), Unpickler(f).load())
196
197 def next(self):
198 (key, value) = next(self.dict)
199 f = BytesIO(value)
200 return (key.decode(self.keyencoding), Unpickler(f).load())
201
202 def previous(self):
203 (key, value) = self.dict.previous()
204 f = BytesIO(value)
205 return (key.decode(self.keyencoding), Unpickler(f).load())
206
207 def first(self):
208 (key, value) = self.dict.first()
209 f = BytesIO(value)
210 return (key.decode(self.keyencoding), Unpickler(f).load())
211
212 def last(self):
213 (key, value) = self.dict.last()
214 f = BytesIO(value)
215 return (key.decode(self.keyencoding), Unpickler(f).load())
216
217
218class DbfilenameShelf(Shelf):
219 """Shelf implementation using the "dbm" generic dbm interface.
220
221 This is initialized with the filename for the dbm database.
222 See the module's __doc__ string for an overview of the interface.
223 """
224
225 def __init__(self, filename, flag='c', protocol=None, writeback=False):
226 import dbm
227 Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback)
228
229
230def open(filename, flag='c', protocol=None, writeback=False):
231 """Open a persistent dictionary for reading and writing.
232
233 The filename parameter is the base filename for the underlying
234 database. As a side-effect, an extension may be added to the
235 filename and more than one file may be created. The optional flag
236 parameter has the same interpretation as the flag parameter of
237 dbm.open(). The optional protocol parameter specifies the
238 version of the pickle protocol.
239
240 See the module's __doc__ string for an overview of the interface.
241 """
242
243 return DbfilenameShelf(filename, flag, protocol, writeback)