blob: 08aff32602c4be8bee15cb42ac7527a86a621e93 [file] [log] [blame]
The Android Open Source Projectcf31fe92008-10-21 07:00:00 -07001#
2# Copyright (C) 2008 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import stat
17import struct
18import zlib
19import cStringIO
20
21from import_ext import ImportExternal
22from error import ImportError
23
24class ImportZip(ImportExternal):
25 """Streams a zip file from the network directly into a Project's
26 Git repository.
27 """
28 @classmethod
29 def CanAccept(cls, url):
30 """Can this importer read and unpack the data stored at url?
31 """
32 if url.endswith('.zip') or url.endswith('.jar'):
33 return True
34 return False
35
36 def _UnpackFiles(self):
37 url_fd, url = self._OpenUrl()
38 try:
39 if not self.__class__.CanAccept(url):
40 raise ImportError('non-zip file extension: %s' % url)
41
42 zip = _ZipFile(url_fd)
43 for entry in zip.FileRecords():
44 data = zip.Open(entry).read()
45 sz = len(data)
46
47 if data and _SafeCRLF(data):
48 data = data.replace('\r\n', '\n')
49 sz = len(data)
50
51 fd = cStringIO.StringIO(data)
52 self._UnpackOneFile(entry.mode, sz, entry.name, fd)
53 zip.Close(entry)
54
55 for entry in zip.CentralDirectory():
56 self._SetFileMode(entry.name, entry.mode)
57
58 zip.CheckTail()
59 finally:
60 url_fd.close()
61
62
63def _SafeCRLF(data):
64 """Is it reasonably safe to perform a CRLF->LF conversion?
65
66 If the stream contains a NUL byte it is likely binary,
67 and thus a CRLF->LF conversion may damage the stream.
68
69 If the only NUL is in the last position of the stream,
70 but it otherwise can do a CRLF<->LF conversion we do
71 the CRLF conversion anyway. At least one source ZIP
72 file has this structure in its source code.
73
74 If every occurrance of a CR and LF is paired up as a
75 CRLF pair then the conversion is safely bi-directional.
76 s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
77 """
78 nul = data.find('\0')
79 if 0 <= nul and nul < (len(data) - 1):
80 return False
81
82 n_lf = 0
83 last = 0
84 while True:
85 lf = data.find('\n', last)
86 if lf < 0:
87 break
88 if lf == 0 or data[lf - 1] != '\r':
89 return False
90 last = lf + 1
91 n_lf += 1
92 return n_lf > 0
93
94class _ZipFile(object):
95 """Streaming iterator to parse a zip file on the fly.
96 """
97 def __init__(self, fd):
98 self._fd = _UngetStream(fd)
99
100 def FileRecords(self):
101 return _FileIter(self._fd)
102
103 def CentralDirectory(self):
104 return _CentIter(self._fd)
105
106 def CheckTail(self):
107 type_buf = self._fd.read(4)
108 type = struct.unpack('<I', type_buf)[0]
109 if type != 0x06054b50: # end of central directory
110 raise ImportError('zip record %x unsupported' % type)
111
112 def Open(self, entry):
113 if entry.is_compressed:
114 return _InflateStream(self._fd)
115 else:
116 if entry.has_trailer:
117 raise ImportError('unable to extract streamed zip')
118 return _FixedLengthStream(self._fd, entry.uncompressed_size)
119
120 def Close(self, entry):
121 if entry.has_trailer:
122 type = struct.unpack('<I', self._fd.read(4))[0]
123 if type == 0x08074b50:
124 # Not a formal type marker, but commonly seen in zips
125 # as the data descriptor signature.
126 #
127 struct.unpack('<3I', self._fd.read(12))
128 else:
129 # No signature for the data descriptor, so read the
130 # remaining fields out of the stream
131 #
132 self._fd.read(8)
133
134
135class _FileIter(object):
136 def __init__(self, fd):
137 self._fd = fd
138
139 def __iter__(self):
140 return self
141
142 def next(self):
143 fd = self._fd
144
145 type_buf = fd.read(4)
146 type = struct.unpack('<I', type_buf)[0]
147
148 if type != 0x04034b50: # local file header
149 fd.unread(type_buf)
150 raise StopIteration()
151
152 rec = _FileHeader(fd.read(26))
153 rec.name = fd.read(rec.name_len)
154 fd.read(rec.extra_len)
155
156 if rec.name.endswith('/'):
157 rec.name = rec.name[:-1]
158 rec.mode = stat.S_IFDIR | 0777
159 return rec
160
161
162class _FileHeader(object):
163 """Information about a single file in the archive.
164 0 version needed to extract 2 bytes
165 1 general purpose bit flag 2 bytes
166 2 compression method 2 bytes
167 3 last mod file time 2 bytes
168 4 last mod file date 2 bytes
169 5 crc-32 4 bytes
170 6 compressed size 4 bytes
171 7 uncompressed size 4 bytes
172 8 file name length 2 bytes
173 9 extra field length 2 bytes
174 """
175 def __init__(self, raw_bin):
176 rec = struct.unpack('<5H3I2H', raw_bin)
177
178 if rec[2] == 8:
179 self.is_compressed = True
180 elif rec[2] == 0:
181 self.is_compressed = False
182 else:
183 raise ImportError('unrecognized compression format')
184
185 if rec[1] & (1 << 3):
186 self.has_trailer = True
187 else:
188 self.has_trailer = False
189
190 self.compressed_size = rec[6]
191 self.uncompressed_size = rec[7]
192 self.name_len = rec[8]
193 self.extra_len = rec[9]
194 self.mode = stat.S_IFREG | 0644
195
196
197class _CentIter(object):
198 def __init__(self, fd):
199 self._fd = fd
200
201 def __iter__(self):
202 return self
203
204 def next(self):
205 fd = self._fd
206
207 type_buf = fd.read(4)
208 type = struct.unpack('<I', type_buf)[0]
209
210 if type != 0x02014b50: # central directory
211 fd.unread(type_buf)
212 raise StopIteration()
213
214 rec = _CentHeader(fd.read(42))
215 rec.name = fd.read(rec.name_len)
216 fd.read(rec.extra_len)
217 fd.read(rec.comment_len)
218
219 if rec.name.endswith('/'):
220 rec.name = rec.name[:-1]
221 rec.mode = stat.S_IFDIR | 0777
222 return rec
223
224
225class _CentHeader(object):
226 """Information about a single file in the archive.
227 0 version made by 2 bytes
228 1 version needed to extract 2 bytes
229 2 general purpose bit flag 2 bytes
230 3 compression method 2 bytes
231 4 last mod file time 2 bytes
232 5 last mod file date 2 bytes
233 6 crc-32 4 bytes
234 7 compressed size 4 bytes
235 8 uncompressed size 4 bytes
236 9 file name length 2 bytes
237 10 extra field length 2 bytes
238 11 file comment length 2 bytes
239 12 disk number start 2 bytes
240 13 internal file attributes 2 bytes
241 14 external file attributes 4 bytes
242 15 relative offset of local header 4 bytes
243 """
244 def __init__(self, raw_bin):
245 rec = struct.unpack('<6H3I5H2I', raw_bin)
246 self.name_len = rec[9]
247 self.extra_len = rec[10]
248 self.comment_len = rec[11]
249
250 if (rec[0] & 0xff00) == 0x0300: # UNIX
251 self.mode = rec[14] >> 16
252 else:
253 self.mode = stat.S_IFREG | 0644
254
255
256class _UngetStream(object):
257 """File like object to read and rewind a stream.
258 """
259 def __init__(self, fd):
260 self._fd = fd
261 self._buf = None
262
263 def read(self, size = -1):
264 r = []
265 try:
266 if size >= 0:
267 self._ReadChunk(r, size)
268 else:
269 while True:
270 self._ReadChunk(r, 2048)
271 except EOFError:
272 pass
273
274 if len(r) == 1:
275 return r[0]
276 return ''.join(r)
277
278 def unread(self, buf):
279 b = self._buf
280 if b is None or len(b) == 0:
281 self._buf = buf
282 else:
283 self._buf = buf + b
284
285 def _ReadChunk(self, r, size):
286 b = self._buf
287 try:
288 while size > 0:
289 if b is None or len(b) == 0:
290 b = self._Inflate(self._fd.read(2048))
291 if not b:
292 raise EOFError()
293 continue
294
295 use = min(size, len(b))
296 r.append(b[:use])
297 b = b[use:]
298 size -= use
299 finally:
300 self._buf = b
301
302 def _Inflate(self, b):
303 return b
304
305
306class _FixedLengthStream(_UngetStream):
307 """File like object to read a fixed length stream.
308 """
309 def __init__(self, fd, have):
310 _UngetStream.__init__(self, fd)
311 self._have = have
312
313 def _Inflate(self, b):
314 n = self._have
315 if n == 0:
316 self._fd.unread(b)
317 return None
318
319 if len(b) > n:
320 self._fd.unread(b[n:])
321 b = b[:n]
322 self._have -= len(b)
323 return b
324
325
326class _InflateStream(_UngetStream):
327 """Inflates the stream as it reads input.
328 """
329 def __init__(self, fd):
330 _UngetStream.__init__(self, fd)
331 self._z = zlib.decompressobj(-zlib.MAX_WBITS)
332
333 def _Inflate(self, b):
334 z = self._z
335 if not z:
336 self._fd.unread(b)
337 return None
338
339 b = z.decompress(b)
340 if z.unconsumed_tail != '':
341 self._fd.unread(z.unconsumed_tail)
342 elif z.unused_data != '':
343 self._fd.unread(z.unused_data)
344 self._z = None
345 return b