Scid  4.6.5
filter.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////
2 //
3 // FILE: filter.cpp
4 // Filter and CompressedFilter Classes
5 //
6 // Part of: Scid (Shane's Chess Information Database)
7 // Version: 1.9
8 //
9 // Notice: Copyright (c) 2000 Shane Hudson. All rights reserved.
10 //
11 // Author: Shane Hudson (sgh@users.sourceforge.net)
12 //
13 //////////////////////////////////////////////////////////////////////
14 
15 #include "filter.h"
16 #include <cstring>
17 
18 
19 //////////////////////////////////////////////////////////////////////
20 //
21 // CompressedFilter methods
22 
23 
24 static uint
25 packBytemap (const byte * inBuffer, byte * outBuffer, uint inLength);
26 
27 static errorT
28 unpackBytemap (const byte * inBuffer, byte * outBuffer,
29  uint inLength, uint outLength);
30 
31 // OVERFLOW_BYTES:
32 // The maximum length that the output buffer could exceed the input
33 // buffer by when compressing. Since a long run length can take six
34 // bytes and the control byte could be encoded in the same step,
35 // seven bytes is sufficient -- so use 8 for nice alignment.
36 //
37 const uint OVERFLOW_BYTES = 8;
38 
39 
40 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
41 // CompressedFilter::Verify():
42 // Return OK only if the compressed filter is identical to
43 // the regular filter passed as the parameter.
44 //
45 errorT
47 {
48  if (CFilterSize != filter->Size()) { return ERROR_Corrupt; }
49 
50  // Decompress the compressed block and compare with the original:
51  byte * tempBuffer = new byte [CFilterSize];
52  const byte * filterData = filter->Data;
53 
54  if (unpackBytemap (CompressedData, tempBuffer,
55  CompressedLength, CFilterSize) != OK) {
56  delete[] tempBuffer;
57  return ERROR_Corrupt;
58  }
59  for (uint i=0; i < CFilterSize; i++) {
60  if (tempBuffer[i] != filterData[i]) {
61  delete[] tempBuffer;
62  return ERROR_Corrupt;
63  }
64  }
65  delete[] tempBuffer;
66 
67  return OK;
68 }
69 
70 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
71 // CompressedFilter::CompressFrom():
72 // Sets the compressed filter to be the compressed representation
73 // of the supplied filter.
74 //
75 void
77 {
78  Clear();
79 
80  CFilterSize = filter->Size();
81  CFilterCount = filter->Count();
82  if(filter->Data == NULL) {
83  CompressedLength = 0;
84  CompressedData = NULL;
85  return;
86  }
87  byte * tempBuf = new byte [CFilterSize + OVERFLOW_BYTES];
88  CompressedLength = packBytemap (filter->Data, tempBuf, CFilterSize);
89  CompressedData = new byte [CompressedLength];
90  std::memcpy (CompressedData, tempBuf, CompressedLength);
91  delete[] tempBuf;
92 
93  // Assert that the compressed filter decompresses identical to the
94  // original, is assertions are being tested:
95  ASSERT (Verify (filter) == OK);
96 
97  return;
98 }
99 
100 
101 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
102 // CompressedFilter::UncompressTo():
103 // Sets the supplied filter to contain the uncompressed data
104 // stored in this compressed filter.
105 //
106 errorT
108 {
109  // The filter and compressed filter MUST be of the same size:
110  if (CFilterSize != filter->Size()) { return ERROR_Corrupt; }
111  if (CompressedLength == 0) {
112  filter->Init(CFilterSize);
113  return OK;
114  }
115 
116  byte * tempBuffer = new byte [CFilterSize];
117  if (unpackBytemap (CompressedData, tempBuffer,
118  CompressedLength, CFilterSize) != OK) {
119  delete[] tempBuffer;
120  return ERROR_Corrupt;
121  }
122  for (uint index=0; index < CFilterSize; index++) {
123  filter->Set (index, tempBuffer[index]);
124  }
125  delete[] tempBuffer;
126  return OK;
127 }
128 
129 
130 const byte FLAG_Packed = 1; // Indicates buffer is stored packed.
131 const byte FLAG_Copied = 0; // Indicates buffer is stored uncompressed.
132 
137 
139 
140 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
141 // packBytemap():
142 // Compresses the contents of inBuffer to outBuffer using a tailored
143 // run-length encoding and byte packing algorithm.
144 //
145 // The compression algorithm assumes:
146 // - that the byte value 0 is very common;
147 // - that the input buffer usually contains just two values,
148 // which is typically the case for tree filters.
149 //
150 // At each step through the algorithm, one of the following is coded:
151 // - a run length of 9 or more of the same value is coded in 18
152 // bits (or 50 bits if the length is >= 255); or
153 // - a byte with value zero is coded in two bits; or
154 // - a byte with nonzero value the same as the last nonzero byte
155 // (excluding run lengths) is coded in two bits; or
156 // - a byte with nonzero value different to the last nonzero byte
157 // is coded in 10 bits.
158 //
159 // The length of the output buffer is returned. It will never be
160 // larger than (inLength + 1), but up to (inLength + OVERFLOW_BYTES)
161 // bytes of outBuffer could be used temporarily before the length
162 // is checked, so outBuffer must be at least (inLength + OVERFLOW_BYTES)
163 // bytes long for safety.
164 //
165 static uint
166 packBytemap (const byte * inBuffer, byte * outBuffer, uint inLength)
167 {
168  ASSERT (inBuffer != NULL && outBuffer != NULL);
169 
170  byte prevLiteral = 0;
171  const byte * inPtr = inBuffer;
172  byte * outPtr = outBuffer + 2;
173  byte * controlPtr = outBuffer + 1;
174  const byte * endPtr = inBuffer + inLength;
175 
176  uint outBytes = 2;
177  uint controlData = 0;
178  uint controlBits = 8;
179 
180  uint stats[4] = {0, 0, 0, 0};
181 
182 #define ENCODE_CONTROL_BITS(bits) \
183  ASSERT (bits >= 0 && bits <= 3); \
184  controlData >>= 2; \
185  controlData |= (bits << 6); \
186  stats[bits]++; \
187  ASSERT (controlBits >= 2); \
188  controlBits -= 2; \
189  if (controlBits == 0) { \
190  *controlPtr = controlData; \
191  controlPtr = outPtr++; \
192  outBytes++; \
193  controlData = 0; \
194  controlBits = 8; \
195  }
196 
197  outBuffer[0] = FLAG_Packed;
198 
199  while (inPtr < endPtr && outBytes <= inLength) {
200  // Find the run length value:
201  uint rle = 1;
202  byte value = *inPtr;
203  const byte * pb = inPtr + 1;
204  while (pb < endPtr && *pb == value) {
205  rle++;
206  pb++;
207  }
208 
209  if (rle >= MIN_RLE_LENGTH) {
210  // Run length is long enough to be worth encoding as a run:
212  inPtr += rle;
213  *outPtr++ = value;
214  if (rle > 255) { // Longer run length:
215  *outPtr++ = 0;
216  *outPtr++ = (rle >> 24) & 255;
217  *outPtr++ = (rle >> 16) & 255;
218  *outPtr++ = (rle >> 8) & 255;
219  *outPtr++ = rle & 255;
220  outBytes += 6;
221  } else {
222  *outPtr++ = rle;
223  outBytes += 2;
224  }
225  } else if (value == 0) {
226  // Zero-valued literal: coded in two bits.
228  inPtr++;
229  } else if (value == prevLiteral) {
230  // Nonzero literal, same as previous: coded in two bits.
232  inPtr++;
233  } else {
234  // Nonzero literal, different to previous one: coded in 10 bits.
236  inPtr++;
237  prevLiteral = value;
238  *outPtr++ = value;
239  outBytes++;
240  }
241  }
242 
243  // Flush the control bits:
244  controlData >>= controlBits;
245  *controlPtr = controlData;
246 
247  // Switch to regular copying if necessary:
248  if (outBytes > inLength) {
249  outBuffer[0] = FLAG_Copied;
250  std::memcpy (outBuffer + 1, inBuffer, inLength);
251  return (inLength + 1);
252  }
253 
254 #ifdef COMPRESSION_STATS
255  printf ("Runs:%u ZeroLits:%u PrevLits:%u DiffLits: %u\n",
256  stats[CODE_RunLength], stats[CODE_ZeroLiteral],
257  stats[CODE_PrevLiteral], stats[CODE_NewLiteral]);
258  printf ("RLE: %u -> %u (%.2f%%)\n", inLength, outBytes,
259  (float)outBytes * 100.0 / inLength);
260 #endif
261 
262  return outBytes;
263 }
264 
265 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
266 // unpackBytemap():
267 // Decompresses the contents of inBuffer to outBuffer using the
268 // compression algorithm of packBytemap().
269 //
270 // The input AND output buffer lengths are provided, so the caller
271 // must know the lengths from a earlier call to packBytemap().
272 // The lengths are used to check for corruption.
273 //
274 // Returns OK on success, or ERROR_Corrupt if any sort of
275 // corruption in the compressed data is detected.
276 //
277 static errorT
278 unpackBytemap (const byte * inBuffer, byte * outBuffer, uint inLength,
279  uint outLength)
280 {
281  ASSERT (inBuffer != NULL && outBuffer != NULL);
282  if (inLength == 0) { return ERROR_Corrupt; }
283 
284  // Check if the buffer was copied without compression:
285 
286  if (inBuffer[0] == FLAG_Copied) {
287  // outLength MUST be one shorter than inLength:
288  if (outLength + 1 != inLength) { return ERROR_Corrupt; }
289  std::memcpy (outBuffer, inBuffer + 1, outLength);
290  return OK;
291  }
292  if (inBuffer[0] != FLAG_Packed) { return ERROR_Corrupt; }
293 
294  const byte * inPtr = inBuffer + 1;
295  int inBytesLeft = inLength - 1;
296  byte * outPtr = outBuffer;
297  int outBytesLeft = outLength;
298  uint controlData = *inPtr++;
299  uint controlBits = 8;
300  inBytesLeft--;
301  byte prevLiteral = 0;
302 
303  while (outBytesLeft > 0) {
304  byte value;
305  uint length;
306  // Read the two control bits for this literal or run length:
307  uint code = controlData & 3;
308  controlData >>= 2;
309  controlBits -= 2;
310  if (controlBits == 0) {
311  inBytesLeft--;
312  if (inBytesLeft < 0) { return ERROR_Corrupt; }
313  controlData = *inPtr++;
314  controlBits = 8;
315  }
316 
317  switch (code) {
318  case CODE_ZeroLiteral: // Literal value zero:
319  *outPtr++ = 0;
320  outBytesLeft--;
321  break;
322 
323  case CODE_PrevLiteral: // Nonzero literal same as previous:
324  *outPtr++ = prevLiteral;
325  outBytesLeft--;
326  break;
327 
328  case CODE_RunLength: // Run length encoding:
329  inBytesLeft -= 2;
330  if (inBytesLeft < 0) { return ERROR_Corrupt; }
331  value = *inPtr++;
332  length = *inPtr++;
333  if (length == 0) {
334  // Longer run length, coded in next 4 bytes:
335  inBytesLeft -= 4;
336  if (inBytesLeft < 0) { return ERROR_Corrupt; }
337  length = *inPtr++;
338  length <<= 8; length |= *inPtr++;
339  length <<= 8; length |= *inPtr++;
340  length <<= 8; length |= *inPtr++;
341  }
342  outBytesLeft -= length;
343  if (outBytesLeft < 0) { return ERROR_Corrupt; }
344  while (length--) {
345  *outPtr++ = value;
346  }
347  break;
348 
349  case CODE_NewLiteral: // Nonzero literal with different value:
350  prevLiteral = *inPtr++;
351  inBytesLeft--;
352  *outPtr++ = prevLiteral;
353  outBytesLeft--;
354  break;
355 
356  default: // UNREACHABLE!
357  ASSERT(0);
358  return ERROR_Corrupt;
359  }
360  }
361 
362  // Check the buffer lengths for corruption:
363  if (inBytesLeft != 0 || outBytesLeft != 0) {
364  return ERROR_Corrupt;
365  }
366  return OK;
367 }
368 
369 //////////////////////////////////////////////////////////////////////
370 // EOF: filter.cpp
371 //////////////////////////////////////////////////////////////////////
unsigned char byte
Definition: common.h:97
const byte FLAG_Packed
Definition: filter.cpp:130
const errorT OK
Definition: error.h:23
const uint MIN_RLE_LENGTH
Definition: filter.cpp:138
const byte FLAG_Copied
Definition: filter.cpp:131
#define ASSERT(f)
Definition: common.h:67
void Init(gamenumT size)
Definition: filter.h:101
errorT UncompressTo(Filter *filter) const
Definition: filter.cpp:107
uint Count() const
Definition: filter.h:53
const uint CODE_PrevLiteral
Definition: filter.cpp:134
const uint CODE_ZeroLiteral
Definition: filter.cpp:133
void CompressFrom(Filter *filter)
Definition: filter.cpp:76
const uint OVERFLOW_BYTES
Definition: filter.cpp:37
uint32_t uint
Definition: common.h:99
uint Size() const
Definition: filter.h:52
const uint CODE_RunLength
Definition: filter.cpp:135
statsfmt
Definition: optable.tcl:717
const uint CODE_NewLiteral
Definition: filter.cpp:136
unsigned short errorT
Definition: error.h:20
void Set(gamenumT index, byte value)
Definition: filter.h:126
errorT Verify(Filter *filter)
Definition: filter.cpp:46
const errorT ERROR_Corrupt
Definition: error.h:46
Definition: filter.h:32
#define ENCODE_CONTROL_BITS(bits)
void Clear()
Definition: filter.h:193