Scid  4.7.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
filter.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////
2 //
3 // FILE: filter.cpp
4 // Filter and CompressedFilter Classes
5 //
6 // Part of: Scid (Shane's Chess Information Database)
7 // Version: 1.9
8 //
9 // Notice: Copyright (c) 2000 Shane Hudson. All rights reserved.
10 //
11 // Author: Shane Hudson (sgh@users.sourceforge.net)
12 //
13 //////////////////////////////////////////////////////////////////////
14 
15 #include "hfilter.h"
16 #include "filter.h"
17 #include <cstring>
18 
19 
20 //////////////////////////////////////////////////////////////////////
21 //
22 // CompressedFilter methods
23 
24 
25 static uint
26 packBytemap (const byte * inBuffer, byte * outBuffer, uint inLength);
27 
28 static errorT
29 unpackBytemap (const byte * inBuffer, byte * outBuffer,
30  uint inLength, uint outLength);
31 
32 // OVERFLOW_BYTES:
33 // The maximum length that the output buffer could exceed the input
34 // buffer by when compressing. Since a long run length can take six
35 // bytes and the control byte could be encoded in the same step,
36 // seven bytes is sufficient -- so use 8 for nice alignment.
37 //
38 const uint OVERFLOW_BYTES = 8;
39 
40 
41 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
42 // CompressedFilter::Verify():
43 // Return OK only if the compressed filter is identical to
44 // the regular filter passed as the parameter.
45 //
46 errorT
48 {
49  if (CFilterSize != filter->Size()) { return ERROR_Corrupt; }
50 
51  // Decompress the compressed block and compare with the original:
52  byte * tempBuffer = new byte [CFilterSize];
53  const byte * filterData = filter->data();
54 
55  if (unpackBytemap (CompressedData, tempBuffer,
56  CompressedLength, CFilterSize) != OK) {
57  delete[] tempBuffer;
58  return ERROR_Corrupt;
59  }
60  for (uint i=0; i < CFilterSize; i++) {
61  if (tempBuffer[i] != filterData[i]) {
62  delete[] tempBuffer;
63  return ERROR_Corrupt;
64  }
65  }
66  delete[] tempBuffer;
67 
68  return OK;
69 }
70 
71 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
72 // CompressedFilter::CompressFrom():
73 // Sets the compressed filter to be the compressed representation
74 // of the supplied filter.
75 //
76 void
78 {
79  Clear();
80 
81  CFilterSize = filter->Size();
82  CFilterCount = filter->Count();
83  if(filter->data() == NULL) {
84  CompressedLength = 0;
85  CompressedData = NULL;
86  return;
87  }
88  byte * tempBuf = new byte [CFilterSize + OVERFLOW_BYTES];
89  CompressedLength = packBytemap (filter->data(), tempBuf, CFilterSize);
90  CompressedData = new byte [CompressedLength];
91  std::memcpy (CompressedData, tempBuf, CompressedLength);
92  delete[] tempBuf;
93 
94  // Assert that the compressed filter decompresses identical to the
95  // original, is assertions are being tested:
96  ASSERT (Verify (filter) == OK);
97 
98  return;
99 }
100 
101 
102 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103 // CompressedFilter::UncompressTo():
104 // Sets the supplied filter to contain the uncompressed data
105 // stored in this compressed filter.
106 //
107 errorT
109 {
110  // The filter and compressed filter MUST be of the same size:
111  if (CFilterSize != filter->Size()) { return ERROR_Corrupt; }
112  if (CompressedLength == 0) {
113  filter->Init(CFilterSize);
114  return OK;
115  }
116 
117  byte * tempBuffer = new byte [CFilterSize];
118  if (unpackBytemap (CompressedData, tempBuffer,
119  CompressedLength, CFilterSize) != OK) {
120  delete[] tempBuffer;
121  return ERROR_Corrupt;
122  }
123  for (uint index=0; index < CFilterSize; index++) {
124  filter->Set (index, tempBuffer[index]);
125  }
126  delete[] tempBuffer;
127  return OK;
128 }
129 
130 
131 const byte FLAG_Packed = 1; // Indicates buffer is stored packed.
132 const byte FLAG_Copied = 0; // Indicates buffer is stored uncompressed.
133 
138 
140 
141 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
142 // packBytemap():
143 // Compresses the contents of inBuffer to outBuffer using a tailored
144 // run-length encoding and byte packing algorithm.
145 //
146 // The compression algorithm assumes:
147 // - that the byte value 0 is very common;
148 // - that the input buffer usually contains just two values,
149 // which is typically the case for tree filters.
150 //
151 // At each step through the algorithm, one of the following is coded:
152 // - a run length of 9 or more of the same value is coded in 18
153 // bits (or 50 bits if the length is >= 255); or
154 // - a byte with value zero is coded in two bits; or
155 // - a byte with nonzero value the same as the last nonzero byte
156 // (excluding run lengths) is coded in two bits; or
157 // - a byte with nonzero value different to the last nonzero byte
158 // is coded in 10 bits.
159 //
160 // The length of the output buffer is returned. It will never be
161 // larger than (inLength + 1), but up to (inLength + OVERFLOW_BYTES)
162 // bytes of outBuffer could be used temporarily before the length
163 // is checked, so outBuffer must be at least (inLength + OVERFLOW_BYTES)
164 // bytes long for safety.
165 //
166 static uint
167 packBytemap (const byte * inBuffer, byte * outBuffer, uint inLength)
168 {
169  ASSERT (inBuffer != NULL && outBuffer != NULL);
170 
171  byte prevLiteral = 0;
172  const byte * inPtr = inBuffer;
173  byte * outPtr = outBuffer + 2;
174  byte * controlPtr = outBuffer + 1;
175  const byte * endPtr = inBuffer + inLength;
176 
177  uint outBytes = 2;
178  uint controlData = 0;
179  uint controlBits = 8;
180 
181  uint stats[4] = {0, 0, 0, 0};
182 
183 #define ENCODE_CONTROL_BITS(bits) \
184  ASSERT (bits >= 0 && bits <= 3); \
185  controlData >>= 2; \
186  controlData |= (bits << 6); \
187  stats[bits]++; \
188  ASSERT (controlBits >= 2); \
189  controlBits -= 2; \
190  if (controlBits == 0) { \
191  *controlPtr = controlData; \
192  controlPtr = outPtr++; \
193  outBytes++; \
194  controlData = 0; \
195  controlBits = 8; \
196  }
197 
198  outBuffer[0] = FLAG_Packed;
199 
200  while (inPtr < endPtr && outBytes <= inLength) {
201  // Find the run length value:
202  uint rle = 1;
203  byte value = *inPtr;
204  const byte * pb = inPtr + 1;
205  while (pb < endPtr && *pb == value) {
206  rle++;
207  pb++;
208  }
209 
210  if (rle >= MIN_RLE_LENGTH) {
211  // Run length is long enough to be worth encoding as a run:
213  inPtr += rle;
214  *outPtr++ = value;
215  if (rle > 255) { // Longer run length:
216  *outPtr++ = 0;
217  *outPtr++ = (rle >> 24) & 255;
218  *outPtr++ = (rle >> 16) & 255;
219  *outPtr++ = (rle >> 8) & 255;
220  *outPtr++ = rle & 255;
221  outBytes += 6;
222  } else {
223  *outPtr++ = rle;
224  outBytes += 2;
225  }
226  } else if (value == 0) {
227  // Zero-valued literal: coded in two bits.
229  inPtr++;
230  } else if (value == prevLiteral) {
231  // Nonzero literal, same as previous: coded in two bits.
233  inPtr++;
234  } else {
235  // Nonzero literal, different to previous one: coded in 10 bits.
237  inPtr++;
238  prevLiteral = value;
239  *outPtr++ = value;
240  outBytes++;
241  }
242  }
243 
244  // Flush the control bits:
245  controlData >>= controlBits;
246  *controlPtr = controlData;
247 
248  // Switch to regular copying if necessary:
249  if (outBytes > inLength) {
250  outBuffer[0] = FLAG_Copied;
251  std::memcpy (outBuffer + 1, inBuffer, inLength);
252  return (inLength + 1);
253  }
254 
255 #ifdef COMPRESSION_STATS
256  printf ("Runs:%u ZeroLits:%u PrevLits:%u DiffLits: %u\n",
257  stats[CODE_RunLength], stats[CODE_ZeroLiteral],
258  stats[CODE_PrevLiteral], stats[CODE_NewLiteral]);
259  printf ("RLE: %u -> %u (%.2f%%)\n", inLength, outBytes,
260  (float)outBytes * 100.0 / inLength);
261 #endif
262 
263  return outBytes;
264 }
265 
266 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
267 // unpackBytemap():
268 // Decompresses the contents of inBuffer to outBuffer using the
269 // compression algorithm of packBytemap().
270 //
271 // The input AND output buffer lengths are provided, so the caller
272 // must know the lengths from a earlier call to packBytemap().
273 // The lengths are used to check for corruption.
274 //
275 // Returns OK on success, or ERROR_Corrupt if any sort of
276 // corruption in the compressed data is detected.
277 //
278 static errorT
279 unpackBytemap (const byte * inBuffer, byte * outBuffer, uint inLength,
280  uint outLength)
281 {
282  ASSERT (inBuffer != NULL && outBuffer != NULL);
283  if (inLength == 0) { return ERROR_Corrupt; }
284 
285  // Check if the buffer was copied without compression:
286 
287  if (inBuffer[0] == FLAG_Copied) {
288  // outLength MUST be one shorter than inLength:
289  if (outLength + 1 != inLength) { return ERROR_Corrupt; }
290  std::memcpy (outBuffer, inBuffer + 1, outLength);
291  return OK;
292  }
293  if (inBuffer[0] != FLAG_Packed) { return ERROR_Corrupt; }
294 
295  const byte * inPtr = inBuffer + 1;
296  int inBytesLeft = inLength - 1;
297  byte * outPtr = outBuffer;
298  int outBytesLeft = outLength;
299  uint controlData = *inPtr++;
300  uint controlBits = 8;
301  inBytesLeft--;
302  byte prevLiteral = 0;
303 
304  while (outBytesLeft > 0) {
305  byte value;
306  uint length;
307  // Read the two control bits for this literal or run length:
308  uint code = controlData & 3;
309  controlData >>= 2;
310  controlBits -= 2;
311  if (controlBits == 0) {
312  inBytesLeft--;
313  if (inBytesLeft < 0) { return ERROR_Corrupt; }
314  controlData = *inPtr++;
315  controlBits = 8;
316  }
317 
318  switch (code) {
319  case CODE_ZeroLiteral: // Literal value zero:
320  *outPtr++ = 0;
321  outBytesLeft--;
322  break;
323 
324  case CODE_PrevLiteral: // Nonzero literal same as previous:
325  *outPtr++ = prevLiteral;
326  outBytesLeft--;
327  break;
328 
329  case CODE_RunLength: // Run length encoding:
330  inBytesLeft -= 2;
331  if (inBytesLeft < 0) { return ERROR_Corrupt; }
332  value = *inPtr++;
333  length = *inPtr++;
334  if (length == 0) {
335  // Longer run length, coded in next 4 bytes:
336  inBytesLeft -= 4;
337  if (inBytesLeft < 0) { return ERROR_Corrupt; }
338  length = *inPtr++;
339  length <<= 8; length |= *inPtr++;
340  length <<= 8; length |= *inPtr++;
341  length <<= 8; length |= *inPtr++;
342  }
343  outBytesLeft -= length;
344  if (outBytesLeft < 0) { return ERROR_Corrupt; }
345  while (length--) {
346  *outPtr++ = value;
347  }
348  break;
349 
350  case CODE_NewLiteral: // Nonzero literal with different value:
351  prevLiteral = *inPtr++;
352  inBytesLeft--;
353  *outPtr++ = prevLiteral;
354  outBytesLeft--;
355  break;
356 
357  default: // UNREACHABLE!
358  ASSERT(0);
359  return ERROR_Corrupt;
360  }
361  }
362 
363  // Check the buffer lengths for corruption:
364  if (inBytesLeft != 0 || outBytesLeft != 0) {
365  return ERROR_Corrupt;
366  }
367  return OK;
368 }
369 
370 //////////////////////////////////////////////////////////////////////
371 // EOF: filter.cpp
372 //////////////////////////////////////////////////////////////////////
unsigned char byte
Definition: common.h:89
const byte FLAG_Packed
Definition: filter.cpp:131
const errorT OK
Definition: error.h:23
const uint MIN_RLE_LENGTH
Definition: filter.cpp:139
const byte FLAG_Copied
Definition: filter.cpp:132
#define ASSERT(f)
Definition: common.h:59
void Init(gamenumT size)
Definition: hfilter.h:45
byte * data()
Return a pointer to the allocated data.
Definition: hfilter.h:51
gamenumT Count() const
Return the number of nonzero values in filter.
Definition: hfilter.h:54
gamenumT Size() const
Return the number of elements in filter.
Definition: hfilter.h:57
const uint CODE_PrevLiteral
Definition: filter.cpp:135
const uint CODE_ZeroLiteral
Definition: filter.cpp:134
void CompressFrom(Filter *filter)
Definition: filter.cpp:77
const uint OVERFLOW_BYTES
Definition: filter.cpp:38
uint32_t uint
Definition: common.h:91
const uint CODE_RunLength
Definition: filter.cpp:136
statsfmt
Definition: optable.tcl:704
const uint CODE_NewLiteral
Definition: filter.cpp:137
unsigned short errorT
Definition: error.h:20
void Set(gamenumT index, byte value)
Sets the value at index.
Definition: hfilter.h:89
errorT Verify(Filter *filter)
Definition: filter.cpp:47
const errorT ERROR_Corrupt
Definition: error.h:46
Definition: hfilter.h:35
#define ENCODE_CONTROL_BITS(bits)
errorT UncompressTo(Filter *filter) const
Definition: filter.cpp:108
void Clear()
Definition: filter.h:71