Scid  4.6.5
namebase.cpp
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2001 Shane Hudson.
3 * Copyright (C) 2014-2016 Fulvio Benini
4 
5 * This file is part of Scid (Shane's Chess Information Database).
6 *
7 * Scid is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation.
10 *
11 * Scid is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Scid. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #include "common.h"
21 #include "namebase.h"
22 #include "misc.h"
23 #include "filebuf.h"
24 #include "index.h"
25 
26 // NameBase file signature, used to identify the file format
27 const char* NameBase::NAMEBASE_MAGIC = "Scid.sn";
28 
29 // NameBase file extension
30 const char* NameBase::NAMEBASE_SUFFIX = ".sn4";
31 
32 
33 /**
34 * NameBase::clear() - clears file associations and frees memory
35 *
36 * Clears file associations and frees memory, leaving the object empty.
37 */
39 {
40  if (modified_) {
41  ASSERT(filename_.empty());
42  flush(0);
43  }
44 
45  filename_.clear();
46  for (nameT n = NAME_PLAYER; n < NUM_NAME_TYPES; n++) {
47  for (size_t i=0; i < names_[n].size(); i++) delete [] names_[n][i];
48  names_[n].resize(0);
49  idx_[n].clear();
50  }
51  eloV_.resize(0);
52 }
53 
54 /**
55  * NameBase::setFileName() - Sets the name of the associated file
56  * @filename: the filename (without extension)
57  *
58  * Sets the name of the file associated with the NameBase object.
59  * The object must be empty and not associated with another file.
60  * Return true if successful.
61  */
62 bool NameBase::setFileName(const char* filename)
63 {
64  ASSERT(filename != 0);
65 
66  if (!filename_.empty()) return false;
67  for (nameT n = NAME_PLAYER; n < NUM_NAME_TYPES; n++) {
68  if (names_[n].size() != 0 || idx_[n].size() != 0) return false;
69  }
70 
71  filename_ = filename;
72  filename_ += NAMEBASE_SUFFIX;
73  return true;
74 }
75 
76 /**
77  * NameBase::Create() - Create an empty NameBase file
78  * @filename: the filename (without extension)
79  *
80  * Create a NameBase file that contains only the header and no names.
81  * Return OK if successful.
82  */
83 errorT NameBase::Create(const char* filename)
84 {
85  if (!setFileName(filename)) return ERROR_FileInUse;
86  return WriteNameFile(0);
87 }
88 
89 /**
90  * NameBase::ReadNameFile() - Reads a NameBase file into memory.
91  * @filename: the filename (without extension) of the file to be read
92  *
93  * A NameBase file starts with an header containing:
94  * - header_magic (8 bytes): identify the file format
95  * - unused (4 bytes): obsolete timeStamp
96  * - number of NAME_PLAYER names stored in the file (3 bytes)
97  * - number of NAME_EVENT names stored in the file (3 bytes)
98  * - number of NAME_SITE names stored in the file (3 bytes)
99  * - number of NAME_ROUND names stored in the file (3 bytes)
100  * - unused (12 bytes): obsolete max frequency
101  * Names are stored using front-coding and each record is composed by:
102  * - name_id (2-3 bytes): the idx (idNumberT) stored in the Index (.si4) file
103  * - unused (1-3 bytes): obsolete frequency
104  * - length (1 byte): the total number of bytes of the name (max 255)
105  * - prefix (1 byte): the number of bytes in common with the previous name
106  * - name (0-255 bytes): the part of the name that differs from the previous one.
107  * Return OK if successful.
108  */
109 errorT
110 NameBase::ReadEntireFile (const char* filename)
111 {
112  if (!setFileName(filename)) return ERROR_FileInUse;
113  Filebuf file;
114  if (file.Open(filename_.c_str(), FMODE_ReadOnly) != OK) return ERROR_FileOpen;
115 
116  char Header_magic[9] = {0}; // magic identifier must be "Scid.sn"
117  file.sgetn(Header_magic, 8);
118  if (strcmp (Header_magic, NAMEBASE_MAGIC) != 0) return ERROR_BadMagic;
119 
120  // *** Compatibility ***
121  // Even if timeStamp is not used we still need to read the bytes
122  file.ReadFourBytes();
123  // ***
124 
125  idNumberT Header_numNames[NUM_NAME_TYPES];
126  Header_numNames[NAME_PLAYER] = file.ReadThreeBytes();
127  Header_numNames[NAME_EVENT] = file.ReadThreeBytes();
128  Header_numNames[NAME_SITE] = file.ReadThreeBytes();
129  Header_numNames[NAME_ROUND] = file.ReadThreeBytes();
130 
131  // *** Compatibility ***
132  // Even if frequency is no longer used we still need to read the bytes
133  uint obsolete_maxFreq[NUM_NAME_TYPES];
134  obsolete_maxFreq[NAME_PLAYER] = file.ReadThreeBytes();
135  obsolete_maxFreq[NAME_EVENT] = file.ReadThreeBytes();
136  obsolete_maxFreq[NAME_SITE] = file.ReadThreeBytes();
137  obsolete_maxFreq[NAME_ROUND] = file.ReadThreeBytes();
138  // ***
139 
140  eloV_.resize(Header_numNames[NAME_PLAYER], 0);
141  for (nameT nt = NAME_PLAYER; nt < NUM_NAME_TYPES; nt++) {
142  names_[nt].resize(Header_numNames[nt], 0);
143  idNumberT id;
144  std::string prevName;
145  for (idNumberT i = 0; i < Header_numNames[nt]; i++) {
146  if (Header_numNames[nt] >= 65536) {
147  id = file.ReadThreeBytes();
148  } else {
149  id = file.ReadTwoBytes();
150  }
151 
152  // *** Compatibility ***
153  // Even if frequency is no longer used we still need to read the bytes
154  // Frequencies can be stored in 1, 2 or 3 bytes:
155  if (obsolete_maxFreq[nt] >= 65536) {
156  file.ReadThreeBytes();
157  } else if (obsolete_maxFreq[nt] >= 256) {
158  file.ReadTwoBytes();
159  } else { // Frequencies all <= 255: fit in one byte
160  file.ReadOneByte();
161  }
162  // ***
163 
164  // Read the name string.
165  // All strings EXCEPT the first are front-coded.
166  uint length = file.ReadOneByte();
167  uint prefix = (i > 0) ? file.ReadOneByte() : 0;
168  char* name = new char[length +1];
169  if (prefix > length || prefix != prevName.copy(name, prefix)) {
170  delete[] name;
171  return ERROR_Corrupt;
172  }
173 
174  std::streamsize extra_chars = length - prefix;
175  if (extra_chars != file.sgetn(name + prefix, extra_chars)) {
176  delete[] name;
177  return ERROR_FileRead;
178  }
179  name[length] = 0;
180  prevName = name;
181 
182  if (id < Header_numNames[nt] && names_[nt][id] == 0) {
183  names_[nt][id] = name;
184  idx_[nt].insert(idx_[nt].end(), std::make_pair(name, id));
185  } else {
186  delete[] name;
187  return ERROR_Corrupt;
188  }
189  }
190 
191  if (idx_[nt].size() != names_[nt].size()) return ERROR_Corrupt;
192  }
193 
194  return OK;
195 }
196 
197 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
198 // NameBase::WriteNameFile(): Write the entire in-memory index to disk.
199 // For each nametype, names are written in alphabetical order and
200 // the strings are front-coded to save space.
201 //
202 errorT
203 NameBase::WriteNameFile(const Index* idx)
204 {
205  for (nameT nt = NAME_PLAYER; nt < NUM_NAME_TYPES; nt++) {
206  if (idx_[nt].size() != names_[nt].size()) return ERROR_Corrupt;
207  }
208 
209  std::vector<int> freq[NUM_NAME_TYPES];
210  if (idx != 0) idx->calcNameFreq(*this, freq);
211 
212  Filebuf file;
213  if (file.Open(filename_.c_str(), FMODE_WriteOnly) != OK) return ERROR_FileOpen;
214 
215  file.sputn(NAMEBASE_MAGIC, 8);
216 
217  // *** Compatibility ***
218  // Even if timeStamp is not used we still need to write the bytes
219  file.WriteFourBytes(0);
220  // ***
221 
222  file.WriteThreeBytes(names_[NAME_PLAYER].size());
223  file.WriteThreeBytes(names_[NAME_EVENT].size());
224  file.WriteThreeBytes(names_[NAME_SITE].size());
225  file.WriteThreeBytes(names_[NAME_ROUND].size());
226 
227  // *** Compatibility ***
228  // even if maxFrequency is no longer used we still need to write these bytes
229  int maxFreq[NUM_NAME_TYPES] = {0};
230  for (nameT n = NAME_PLAYER; n < NUM_NAME_TYPES; n++) {
231  for (size_t i=0; i < freq[n].size(); i++) {
232  if (freq[n][i] > maxFreq[n]) maxFreq[n] = freq[n][i];
233  }
234  file.WriteThreeBytes(maxFreq[n]);
235  }
236  // ***
237 
238  for (nameT nt = NAME_PLAYER; nt < NUM_NAME_TYPES; nt++) {
239  char prevName[1024] = {0};
240  size_t numNames = idx_[nt].size();
241  for (iterator it = idx_[nt].begin(); it != idx_[nt].end(); it++) {
242  const char* name = (*it).first;
243  idNumberT id = (*it).second;
244 
245  // write idNumber in 2 bytes if possible, otherwise 3.
246  if (numNames >= 65536) {
247  file.WriteThreeBytes(id);
248  } else {
249  file.WriteTwoBytes(id);
250  }
251 
252  // *** Compatibility ***
253  // even if frequency is no longer used we still need to write these bytes
254  if (maxFreq[nt] >= 65536) {
255  file.WriteThreeBytes(freq[nt][id]);
256  } else if (maxFreq[nt] >= 256) {
257  file.WriteTwoBytes(freq[nt][id]);
258  } else {
259  file.WriteOneByte(freq[nt][id]);
260  }
261  // ***
262 
263  ASSERT(strlen(name) < 256);
264  byte length = strlen(name);
265  file.WriteOneByte(length);
266  byte prefix = 0;
267  if (it != idx_[nt].begin()) {
268  prefix = (byte) strPrefix (name, prevName);
269  file.WriteOneByte(prefix);
270  }
271  file.sputn(name + prefix, (length - prefix));
272  strcpy(prevName, name);
273  }
274  }
275  return OK;
276 }
277 
278 /**
279  * NameBase::AddName() - Returns the idNumberT corresponding to @str
280  * @nt: a valid name type
281  * @str: the name to lookup/add
282  * @idPtr: valid pointer to the idNumberT object where the result will be stored
283  *
284  * This function ensure that a name is stored inside the NameBase object and return
285  * the corresponding idNumberT in @idPtr.
286  * Names are not duplicated inside a NameBase object, multiple calls to AddName()
287  * with equal @nt and @str will result in the same idNumberT.
288  * Return OK if successful.
289  */
290 errorT
291 NameBase::AddName (nameT nt, const char* str, idNumberT* idPtr)
292 {
293  ASSERT (IsValidNameType(nt) && str != NULL && idPtr != NULL);
294 
295  if (FindExactName(nt, str, idPtr) != OK) {
296  static const uint NAME_MAX_ID [NUM_NAME_TYPES] = {
297  1048575, /* Player names: Maximum of 2^20 -1 = 1,048,575 */
298  524287, /* Event names: Maximum of 2^19 -1 = 524,287 */
299  524287, /* Site names: Maximum of 2^19 -1 = 524,287 */
300  262143 /* Round names: Maximum of 2^18 -1 = 262,143 */
301  };
302  if (names_[nt].size() >= NAME_MAX_ID[nt]) return ERROR_Full; // Too many names already.
303 
304  const size_t strLen = strlen(str);
305  if (strLen > 255) return ERROR_NameTooLong;
306 
307  char* name = new char[strLen +1];
308  strcpy(name, str);
309  *idPtr = names_[nt].size();
310  if (!idx_[nt].insert(std::make_pair(name, *idPtr)).second) {
311  delete[] name;
312  return ERROR;
313  }
314  names_[nt].push_back(name);
315  if (nt == NAME_PLAYER) eloV_.push_back(0);
316 
317  modified_ = true;
318  }
319  return OK;
320 }
321 
322 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
323 // NameBase::FindExactName():
324 // Finds an exact full, case-sensitive name.
325 // Returns OK or ERROR_NotFound.
326 //
327 errorT
328 NameBase::FindExactName (nameT nt, const char* str, idNumberT* idPtr) const
329 {
330  ASSERT (IsValidNameType(nt) && str != NULL && idPtr != NULL);
331 
332  iterator it = idx_[nt].find(str);
333  if (it != idx_[nt].end()) {
334  *idPtr = (*it).second;
335  return OK;
336  }
337  return ERROR_NameNotFound;
338 }
339 
340 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
341 // NameBase::GetMatches(): Get the first few matches of a name prefix.
342 // The parameter maxMatches indicates the size of the idNumber array.
343 // The first maxMatches matching IDs are placed in the array.
344 // Returns: the number found, up which will be <= maxMatches.
345 //
346 uint
347 NameBase::GetFirstMatches (nameT nt, const char * str, uint maxMatches,
348  idNumberT * array) const
349 {
350  ASSERT (IsValidNameType(nt) && str != NULL);
351 
352  size_t len = strlen(str);
353  uint matches = 0;
354  iterator it = idx_[nt].lower_bound(str);
355  for (; matches < maxMatches && it != idx_[nt].end(); matches++) {
356  const char* s = (*it).first;
357  if (strlen(s) < len || strncmp(s, str, len) != 0) break;
358  array[matches] = (*it++).second;
359  }
360 
361  return matches;
362 }
363 
364 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~
365 // NameBase::NameTypeFromString
366 // Returns a valid nameT given a string, or NAME_INVALID.
367 // To match, the string should be a prefix of "player", "event",
368 // "site" or "round", or be a superstring of it, e.g. "player ...."
369 nameT
371 {
372  if (*str == '\0') { return NAME_INVALID; }
373  if (strIsAlphaPrefix (str, "player")) { return NAME_PLAYER; }
374  if (strIsAlphaPrefix (str, "event")) { return NAME_EVENT; }
375  if (strIsAlphaPrefix (str, "site")) { return NAME_SITE; }
376  if (strIsAlphaPrefix (str, "round")) { return NAME_ROUND; }
377  if (strIsAlphaPrefix ("player", str)) { return NAME_PLAYER; }
378  if (strIsAlphaPrefix ("event", str)) { return NAME_EVENT; }
379  if (strIsAlphaPrefix ("site", str)) { return NAME_SITE; }
380  if (strIsAlphaPrefix ("round", str)) { return NAME_ROUND; }
381  return NAME_INVALID;
382 }
383 
384 //////////////////////////////////////////////////////////////////////
385 // EOF: namebase.cpp
386 //////////////////////////////////////////////////////////////////////
unsigned char byte
Definition: common.h:97
const errorT ERROR_Full
Definition: error.h:49
errorT flush(const Index *idx)
Definition: namebase.h:89
int WriteFourBytes(uint32_t value)
Writes a 32-bit unsigned integer.
Definition: filebuf.h:161
void Clear()
NameBase::clear() - clears file associations and frees memory.
Definition: namebase.cpp:38
uint idNumberT
Definition: namebase.h:39
const errorT OK
Definition: error.h:23
const errorT ERROR_BadMagic
Definition: error.h:35
const errorT ERROR_FileInUse
Definition: error.h:37
errorT FindExactName(nameT nt, const char *str, idNumberT *idPtr) const
Definition: namebase.cpp:328
static nameT NameTypeFromString(const char *str)
Definition: namebase.cpp:370
int WriteOneByte(byte value)
Writes a 8-bit unsigned integer.
Definition: filebuf.h:135
#define ASSERT(f)
Definition: common.h:67
const errorT ERROR_NameTooLong
Definition: error.h:55
names
Definition: tablebase.tcl:260
const errorT ERROR_NameNotFound
Definition: error.h:50
uint16_t ReadTwoBytes()
Reads a 16-bit unsigned integer.
Definition: filebuf.h:117
int WriteThreeBytes(uint32_t value)
Writes a 24-bit unsigned integer.
Definition: filebuf.h:152
uint strPrefix(const char *s1, const char *s2)
Definition: misc.h:395
errorT ReadEntireFile(const char *filename)
NameBase::ReadNameFile() - Reads a NameBase file into memory.
Definition: namebase.cpp:110
errorT AddName(nameT nt, const char *str, idNumberT *idPtr)
NameBase::AddName() - Returns the idNumberT corresponding to : a valid name type : the name to looku...
Definition: namebase.cpp:291
int WriteTwoBytes(uint32_t value)
Writes a 16-bit unsigned integer.
Definition: filebuf.h:144
const errorT ERROR_FileRead
Definition: error.h:33
uint nameT
Definition: namebase.h:29
uint32_t ReadThreeBytes()
Reads a 24-bit unsigned integer.
Definition: filebuf.h:123
sizew
Definition: board.tcl:619
static bool IsValidNameType(nameT nt)
Definition: namebase.h:79
uint32_t uint
Definition: common.h:99
uint GetFirstMatches(nameT nt, const char *str, uint maxMatches, idNumberT *array) const
Definition: namebase.cpp:347
errorT Create(const char *filename)
NameBase::Create() - Create an empty NameBase file : the filename (without extension) ...
Definition: namebase.cpp:83
Definition: index.h:61
errorT Open(const char *filename, fileModeT fmode)
Opens a file.
Definition: filebuf.h:43
byte ReadOneByte()
Reads a 8-bit unsigned integer.
Definition: filebuf.h:111
unsigned short errorT
Definition: error.h:20
void calcNameFreq(const NameBase &nb, std::vector< int >(&resVec)[NUM_NAME_TYPES]) const
calcNameFreq() - calculate the usage of NameBase&#39;s names : the NameBase linked to this Index : an arr...
Definition: index.h:122
Adds some helper functions to std::filebuf:
Definition: filebuf.h:35
const errorT ERROR_Corrupt
Definition: error.h:46
Extends the std:filebuf class with performance improvements.
uint32_t ReadFourBytes()
Reads a 32-bit unsigned integer.
Definition: filebuf.h:129
const errorT ERROR
Definition: error.h:26
const errorT ERROR_FileOpen
Definition: error.h:31
bool strIsAlphaPrefix(const char *prefix, const char *longStr)
Definition: misc.h:447