30 SPELL_NEWNAME, SPELL_ALIAS, SPELL_PREFIX, SPELL_INFIX, SPELL_SUFFIX,
32 SPELL_EMPTY, SPELL_OLDBIO, SPELL_UNKNOWN
52 Parser::Parser(
char* line) {
55 extra = strchr(line,
'#');
93 type = SPELL_SECTIONSTART;
98 extra = strchr(
name,
'"');
100 char* end = strchr(++extra,
'"');
109 type = SPELL_NEWNAME;
135 SpellChecker::SpellChkValidate& validate_;
141 : sp_(sp), validate_(v), nt_(
NAME_INVALID), nameIdx_(-1) {
149 case SPELL_SECTIONSTART:
152 if (data.extra != NULL) {
153 sp_.excludeChars_[nt_] = data.extra;
155 sp_.excludeChars_[nt_].clear();
164 return nameSection(data, keepBuffer);
172 validate_.ignoredLine(data.name);
181 errorT nameSection(
const Parser&
data,
bool* keepBuffer) {
188 ASSERT(sp_.names_[nt_].size() < (1ULL << 31));
189 nameIdx_ =
static_cast<int32_t
>(sp_.names_[nt_].size());
190 sp_.names_[nt_].push_back(data.name);
192 sp_.pInfo_.push_back(data.extra);
196 if (nameIdx_ == -1) {
199 sp_.idx_[nt_].push_back(SpellChecker::Idx(
200 sp_.normalizeAndTransform(nt_, data.name),
206 return sp_.general_[nt_].
addPrefix(data.name);
208 return sp_.general_[nt_].
addInfix(data.name);
210 return sp_.general_[nt_].
addSuffix(data.name);
222 if (data.type == SPELL_BIO) {
224 sp_.pInfo_[nameIdx_].bio_.push_back(data.name);
226 ASSERT(data.type == SPELL_ELO);
228 sp_.pElo_.resize(nameIdx_ + 1);
229 sp_.pElo_[nameIdx_].AddEloData(data.name);
248 errorT SpellChecker::read(
const char* filename,
const Progress& progress)
251 ASSERT(staticStrings_ == NULL);
255 std::streamsize fileSize = -1;
256 if (file.open(filename, std::ios::in | std::ios::binary | std::ios::ate) != 0) {
257 fileSize = file.pubseekoff(0, std::ios::cur, std::ios::in);
258 file.pubseekoff(0, std::ios::beg, std::ios::in);
262 SpellChkValidate validate(filename, *
this);
265 staticStrings_ = (
char*) malloc(fileSize + 1);
266 char* bEnd = staticStrings_ + fileSize + 1;
267 char* line = staticStrings_;
270 std::streamsize report_done = 0;
272 while ((nRead = file.
getline(line, std::distance(line, bEnd))) != 0) {
273 report_done += nRead;
274 if ((++report_i % 10000) == 0) {
275 if (!progress.
report(report_done, fileSize))
280 errorT err = loader.
load(Parser(line), &keepBuffer);
281 if (err !=
OK)
return err;
283 if (keepBuffer) line += nRead;
285 if (report_done != fileSize || file.sgetc() != EOF)
return ERROR_FileRead;
288 if (pElo_.size() > 0) {
290 pElo_.resize(pInfo_.size());
291 validate.checkEloData();
295 char* shrink = (
char*) realloc(staticStrings_, 1 + std::distance(staticStrings_,line));
296 if (shrink != NULL && shrink != staticStrings_) {
298 const char* oldAddr = staticStrings_;
299 staticStrings_ = shrink;
301 for (
auto& e : (names_[i]))
302 e = staticStrings_ + std::distance(oldAddr, e);
304 for (
auto& e : pInfo_) {
305 e.comment_ = staticStrings_ + std::distance(oldAddr, e.comment_);
306 for (
auto& bio : e.bio_) {
307 bio = staticStrings_ + std::distance(oldAddr, bio);
314 std::sort(idx_[i].begin(), idx_[i].end());
315 validate.idxDuplicates(i);
353 if (! isdigit(static_cast<unsigned char>(*str))) {
break; }
356 if (*str !=
':') {
break; }
363 if (isdigit(static_cast<unsigned char>(*str))) {
366 }
else if (*str ==
'?') {
369 }
else if (*str ==
' ') {
376 elo_.push_back(std::make_pair(year, elo));
378 if (*str ==
',') { str++; }
390 static const char * titles[] = {
392 "wgm",
"wim",
"wfm",
"w",
396 const char ** titlePtr = titles;
398 const char* comment = GetComment();
399 if (*comment == 0) {
return ""; }
401 while (*titlePtr != NULL) {
402 if (
strIsPrefix (*titlePtr, comment)) {
return *titlePtr; }
417 static char country[4];
420 const char*
start = GetComment();
421 if (*start == 0) {
return ""; }
424 while (*start !=
' ' && *start != 0) { start++; }
425 while (*start ==
' ') { start++; }
427 const char * end =
start;
429 while (*end !=
' ' && *end != 0) { end++; length++; }
432 for (
int i=0; i < 3; i++) { country[i] = start[length-3 + i]; }
446 const char* s = GetComment();
447 if (*s == 0) {
return 0; }
449 while (*s !=
'[' && *s != 0) { s++; }
450 if (*s !=
'[') {
return 0; }
463 const char* s = GetComment();
467 while (*s !=
']' && *s != 0) { s++; }
471 while (*s ==
' ') { s++; }
483 const char* s = GetComment();
487 while (*s !=
']' && *s != 0) { s++; }
491 while (*s ==
' ') { s++; }
493 while (*s != 0 && *s !=
'-') { s++; }
494 while (*s ==
'-') { s++; }
const char * strTrimLeft(const char *target, const char *trimChars)
const char * getTitle() const
bool strIsPrefix(const char *prefix, const char *longStr)
errorT addPrefix(const char *s)
add*fix() - add a general correction
SpellChkLoader(SpellChecker &sp, SpellChecker::SpellChkValidate &v)
class SpellChecker - name spelling
errorT addInfix(const char *s)
const errorT ERROR_FileRead
const errorT ERROR_UserCancel
void strTrimRight(char *target, const char *trimChars, size_t nTrimCh)
static bool IsValidNameType(nameT nt)
Validate a nameT type.
eloT getPeakRating() const
const errorT ERROR_CorruptData
uint32_t strGetUnsigned(const char *str)
bool report(size_t done, size_t total) const
dateT getBirthdate() const
Adds some helper functions to std::filebuf:
class SpellChkLoader - load data into a SpellChecker object
Extends the std:filebuf class with performance improvements.
dateT getDeathdate() const
errorT load(const Parser &data, bool *keepBuffer)
errorT addSuffix(const char *s)
const char * getLastCountry() const
static nameT NameTypeFromString(const char *str)
Match a string to a nameT.
const errorT ERROR_FileOpen
size_t getline(char *str, size_t count)
Equivalent to std::fstream::getline, but faster (no sentry [27.7.2.1.3]).
void AddEloData(const char *str)
dateT date_EncodeFromString(const char *str)