28 #ifdef SPELLCHKVALIDATE 54 typedef std::vector< std::pair<std::string,std::string> > Cont;
67 size_t corrections = 0;
68 Cont::const_iterator it;
70 for (it = prefix_.begin(); it != prefix_.end(); it++) {
71 const std::string& s = it->first;
72 if (name->compare(0, s.length(), s) == 0) {
74 name->replace(0, s.length(), it->second);
79 for (it = infix_.begin(); it != infix_.end(); it++) {
80 const std::string& s = it->first;
81 size_t pos = name->find(s);
82 while (pos != std::string::npos) {
84 name->replace(pos, s.length(), it->second);
85 pos = name->find(s, pos + it->second.length());
89 for (it = suffix_.begin(); it != suffix_.end(); it++) {
90 const std::string& s = it->first;
91 if (name->length() < s.length())
continue;
92 size_t pos = name->length() - s.length();
93 if (name->compare(pos, s.length(), s) == 0) {
95 name->replace(pos, s.length(), it->second);
116 errorT add(Cont& v,
const char* s) {
118 std::vector<size_t> parse;
119 for (
size_t i=0; *(s+i) != 0; i++) {
120 if (*(s+i) ==
'"') parse.push_back(i);
124 parse[1] -= parse[0];
127 parse[3] -= parse[2];
128 v.push_back(std::make_pair(
129 std::string(s + parse[0], parse[1]),
130 std::string(s + parse[2], parse[3])
144 std::vector< std::pair<uint16_t, eloT> > elo_;
147 void AddEloData(
const char* str);
151 auto itBegin = std::find_if(elo_.begin(), elo_.end(),
152 [&](
const std::pair<uint16_t, eloT>& e) {
153 return e.first == year;
155 auto itEnd = std::find_if(itBegin, elo_.end(),
156 [&](
const std::pair<uint16_t, eloT>& e) {
157 return e.first != year;
160 size_t n = std::distance(itBegin, itEnd);
161 if (n == 0)
return 0;
164 if (month == 0 || month > 12) month = 0;
168 if (year == 2009 && n == 5) {
170 idx = (month < 6) ? month / 3 : (month - 2)/2;
172 }
else if (year == 2012 && n == 9) {
174 idx = (month < 6) ? month / 2 : month - 3;
176 }
else if (year > 2012) {
178 if (month >= n)
return 0;
182 idx = month * n / 12;
185 return (itBegin + idx)->second;
188 #ifdef SPELLCHKVALIDATE 189 std::string isValid()
const {
190 for (
size_t i=1, n=elo_.size(); i < n; i++) {
191 if (elo_[i].first < elo_[i -1].first)
return "unsorted";
194 auto count = [
this](
uint year) {
195 return std::count_if(this->elo_.begin(), this->elo_.end(),
196 [&](
const std::pair<uint16_t, eloT>& e) {
return e.first == year; });
199 auto expected = [](
uint year) {
200 if (year < 1990)
return 1;
201 if (year < 2001)
return 2;
202 if (year < 2009)
return 4;
203 if (year < 2010)
return 5;
204 if (year < 2012)
return 6;
205 if (year < 2013)
return 9;
209 for (
uint y=1970; y<2015; y++) {
211 if (n == 0)
continue;
212 if (n != expected(y))
213 return std::to_string(y) +
": " + std::to_string(n) +
"(" +
214 std::to_string(expected(y)) +
")";
217 return std::string();
234 const char* comment_;
235 std::vector<const char*> bio_;
242 const char* getTitle()
const;
243 const char* getLastCountry()
const;
244 dateT getBirthdate()
const;
245 dateT getDeathdate()
const;
246 eloT getPeakRating()
const;
248 return (comment_ != 0) ? comment_ :
"";
265 Idx(
const std::string& a, int32_t i) : alias(a), idx(i) {}
266 bool operator<(
const Idx& b)
const {
return alias < b.alias; }
267 bool operator<(
const std::string& b)
const {
return alias < b; }
269 typedef std::vector<Idx>::const_iterator IdxIt;
275 std::vector<PlayerInfo> pInfo_;
276 std::vector<PlayerElo> pElo_;
277 char* staticStrings_;
283 free(staticStrings_);
295 static std::pair<errorT, SpellChecker*>
Create(
const char* filename,
298 errorT err = res->read(filename, progress);
303 return std::make_pair(err, res);
321 std::vector<const char*> res;
322 std::pair<IdxIt, IdxIt> it;
324 else it = idxFindPlayer(name);
325 for (; it.first != it.second && res.size() < nMaxRes; it.first++) {
326 const char* corrected = names_[nt][it.first->idx];
327 if (
std::find(res.begin(), res.end(), corrected) == res.end()) {
328 res.push_back(corrected);
352 std::vector<const char*>* bio = 0)
const {
354 IdxIt it = idxFindPlayerUnambiguous(name);
357 if (bio != 0) *bio = pInfo_[it->idx].bio_;
358 return &(pInfo_[it->idx]);
363 if (!hasEloData())
return 0;
364 IdxIt it = idxFindPlayerUnambiguous(name);
366 return &(pElo_[it->idx]);
370 return pElo_.size() != 0;
375 return names_[nt].size();
385 std::string normalizeAndTransform(
const nameT& nt,
const char* s)
const {
387 for (
const char* i = s; *i != 0; i++) {
388 if (excludeChars_[nt].
find(*i) != std::string::npos)
continue;
395 std::pair<IdxIt, IdxIt> idxFind(
const nameT& nt,
const char* prefix)
const {
396 std::pair<IdxIt, IdxIt> res;
397 std::string s = normalizeAndTransform(nt, prefix);
398 res.first = std::lower_bound(idx_[nt].begin(), idx_[nt].end(), s);
399 for (res.second = res.first; res.second != idx_[nt].end(); res.second++) {
400 if (res.second->alias.compare(0, s.length(), s) != 0)
break;
401 if (res.second->alias == s)
return std::make_pair(res.second, res.second +1);
406 std::pair<IdxIt, IdxIt> idxFindPlayer(
const char* prefix)
const {
407 std::pair<IdxIt, IdxIt> res = idxFind(
NAME_PLAYER, prefix);
408 if (res.first == res.second) {
413 std::string s = prefix;
414 size_t pos = s.rfind(
' ');
415 if (pos != std::string::npos) {
416 std::string inv = s.substr(pos);
417 inv.append(s, 0, pos);
424 IdxIt idxFindPlayerUnambiguous(
const char*
name)
const {
425 std::pair<IdxIt, IdxIt> it = idxFindPlayer(name);
426 if (it.first == it.second)
return idx_[
NAME_PLAYER].end();
428 for (IdxIt i = it.first; i != it.second; i++) {
429 if (i->idx != it.first->idx)
436 #ifndef SPELLCHKVALIDATE 437 class SpellChkValidate {
440 void ignoredLine(
const char*) {}
441 void idxDuplicates(
const nameT&) {}
442 void checkEloData() {}
445 class SpellChkValidate {
450 SpellChkValidate(
const char* spellfile,
const SpellChecker& sp) : spell_(sp) {
451 f_.open(spellfile + std::string(
".validate"));
453 void ignoredLine(
const char* line) {
454 f_ <<
"Ignored line:" << std::endl;
455 f_ << line << std::endl;
458 static bool cmpIdxAlias(
const Idx& a,
const Idx& b) {
459 return a.alias == b.alias;
461 void idxDuplicates(
const nameT& nt) {
462 IdxIt it = spell_.idx_[nt].begin();
463 IdxIt it_end = spell_.idx_[nt].end();
465 it = std::adjacent_find(it, it_end, cmpIdxAlias);
466 if (it == it_end)
return;
468 IdxIt it_endDuplicates = std::upper_bound(it, it_end, *it);
469 f_ <<
"Duplicate hash: " << it->alias << std::endl;
470 for(; it != it_endDuplicates; it++) {
471 f_ << spell_.names_[nt][it->idx];
472 f_ <<
" - Idx:" << it->idx << std::endl;
477 void checkEloData() {
478 for (
size_t i=0, n = spell_.pElo_.size(); i < n; i++) {
479 std::string s = spell_.pElo_[i].isValid();
481 f_ <<
"Elo error: " << s <<
" --- ";
uint date_GetYear(dateT date)
size_t normalize(std::string *name) const
errorT addPrefix(const char *s)
add*fix() - add a general correction
class PlayerElo - elo ratings of a player
class SpellChecker - name spelling
int find(const char *filename)
find() - search for a database.
errorT addInfix(const char *s)
eloT getElo(dateT date) const
size_t numCorrectNames(const nameT &nt) const
const NameNormalizer & getGeneralCorrections(const nameT &nt) const
const errorT ERROR_CorruptData
const PlayerElo * getPlayerElo(const char *name) const
std::vector< const char * > find(const nameT &nt, const char *name, uint nMaxRes=10) const
class NameNormalizer - apply general corrections to a name
const PlayerInfo * getPlayerInfo(const char *name, std::vector< const char *> *bio=0) const
class SpellChkLoader - load data into a SpellChecker object
const char * GetComment() const
PlayerInfo(const char *s)
errorT addSuffix(const char *s)
uint date_GetMonth(dateT date)
static std::pair< errorT, SpellChecker * > Create(const char *filename, const Progress &progress)
Create() - Create a new SpellChecker object.
class PlayerInfo - player informations