Scid  4.7.0
pgnparse.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2018 Fulvio Benini
3  *
4  * This file is part of SCID (Shane's Chess Information Database).
5  *
6  * SCID is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation.
9  *
10  * SCID is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with SCID. If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 /** @file
21  * Implements a parser that converts PGN text into SCID's Game objects.
22  */
23 
24 #ifndef SCID_PGNPARSE_H
25 #define SCID_PGNPARSE_H
26 
27 #include "common.h"
28 #include "game.h"
29 #include "pgn_lexer.h"
30 #include <string>
31 #include <vector>
32 
33 /**
34  * This class implements a PGN "visitor" that invokes the appropriate member
35  * functions of the associated Game object for each type of PGN token.
36  * Errors are stored as <line number, error message> pairs; if the maximum
37  * number of errors is reached tokens are ignored until the end of the Game.
38  */
39 class PgnVisitor {
40  Game& game;
41  std::vector<std::pair<size_t, std::string>> errors_;
42  size_t linenum_ = 0;
43  int nErrorsAllowed_ = 2;
44 
45  using TView = std::pair<const char*, const char*>;
46  friend struct PgnParseLog;
47 
48 public:
49  explicit PgnVisitor(Game& g) : game(g) {}
50 
52  if (nErrorsAllowed_ < 0) // Skip until the end of the game
53  return true;
54 
55  return logErr("Unexpected end of input (result missing ?).");
56  }
57 
59  if (nErrorsAllowed_ < 0) // Skip until the end of the game
60  return true;
61 
62  return logErr("Unexpected end of game: PGN header '[' seen "
63  "inside game (result missing ?).");
64  }
65 
66  bool visitPGN_Comment(TView comment) {
67  if (nErrorsAllowed_ < 0) { // Skip until the end of the game
68  linenum_ += std::count(comment.first, comment.second, '\n');
69  return true;
70  }
71 
72  linenum_ += pgn::trim(comment);
73  auto& str = game.accessMoveComment();
74  auto prevSz = str.size();
75  str.append(comment.first, comment.second);
76  linenum_ += pgn::normalize(str, prevSz);
77  return true;
78  }
79 
81  ++linenum_;
82  return true;
83  }
84 
85  bool visitPGN_EPD(TView line) {
86  ASSERT(nErrorsAllowed_ >= 0);
87  std::string tmp(line.first, line.second);
88  if (game.SetStartFen(tmp.c_str()) != OK)
89  return logErr("Failed to parse EPD record: ", line);
90 
91  int spaces = 0;
92  auto opcode = std::find_if(line.first, line.second, [&](char ch) {
93  return (ch == ' ') ? spaces++ == 4 : spaces == 4;
94  });
95  return visitPGN_Comment(std::make_pair(opcode, line.second));
96  }
97 
98  bool visitPGN_Escape(TView) { return true; }
99 
100  bool visitPGN_MoveNum(TView) { return true; }
101 
102  bool visitPGN_NAG(TView token) {
103  if (nErrorsAllowed_ < 0) // Skip until the end of the game
104  return true;
105 
106  auto nag_code = game_parseNag(token);
107  if (nag_code == 0 || game.AddNag(nag_code) != OK)
108  return logErr("Invalid annotation symbol: ", token);
109 
110  return true;
111  }
112 
113  bool visitPGN_ResultFinal(char resultCh) {
114  auto result = RESULT_None;
115  switch (resultCh) {
116  case '0':
117  result = RESULT_Black;
118  break;
119  case '1':
120  result = RESULT_White;
121  break;
122  case '/':
123  result = RESULT_Draw;
124  break;
125  default:
126  ASSERT(resultCh == '*');
127  }
128 
129  auto prev_result = game.GetResult();
130  if (result != prev_result) {
131  // Use the end-of-game result instead of the header tag result
132  game.SetResult(result);
133  if (prev_result != RESULT_None && nErrorsAllowed_ >= 0)
134  logErr("Final result did not match the header tag.");
135  }
136  return false;
137  }
138 
139  bool visitPGN_SANMove(TView tok) {
140  if (nErrorsAllowed_ < 0) // Skip until the end of the game
141  return true;
142 
143  simpleMoveT sm;
144  auto err = game.GetCurrentPos()->ParseMove(&sm, tok.first, tok.second);
145  if (err != OK) {
146  if (err == ERROR_CastlingAvailability) {
147  logWarning("Warning: illegal castling ", tok);
148  } else {
149  if (game_parseNag(tok)) // may be 'D', 'N' or a weird suffix
150  return visitPGN_NAG(tok);
151 
152  return logFatalErr("Failed to parse the move: ", tok);
153  }
154  }
155  return (game.AddMove(&sm) == OK)
156  ? true
157  : logFatalErr("Failed to add the move: ", tok);
158  }
159 
160  bool visitPGN_Suffix(TView token) { return visitPGN_NAG(token); }
161 
162  bool visitPGN_TagPair(TView tag, TView value) {
163  linenum_ += std::count(value.first, value.second, '\n');
164  if (nErrorsAllowed_ < 0) // Skip until the end of the game
165  return true;
166 
167  auto tagLen = std::distance(tag.first, tag.second);
168  auto valueLen = std::distance(value.first, value.second);
169  if (tagLen == 0 || tagLen + valueLen > 240 ||
170  !parseTagPair(tag.first, tagLen, value)) // Failure
171  {
172  std::string err(tag.first, tag.second);
173  err.append(" \"");
174  err.append(value.first, value.second);
175  err.push_back('"');
176  logErr("Error parsing the tag pair: ",
177  {err.c_str(), err.c_str() + err.size()});
178  }
179  return true;
180  }
181 
182  bool visitPGN_Unknown(TView token) {
183  if (nErrorsAllowed_ < 0) // Skip until the end of the game
184  return true;
185 
186  // Accept misspelled castling moves
187  std::string tmp(token.first, token.second);
188  if (tmp == "0-0" || tmp == "00") {
189  tmp = "O-O";
190  return visitPGN_SANMove({tmp.c_str(), tmp.c_str() + 3});
191  }
192  if (tmp == "0-0-0" || tmp == "000") {
193  tmp = "O-O-O";
194  return visitPGN_SANMove({tmp.c_str(), tmp.c_str() + 5});
195  }
196 
197  return logErr("Unknown token: ", token);
198  }
199 
201  if (nErrorsAllowed_ < 0) // Skip until the end of the game
202  return true;
203 
204  if (game.AddVariation() != OK)
205  return logFatalErr("Failed to add a new variation.");
206 
207  return true;
208  }
209 
211  if (nErrorsAllowed_ < 0) // Skip until the end of the game
212  return true;
213 
214  if (game.MoveExitVariation() != OK || game.MoveForward() != OK)
215  return logFatalErr("Failed to exit from variation.");
216 
217  return true;
218  }
219 
220 private:
221  bool logWarning(const char* str1, TView str2 = {nullptr, nullptr}) {
222  errors_.emplace_back(linenum_, str1);
223  if (std::distance(str2.first, str2.second) > 200) {
224  errors_.back().second.append(str2.first, 200);
225  errors_.back().second.append("...");
226  } else {
227  errors_.back().second.append(str2.first, str2.second);
228  }
229  return true;
230  }
231 
232  bool logErr(const char* str1, TView str2 = {nullptr, nullptr}) {
233  --nErrorsAllowed_;
234  return logWarning(str1, str2);
235  }
236 
237  bool logFatalErr(const char* str1, TView str2 = {nullptr, nullptr}) {
238  nErrorsAllowed_ = 0;
239  return logErr(str1, str2);
240  }
241 
242  bool parseTagResult(TView str) {
243  auto len = std::distance(str.first, str.second);
244  if (len > 0 && *str.first == '*') {
245  game.SetResult(RESULT_None);
246  return true;
247  }
248  if (len >= 3) {
249  if (std::equal(str.first, str.first + 3, "1-0")) {
250  game.SetResult(RESULT_White);
251  return true;
252  }
253  if (std::equal(str.first, str.first + 3, "0-1")) {
254  game.SetResult(RESULT_Black);
255  return true;
256  }
257  if (std::equal(str.first, str.first + 3, "1/2")) {
258  game.SetResult(RESULT_Draw);
259  return true;
260  }
261  }
262  return logErr("Invalid Result tag: ", str);
263  }
264 
265  bool parseTagPair(const char* tag, size_t tagLen, TView value) {
266  switch (tagLen) {
267  case 3:
268  if (std::equal(tag, tag + 3, "ECO")) {
269  std::string tmp{value.first, value.second};
270  game.SetEco(eco_FromString(tmp.c_str()));
271  return true;
272  }
273  if (std::equal(tag, tag + 3, "FEN")) {
274  std::string tmp{value.first, value.second};
275  return game.SetStartFen(tmp.c_str()) == OK;
276  }
277  break;
278  case 4:
279  if (std::equal(tag, tag + 4, "Date")) {
280  game.SetDate(date_parsePGNTag(value));
281  return true;
282  }
283  break;
284  case 6:
285  if (std::equal(tag, tag + 6, "Result"))
286  return parseTagResult(value);
287  break;
288  case 7:
289  if (std::equal(tag, tag + 7, "UTCDate") &&
290  game.GetDate() == ZERO_DATE) {
291  // Add two tags: "UTCDate" and the standard "Date".
292  game.SetDate(date_parsePGNTag(value));
293  }
294  break;
295  case 9:
296  if (std::equal(tag, tag + 9, "EventDate")) {
297  game.SetEventDate(date_parsePGNTag(value));
298  return true;
299  }
300  if (std::equal(tag, tag + 9, "ScidFlags")) {
301  game.SetScidFlags(value.first,
302  std::distance(value.first, value.second));
303  return true;
304  }
305  break;
306  }
307  if (tagLen >= 8) {
308  // Look for Rating Types: only the first Rating type found for
309  // each player is added as the rating. Any extra ratings are
310  // just added as normal tags.
311  if (std::equal(tag, tag + 5, "White") && game.GetWhiteElo() == 0) {
312  auto res = game.setRating(WHITE, tag + 5, tagLen - 5, value);
313  if (res >= 0)
314  return res;
315  } else if (std::equal(tag, tag + 5, "Black") &&
316  game.GetBlackElo() == 0) {
317  auto res = game.setRating(BLACK, tag + 5, tagLen - 5, value);
318  if (res >= 0)
319  return res;
320  }
321  }
322  auto& str = game.accessTagValue(tag, tagLen);
323  str.assign(value.first, value.second);
324  linenum_ += pgn::normalize<true>(str, 0);
325  return true;
326  }
327 };
328 
329 /**
330  * Format and store errors.
331  */
332 struct PgnParseLog {
333  std::string log;
334  unsigned long long n_bytes = 0;
335  unsigned long long n_lines = 0;
336  unsigned long long n_games = 0;
337 
338  /**
339  * Format and store errors occurred while parsing a Game.
340  * It also updates the byte, line, and game counters.
341  * @returns false if part of the game was ignored, true otherwise.
342  */
343  bool logGame(size_t nBytes, const PgnVisitor& visitor) {
344  ++n_games;
345  for (auto& e : visitor.errors_) {
346  log += "(game " + std::to_string(n_games);
347  log += ", line " + std::to_string(n_lines + e.first) + ") ";
348  log += e.second;
349  log += "\n";
350  }
351  n_lines += visitor.linenum_;
352  n_bytes += nBytes;
353  if (visitor.nErrorsAllowed_ < 0) {
354  log += "(game " + std::to_string(n_games);
355  log += ", line " + std::to_string(n_lines) + ") ";
356  log += "End of game, ignored the part after the last error.\n";
357  return false;
358  }
359  return true;
360  }
361 };
362 
363 /**
364  * Convert PGN text into a SCID's Game object.
365  * @param input: the memory containing the PGN text.
366  * @param inputLen: the number of chars in @e input.
367  * @param game: the Game object where the game will be stored.
368  * The object is not automatically cleared so that moves can
369  * be added to an already existing one.
370  * @param log: stores eventual parsing error.
371  * @returns true if a game was parsed successfully (maybe with errors, but
372  * without ignoring any part), false otherwise.
373  */
374 inline bool pgnParseGame(const char* input, size_t inputLen, Game& game,
375  PgnParseLog& log) {
376  struct VisitorNoEOF : public PgnVisitor {
377  explicit VisitorNoEOF(Game& g) : PgnVisitor(g) {}
378  bool visitPGN_inputEOF() { return true; }
379  } visitor(game);
380 
381  auto parse = pgn::parse_game({input, input + inputLen}, visitor);
382  if (!log.logGame(parse.first, visitor))
383  return false;
384 
385  if (parse.first == inputLen && !parse.second &&
386  *game.GetMoveComment() == '\0')
387  return false;
388 
389  return true;
390 }
391 
392 #endif // idndef SCID_PGNPARSE_H
bool visitPGN_TagPair(TView tag, TView value)
Definition: pgnparse.h:162
int setRating(colorT col, const char *ratingType, size_t ratingTypeLen, std::pair< const char *, const char *> rating)
Definition: game.cpp:699
bool pgnParseGame(const char *input, size_t inputLen, Game &game, PgnParseLog &log)
Convert PGN text into a SCID&#39;s Game object.
Definition: pgnparse.h:374
std::pair< std::size_t, bool > parse_game(pgn_impl::InputMemory input, TVisitor &&parser)
Read a PGN game from memory, grouping characters in tokens and dispatching them to a PGN parser...
Definition: pgn_lexer.h:371
const colorT WHITE
Definition: common.h:207
std::string & accessMoveComment()
Definition: game.h:360
bool visitPGN_EndOfLine()
Definition: pgnparse.h:80
std::string & accessTagValue(const char *tag, size_t tagLen)
Definition: game.cpp:657
const errorT OK
Definition: error.h:23
Format and store errors.
Definition: pgnparse.h:332
errorT AddVariation()
Definition: game.cpp:912
errorT SetStartFen(const char *fenStr)
Setup the start position from a FEN string and remove all the moves.
Definition: game.cpp:603
#define ASSERT(f)
Definition: common.h:59
bool visitPGN_inputEOF()
Definition: pgnparse.h:51
This class implements a PGN "visitor" that invokes the appropriate member functions of the associated...
Definition: pgnparse.h:39
const char * GetMoveComment() const
Definition: game.h:357
Split input into PGN tokens and dispatch them to a "visiting" parser.
const resultT RESULT_Black
Definition: common.h:191
void SetEventDate(dateT date)
Definition: game.h:387
const errorT ERROR_CastlingAvailability
Definition: error.h:75
bool visitPGN_VariationStart()
Definition: pgnparse.h:200
bool visitPGN_inputUnexpectedPGNHeader()
Definition: pgnparse.h:58
const colorT BLACK
Definition: common.h:208
bool visitPGN_Suffix(TView token)
Definition: pgnparse.h:160
dateT date_parsePGNTag(const char *str, size_t len)
Creates a dateT object from a PGN tag value string.
Definition: date.h:166
dateT GetDate() const
Definition: game.h:401
void SetResult(resultT res)
Definition: game.h:388
const resultT RESULT_Draw
Definition: common.h:192
bool visitPGN_Escape(TView)
Definition: pgnparse.h:98
const dateT ZERO_DATE
Definition: date.h:35
bool logGame(size_t nBytes, const PgnVisitor &visitor)
Format and store errors occurred while parsing a Game.
Definition: pgnparse.h:343
byte game_parseNag(std::pair< const char *, const char *> strview)
Definition: game.cpp:140
resultT GetResult() const
Definition: game.h:403
PgnVisitor(Game &g)
Definition: pgnparse.h:49
const resultT RESULT_White
Definition: common.h:190
void SetDate(dateT date)
Definition: game.h:386
bool visitPGN_VariationEnd()
Definition: pgnparse.h:210
bool visitPGN_SANMove(TView tok)
Definition: pgnparse.h:139
Position * GetCurrentPos()
Definition: game.h:292
errorT ParseMove(simpleMoveT *sm, const char *str)
Definition: position.cpp:2373
eloT GetWhiteElo() const
Definition: game.h:404
errorT MoveForward()
Definition: game.cpp:742
eloT GetBlackElo() const
Definition: game.h:405
bool visitPGN_MoveNum(TView)
Definition: pgnparse.h:100
errorT MoveExitVariation()
Definition: game.cpp:795
Definition: game.h:167
std::size_t trim(TView &str)
Trim leading and trailing white spaces.
Definition: pgn_lexer.h:470
errorT AddNag(byte nag)
Definition: game.cpp:367
bool visitPGN_Comment(TView comment)
Definition: pgnparse.h:66
ecoT eco_FromString(const char *ecoStr)
Definition: misc.cpp:36
void SetEco(ecoT eco)
Definition: game.h:395
bool visitPGN_EPD(TView line)
Definition: pgnparse.h:85
bool visitPGN_ResultFinal(char resultCh)
Definition: pgnparse.h:113
errorT AddMove(const simpleMoveT *sm)
Definition: game.cpp:889
const resultT RESULT_None
Definition: common.h:189
std::size_t normalize(TString &str, std::size_t pos)
Normalize white spaces and converts Latin-1 chars to UTF-8 sequences.
Definition: pgn_lexer.h:402
bool visitPGN_NAG(TView token)
Definition: pgnparse.h:102
void SetScidFlags(const char *s, size_t len)
Definition: game.h:243
bool visitPGN_Unknown(TView token)
Definition: pgnparse.h:182
std::string log
Definition: pgnparse.h:333