Line data Source code
1 : /*
2 : * Copyright (C) 2018 Fulvio Benini
3 : *
4 : * This file is part of SCID (Shane's Chess Information Database).
5 : *
6 : * SCID is free software: you can redistribute it and/or modify
7 : * it under the terms of the GNU General Public License as published by
8 : * the Free Software Foundation.
9 : *
10 : * SCID is distributed in the hope that it will be useful,
11 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : * GNU General Public License for more details.
14 : *
15 : * You should have received a copy of the GNU General Public License
16 : * along with SCID. If not, see <http://www.gnu.org/licenses/>.
17 : *
18 : */
19 :
20 : /** @file
21 : * Implements a parser that converts PGN text into SCID's Game objects.
22 : */
23 :
24 : #ifndef SCID_PGNPARSE_H
25 : #define SCID_PGNPARSE_H
26 :
27 : #include "common.h"
28 : #include "game.h"
29 : #include "pgn_lexer.h"
30 : #include <string>
31 : #include <vector>
32 :
33 : /**
34 : * This class implements a PGN "visitor" that invokes the appropriate member
35 : * functions of the associated Game object for each type of PGN token.
36 : * Errors are stored as <line number, error message> pairs; if the maximum
37 : * number of errors is reached tokens are ignored until the end of the Game.
38 : */
39 2070 : class PgnVisitor {
40 : Game& game;
41 : std::vector<std::pair<size_t, std::string>> errors_;
42 : size_t linenum_ = 0;
43 : int nErrorsAllowed_ = 2;
44 :
45 : using TView = std::pair<const char*, const char*>;
46 : friend struct PgnParseLog;
47 :
48 : public:
49 2070 : explicit PgnVisitor(Game& g) : game(g) {}
50 :
51 2 : bool visitPGN_inputEOF() {
52 2 : if (nErrorsAllowed_ < 0) // Skip until the end of the game
53 1 : return true;
54 :
55 1 : return logErr("Unexpected end of input (result missing ?).");
56 : }
57 :
58 2 : bool visitPGN_inputUnexpectedPGNHeader() {
59 2 : if (nErrorsAllowed_ < 0) // Skip until the end of the game
60 1 : return true;
61 :
62 1 : return logErr("Unexpected end of game: PGN header '[' seen "
63 2 : "inside game (result missing ?).");
64 : }
65 :
66 224036 : bool visitPGN_Comment(TView comment) {
67 224036 : if (nErrorsAllowed_ < 0) { // Skip until the end of the game
68 0 : linenum_ += std::count(comment.first, comment.second, '\n');
69 0 : return true;
70 : }
71 :
72 224036 : linenum_ += pgn::trim(comment);
73 224036 : auto& str = game.accessMoveComment();
74 224036 : auto prevSz = str.size();
75 224036 : str.append(comment.first, comment.second);
76 224036 : linenum_ += pgn::normalize(str, prevSz);
77 224036 : return true;
78 : }
79 :
80 129965 : bool visitPGN_EndOfLine() {
81 129965 : ++linenum_;
82 129965 : return true;
83 : }
84 :
85 4 : bool visitPGN_EPD(TView line) {
86 4 : ASSERT(nErrorsAllowed_ >= 0);
87 8 : std::string tmp(line.first, line.second);
88 4 : if (game.SetStartFen(tmp.c_str()) != OK)
89 0 : return logErr("Failed to parse EPD record: ", line);
90 :
91 4 : int spaces = 0;
92 199 : auto opcode = std::find_if(line.first, line.second, [&](char ch) {
93 195 : return (ch == ' ') ? spaces++ == 4 : spaces == 4;
94 199 : });
95 4 : return visitPGN_Comment(std::make_pair(opcode, line.second));
96 : }
97 :
98 1 : bool visitPGN_Escape(TView) { return true; }
99 :
100 1158559 : bool visitPGN_MoveNum(TView) { return true; }
101 :
102 384 : bool visitPGN_NAG(TView token) {
103 384 : if (nErrorsAllowed_ < 0) // Skip until the end of the game
104 0 : return true;
105 :
106 384 : auto nag_code = game_parseNag(token);
107 384 : if (nag_code == 0 || game.AddNag(nag_code) != OK)
108 0 : return logErr("Invalid annotation symbol: ", token);
109 :
110 384 : return true;
111 : }
112 :
113 2034 : bool visitPGN_ResultFinal(char resultCh) {
114 2034 : auto result = RESULT_None;
115 2034 : switch (resultCh) {
116 24 : case '0':
117 24 : result = RESULT_Black;
118 24 : break;
119 9 : case '1':
120 9 : result = RESULT_White;
121 9 : break;
122 0 : case '/':
123 0 : result = RESULT_Draw;
124 0 : break;
125 2001 : default:
126 2001 : ASSERT(resultCh == '*');
127 : }
128 :
129 2034 : auto prev_result = game.GetResult();
130 2034 : if (result != prev_result) {
131 : // Use the end-of-game result instead of the header tag result
132 1 : game.SetResult(result);
133 1 : if (prev_result != RESULT_None && nErrorsAllowed_ >= 0)
134 0 : logErr("Final result did not match the header tag.");
135 : }
136 2034 : return false;
137 : }
138 :
139 1565565 : bool visitPGN_SANMove(TView tok) {
140 1565565 : if (nErrorsAllowed_ < 0) // Skip until the end of the game
141 2 : return true;
142 :
143 : simpleMoveT sm;
144 1565563 : auto err = game.GetCurrentPos()->ParseMove(&sm, tok.first, tok.second);
145 1565563 : if (err != OK) {
146 2 : if (err == ERROR_CastlingAvailability) {
147 0 : logWarning("Warning: illegal castling ", tok);
148 : } else {
149 2 : if (game_parseNag(tok)) // may be 'D', 'N' or a weird suffix
150 0 : return visitPGN_NAG(tok);
151 :
152 2 : return logFatalErr("Failed to parse the move: ", tok);
153 : }
154 : }
155 1565561 : return (game.AddMove(&sm) == OK)
156 1565561 : ? true
157 1565561 : : logFatalErr("Failed to add the move: ", tok);
158 : }
159 :
160 0 : bool visitPGN_Suffix(TView token) { return visitPGN_NAG(token); }
161 :
162 15144 : bool visitPGN_TagPair(TView tag, TView value) {
163 15144 : linenum_ += std::count(value.first, value.second, '\n');
164 15144 : if (nErrorsAllowed_ < 0) // Skip until the end of the game
165 0 : return true;
166 :
167 15144 : auto tagLen = std::distance(tag.first, tag.second);
168 15144 : auto valueLen = std::distance(value.first, value.second);
169 30284 : if (tagLen == 0 || tagLen + valueLen > 240 ||
170 15140 : !parseTagPair(tag.first, tagLen, value)) // Failure
171 : {
172 8 : std::string err(tag.first, tag.second);
173 4 : err.append(" \"");
174 4 : err.append(value.first, value.second);
175 4 : err.push_back('"');
176 4 : logErr("Error parsing the tag pair: ",
177 8 : {err.c_str(), err.c_str() + err.size()});
178 : }
179 15144 : return true;
180 : }
181 :
182 0 : bool visitPGN_Unknown(TView token) {
183 0 : if (nErrorsAllowed_ < 0) // Skip until the end of the game
184 0 : return true;
185 :
186 : // Accept misspelled castling moves
187 0 : std::string tmp(token.first, token.second);
188 0 : if (tmp == "0-0" || tmp == "00") {
189 0 : tmp = "O-O";
190 0 : return visitPGN_SANMove({tmp.c_str(), tmp.c_str() + 3});
191 : }
192 0 : if (tmp == "0-0-0" || tmp == "000") {
193 0 : tmp = "O-O-O";
194 0 : return visitPGN_SANMove({tmp.c_str(), tmp.c_str() + 5});
195 : }
196 :
197 0 : return logErr("Unknown token: ", token);
198 : }
199 :
200 309108 : bool visitPGN_VariationStart() {
201 309108 : if (nErrorsAllowed_ < 0) // Skip until the end of the game
202 0 : return true;
203 :
204 309108 : if (game.AddVariation() != OK)
205 0 : return logFatalErr("Failed to add a new variation.");
206 :
207 309108 : return true;
208 : }
209 :
210 309108 : bool visitPGN_VariationEnd() {
211 309108 : if (nErrorsAllowed_ < 0) // Skip until the end of the game
212 0 : return true;
213 :
214 309108 : if (game.MoveExitVariation() != OK || game.MoveForward() != OK)
215 0 : return logFatalErr("Failed to exit from variation.");
216 :
217 309108 : return true;
218 : }
219 :
220 : private:
221 10 : bool logWarning(const char* str1, TView str2 = {nullptr, nullptr}) {
222 10 : errors_.emplace_back(linenum_, str1);
223 10 : if (std::distance(str2.first, str2.second) > 200) {
224 3 : errors_.back().second.append(str2.first, 200);
225 3 : errors_.back().second.append("...");
226 : } else {
227 7 : errors_.back().second.append(str2.first, str2.second);
228 : }
229 10 : return true;
230 : }
231 :
232 10 : bool logErr(const char* str1, TView str2 = {nullptr, nullptr}) {
233 10 : --nErrorsAllowed_;
234 10 : return logWarning(str1, str2);
235 : }
236 :
237 2 : bool logFatalErr(const char* str1, TView str2 = {nullptr, nullptr}) {
238 2 : nErrorsAllowed_ = 0;
239 2 : return logErr(str1, str2);
240 : }
241 :
242 2037 : bool parseTagResult(TView str) {
243 2037 : auto len = std::distance(str.first, str.second);
244 2037 : if (len > 0 && *str.first == '*') {
245 2003 : game.SetResult(RESULT_None);
246 2003 : return true;
247 : }
248 34 : if (len >= 3) {
249 33 : if (std::equal(str.first, str.first + 3, "1-0")) {
250 8 : game.SetResult(RESULT_White);
251 8 : return true;
252 : }
253 25 : if (std::equal(str.first, str.first + 3, "0-1")) {
254 24 : game.SetResult(RESULT_Black);
255 24 : return true;
256 : }
257 1 : if (std::equal(str.first, str.first + 3, "1/2")) {
258 0 : game.SetResult(RESULT_Draw);
259 0 : return true;
260 : }
261 : }
262 2 : return logErr("Invalid Result tag: ", str);
263 : }
264 :
265 15140 : bool parseTagPair(const char* tag, size_t tagLen, TView value) {
266 15140 : switch (tagLen) {
267 41 : case 3:
268 41 : if (std::equal(tag, tag + 3, "ECO")) {
269 72 : std::string tmp{value.first, value.second};
270 36 : game.SetEco(eco_FromString(tmp.c_str()));
271 36 : return true;
272 5 : }
273 5 : if (std::equal(tag, tag + 3, "FEN")) {
274 0 : std::string tmp{value.first, value.second};
275 0 : return game.SetStartFen(tmp.c_str()) == OK;
276 5 : }
277 5 : break;
278 4078 : case 4:
279 4078 : if (std::equal(tag, tag + 4, "Date")) {
280 2038 : game.SetDate(date_parsePGNTag(value));
281 2038 : return true;
282 : }
283 2040 : break;
284 2065 : case 6:
285 2065 : if (std::equal(tag, tag + 6, "Result"))
286 2037 : return parseTagResult(value);
287 28 : break;
288 84 : case 7:
289 112 : if (std::equal(tag, tag + 7, "UTCDate") &&
290 28 : game.GetDate() == ZERO_DATE) {
291 : // Add two tags: "UTCDate" and the standard "Date".
292 1 : game.SetDate(date_parsePGNTag(value));
293 : }
294 84 : break;
295 156 : case 9:
296 156 : if (std::equal(tag, tag + 9, "EventDate")) {
297 36 : game.SetEventDate(date_parsePGNTag(value));
298 36 : return true;
299 : }
300 120 : if (std::equal(tag, tag + 9, "ScidFlags")) {
301 0 : game.SetScidFlags(value.first,
302 0 : std::distance(value.first, value.second));
303 0 : return true;
304 : }
305 120 : break;
306 : }
307 10993 : if (tagLen >= 8) {
308 : // Look for Rating Types: only the first Rating type found for
309 : // each player is added as the rating. Any extra ratings are
310 : // just added as normal tags.
311 622 : if (std::equal(tag, tag + 5, "White") && game.GetWhiteElo() == 0) {
312 36 : auto res = game.setRating(WHITE, tag + 5, tagLen - 5, value);
313 36 : if (res >= 0)
314 36 : return res;
315 818 : } else if (std::equal(tag, tag + 5, "Black") &&
316 232 : game.GetBlackElo() == 0) {
317 36 : auto res = game.setRating(BLACK, tag + 5, tagLen - 5, value);
318 36 : if (res >= 0)
319 36 : return res;
320 : }
321 : }
322 10921 : auto& str = game.accessTagValue(tag, tagLen);
323 10921 : str.assign(value.first, value.second);
324 10921 : linenum_ += pgn::normalize<true>(str, 0);
325 10921 : return true;
326 : }
327 : };
328 :
329 : /**
330 : * Format and store errors.
331 : */
332 110 : struct PgnParseLog {
333 : std::string log;
334 : unsigned long long n_bytes = 0;
335 : unsigned long long n_lines = 0;
336 : unsigned long long n_games = 0;
337 :
338 : /**
339 : * Format and store errors occurred while parsing a Game.
340 : * It also updates the byte, line, and game counters.
341 : * @returns false if part of the game was ignored, true otherwise.
342 : */
343 2068 : bool logGame(size_t nBytes, const PgnVisitor& visitor) {
344 2068 : ++n_games;
345 2076 : for (auto& e : visitor.errors_) {
346 8 : log += "(game " + std::to_string(n_games);
347 8 : log += ", line " + std::to_string(n_lines + e.first) + ") ";
348 8 : log += e.second;
349 8 : log += "\n";
350 : }
351 2068 : n_lines += visitor.linenum_;
352 2068 : n_bytes += nBytes;
353 2068 : if (visitor.nErrorsAllowed_ < 0) {
354 1 : log += "(game " + std::to_string(n_games);
355 1 : log += ", line " + std::to_string(n_lines) + ") ";
356 1 : log += "End of game, ignored the part after the last error.\n";
357 1 : return false;
358 : }
359 2067 : return true;
360 : }
361 : };
362 :
363 : /**
364 : * Convert PGN text into a SCID's Game object.
365 : * @param input: the memory containing the PGN text.
366 : * @param inputLen: the number of chars in @e input.
367 : * @param game: the Game object where the game will be stored.
368 : * The object is not automatically cleared so that moves can
369 : * be added to an already existing one.
370 : * @param log: stores eventual parsing error.
371 : * @returns true if a game was parsed successfully (maybe with errors, but
372 : * without ignoring any part), false otherwise.
373 : */
374 45 : inline bool pgnParseGame(const char* input, size_t inputLen, Game& game,
375 : PgnParseLog& log) {
376 45 : struct VisitorNoEOF : public PgnVisitor {
377 45 : explicit VisitorNoEOF(Game& g) : PgnVisitor(g) {}
378 10 : bool visitPGN_inputEOF() { return true; }
379 90 : } visitor(game);
380 :
381 45 : auto parse = pgn::parse_game({input, input + inputLen}, visitor);
382 45 : if (!log.logGame(parse.first, visitor))
383 1 : return false;
384 :
385 45 : if (parse.first == inputLen && !parse.second &&
386 1 : *game.GetMoveComment() == '\0')
387 1 : return false;
388 :
389 43 : return true;
390 : }
391 :
392 : #endif // idndef SCID_PGNPARSE_H
|