36
37:- module(csv,
38 [ csv//1, 39 csv//2, 40
41 csv_read_file/2, 42 csv_read_file/3, 43 csv_read_stream/3, 44
45 csv_read_file_row/3, 46 csv_read_row/3, 47 csv_options/2, 48
49 csv_write_file/2, 50 csv_write_file/3, 51 csv_write_stream/3 52 ]). 53:- use_module(library(record),[(record)/1, op(_,_,record)]). 54
55:- autoload(library(apply),[maplist/2]). 56:- use_module(library(debug),[debug/3]). 57:- autoload(library(error),[must_be/2,domain_error/2]). 58:- autoload(library(lists),[append/3]). 59:- autoload(library(option),[option/2,select_option/4]). 60:- autoload(library(pure_input),
61 [phrase_from_file/3,phrase_from_stream/2]). 62:- autoload(library(readutil),[read_line_to_codes/2]). 63:- autoload(library(dcg/basics),[string//1,eos//0]). 64
65
78
79:- predicate_options(csv//2, 2,
80 [ separator(code), 81 strip(boolean),
82 ignore_quotes(boolean),
83 convert(boolean),
84 case(oneof([down,preserve,up])),
85 functor(atom),
86 arity(-nonneg), 87 match_arity(boolean)
88 ]). 89:- predicate_options(csv_read_file/3, 3,
90 [ pass_to(csv//2, 2),
91 pass_to(phrase_from_file/3, 3)
92 ]). 93:- predicate_options(csv_read_file_row/3, 3,
94 [ line(-integer),
95 pass_to(csv//2, 2),
96 pass_to(open/4, 4)
97 ]). 98:- predicate_options(csv_write_file/3, 3,
99 [ pass_to(csv//2, 2),
100 pass_to(open/4, 4)
101 ]). 102:- predicate_options(csv_write_stream/3, 3,
103 [ pass_to(csv//2, 2)
104 ]). 105
106
107:- record
108 csv_options(separator:integer=0',,
109 strip:boolean=false,
110 ignore_quotes:boolean=false,
111 convert:boolean=true,
112 case:oneof([down,preserve,up])=preserve,
113 functor:atom=row,
114 arity:integer,
115 match_arity:boolean=true,
116 skip_header:atom). 117
118
138
139
140csv_read_file(File, Rows) :-
141 csv_read_file(File, Rows, []).
142
143csv_read_file(File, Rows, Options) :-
144 default_separator(File, Options, Options1),
145 make_csv_options(Options1, Record, RestOptions),
146 phrase_from_file(csv_roptions(Rows, Record), File, RestOptions).
147
148
149default_separator(File, Options0, Options) :-
150 ( option(separator(_), Options0)
151 -> Options = Options0
152 ; file_name_extension(_, Ext0, File),
153 downcase_atom(Ext0, Ext),
154 ext_separator(Ext, Sep)
155 -> Options = [separator(Sep)|Options0]
156 ; Options = Options0
157 ).
158
159ext_separator(csv, 0',).
160ext_separator(tsv, 0'\t).
161
162
166
167csv_read_stream(Stream, Rows, Options) :-
168 make_csv_options(Options, Record, _),
169 phrase_from_stream(csv_roptions(Rows, Record), Stream).
170
171
220
221csv(Rows) -->
222 csv(Rows, []).
223
224csv(Rows, Options) -->
225 { make_csv_options(Options, Record, _) },
226 csv_roptions(Rows, Record).
227
228csv_roptions(Rows, Record) -->
229 { ground(Rows) },
230 !,
231 emit_csv(Rows, Record).
232csv_roptions(Rows, Record) -->
233 skip_header(Record),
234 csv_data(Rows, Record).
235
(Options) -->
237 { csv_options_skip_header(Options, CommentStart),
238 nonvar(CommentStart),
239 atom_codes(CommentStart, Codes)
240 },
241 !,
242 skip_header_lines(Codes),
243 skip_blank_lines.
244skip_header(_) -->
245 [].
246
(CommentStart) -->
248 string(CommentStart),
249 !,
250 ( string(_Comment),
251 end_of_record
252 -> skip_header_lines(CommentStart)
253 ).
254skip_header_lines(_) -->
255 [].
256
257skip_blank_lines -->
258 eos,
259 !.
260skip_blank_lines -->
261 end_of_record,
262 !,
263 skip_blank_lines.
264skip_blank_lines -->
265 [].
266
267csv_data([], _) -->
268 eos,
269 !.
270csv_data([Row|More], Options) -->
271 row(Row, Options),
272 !,
273 { debug(csv, 'Row: ~p', [Row]) },
274 csv_data(More, Options).
275
276
277row(Row, Options) -->
278 fields(Fields, Options),
279 { csv_options_functor(Options, Functor),
280 Row =.. [Functor|Fields],
281 functor(Row, _, Arity),
282 check_arity(Options, Arity)
283 }.
284
285check_arity(Options, Arity) :-
286 csv_options_arity(Options, Arity),
287 !.
288check_arity(Options, _) :-
289 csv_options_match_arity(Options, false),
290 !.
291check_arity(Options, Arity) :-
292 csv_options_arity(Options, Expected),
293 domain_error(row_arity(Expected), Arity).
294
295fields([F|T], Options) -->
296 field(F, Options),
297 ( separator(Options)
298 -> fields(T, Options)
299 ; end_of_record
300 -> { T = [] }
301 ).
302
303field(Value, Options) -->
304 "\"",
305 { csv_options_ignore_quotes(Options, false) },
306 !,
307 string_codes(Codes),
308 { make_value(Codes, Value, Options) }.
309field(Value, Options) -->
310 { csv_options_strip(Options, true) },
311 !,
312 stripped_field(Value, Options).
313field(Value, Options) -->
314 { csv_options_separator(Options, Sep) },
315 field_codes(Codes, Sep),
316 { make_value(Codes, Value, Options) }.
317
318
319stripped_field(Value, Options) -->
320 ws,
321 ( "\"",
322 { csv_options_strip(Options, false) }
323 -> string_codes(Codes),
324 ws
325 ; { csv_options_separator(Options, Sep) },
326 field_codes(Codes0, Sep),
327 { strip_trailing_ws(Codes0, Codes) }
328 ),
329 { make_value(Codes, Value, Options) }.
330
331ws --> " ", !, ws.
332ws --> "\t", !, ws.
333ws --> "".
334
335strip_trailing_ws(List, Stripped) :-
336 append(Stripped, WS, List),
337 all_ws(WS).
338
339all_ws([]).
340all_ws([32|T]) :- all_ws(T).
341all_ws([9|T]) :- all_ws(T).
342
343
348
349string_codes(List) -->
350 [H],
351 ( { H == 0'" }
352 -> ( "\""
353 -> { List = [H|T] },
354 string_codes(T)
355 ; { List = [] }
356 )
357 ; { List = [H|T] },
358 string_codes(T)
359 ).
360
361field_codes([], Sep), [Sep] --> [Sep], !.
362field_codes([], _), "\n" --> "\r\n", !.
363field_codes([], _), "\n" --> "\n", !.
364field_codes([], _), "\n" --> "\r", !.
365field_codes([H|T], Sep) --> [H], !, field_codes(T, Sep).
366field_codes([], _) --> []. 367
372
373make_value(Codes, Value, Options) :-
374 csv_options_convert(Options, Convert),
375 csv_options_case(Options, Case),
376 make_value(Convert, Case, Codes, Value).
377
378make_value(true, preserve, Codes, Value) :-
379 !,
380 name(Value, Codes).
381make_value(true, Case, Codes, Value) :-
382 !,
383 ( number_string(Value, Codes)
384 -> true
385 ; make_value(false, Case, Codes, Value)
386 ).
387make_value(false, preserve, Codes, Value) :-
388 !,
389 atom_codes(Value, Codes).
390make_value(false, down, Codes, Value) :-
391 !,
392 string_codes(String, Codes),
393 downcase_atom(String, Value).
394make_value(false, up, Codes, Value) :-
395 string_codes(String, Codes),
396 upcase_atom(String, Value).
397
398separator(Options) -->
399 { csv_options_separator(Options, Sep) },
400 [Sep].
401
402end_of_record --> "\n". 403end_of_record --> "\r\n". 404end_of_record --> "\r". 405end_of_record --> eos. 406
407
423
424csv_read_file_row(File, Row, Options) :-
425 default_separator(File, Options, Options1),
426 make_csv_options(Options1, RecordOptions, Options2),
427 select_option(line(Line), Options2, RestOptions, _),
428 setup_call_cleanup(
429 open(File, read, Stream, RestOptions),
430 csv_read_stream_row(Stream, Row, Line, RecordOptions),
431 close(Stream)).
432
433csv_read_stream_row(Stream, Row, Line, Options) :-
434 between(1, infinite, Line),
435 ( csv_read_row(Stream, Row0, Options),
436 Row0 \== end_of_file
437 -> Row = Row0
438 ; !,
439 fail
440 ).
441
442
449
450csv_read_row(Stream, Row, _Record) :-
451 at_end_of_stream(Stream),
452 !,
453 Row = end_of_file.
454csv_read_row(Stream, Row, Record) :-
455 read_lines_to_codes(Stream, Codes, Record, even),
456 phrase(row(Row0, Record), Codes),
457 !,
458 Row = Row0.
459
460read_lines_to_codes(Stream, Codes, Options, QuoteQuantity) :-
461 read_line_to_codes(Stream, Codes0),
462 Codes0 \== end_of_file,
463 ( ( csv_options_ignore_quotes(Options, true)
464 ; check_quotes(Codes0, QuoteQuantity, even)
465 )
466 -> Codes = Codes0
467 ; append(Codes0, [0'\n|Tail], Codes),
468 read_lines_to_codes(Stream, Tail, Options, odd)
469 ).
470
471check_quotes(Codes, QuoteQuantity0, QuoteQuantity) :-
472 memberchk(0'", Codes),
473 !,
474 check_quotes_(Codes, QuoteQuantity0, QuoteQuantity).
475check_quotes(_, QuoteQuantity, QuoteQuantity).
476
477check_quotes_([], QuoteQuantity, QuoteQuantity).
478check_quotes_([C|T], QuoteQuantity0, QuoteQuantity) :-
479 ( C == 0'"
480 -> odd_even(QuoteQuantity0, QuoteQuantity1),
481 check_quotes_(T, QuoteQuantity1, QuoteQuantity)
482 ; check_quotes_(T, QuoteQuantity0, QuoteQuantity)
483 ).
484
485odd_even(odd, even).
486odd_even(even, odd).
487
494
495csv_options(Compiled, Options) :-
496 make_csv_options(Options, Compiled, _Ignored).
497
498
499 502
510
511csv_write_file(File, Data) :-
512 csv_write_file(File, Data, []).
513
514csv_write_file(File, Data, Options) :-
515 must_be(list, Data),
516 default_separator(File, Options, Options1),
517 make_csv_options(Options1, OptionsRecord, RestOptions),
518 setup_call_cleanup(
519 open(File, write, Out, RestOptions),
520 maplist(csv_write_row(Out, OptionsRecord), Data),
521 close(Out)).
522
523csv_write_row(Out, OptionsRecord, Row) :-
524 phrase(emit_row(Row, OptionsRecord), String),
525 format(Out, '~s', [String]).
526
527emit_csv([], _) --> [].
528emit_csv([H|T], Options) -->
529 emit_row(H, Options),
530 emit_csv(T, Options).
531
532emit_row(Row, Options) -->
533 { Row =.. [_|Fields] },
534 emit_fields(Fields, Options),
535 "\r\n". 536
537emit_fields([], _) -->
538 "".
539emit_fields([H|T], Options) -->
540 emit_field(H, Options),
541 ( { T == [] }
542 -> []
543 ; { csv_options_separator(Options, Sep) },
544 [Sep],
545 emit_fields(T, Options)
546 ).
547
548emit_field(H, Options) -->
549 { ( atom(H)
550 -> atom_codes(H, Codes)
551 ; string(H)
552 -> string_codes(H, Codes)
553 )
554 },
555 !,
556 ( { needs_quotes(H, Options) }
557 -> "\"", emit_string(Codes), "\""
558 ; emit_codes(Codes)
559 ).
560emit_field([], _) -->
561 !,
562 { atom_codes('[]', Codes) },
563 emit_codes(Codes).
564emit_field(H, _) -->
565 { number_codes(H,Codes) },
566 emit_codes(Codes).
567
568needs_quotes(Atom, _) :-
569 sub_atom(Atom, _, _, _, '"'),
570 !.
571needs_quotes(Atom, _) :-
572 sub_atom(Atom, _, _, _, '\n'),
573 !.
574needs_quotes(Atom, _) :-
575 sub_atom(Atom, _, _, _, '\r'),
576 !.
577needs_quotes(Atom, Options) :-
578 csv_options_separator(Options, Sep),
579 char_code(Char, Sep),
580 sub_atom(Atom, _, _, _, Char),
581 !.
582
583emit_string([]) --> "".
584emit_string([0'"|T]) --> !, "\"\"", emit_string(T).
585emit_string([H|T]) --> [H], emit_string(T).
586
587emit_codes([]) --> "".
588emit_codes([0'"|T]) --> !, "\"\"", emit_codes(T).
589emit_codes([H|T]) --> [H], emit_codes(T).
590
591
607
608csv_write_stream(Stream, Data, Options) :-
609 must_be(list, Data),
610 make_csv_options(Options, OptionsRecord, _),
611 maplist(csv_write_row(Stream, OptionsRecord), Data)