1/*  Part of SWI-Prolog odf-sheet pack
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org/pack/list?p=odf-sheet
    6
    7    Copyright (c) 2012-2014, VU University of Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions are
   12    met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15    notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18    notice, this list of conditions and the following disclaimer in the
   19    documentation and/or other materials provided with the distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
   22    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   23    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
   24    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   27    TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   28    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   29    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   30    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   31    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   32*/
   33
   34:- module(ods_table,
   35	  [ assert_tables/2,		% ?Sheet, ?Type
   36	    data_blocks/3,		% :Sheet, ?Type, ?Blocks
   37	    assert_blocks/2,		% ?Sheet, ?Type
   38	    block_union_new_non_empty/3,% +Blocks, -Union, -NewNonEmpty
   39	    tables/3,			% ?Sheet, +Type, -Tables
   40	    (table)/2,			% +Data, -Support
   41
   42	    adjacent_objects/5,		% :Sheet, +Type, ?Obj1, ?Obj2, ?Rel
   43	    intersecting_objects/5,	% :Sheet, +Type, ?Tab1, ?Tab2, -Intersection
   44	    color_sheets/2,		% :Sheet, ?What
   45
   46	    cells_outside_tables/3	% +Sheet, +Table, -Cells
   47	  ]).   48:- use_module(recognise).   49:- use_module(datasource).   50:- use_module(sheet).   51:- use_module(data).   52:- use_module(library(lists)).   53:- use_module(library(pairs)).   54:- use_module(library(apply)).   55:- use_module(library(error)).   56:- use_module(library(clpfd), except([transpose/2])).   57:- use_module(library(ugraphs)).   58:- use_module(library(trace/pprint)).   59
   60:- meta_predicate
   61	tables(:, ?, -),
   62	assert_tables(:, ?),
   63	data_blocks(:, +, -),
   64	assert_blocks(:, ?),
   65	adjacent_objects(:, +, ?, ?, ?),
   66	intersecting_objects(:, +, ?, ?, ?),
   67	color_sheets(:, ?).

Detect tables in spreadsheets

This module provides an unstable prototype for recognising tables on sheets. A table consists of one rectangle that describes the content and zero or more adjacent rectangles that provide information about the content (i.e., headers) */

   77		 /*******************************
   78		 *	       TABLES		*
   79		 *******************************/
 assert_tables(:Sheet, ?Type) is det
Infer and assert identified tables. Creates the following facts:
   88assert_tables(Sheet, Type) :-
   89	Sheet = M:_,
   90	tables(Sheet, Type, Tables),
   91	forall(member(T, Tables),
   92	       assert_table(M:T)),
   93	(   Tables == []
   94	->  true
   95	;   assert_tables(Sheet, Type)
   96	).
 tables(?Sheet, +Type, -Tables) is det
Make an initial guess at all tables. Table is a list of table(Data, Headers,Union).
  103tables(Sheet, Type, Tables) :-
  104	findall(SheetTables,
  105		( setof(Table,
  106			Type^table_in_sheet(Sheet, Type, Table),
  107			SheetTables0),
  108		  remove_inside(SheetTables0, SheetTables)
  109		),
  110		NestedTables),
  111	append(NestedTables, Tables).
  112
  113table_in_sheet(M:Sheet, Type, table(Id,Type,DS,Headers,Union)) :-
  114	ds_sheet(DS, Sheet),
  115	cell_class(Type),
  116	unassigned_anchor(DS, Type),
  117	once((block(M:DS, Type),
  118	      table(M:DS, Headers))),
  119	ds_union([DS|Headers], Union),
  120	ds_id(DS, Id, table).
  121
  122
  123		 /*******************************
  124		 *	    SUPER BLOCKS	*
  125		 *******************************/
 data_blocks(:Sheet, +Type, -Blocks) is nondet
True when Blocks is a list of non-verlapping datasources that contains all detected blocks. This implies that we need to
Arguments:
Type- is the cell-type (string, float, ...)
  139data_blocks(Sheet, Type, Blocks) :-
  140	findall(Block,
  141		(   sheet_object(Sheet, block, Block),
  142		    object_data_type(Block, Type)
  143		),
  144		Blocks0),
  145	resolve_intersections(Blocks0, Blocks).
  146
  147resolve_intersections(Blocks0, Blocks) :-
  148	findall(B1-B2, block_intersection(Blocks0, B1, B2), Pairs),
  149	partition_graph(Pairs, Sets),
  150	maplist(block_union, Sets, Blocks).
  151resolve_intersections(Blocks0, Blocks) :-
  152	findall(i(B1,B2,Resolutions),
  153		( block_intersection(Blocks0, B1, B2),
  154		  intersection_resolutions(B1, B2, Resolutions)
  155		),
  156		Intersections),
  157	(   Intersections == []
  158	->  Blocks = Blocks0
  159	;   print_term(Intersections, [])
  160	).
 block_intersection(+Blocks:list, -Intersection) is nondet
True when Intersection describes an intersection between two datablocks and a list of possible ways to resolve this intersection.
Arguments:
Intersection- is a term i(B1,B2,Resolutions)
  170block_intersection(Blocks, B1, B2) :-
  171	member(B1, Blocks),
  172	member(B2, Blocks),
  173	B1 \== B2,
  174	object_union(B1, Union1),
  175	object_union(B2, Union2),
  176	ds_intersection(Union1, Union2, _).
  177
  178intersection_resolutions(B1, B2, Resolutions) :-
  179	findall(Resolve, resolve_intersection(B1, B2, Resolve),
  180		Resolutions).
 resolve_intersection(+B1, +B2, -Resolution) is nondet
Resolve an intersection between B1 and B2. Resolutions:
union(B1, B2, Problems)
Create a union. Problems are datasources that are included in the new union and nor part of B1, neither of B2 and are not empty.
  191resolve_intersection(B1, B2, union(B1,B2,Problems)) :-
  192	object_union(B1, Union1),
  193	object_union(B2, Union2),
  194	ds_union(Union1, Union2, Union),
  195	ds_subtract(Union1, Union, LocRest0),
  196	pairs_values(LocRest0, Rest0),
  197	maplist(ds_subtract(Union2), Rest0, NestedRests),
  198	append(NestedRests, LocRests),
  199	pairs_values(LocRests, Rests),
  200	exclude(ds_empty_cells, Rests, Problems).
  201
  202ds_empty_cells(DS) :-
  203	ds_sheet(DS, Sheet),
  204	forall(ds_inside(DS, X, Y),
  205	       cell_class(Sheet,X,Y,empty)).
 block_union(+Blocks, -Block) is det
True when Block is the union of Blocks.
To be done
- What should we do with the new parts that are included?
  214block_union([H|T], Union) :-
  215	block_union_list(T, H, Union).
  216
  217block_union_list([], Union, Union).
  218block_union_list([H|T], Union0, Union) :-
  219	block_union(H, Union0, Union1),
  220	block_union_list(T, Union1, Union).
  221
  222block_union(block(_, Type1, DS1),
  223	    block(_, Type2, DS2),
  224	    block(Id, Type, DS)) :-
  225	ds_union(DS1, DS2, DS),
  226	ds_id(DS, Id),
  227	type_union(Type1, Type2, Type).
  228
  229type_union(Type1, Type2, Type) :-
  230	(   Type1 = Type2
  231	->  Type = Type1
  232	;   Type = hybrid
  233	).
 block_union_new_non_empty(+Blocks, -Union, -NonEmptyBlocks) is det
Determine the union of Blocks and unify NonEmptyDS with a list of additional blocks that were added to Union and are not part of any block in Blocks.
  241block_union_new_non_empty(Blocks, UnionBlock, NonEmptyDS) :-
  242	block_union(Blocks, UnionBlock),
  243	object_union(UnionBlock, Union),
  244	maplist(object_union, Blocks, Parts),
  245	ds_sheet(Union, Sheet),
  246	findall(cell_range(Sheet,X,Y,X,Y),
  247		( ds_inside(Union, X, Y),
  248		  \+ ( member(Part, Parts),
  249		       ds_inside(Part, X, Y)
  250		     ),
  251		  \+ cell_class(Sheet, X, Y, empty)
  252		),
  253		NonEmptyCells),
  254	ds_join(NonEmptyCells, NonEmptyDS).
 partition_graph(+Edges, -VerticeSets) is det
Partition a graph into a set of sets of connected vertices.
  260partition_graph(Edges, VerticeSets) :-
  261	vertices_edges_to_ugraph([], Edges, Graph),
  262	partition_graph2(Graph, VerticeSets).
  263
  264partition_graph2([], []).
  265partition_graph2(Graph, [Set1|Sets]) :-
  266	Graph = [V0-_|_],
  267	reachable(V0, Graph, Set1),
  268	del_vertices(Graph, Set1, Graph2),
  269	partition_graph2(Graph2, Sets).
  270
  271
  272		 /*******************************
  273		 *	      BLOCKS		*
  274		 *******************************/
 assert_blocks(:Sheet, ?Type) is det
Infer and assert identified blocks. Creates the following facts:
  283assert_blocks(Sheet, Type) :-
  284	Sheet = M:_,
  285	blocks(Sheet, Type, Blocks),
  286	forall(member(T, Blocks),
  287	       assert_block(M:T)),
  288	(   Blocks == []
  289	->  true
  290	;   assert_blocks(Sheet, Type)
  291	).
 blocks(?Sheet, +Type, -Blocks) is det
Make an initial guess at all blocks. Block is a list of block(Data, Headers,Union).
  298blocks(Sheet, Type, Blocks) :-
  299	findall(SheetBlocks,
  300		( setof(Block,
  301			Type^block_in_sheet(Sheet, Type, Block),
  302			SheetBlocks0),
  303		  remove_inside(SheetBlocks0, SheetBlocks)
  304		),
  305		NestedBlocks),
  306	append(NestedBlocks, Blocks).
  307
  308block_in_sheet(M:Sheet, Type, block(Id,Type,DS)) :-
  309	ds_sheet(DS, Sheet),
  310	cell_class(Type),
  311	unassigned_anchor(M:DS, Type),
  312	once(block(M:DS, Type)),
  313	ds_id(DS, Id, block).
 remove_inside(+Tables0, -Tables) is det
Remove all tables that are entirely enclosed into other tables.
  319remove_inside(Tables0, Tables) :-
  320	remove_inside(Tables0, Tables0, Tables).
  321
  322remove_inside([], _, []).
  323remove_inside([H|T0], All, T) :-
  324	arg(3, H, Union),
  325	member(T2, All),
  326	T2 \== H,
  327	arg(3, T2, U2),
  328	ds_intersection(Union, U2, Union), !,
  329	remove_inside(T0, All, T).
  330remove_inside([H|T0], All, [H|T]) :-
  331	remove_inside(T0, All, T).
 table(:DataDS, ?SupportDS) is nondet
True when there is a table with DataDS and a list of support datasources.
  339table(QDataDS, TitleDS) :-
  340	QDataDS = _:DataDS,
  341	ds_size(DataDS, Cols, Rows),
  342	top_rows(QDataDS, -1, TitleDS, Left),
  343	left_columns(QDataDS, -1, Left, Right),
  344	right_columns(QDataDS, Cols, Right, Bottom),
  345	bottom_rows(QDataDS, Rows, Bottom, []).
 top_rows(:DS, +StartIndex, -Rows, ?Tail) is nondet
 bottom_rows(:DS, +StartIndex, -Rows, ?Tail) is nondet
 left_columns(:DS, +StartIndex, -Rows, ?Tail) is nondet
 right_columns(:DS, +StartIndex, -Rows, ?Tail) is nondet
  352top_rows(QDataDS, Index, [Row|Rows], Tail) :-
  353	QDataDS = M:DataDS,
  354	ds_unbounded_row_slice(DataDS, Index, Row),
  355	row(M:Row, string),
  356	Up is Index - 1,
  357	top_rows(QDataDS, Up, Rows, Tail).
  358top_rows(_, _, Tail, Tail).
  359
  360
  361bottom_rows(QDataDS, Index, [Row|Rows], Tail) :-
  362	QDataDS = M:DataDS,
  363	ds_unbounded_row_slice(DataDS, Index, Row),
  364	row(M:Row, string),
  365	Down is Index + 1,
  366	bottom_rows(QDataDS, Down, Rows, Tail).
  367bottom_rows(_, _, Tail, Tail).
  368
  369
  370left_columns(QDataDS, Index, [Col|Cols], Tail) :-
  371	QDataDS = M:DataDS,
  372	ds_unbounded_column_slice(DataDS, Index, Col),
  373	col(M:Col, string),
  374	Up is Index - 1,
  375	left_columns(QDataDS, Up, Cols, Tail).
  376left_columns(_, _, Tail, Tail).
  377
  378right_columns(QDataDS, Index, [Col|Cols], Tail) :-
  379	QDataDS = M:DataDS,
  380	ds_unbounded_column_slice(DataDS, Index, Col),
  381	col(M:Col, string),
  382	Right is Index + 1,
  383	right_columns(QDataDS, Right, Cols, Tail).
  384right_columns(_, _, Tail, Tail).
  385
  386
  387		 /*******************************
  388		 *	  TABLE RELATIONS	*
  389		 *******************************/
 adjacent_objects(:Sheet, +Type, ?Obj1, ?Obj2, ?Rel)
True when Obj1 and Obj2 are adjacent in Sheet. Rel is one of above, below left_of or right_of
  396adjacent_objects(Sheet, Type, Obj1, Obj2, Rel) :-
  397	must_be(oneof([table,block]), Type),
  398	sheet_object(Sheet, Type, Obj1),
  399	sheet_object(Sheet, Type, Obj2),
  400	object_union(Obj1, Union1),
  401	object_union(Obj2, Union2),
  402	ds_adjacent(Union1, Rel, Union2).
 intersecting_objects(:Sheet, +Type, ?Obj1, ?Obj2, -Intersection)
True when Obj1 and Obj2 intersect in Sheet. Intersection is the intersecting part.
  410intersecting_objects(Sheet, Type, Obj1, Obj2, Intersection) :-
  411	must_be(oneof([table,block]), Type),
  412	sheet_object(Sheet, Type, Obj1),
  413	sheet_object(Sheet, Type, Obj2),
  414	Obj1 \== Obj2,
  415	object_union(Obj1, Union1),
  416	object_union(Obj2, Union2),
  417	ds_intersection(Union1, Union2, Intersection).
 color_sheets(?Sheet, ?What) is det
Assign colours to objects in sheets. Colours are named 1,2,3,4.
  424color_sheets(Sheet, What) :-
  425	must_be(oneof([table,block]), What),
  426	Sheet = M:SheetName,
  427	forall(M:sheet(SheetName, _),
  428	       do_color_sheet(M:SheetName, What)).
  429
  430do_color_sheet(Sheet, What) :-
  431	Sheet = _:SheetName,
  432	debug(color, 'Colouring sheet ~q', [SheetName]),
  433	color_adjacent(Sheet, What),
  434	color_intersecting_cells(Sheet, What).
  435
  436color_adjacent(Sheet, What) :-
  437	Sheet = M:_,
  438	findall(color(T1,_)-color(T2,_),
  439		( (   adjacent_objects(Sheet, What, Tab1, Tab2, _)
  440		  ;   intersecting_objects(Sheet, What, Tab1, Tab2, _)
  441		  ),
  442		  object_id(Tab1, T1),
  443		  object_id(Tab2, T2)
  444		),
  445		Pairs),
  446	assign_vars(Pairs),
  447	maplist(color_constraint, Pairs),
  448	term_variables(Pairs, Colors),
  449	label(Colors), !,
  450	maplist(assign_color(M), Pairs).
 assign_vars(+Pairs)
Make sure each object id is associated with a unique variable.
  456assign_vars(List) :-
  457	empty_assoc(B0),
  458	assign_vars(List, B0).
  459
  460assign_vars([], _).
  461assign_vars([color(O1,C1)-color(O2,C2)|T], B0) :-
  462	assign_var(O1, C1, B0, B1),
  463	assign_var(O2, C2, B1, B2),
  464	assign_vars(T, B2).
  465
  466assign_var(Name, Var, B, B) :-
  467	get_assoc(Name, B, Var), !.
  468assign_var(Name, Var, B0, B) :-
  469	put_assoc(Name, B0, Var, B).
  470
  471max_colors(10).
  472
  473color_constraint(color(_,C1)-color(_,C2)) :-
  474	max_colors(Max),
  475	C1 in 1..Max,
  476	C2 in 1..Max,
  477	C1 #\= C2.
  478
  479assign_color(M, color(T1,C1)-color(T2,C2)) :-
  480	assert_object_property(M:T1, color(C1)),
  481	assert_object_property(M:T2, color(C2)).
  482
  483color_intersecting_cells(Sheet, What) :-
  484	forall(intersecting_objects(Sheet, What, Obj1, Obj2, Intersection),
  485	       ( object_id(Obj1, Id1),
  486		 object_id(Obj2, Id2),
  487		 forall(ds_inside(Intersection, X, Y),
  488			assert_cell_property(Sheet, X, Y, objects(Id1,Id2)))
  489	       )).
  490
  491
  492		 /*******************************
  493		 *	     LEFT-OVERS		*
  494		 *******************************/
 cells_outside_tables(+Sheet, +Tables, -Cells) is det
True when Cells is a list of cell(Sheet,X,Y) that is outside any table.
  501cells_outside_tables(Sheet, Tables, Cells) :-
  502	findall(cell(Sheet,X,Y),
  503		( sheet_bb(Sheet, SheetDS),
  504		  ds_inside(SheetDS, X, Y),
  505		  cell_value(Sheet, X, Y, _),
  506		  \+ ( member(table(_,_,DS), Tables),
  507		       ds_inside(DS,X,Y)
  508		     )
  509		),
  510		Cells)