2% Nov/99
    3% DNA data processing
    4%	1. remove duplicates
    5%	2. break up upper-case constants to lists of lower-case constants
    6
    7dna_proc(List, List3) :-
    8	remove_dups(List, List2),
    9	conv_to_lc_atoms(List2, List3),
   10	make_new_alpha(List3).
   11
   12% dna_proc2 done by unaligned processing. Pad examples with random
   13% alphabet. Don't filter alphabet either, like in dna_proc.
   14
   15dna_proc2(List, List4) :-  
   16	remove_dups(List, List2),
   17	conv_to_lc_atoms(List2, List3),
   18	unaligned_P(_, StrSize),
   19	List3 = [S|_],
   20	length(S, L),
   21	PadSize is integer((StrSize-L)/2),
   22	pad_randomly(List3, PadSize, List4).	
   23
   24remove_dups([], []) :- !.
   25remove_dups([A|R], S) :-
   26	member(A, R),
   27	!,
   28	remove_dups(R, S).
   29remove_dups([A|R], [A|S]) :-
   30	remove_dups(R, S).
   31
   32conv_to_lc_atoms([], []) :- !.
   33conv_to_lc_atoms([A|R], [B|S]) :-
   34	name(A, L),	
   35	conv_to_lc_atoms2(L, B),
   36	conv_to_lc_atoms(R, S).
   37
   38conv_to_lc_atoms2([], []) :- !.
   39conv_to_lc_atoms2([A|R], [B|S]) :-
   40	A2 is A+32,  % upper-case ascii to lower ascii
   41	name(B, [A2]),
   42	conv_to_lc_atoms2(R, S).
   43
   44sum_lengths([], 0) :- !.
   45sum_lengths([A|R], S) :-
   46	sum_lengths(R, T),
   47	length(A, L),
   48	S is T + L.
   49
   50% makes N random sequences of size =< A.
   51
   52make_random_strings(N, _, _, []) :-
   53	N =< 0, 
   54	!.
   55make_random_strings(N, L, Plist, [S|R]) :-
   56	M is N - 1,
   57	make_random_strings(M, L, Plist, R),
   58	repeat,
   59	make_randstring(L, S),
   60	\+ member(S, R),
   61	\+ member(S, Plist),
   62	!.
   63
   64make_randstring(N, []) :-
   65	(N =< 0 ; maybe),
   66	!.
   67make_randstring(N, [A|R]) :-
   68	alphabet_P(_, L),
   69	select_rand(L, A),
   70	M is N - 1,
   71	make_randstring(M, R).
   72
   73
   74make_new_alpha(List) :-
   75	append_all(List, [], All),
   76	remove_dups(All, All2),
   77	length(All2, L),
   78	retract(alphabet_P(_, _)),
   79	assert(alphabet_P(L, All2)),
   80	!.
   81
   82append_all([], A, A) :- !.
   83append_all([A|R], B, C) :-
   84	append(A, B, D),
   85	append_all(R, D, C),
   86	!.
   87
   88pad_randomly([], _, []) :- !.
   89pad_randomly([S|R], Size, [S2|R2]) :- 
   90	make_randstring3(Size, Left),
   91	make_randstring3(Size, Right),
   92	append(Left, S, T),
   93	append(T, Right, S2),
   94	!,
   95	pad_randomly(R, Size, R2).
   96
   97make_randstring3(N, []) :- N =< 0, !.
   98make_randstring3(N, [A|R]) :-
   99	alphabet_P(_, L),
  100	select_rand(L, A),
  101	M is N - 1,
  102	make_randstring3(M, R)