#!/usr/bin/perl -w

#
# Perl script to extract non-breaking regular expressions from SRX files
#
# Copyright 2010 Digital Silk Road
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

use strict;

use XML::Simple;

binmode(STDOUT,":utf8");

if(@ARGV != 2) {
    die "Usage: perl $0 <Language> <SRX file>\n";
}
my $language = shift;
my $srxfile = shift;

if(!-e $srxfile) {
    die "File $srxfile does not exist\n";
}

my $segrules = XMLin($srxfile);

my @languageRules;
foreach my $langMapRef (@{$segrules->{body}->{maprules}->{languagemap}}) {
    if($language =~ /$langMapRef->{languagepattern}/) {
	push @languageRules, $langMapRef->{languagerulename};
    }
}

my @rules;
foreach my $langRuleRef (@{$segrules->{body}->{languagerules}->{languagerule}}) {
    if(grep {$_ eq $langRuleRef->{languagerulename}} @languageRules) {
	print $langRuleRef->{languagerulename},"\t-------------------------------------------------------\n";
	foreach my $ruleRef (@{$langRuleRef->{rule}}) {
	    if($ruleRef->{break} eq "no") {
		print $ruleRef->{beforebreak},"\n";
		print "AB:",$ruleRef->{afterbreak},"\n";
	    }
	}
    }
}

