This is a patch to fix the mcu parser (mcu.rb).

Bye.
--- mcu.rb	2006-11-12 00:37:38.000000000 +0100
+++ mcu-fixed.rb	2006-11-12 03:20:46.000000000 +0100
@@ -97,17 +97,13 @@
 		    # Only add authors of appropiate length
        	       		    product['authors'] << author
 			    print "Authors are #{product['authors']}\n" if $DEBUG # for DEBUGing
+		    robotstate = 0
 		    end
-                elsif robotstate == 10 and line =~ /^([^<]+)/
-		    if product['name'].nil?  then
-	                    product['name'] = $1.strip
-	            else
-	                    product['name'] += $1.strip
-		    end
+                elsif robotstate == 2 and line =~ /^(.*)$/ # The title es the next line to title declaration and has not tags on web src code
+	            product['name'] = $1.strip
 		    print "Name is #{product['name']}\n" if $DEBUG # for DEBUGing
-                elsif robotstate == 2 and line =/^<td class="tex1"/
-		    robotstate = 10
-                elsif robotstate == 3 and line =~ /^([0-9]+-[0-9]+-[0-9]+-[0-9X]+)/ 
+                    robotstate = 0
+		elsif robotstate == 3 and line =~ /^([0-9]+-[0-9]+-[0-9]+-[0-9X]).*/ 
                     product['isbn'] = $1
 		    print "ISBN is #{product['isbn']}\n" if $DEBUG # for DEBUGing
 		    robotstate = 0
@@ -119,15 +115,15 @@
                     product['media'] = $1.strip
 		    print "Media is #{product['media']}\n" if $DEBUG # for DEBUGing
 		    robotstate = 0 
-                elsif line =~ /^Autor:/
+                elsif line =~ /^.*>Autor:\s*</
 		    robotstate = 1
-                elsif line =~ /^T.tulo:/
+                elsif line =~ /^.*>T.tulo:\s*</
 		    robotstate = 2
-                elsif line =~ /^ISBN:/
+                elsif line =~ /^.*>ISBN:\s*</
 		    robotstate = 3
-                elsif line =~ /^Publicaci.n:/
+                elsif line =~ /^.*>Publicaci.n:\s*</
 		    robotstate = 4
-                elsif line =~ /^Encuadernaci.n:/
+                elsif line =~ /^.*>Encuadernaci.n:\s*</
 		    robotstate = 5 
                 end
             end

Reply via email to