To find all js/css/image pathnames in a HTML document, I used regular
expression(in the last line of my code snippet) to do this as the following,
are there any other shorter regular expressions or more efficient ways to do
this ?
import re
translation='''<link rel="stylesheet" type="text/css" href="O8C.css">
<span eid="beetle_e" id="beetle_e" level="0" alpha_id="000003072" name="beetle"
idm_id="000003072" backup-class="b" class="entry">
<span level="1" class="h-g">
<span level="2" class="top-g">
<span level="3" class="h">bee·tle™</span>
<span type="h_full_" level="3" class="ei-g">
<span class="z_ei-g">/</span>
<span file="{gb}/b/be/bee/beetle#_gb_1.spx" level="4" wd="beetle"
recdate="070514" class="phon-gb">ˈbiːtl</span>
<a type="sound" topic="b/bee/beetl/beetle__gb_1.spx" resource="uk_pron"
backup-class="Media" class="fayin" href="sound://uk/beetle__gb_1.spx"><img
src="uk_pron.png" class="fayin"/></a>
<span class="z">;
<span class="z_phon-us">NAmE</span>
</span>
<span file="{gb}/b/be/bee/beetle#_us_1.spx" sup="y" level="4"
wd="beetle" recdate="070514" class="phon-us">ˈbiːtl</span>
<a type="sound" topic="b/bee/beetl/beetle__us_1.spx" resource="us_pron"
backup-class="Media" class="fayin" href="sound://us/beetle__us_1.spx"><img
src="us_pron.png" class="fayin"/></a>
<span class="z_ei-g">/</span>
</span>
<span level="3" display="inline" class="pos-g">
<span topic="beetle_e" bookmark="beetle_pos_n" class="Ref">
<a href="entry://#beetle_pos_n" level="4" pos="n"
backup-class="pos">noun</a>
</span>
<span class="z">,</span>
<span topic="beetle_e" bookmark="beetle_pos_v" class="Ref">
<a href="entry://#beetle_pos_v" level="4" pos="v"
backup-class="pos">verb</a>
</span>
</span>
</span>
<span level="2" class="infl">
<span level="3" class="inflection">beetle</span>
<span level="3" class="inflection">beetles</span>
<span level="3" class="inflection">beetled</span>
<span level="3" class="inflection">beetling</span>
</span>
</span>
<a name="beetle_pos_n"></a>
<span eid="beetle_pos_n" id="beetle_pos_n" level="1" class="p-g">
<span level="2" class="block-g">
<span level="3" class="pos-g">
<span level="4" pos="n" class="pos">noun</span>
</span>
<img src="/pic/insects_comp.jpg" alt="/pic/insects_comp.jpg" height="620"
width="720" style="display:none;"
onclick="this.style.display='none';this.nextSibling.nextSibling.style.display='block';"/>
<img type="image" topic="insects_comp.htm" thumb_resource="thumb"
resource="pic" thumb="beetle.jpg" class="Media" backup-height="620"
backup-width="720" src="/thumb/beetle.jpg" alt="/thumb/beetle.jpg"
onclick="this.style.display='none';this.previousSibling.previousSibling.style.display='block';"/>
<span class="clear"></span>
</span>
<span eid="beetle_ng_1" id="beetle_ng_1" level="2" n="1" class="n-g">
<span class="z_n">1</span>
<span level="3" class="def-g">
<span status="6" level="4" tranidoupc="1" class="d">an insect, often
large and black, with a hard case on its back, covering its wings. There are
several types of
<span level="5" class="dh">beetle.</span>
<span localeuidoupc="201" status="6" level="5" class="chn">甲虫</span>
</span>
</span>
<span xt="see" level="3" class="xr-g">
<span class="symbols-xrsym">☞</span>see also
<span eid="beetle_xr_1" id="beetle_xr_1" xt="see"
href="deathwatchbeetle_e" level="4" pos="n" class="xr">
<span topic="deathwatchbeetle_e" fk="XXX" class="Ref">
<span level="5" class="xh">
<a href="entry://death-watch beetle">death-watch beetle</a>
</span>
</span>
</span>
</span>
</span>
<span eid="beetle_ng_2" id="beetle_ng_2" new="seven" level="2" n="2"
enc="y" class="n-g">
<span class="z_n">2</span>
<span level="3" class="alt">Beetle</span>
<span level="3" class="vs-g">
<span class="z">(</span>
<span level="4" brackets="n" display="inline" class="label-g">
<span level="5" g="amalso" class="g">NAmE also</span>
</span>
<span level="4" class="v">bug</span>
<span class="z">)</span>
</span>
<span level="3" class="def-g">
<span status="6" level="4" tranidoupc="7" class="d">the English names
for the original Volkswagen small car with a round shape at the front and the
back
<span localeuidoupc="201" status="6" level="5"
class="chn">“甲壳虫”(英国人用以指称一款圆头圆顶的原大众牌的小汽车)</span>
</span>
</span>
</span>
</span>
<a name="beetle_pos_v"></a>
<span eid="beetle_pos_v" id="beetle_pos_v" level="1" class="p-g">
<span level="2" class="block-g">
<span level="3" class="pos-g">
<span level="4" pos="v" class="pos">verb</span>
</span>
</span>
<span gr="i" level="2" class="gr">
<span class="z_gr_br">[</span>intransitive
<span class="z_gr_br">]</span>
</span>
<span eid="beetle_cf_1" id="beetle_cf_1" level="2" class="cf">+
adv./prep.</span>
<span level="2" class="def-g">
<span level="3" display="inline" class="label-g">(
<span level="4" g="br" class="g">BrE</span>) (
<span level="4" r="infml" class="r">informal</span>)
</span>
<span status="6" level="3" tranidoupc="3" class="d">to move somewhere
quickly
<span localeuidoupc="201" status="6" level="4" class="chn">快速移动</span>
</span>
</span>
<span xt="syn" level="2" class="xr-g">
<span class="symbols-synsym">SYN</span>
<span eid="beetle_xr_2" id="beetle_xr_2" xt="syn" href="scurry_e"
level="3" pos="v" class="xr">
<span topic="scurry_e" fk="XXX" class="Ref">
<span level="4" class="xh">
<a href="entry://scurry">scurry</a>
</span>
</span>
</span>
</span>
<span eid="beetle_xg_1" id="beetle_xg_1" level="2" class="x-g">
<span class="symbols-xsym">◆</span>
<span status="6" record="y" level="3" tranidoupc="4" class="x">I last saw
him beetling off down the road.</span>
<span localeuidoupc="201" status="6" level="3"
class="tx">我上次见到他时,他正快步沿路而去。</span>
</span>
</span>
<span class="pracpron">
<span class="pron-g">
<span type="h" class="wd">bee·tle™</span>
<span type="h_full_" level="3" class="ei-g">
<span class="z_ei-g">/</span>
<span file="{gb}/b/be/bee/beetle#_gb_1.spx" level="4" wd="beetle"
recdate="070514" class="phon-gb">ˈbiːtl</span>
<a type="sound" topic="b/bee/beetl/beetle__gb_1.spx" resource="uk_pron"
backup-class="Media" class="fayin" href="sound://uk/beetle__gb_1.spx"><img
src="uk_pron.png" class="fayin"/></a>
<span class="z">;
<span class="z_phon-us">NAmE</span>
</span>
<span file="{gb}/b/be/bee/beetle#_us_1.spx" sup="y" level="4"
wd="beetle" recdate="070514" class="phon-us">ˈbiːtl</span>
<a type="sound" topic="b/bee/beetl/beetle__us_1.spx" resource="us_pron"
backup-class="Media" class="fayin" href="sound://us/beetle__us_1.spx"><img
src="us_pron.png" class="fayin"/></a>
<span class="z_ei-g">/</span>
</span>
</span>
</span>
</span>
'''
print(re.findall(r'(?:href|src)="([^"]+?\.(?:css|js|png|jpg))"', translation))
--
https://mail.python.org/mailman/listinfo/python-list