Add Word and Excel detection (#1818)

* Add new FrontPage pattern and remove too broad one

The removed rule would match all MS Office generated web pages (such as
those generated by Word and Excel), not just FrontPage.

* Add Microsoft Word

* Add Microsoft Excel
main
q-- 7 years ago committed by Elbert Alias
parent 9c1e5ecd75
commit 405a7b740a

@ -2854,10 +2854,10 @@
"cats": [
"20"
],
"html": "<html[^>]+urn:schemas-microsoft-com:office:office",
"icon": "FrontPage.png",
"meta": {
"generator": "Microsoft FrontPage(?:\\s((?:Express )?[\\d.]+))?\\;version:\\1"
"generator": "Microsoft FrontPage(?:\\s((?:Express )?[\\d.]+))?\\;version:\\1",
"ProgId": "^FrontPage\\."
},
"website": "http://office.microsoft.com/frontpage"
},
@ -5229,6 +5229,18 @@
"icon": "Microsoft.svg",
"website": "http://microsoft.com"
},
"Microsoft Excel": {
"cats": [
"20"
],
"icon": "Microsoft Excel.svg",
"html": "(?:<html [^>]*xmlns:w=\"urn:schemas-microsoft-com:office:excel\"|<!--\\s*(?:START|END) OF OUTPUT FROM EXCEL PUBLISH AS WEB PAGE WIZARD\\s*-->|<div [^>]*x:publishsource=\"?Excel\"?)",
"meta": {
"generator": "Microsoft Excel( [\\d.]+)?\\;version:\\1",
"ProgId": "^Excel\\."
},
"website": "https://office.microsoft.com/excel"
},
"Microsoft SharePoint": {
"cats": [
"1"
@ -5246,6 +5258,18 @@
},
"website": "http://sharepoint.microsoft.com"
},
"Microsoft Word": {
"cats": [
"20"
],
"icon": "Microsoft Word.svg",
"html": "(?:<html [^>]*xmlns:w=\"urn:schemas-microsoft-com:office:word\"|<w:WordDocument>|<div [^>]*class=\"?WordSection1[\" >]|<style[^>]*>[^>]*@page WordSection1)",
"meta": {
"generator": "Microsoft Word( [\\d.]+)?\\;version:\\1",
"ProgId": "^Word\\."
},
"website": "https://office.microsoft.com/word"
},
"Mietshop": {
"cats": [
"6"

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" width="110.03758" height="107.5" viewBox="0 0 88.030066 86" version="1.1" id="svg2" inkscape:version="0.48.3.1 r9886" sodipodi:docname="word.svg">
<metadata id="metadata126">
<rdf:RDF>
<cc:Work rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
<dc:title/>
</cc:Work>
</rdf:RDF>
</metadata>
<defs id="defs124"/>
<sodipodi:namedview pagecolor="#ffffff" bordercolor="#666666" borderopacity="1" objecttolerance="10" gridtolerance="10" guidetolerance="10" inkscape:pageopacity="0" inkscape:pageshadow="2" inkscape:window-width="1280" inkscape:window-height="960" id="namedview122" showgrid="false" inkscape:zoom="1.0905983" inkscape:cx="344.89442" inkscape:cy="53.75" inkscape:window-x="0" inkscape:window-y="0" inkscape:window-maximized="1" inkscape:current-layer="svg2" fit-margin-top="0" fit-margin-left="0" fit-margin-right="0" fit-margin-bottom="0"/>
<path d="m 46.04,0 5.94,0 c 0,2.67 0,5.33 0,8 10.01,0 20.02,0.02 30.03,-0.03 1.69,0.07 3.55,-0.05 5.02,0.96 1.03,1.48 0.91,3.36 0.98,5.06 -0.05,17.36 -0.03,34.71 -0.02,52.06 -0.05,2.91 0.27,5.88 -0.34,8.75 -0.4,2.08 -2.9,2.13 -4.57,2.2 -10.36,0.03 -20.73,-0.02 -31.1,0 0,3 0,6 0,9 l -6.21,0 C 30.53,83.23 15.26,80.67 0,78 0,54.67 0,31.34 0,8.01 15.35,5.34 30.7,2.71 46.04,0 z" id="path10" inkscape:connector-curvature="0" style="fill:#207245"/>
<path d="m 51.98,11 c 11,0 22,0 33,0 0,21 0,42 0,63 -11,0 -22,0 -33,0 0,-2 0,-4 0,-6 2.67,0 5.33,0 8,0 0,-2.33 0,-4.67 0,-7 -2.67,0 -5.33,0 -8,0 0,-1.33 0,-2.67 0,-4 2.67,0 5.33,0 8,0 0,-2.33 0,-4.67 0,-7 -2.67,0 -5.33,0 -8,0 0,-1.33 0,-2.67 0,-4 2.67,0 5.33,0 8,0 0,-2.33 0,-4.67 0,-7 -2.67,0 -5.33,0 -8,0 0,-1.33 0,-2.67 0,-4 2.67,0 5.33,0 8,0 0,-2.33 0,-4.67 0,-7 -2.67,0 -5.33,0 -8,0 0,-1.33 0,-2.67 0,-4 2.67,0 5.33,0 8,0 0,-2.33 0,-4.67 0,-7 -2.67,0 -5.33,0 -8,0 0,-2 0,-4 0,-6 z" id="path48" inkscape:connector-curvature="0" style="fill:#ffffff"/>
<path d="m 63.98,17 c 4.67,0 9.33,0 14,0 0,2.33 0,4.67 0,7 -4.67,0 -9.33,0 -14,0 0,-2.33 0,-4.67 0,-7 z" id="path58" inkscape:connector-curvature="0" style="fill:#207245"/>
<path d="m 29.62,26.37 c 2.26,-0.16 4.53,-0.3 6.8,-0.41 -2.67,5.47 -5.35,10.94 -8.07,16.39 2.75,5.6 5.56,11.16 8.32,16.76 -2.41,-0.14 -4.81,-0.29 -7.22,-0.46 -1.7,-4.17 -3.77,-8.2 -4.99,-12.56 -1.36,4.06 -3.3,7.89 -4.86,11.87 -2.19,-0.03 -4.38,-0.12 -6.57,-0.21 2.57,-5.03 5.05,-10.1 7.7,-15.1 -2.25,-5.15 -4.72,-10.2 -7.04,-15.32 2.2,-0.13 4.4,-0.26 6.6,-0.38 1.49,3.91 3.12,7.77 4.35,11.78 1.32,-4.25 3.29,-8.25 4.98,-12.36 z" id="path72" inkscape:connector-curvature="0" style="fill:#ffffff"/>
<path d="m 63.98,28 c 4.67,0 9.33,0 14,0 0,2.33 0,4.67 0,7 -4.67,0 -9.33,0 -14,0 0,-2.33 0,-4.67 0,-7 z" id="path90" inkscape:connector-curvature="0" style="fill:#207245"/>
<path d="m 63.98,39 c 4.67,0 9.33,0 14,0 0,2.33 0,4.67 0,7 -4.67,0 -9.33,0 -14,0 0,-2.33 0,-4.67 0,-7 z" id="path108" inkscape:connector-curvature="0" style="fill:#207245"/>
<path d="m 63.98,50 c 4.67,0 9.33,0 14,0 0,2.33 0,4.67 0,7 -4.67,0 -9.33,0 -14,0 0,-2.33 0,-4.67 0,-7 z" id="path114" inkscape:connector-curvature="0" style="fill:#207245"/>
<path d="m 63.98,61 c 4.67,0 9.33,0 14,0 0,2.33 0,4.67 0,7 -4.67,0 -9.33,0 -14,0 0,-2.33 0,-4.67 0,-7 z" id="path120" inkscape:connector-curvature="0" style="fill:#207245"/>
</svg>

After

Width:  |  Height:  |  Size: 3.7 KiB

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" width="110.03056" height="107.5" viewBox="0 0 88.024445 86" version="1.1" id="svg2" inkscape:version="0.48.3.1 r9886" sodipodi:docname="Microsoft-Office-2013-lineup.svg">
<metadata id="metadata126">
<rdf:RDF>
<cc:Work rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
<dc:title/>
</cc:Work>
</rdf:RDF>
</metadata>
<defs id="defs124"/>
<sodipodi:namedview pagecolor="#ffffff" bordercolor="#666666" borderopacity="1" objecttolerance="10" gridtolerance="10" guidetolerance="10" inkscape:pageopacity="0" inkscape:pageshadow="2" inkscape:window-width="1280" inkscape:window-height="960" id="namedview122" showgrid="false" fit-margin-top="0" fit-margin-left="0" fit-margin-right="0" fit-margin-bottom="0" inkscape:zoom="1.0905983" inkscape:cx="321.51598" inkscape:cy="53.75" inkscape:window-x="0" inkscape:window-y="0" inkscape:window-maximized="1" inkscape:current-layer="svg2"/>
<g id="g3103" transform="translate(-21.015556,0)">
<path id="path6" d="M 67.02,0 73,0 c 0,2.67 0,5.33 0,8 10.7,0.06 21.39,-0.11 32.08,0.06 2.28,-0.23 4.07,1.54 3.86,3.82 0.17,19.7 -0.04,39.41 0.1,59.11 -0.1,2.02 0.2,4.26 -0.97,6.05 -1.46,1.04 -3.35,0.91 -5.05,0.99 C 93.01,77.98 83.01,78 73,78 c 0,2.67 0,5.33 0,8 l -6.25,0 C 51.53,83.22 36.27,80.67 21.02,78 21.01,54.67 21.02,31.34 21.02,8.02 36.35,5.34 51.69,2.73 67.02,0 z" inkscape:connector-curvature="0" style="fill:#2a5699"/>
<path id="path46" d="m 73,11 c 11,0 22,0 33,0 0,21.33 0,42.67 0,64 -11,0 -22,0 -33,0 0,-2.67 0,-5.33 0,-8 8.67,0 17.33,0 26,0 0,-1.33 0,-2.67 0,-4 -8.67,0 -17.33,0 -26,0 0,-1.67 0,-3.33 0,-5 8.67,0 17.33,0 26,0 0,-1.33 0,-2.67 0,-4 -8.67,0 -17.33,0 -26,0 0,-1.67 0,-3.33 0,-5 8.67,0 17.33,0 26,0 0,-1.33 0,-2.67 0,-4 -8.67,0 -17.33,0 -26,0 0,-1.67 0,-3.33 0,-5 8.67,0 17.33,0 26,0 0,-1.33 0,-2.67 0,-4 -8.67,0 -17.33,0 -26,0 0,-1.67 0,-3.33 0,-5 8.67,0 17.33,0 26,0 0,-1.33 0,-2.67 0,-4 -8.67,0 -17.33,0 -26,0 0,-1.67 0,-3.33 0,-5 8.67,0 17.33,0 26,0 0,-1.33 0,-2.67 0,-4 -8.67,0 -17.33,0 -26,0 0,-2.33 0,-4.67 0,-7 z" inkscape:connector-curvature="0" style="fill:#ffffff"/>
<path id="path88" d="m 41.68,28.67 c 1.9,-0.11 3.8,-0.19 5.7,-0.29 1.33,6.75 2.69,13.49 4.13,20.21 1.13,-6.94 2.38,-13.86 3.59,-20.79 2,-0.07 4,-0.18 5.99,-0.3 -2.26,9.69 -4.24,19.46 -6.71,29.09 -1.67,0.87 -4.17,-0.04 -6.15,0.1 -1.33,-6.62 -2.88,-13.2 -4.07,-19.85 -1.17,6.46 -2.69,12.86 -4.03,19.28 -1.92,-0.1 -3.85,-0.22 -5.78,-0.35 -1.66,-8.8 -3.61,-17.54 -5.16,-26.36 1.71,-0.08 3.43,-0.15 5.14,-0.21 1.03,6.37 2.2,12.71 3.1,19.09 1.41,-6.54 2.85,-13.08 4.25,-19.62 z" inkscape:connector-curvature="0" style="fill:#ffffff"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 3.0 KiB