XQuery/獲取壓縮的 XML 檔案
您想處理來自網路的 XML 文件,這些文件包含在一個 zip 檔案中。
此指令碼使用 eXist 壓縮模組中的解壓縮函式。該函式使用高階函式來過濾壓縮檔案的所需元件並處理每個元件。
解壓縮函式有五個輸入引數,其中兩個是傳遞給解壓縮函式的 XQuery 函式。這兩個函式反過來都有引數。
以下是壓縮函式的一般佈局
compression:unzip( $zip-data as xs:base64Binary, $entry-filter as function, $entry-filter-param as xs:anyType*, $entry-data as function, $entry-data-param as xs:anyType*) item()*
透過呼叫使用者定義的函式來解壓縮來自提供的資料的所有資源/資料夾,以確定如何儲存資源/資料夾
- $zip-data zip 檔案資料
- $entry-filter 用於從 zip 檔案中過濾資源的使用者定義函式。該函式接受 3 個引數,例如 user:unzip-entry-filter($path as xs:string, $data-type as xs:string, $param as item()*) as xs:boolean。$type 可以是“資源”或“資料夾”。$param 是一個包含任何其他引數的序列,例如已提取檔案的列表。如果返回型別為 true(),則表示應處理條目並將其傳遞給 entry-data 函式,否則跳過資源。
- $entry-filter-param 用於過濾函式的包含額外引數的序列。
- $entry-data 用於儲存從 zip 檔案中提取的資源的使用者定義函式。該函式接受 4 個引數,例如 user:unzip-entry-data($path as xs:string, $data-type as xs:string, $data as item()?, $param as item()*). $type 可以是“資源”或“資料夾”。$param 是一個包含任何其他引數的序列。
- $entry-data-param 用於儲存函式的包含額外引數的序列。
在第一個示例中,我們知道只有一個 XML 檔案,並且我們打算在指令碼中處理該 XML 檔案。後面的示例將儲存該檔案或這些檔案以供以後處理。
declare namespace fw = "http://www.cems.uwe.ac.uk/xmlwiki/fw";
declare function fw:filter($path as xs:string, $type as xs:string, $param as item()*) as xs:boolean {
(: pass all :)
true()
};
declare function fw:process($path as xs:string,$type as xs:string, $data as item()? , $param as item()*) {
(: return the XML :)
$data
};
let $uri := request:get-parameter("uri","http://www.iso.org/iso/iso_3166-1_list_en.zip")
let $zip := httpclient:get(xs:anyURI($uri), true(), ())/httpclient:body/text()
let $filter := util:function(QName("http://www.cems.uwe.ac.uk/xmlwiki/fw","fw:filter"),3)
let $process := util:function(QName("http://www.cems.uwe.ac.uk/xmlwiki/fw","fw:process"),4)
let $xml := compression:unzip($zip,$filter,(),$process,())
return $xml
<ISO_3166-1_List_en xml:lang="en">
<ISO_3166-1_Entry>
<ISO_3166-1_Country_name>AFGHANISTAN</ISO_3166-1_Country_name>
<ISO_3166-1_Alpha-2_Code_element>AF</ISO_3166-1_Alpha-2_Code_element>
</ISO_3166-1_Entry>
<ISO_3166-1_Entry>
<ISO_3166-1_Country_name>ÅLAND ISLANDS</ISO_3166-1_Country_name>
<ISO_3166-1_Alpha-2_Code_element>AX</ISO_3166-1_Alpha-2_Code_element>
</ISO_3166-1_Entry>
...
</ISO_3166-1_List_en>
compression:unzip() 函式為它找到的 zip 存檔中的每個元件呼叫 process 函式。這被稱為回撥函式。您可以在 process 函式中放置任何有效的 XQuery 程式碼來執行您對每個輸入檔案想要執行的操作,例如列出或儲存它。
例如,以下 process 函式將列出 zip 檔案中的所有專案、它們的路徑、它們的型別以及如果該專案是 XML 檔案,則列出其根節點。
declare function t:process($path as xs:string, $type as xs:string, $data as item()? , $param as item()*) {
(: return a list of the items in the zip file. :)
<item path="{$path}" type="{$type}">{name($data/*)}</item>
};
在 Office Open XML 檔案上執行此命令將返回以下內容
<item path="[Content_Types].xml" type="resource">Types</item>
<item path="_rels/.rels" type="resource">Relationships</item>
<item path="word/_rels/document.xml.rels" type="resource">Relationships</item>
<item path="word/document.xml" type="resource">w:document</item>
<item path="word/theme/theme1.xml" type="resource">a:theme</item>
<item path="word/settings.xml" type="resource">w:settings</item>
<item path="word/fontTable.xml" type="resource">w:fonts</item>
<item path="word/webSettings.xml" type="resource">w:webSettings</item>
<item path="docProps/app.xml" type="resource">Properties</item>
<item path="docProps/core.xml" type="resource">cp:coreProperties</item>
<item path="word/styles.xml" type="resource">w:styles</item>
您可能希望將解壓縮的文件儲存在資料庫中。我們可以修改 process 函式來執行此操作。我們可以使用第三個引數傳遞要儲存每個檔案的目錄。此外,我們需要建立一個集合來儲存解壓縮的檔案。
declare namespace fw = "http://www.cems.uwe.ac.uk/xmlwiki/fw";
declare function fw:filter($path as xs:string, $type as xs:string, $param as item()*) as xs:boolean {
(: pass all :)
true()
};
declare function fw:process($path as xs:string,$type as xs:string, $data as item()? , $param as item()*) {
(: store the XML in the nominated directory :)
xmldb:store($param/@directory, $path, $data)
};
let $baseCollection := "/db/apps/zip/data/"
let $uri := request:get-parameter("uri","http://www.iso.org/iso/iso_3166-1_list_en.zip")
let $unzipCollection := request:get-parameter("dir","temp")
let $zip := httpclient:get(xs:anyURI($uri), true(), ())/httpclient:body/text()
let $filter := util:function(QName("http://www.cems.uwe.ac.uk/xmlwiki/fw","fw:filter"),3)
let $process := util:function(QName("http://www.cems.uwe.ac.uk/xmlwiki/fw","fw:process"),4)
let $login := xmldb:login("/db","admin","password")
let $fullPath := concat($baseCollection, $unzipCollection)
let $mkdir :=
if (xmldb:collection-available($fullPath)) then ()
else xmldb:create-collection($baseCollection, $unzipCollection)
let $store := compression:unzip($zip,$filter,(),$process,<param directory="{$fullPath}"/>)
return $store
zip 檔案通常包含多個檔案。特別是 Microsoft Word .docX 和 Excel .xslx 檔案是 xml 檔案的壓縮集合,它們共同定義文件或電子表格。
當文件儲存在 eXist 資料庫中時,MIME 型別(媒體型別)是使用 mime-types.xml 檔案從檔案字尾推斷出來的。或者,可以在儲存文件時顯式設定 MIME 型別。
我們在此假設 zip 檔案中的檔名很簡單。如果存在目錄結構,則需要進行額外的編碼。
declare namespace fw = "http://www.cems.uwe.ac.uk/xmlwiki/fw";
declare function fw:filter($path as xs:string, $type as xs:string, $param as item()*) as xs:boolean {
(: pass all :)
true()
};
declare function fw:process($path as xs:string,$type as xs:string, $data as item()? , $param as item()*) {
(: store the XML in the nominated directory :)
(: we need to encode the filename to account for filenames with illegal characters like [Content_Types].xml :)
let $path := xmldb:encode($path)
(: ensure mime type is set properly for .rels files which are xml
alternatively you could add this mime type to the mime-types.xml configuration file
:)
return
if (ends-with($path, '.rels')) then
xmldb:store($param/@directory, $path, $data, 'application/xml')
else
xmldb:store($param/@directory, $path, $data)
};
let $baseCollection := "/db/apps/zip/data/"
let $uri := request:get-parameter("uri","http://www.iso.org/iso/iso_3166-1_list_en.zip")
let $unzipCollection := request:get-parameter("dir","temp")
let $zip := httpclient:get(xs:anyURI($uri), true(), ())/httpclient:body/text()
let $filter := util:function(QName("http://www.cems.uwe.ac.uk/xmlwiki/fw","fw:filter"),3)
let $process := util:function(QName("http://www.cems.uwe.ac.uk/xmlwiki/fw","fw:process"),4)
let $login := xmldb:login("/db","admin","password")
let $fullPath := concat($baseCollection, $unzipCollection)
let $mkdir :=
if (xmldb:collection-available($fullPath))
then ()
else xmldb:create-collection($baseCollection, $unzipCollection)
let $store := compression:unzip($zip,$filter,(),$process,<param directory="{$fullPath}"/>)
return
<result>
{for $file in $store
return
<file>{$file}</file>
}
</result>
大多數 zip 檔案包含一個檔案的目錄樹。在解壓縮檔案時,需要在資料庫中重新建立此目錄結構。我們可以修改 process 函式來根據需要建立資料庫集合,假設上級目錄在子目錄之前被引用。
declare namespace fw = "http://www.cems.uwe.ac.uk/xmlwiki/fw";
declare function fw:filter($path as xs:string, $type as xs:string, $param as item()*) as xs:boolean {
(: filter any files which are not required :)
if (ends-with($path,".bin")) then false() else true()
};
declare function fw:process($path as xs:string,$type as xs:string, $data as item()? , $param as item()*) {
(: parse the path and create a collection if necessary :)
let $steps := tokenize($path,"/")
let $nsteps := count($steps)
let $filename := $steps[$nsteps]
let $collection := string-join(subsequence($steps,1,$nsteps - 1 ),"/")
let $baseCollection := string($param/@collection)
let $fullCollection := concat($baseCollection,"/",$collection)
let $mkdir :=
if (xmldb:collection-available($fullCollection)) then ()
else xmldb:create-collection($baseCollection, $collection)
let $filename := xmldb:encode($filename)
return
xmldb:store($fullCollection, $filename, $data)
};
let $baseCollection := "/db/apps/zip/data/"
let $path := request:get-parameter("path","http://www.iso.org/iso/iso_3166-1_list_en.zip")
let $unzipCollection := request:get-parameter("dir","temp")
let $zip := httpclient:get(xs:anyURI($path), true(), ())/httpclient:body/text()
let $filter := util:function(QName("http://www.cems.uwe.ac.uk/xmlwiki/fw","fw:filter"),3)
let $process := util:function(QName("http://www.cems.uwe.ac.uk/xmlwiki/fw","fw:process"),4)
let $login := xmldb:login("/db","admin","password")
let $collection := concat($baseCollection, $unzipCollection)
let $mkdir :=
if (xmldb:collection-available($collection)) then ()
else xmldb:create-collection($baseCollection, $unzipCollection)
let $store := compression:unzip($zip,$filter,(),$process,<param collection="{$collection}"/>)
return
<result>
{for $file in $store
return
<file>{$file}</file>
}
</result>
在解壓縮 zip 檔案之前,將它們作為二進位制資源儲存在資料庫中可能很有用。預設情況下,以 .zip 為字尾的檔案被儲存為二進位制資料。要在 eXist 中儲存 .docx 和 .xslx 檔案,您需要將這些字尾新增到 $EXIST_HOME/mime-type.xml 配置檔案中的條目中。
更改
<mime-type name="application/zip" type="binary">
<description>ZIP archive</description>
<extensions>.zip</extensions>
</mime-type>
為
<mime-type name="application/zip" type="binary">
<description>ZIP archive and Office Open XML</description>
<extensions>.zip,.docx,.xlsx,.pptx</extensions>
</mime-type>
您將需要重新啟動伺服器以使此更改生效。
基本指令碼保持不變,只是進行了細微的修改
let $path := request:get-parameter("path","http://www.iso.org/iso/iso_3166-1_list_en.zip")
let $unzipCollection := request:get-parameter("dir","temp")
let $zip :=
if (starts-with($path,"http"))
then httpclient:get(xs:anyURI($path), true(), ())/httpclient:body/text()
else util:binary-doc($path)