Benford.xsl 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  3. xmlns:xs="http://www.w3.org/2001/XMLSchema"
  4. xmlns:Benford="https://pl.wikipedia.org/wiki/Rozkład_Benforda"
  5. xmlns:system_cache__dita="http://biuro.biall-net.pl/xmlschema_procesy5/default_db_xml_cache/dita.xsd"
  6. exclude-result-prefixes="xs"
  7. version="2.0">
  8. <!-- Template for analysis of https://pl.wikipedia.org/wiki/Rozkład_Benforda
  9. use this example to construct
  10. <Benford:table>
  11. <xsl:attribute name="Benford:year" select="."/>
  12. <xsl:attribute name="Benford:subject" select="$Osoba"/>
  13. <xsl:for-each select="$BI_audit_ENERGA_RUM_UMOWY_fix//default_db:BI_audit_ENERGA_RUM_UMOWY[ lower-case(default_db:Osoba_merytoryczna___nazwisko_i_imie) = lower-case($Osoba) and Data_zawarcia.rok = $rok_cur and not(Wartosc = '0' )]">
  14. <Benford:value><xsl:value-of select="substring( Wartosc,1,1)"/></Benford:value>
  15. </xsl:for-each>
  16. </Benford:table>
  17. than to analyze:
  18. <xsl:if test="$Benford:table//Benford:value">
  19. <xsl:apply-templates mode="Benford:table_analys" select="$Benford:table"/>
  20. </xsl:if>
  21. expect:
  22. <Benford:table_analys_exceed Benford:year="2012"
  23. Benford:subject="Jan Npwak"
  24. Benford:digit="8"
  25. Benford:true.current.dig="5"
  26. Benford:current.dig.ratio="16"
  27. Benford:current.dig.test.fault="11"
  28. Benford:table_analys.dig.all.sum="37"/>
  29. or:
  30. <Benford:table Benford:year="2011"
  31. Benford:subject="Jan Nowak"
  32. Benford:use_wider_period="#41 $Benford:table_analys not complete all numnbers used - use bigger period">
  33. <Benford:value>8</Benford:value>
  34. <Benford:value>5</Benford:value>
  35. <Benford:value>0</Benford:value>
  36. <Benford:value>0</Benford:value>
  37. <Benford:value>6</Benford:value>
  38. <Benford:value>0</Benford:value>
  39. <Benford:value>1</Benford:value>
  40. <Benford:value>1</Benford:value>
  41. <Benford:value>6</Benford:value>
  42. <Benford:value>8</Benford:value>
  43. <Benford:value>8</Benford:value>
  44. <Benford:value>8</Benford:value>
  45. <Benford:value>9</Benford:value>
  46. <Benford:value>0</Benford:value>
  47. <Benford:value>0</Benford:value>
  48. <Benford:value>6</Benford:value>
  49. <Benford:value>0</Benford:value>
  50. <Benford:value>0</Benford:value>
  51. <Benford:value>0</Benford:value>
  52. <Benford:value>0</Benford:value>
  53. <Benford:value>0</Benford:value>
  54. </Benford:table>
  55. which should be analyzed more wider - TODO example:
  56. -->
  57. <xsl:param name="Benford:true.tolerance" select="10"/>
  58. <xsl:variable name="Benford:true">
  59. <Benford:true>
  60. <Benford:table_analys.dig Benford:digit="1" Benford:digit.occurs="{xs:double(30.1)}"/>
  61. <Benford:table_analys.dig Benford:digit="2" Benford:digit.occurs="{xs:double(17.6)}"/>
  62. <Benford:table_analys.dig Benford:digit="3" Benford:digit.occurs="{xs:double(12.5)}"/>
  63. <Benford:table_analys.dig Benford:digit="4" Benford:digit.occurs="{xs:double(9.7)}"/>
  64. <Benford:table_analys.dig Benford:digit="5" Benford:digit.occurs="{xs:double(7.9)}"/>
  65. <Benford:table_analys.dig Benford:digit="6" Benford:digit.occurs="{xs:double(6.7)}"/>
  66. <Benford:table_analys.dig Benford:digit="7" Benford:digit.occurs="{xs:double(5.8)}"/>
  67. <Benford:table_analys.dig Benford:digit="8" Benford:digit.occurs="{xs:double(5.1)}"/>
  68. <Benford:table_analys.dig Benford:digit="9" Benford:digit.occurs="{xs:double(4.6)}"/>
  69. </Benford:true>
  70. </xsl:variable>
  71. <xsl:template mode="Benford:table_analys" match="Benford:table">
  72. <xsl:variable name="Benford:table" select="."/>
  73. <!--<xsl:copy-of select="$Benford:table"></xsl:copy-of>-->
  74. <xsl:variable name="Benford:table_analys">
  75. <Benford:table_analys>
  76. <xsl:copy-of select="@*"/>
  77. <xsl:for-each select="1 to 9">
  78. <Benford:table_analys.dig>
  79. <xsl:attribute name="Benford:digit" select="."/>
  80. <xsl:attribute name="Benford:digit.occurs" select="count($Benford:table//Benford:value[ number(text()) = current()])"/>
  81. </Benford:table_analys.dig>
  82. </xsl:for-each>
  83. </Benford:table_analys>
  84. </xsl:variable>
  85. <xsl:choose>
  86. <xsl:when test="$Benford:table_analys//Benford:table_analys.dig[@Benford:digit.occurs = 0 ]">
  87. <xsl:comment>#41 $Benford:table_analys not complete all numnbers used - use bigger period</xsl:comment>
  88. <Benford:table>
  89. <xsl:copy-of select="$Benford:table/@*"/>
  90. <xsl:attribute name="Benford:use_wider_period" select="'#41 $Benford:table_analys not complete all numnbers used - use bigger period'"/>
  91. <xsl:copy-of select="*"/>
  92. </Benford:table>
  93. </xsl:when>
  94. <xsl:otherwise>
  95. <xsl:variable name="Benford:table_analys.dig.all.sum" select="sum($Benford:table_analys//Benford:table_analys.dig/@Benford:digit.occurs)"/>
  96. <xsl:for-each select="$Benford:table_analys//Benford:table_analys.dig">
  97. <xsl:variable name="Benford:true.current.dig" select="$Benford:true//Benford:table_analys.dig[number(@Benford:digit) = current()/number(@Benford:digit)]/@Benford:digit.occurs" />
  98. <xsl:variable name="Benford:current.dig.ratio" select="number(@Benford:digit.occurs) div number($Benford:table_analys.dig.all.sum) * 100" />
  99. <xsl:variable name="Benford:current.dig.test.fault" select="abs(number($Benford:current.dig.ratio) - number($Benford:true.current.dig))" />
  100. <xsl:choose>
  101. <xsl:when test=" number($Benford:current.dig.test.fault) &gt; number($Benford:true.tolerance) ">
  102. <Benford:table_analys_exceed>
  103. <xsl:copy-of select="$Benford:table/@*"/>
  104. <xsl:attribute name="Benford:digit" select="@Benford:digit"/>
  105. <xsl:attribute name="Benford:true.current.dig" select="round($Benford:true.current.dig)"/>
  106. <xsl:attribute name="Benford:current.dig.ratio" select="round($Benford:current.dig.ratio)"/>
  107. <xsl:attribute name="Benford:current.dig.test.fault" select="round($Benford:current.dig.test.fault)"/>
  108. <xsl:attribute name="Benford:table_analys.dig.all.sum" select="$Benford:table_analys.dig.all.sum"/>
  109. <xsl:attribute name="Benford:current.dig.test.fault.rel" select="number($Benford:current.dig.ratio) - number($Benford:true.current.dig)"/>
  110. </Benford:table_analys_exceed>
  111. </xsl:when>
  112. </xsl:choose>
  113. </xsl:for-each>
  114. </xsl:otherwise>
  115. </xsl:choose>
  116. </xsl:template>
  117. <xsl:template match="*|text()|comment()" mode="system_cache__dita:topic"/>
  118. <!-- to generate dita topic based on Benford:table_analys_exceed - it will try to target data from parameter (deals etc)-->
  119. <xsl:template match="Benford:table_analys_exceed" mode="system_cache__dita:topic">
  120. <!-- input: <Benford:table_analys_exceed Benford:year="2012"
  121. Benford:subject="Jan Npwak"
  122. Benford:digit="8"
  123. Benford:true.current.dig="5"
  124. Benford:current.dig.ratio="16"
  125. Benford:current.dig.test.fault="11"
  126. Benford:table_analys.dig.all.sum="37"
  127. Benford:current.dig.test.fault.rel="-10"/>
  128. -->
  129. <xsl:param name="Benford:subject.xml" /> <!-- example "$BI_audit_ENERGA_RUM_UMOWY_fix//default_db:BI_audit_ENERGA_RUM_UMOWY" -->
  130. <xsl:param name="Benford:subject.value" /> <!-- to search against Benford:subject.xml for get value - example 'Wartosc' -->
  131. <xsl:param name="Benford:subject" /> <!-- example to search against Benford:subject.xml 'default_db:Osoba_merytoryczna___nazwisko_i_imie' -->
  132. <xsl:param name="Benford:year" /> <!-- to search against shorter period - example 'Data_zawarcia.rok' -->
  133. <xsl:param name="Benford:subject.description" /> <!-- to search against Benford:subject.xml for get description - example 'default_db:Przedmiot_umowy default_db:Kontrahenci' -->
  134. <xsl:param name="topic.id"/>
  135. <xsl:param name="simpletable.id"/>
  136. <xsl:param name="id" select="concat('topic__Benford_table_analys_exceed',generate-id())"/>
  137. <xsl:variable name="Benford:table_analys_exceed" select="."/>
  138. <topic id="{$id}" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  139. Benford:digit="{@Benford:digit}"
  140. Benford:current.dig.test.fault="{@Benford:current.dig.test.fault}"
  141. Benford:current.dig.test.fault.rel="{@Benford:current.dig.test.fault.rel}"
  142. xsi:noNamespaceSchemaLocation="urn:oasis:names:tc:dita:xsd:topic.xsd">
  143. <xsl:message>#156 Wykryte anomalia Benford dla <xsl:value-of select="@Benford:subject"/> w <xsl:value-of select="@Benford:year"/> dla liczby <xsl:value-of select="@Benford:digit"/> przekroczone o <xsl:value-of select="@Benford:current.dig.test.fault"/>, liczone dla <xsl:value-of select="@Benford:table_analys.dig.all.sum"/> pozycji.</xsl:message>
  144. <xsl:choose>
  145. <xsl:when test="$Benford:subject.xml and $Benford:subject.value "><!--<xsl:when test="$Benford:subject.xml and $Benford:subject and $Benford:year and $Benford:subject.value and $Benford:subject.description">-->
  146. <title>Wykryte anomalia Benford dla <xsl:value-of select="@Benford:subject"/> w <xsl:value-of select="@Benford:year"/> dla liczby <xsl:value-of select="@Benford:digit"/> liczone dla <xsl:value-of select="@Benford:table_analys.dig.all.sum"/> pozycji.
  147. <!--Wartość: <xsl:value-of select="sum($Benford:subject.xml [ node()[name()=$Benford:subject] = $Benford:table_analys_exceed/@Benford:subject and substring(node()[name()=$Benford:subject.value],1,1)=$Benford:table_analys_exceed/@Benford:digit and node()[name()=$Benford:year]=$Benford:table_analys_exceed/@Benford:year ]/node()[name()=$Benford:subject.value])"/>-->
  148. Wartość: <xsl:value-of select="sum($Benford:subject.xml[substring(node()[name()=$Benford:subject.value],1,1)=$Benford:table_analys_exceed/@Benford:digit ] /node()[name()=$Benford:subject.value]) div 1000000"/> [mln]
  149. <xsl:if test="number(@Benford:current.dig.test.fault.rel) &lt; 0"> - Za mało obiektów o <xsl:value-of select="@Benford:current.dig.test.fault"/> %</xsl:if>
  150. <xsl:if test="number(@Benford:current.dig.test.fault.rel) &gt; 0"> - Przekroczenie ilości obiektów o <xsl:value-of select="@Benford:current.dig.test.fault"/> %</xsl:if>
  151. </title>
  152. <body>
  153. <p>
  154. <xsl:value-of select="@Benford:true.current.dig"/> - stała rozkładu dla liczby <xsl:value-of select="@Benford:digit"/>.
  155. </p>
  156. <p>
  157. <xsl:value-of select="@Benford:current.dig.test.fault.rel"/> % - o ile za dużo / za mało jest danych
  158. </p>
  159. <p>
  160. <xsl:value-of select="round(sum($Benford:subject.xml/node()[name()=$Benford:subject.value]) div 1000000)"/> [mln] - ogólna wartość transakcji
  161. </p>
  162. <p>
  163. <xsl:value-of select="count($Benford:subject.xml/node()[name()=$Benford:subject.value]) "/> - ogólna ilość transakcji
  164. </p>
  165. <p>
  166. <xsl:value-of select="round(sum($Benford:subject.xml[substring(node()[name()=$Benford:subject.value],1,1)=$Benford:table_analys_exceed/@Benford:digit ] /node()[name()=$Benford:subject.value]) div sum($Benford:subject.xml/node()[name()=$Benford:subject.value]) * 100 )"/> - % wartości trasnakcji z anomalią do ogółu (kwota)
  167. </p>
  168. <p>
  169. <xsl:value-of select="count($Benford:subject.xml[substring(node()[name()=$Benford:subject.value],1,1)=$Benford:table_analys_exceed/@Benford:digit ] /node()[name()=$Benford:subject.value]) div count($Benford:subject.xml/node()[name()=$Benford:subject.value]) * 100 "/> - % wartości trasnakcji z anomalią do ogółu (ilość)
  170. </p>
  171. <simpletable frame="all" relcolwidth="8* 1.0* 2*" id="topic__Benford_table_analys_exceed_simpletable_{ generate-id()}" > <!-- system_cache__dita:width_cols="auto" -->
  172. <sthead>
  173. <stentry>Dane obiektu </stentry>
  174. <stentry>Data</stentry>
  175. <stentry>Kwota</stentry>
  176. </sthead>
  177. <!--<xsl:for-each select="$Benford:subject.xml [ node()[name()=$Benford:subject] = $Benford:table_analys_exceed/@Benford:subject and substring(node()[name()=$Benford:subject.value],1,1)=$Benford:table_analys_exceed/@Benford:digit and node()[name()=$Benford:year]=$Benford:table_analys_exceed/@Benford:year ]">-->
  178. <xsl:for-each select="$Benford:subject.xml[substring(node()[name()=$Benford:subject.value],1,1)=$Benford:table_analys_exceed/@Benford:digit ]">
  179. <strow>
  180. <xsl:variable name="current" select="current()"/>
  181. <stentry>
  182. <!--<xsl:value-of select="current()/node()[name()='default_db:Numer_rejestrowy']"/>-->
  183. <xsl:for-each select=" tokenize($Benford:subject.description,' ')">
  184. <xsl:variable name="nname" select="."/>
  185. <!--<xsl:comment>#171 checking for name '<xsl:value-of select="$nname"/>'</xsl:comment>-->
  186. <p><xsl:value-of select="$nname"/>: <xsl:value-of select="$current//node()[name()=$nname]"/></p>
  187. </xsl:for-each>
  188. </stentry>
  189. <stentry><xsl:value-of select="current()/node()[name()=$Benford:year]"/></stentry>
  190. <stentry><xsl:value-of select="current()/node()[name()=$Benford:subject.value]"/></stentry>
  191. </strow>
  192. </xsl:for-each>
  193. </simpletable>
  194. </body>
  195. </xsl:when>
  196. <xsl:when test="$topic.id and $simpletable.id">
  197. <title>Wykryte anomalia Benford dla <xsl:value-of select="@Benford:subject"/> w <xsl:value-of select="@Benford:year"/> dla liczby <xsl:value-of select="@Benford:digit"/> przekroczone o <xsl:value-of select="@Benford:current.dig.test.fault"/>, liczone dla <xsl:value-of select="@Benford:table_analys.dig.all.sum"/> pozycji.</title>
  198. <body>
  199. <p><xsl:copy-of select="system_cache__dita:xref_2_topc($topic.id)"/></p>
  200. </body>
  201. </xsl:when>
  202. </xsl:choose>
  203. </topic>
  204. </xsl:template>
  205. <xsl:template match="topichead" mode="Benford:ditamap_root.embedded_map">
  206. <xsl:apply-templates mode="Benford:ditamap_root.embedded_map"/>
  207. </xsl:template>
  208. <xsl:template match="topic" mode="Benford:ditamap_root.embedded_map">
  209. <xsl:message>#197 found topic <xsl:value-of select="@id"/></xsl:message>
  210. <xsl:apply-templates mode="Benford:ditamap_root.embedded_map.topic">
  211. <xsl:with-param name="topic.id" select="@id"/>
  212. </xsl:apply-templates>
  213. </xsl:template>
  214. <xsl:template match="body|p" mode="Benford:ditamap_root.embedded_map.topic">
  215. <xsl:param name="topic.id" required="yes"/>
  216. <xsl:apply-templates mode="Benford:ditamap_root.embedded_map.topic.body">
  217. <xsl:with-param name="topic.id" select="$topic.id"/>
  218. </xsl:apply-templates>
  219. </xsl:template>
  220. <xsl:template match="simpletable" mode="Benford:ditamap_root.embedded_map.topic.body">
  221. <xsl:param name="topic.id" required="yes"/>
  222. <xsl:apply-templates mode="Benford:ditamap_root.embedded_map.topic.body.simpletable">
  223. <xsl:with-param name="topic.id" select="$topic.id"/>
  224. <xsl:with-param name="simpletable.id" select="@id"/>
  225. </xsl:apply-templates>
  226. </xsl:template>
  227. <xsl:template match="strow|stentry" mode="Benford:ditamap_root.embedded_map.topic.body.simpletable">
  228. <xsl:param name="topic.id" required="yes"/>
  229. <xsl:param name="simpletable.id" required="yes"/>
  230. <xsl:apply-templates mode="Benford:ditamap_root.embedded_map.topic.body.simpletable">
  231. <xsl:with-param name="topic.id" select="$topic.id"/>
  232. <xsl:with-param name="simpletable.id" select="$simpletable.id"/>
  233. </xsl:apply-templates>
  234. </xsl:template>
  235. <xsl:template match="Benford:table_analys_exceed" mode="Benford:ditamap_root.embedded_map.topic.body.simpletable">
  236. <xsl:param name="topic.id" required="yes"/>
  237. <xsl:param name="simpletable.id" required="yes"/>
  238. <xsl:apply-templates mode="system_cache__dita:topic" select=".">
  239. <xsl:with-param name="topic.id" select="$topic.id"/>
  240. <xsl:with-param name="simpletable.id" select="$simpletable.id"/>
  241. </xsl:apply-templates>
  242. </xsl:template>
  243. <xsl:template match="*" mode="Benford:ditamap_root.embedded_map">
  244. <xsl:apply-templates mode="Benford:ditamap_root.embedded_map"/>
  245. </xsl:template>
  246. <xsl:template match="text()|comment()" mode="Benford:ditamap_root.embedded_map"/>
  247. <xsl:template match="*" mode="Benford:ditamap_root.embedded_map.body.topic.cleanup" >
  248. <xsl:element name="{name()}">
  249. <xsl:copy-of select="@*"/>
  250. <xsl:apply-templates mode="Benford:ditamap_root.embedded_map.body.topic.cleanup"/>
  251. </xsl:element>
  252. </xsl:template>
  253. <xsl:template match="simpletable" mode="Benford:ditamap_root.embedded_map.body.topic.cleanup" >
  254. <xsl:element name="{name()}">
  255. <xsl:copy-of select="@*"/>
  256. <xsl:apply-templates mode="Benford:ditamap_root.embedded_map.body.topic.cleanup.simpletable"/>
  257. </xsl:element>
  258. </xsl:template>
  259. <xsl:template match="strow|stentry|sthead|xref" mode="Benford:ditamap_root.embedded_map.body.topic.cleanup.simpletable" >
  260. <xsl:element name="{name()}">
  261. <xsl:copy-of select="@*"/>
  262. <xsl:apply-templates mode="Benford:ditamap_root.embedded_map.body.topic.cleanup.simpletable"/>
  263. </xsl:element>
  264. </xsl:template>
  265. <xsl:template match="topic" mode="Benford:ditamap_root.embedded_map.body.topic.cleanup.simpletable" >
  266. <xsl:copy-of select="system_cache__dita:xref_2_topc(@id,concat('!(B',@Benford:digit,'/',round(@Benford:current.dig.test.fault.rel),')'))"/>
  267. </xsl:template>
  268. <!--<xsl:template match="topic" mode="Benford:ditamap_root.embedded_map.body.topic.cleanup.simpletable"/>-->
  269. <xsl:template match="Benford:table|Benford:table_analys_exceed|Benford:value" mode="Benford:ditamap_root.embedded_map.body.topic.cleanup.simpletable" />
  270. <xsl:template match="*" mode="Benford:ditamap_root.embedded_map.body.topicref.topic">
  271. <xsl:for-each select=".//topic">
  272. <xsl:apply-templates select="." mode="system_cache__dita:xref.topic.result-document"/>
  273. </xsl:for-each>
  274. </xsl:template>
  275. <xsl:template mode="ditamap_root.embedded_map.body" match="@Benford:value"/>
  276. <xsl:template mode="system_cache__dita:xref.topic.result-document" match="@Benford:digit"/>
  277. <xsl:template mode="system_cache__dita:xref.topic.result-document" match="@Benford:current.dig.test.fault|@Benford:current.dig.test.fault.rel"/>
  278. <!--<xsl:template mode="table_cut" match="@Benford:value" />-->
  279. </xsl:stylesheet>