{"id":128,"date":"2016-10-13T15:53:41","date_gmt":"2016-10-13T15:53:41","guid":{"rendered":"https:\/\/blog.diggernaut.ru\/?p=128"},"modified":"2019-01-11T06:36:56","modified_gmt":"2019-01-11T06:36:56","slug":"parser-s-zapisyu-v-subd","status":"publish","type":"post","link":"https:\/\/www.diggernaut.ru\/blog\/parser-s-zapisyu-v-subd\/","title":{"rendered":"\u041f\u0430\u0440\u0441\u0435\u0440 \u0441 \u0437\u0430\u043f\u0438\u0441\u044c\u044e \u0432 \u0421\u0423\u0411\u0414"},"content":{"rendered":"<p>\u0421\u0434\u0435\u043b\u0430\u0442\u044c \u043f\u0430\u0440\u0441\u0435\u0440 \u0441 \u0437\u0430\u043f\u0438\u0441\u044c\u044e \u0432 \u0421\u0423\u0411\u0414? \u042d\u0442\u043e \u043d\u0435 \u0442\u0430\u043a \u0443\u0436 \u0438 \u0441\u043b\u043e\u0436\u043d\u043e.<\/p>\n<p>\u0421\u0435\u0433\u043e\u0434\u043d\u044f \u044f \u043f\u043e\u043a\u0430\u0436\u0443 \u043a\u0430\u043a \u044d\u0442\u043e \u043b\u0435\u0433\u043a\u043e \u0438 \u043f\u0440\u043e\u0441\u0442\u043e \u0440\u0435\u0430\u043b\u0438\u0437\u0443\u0435\u0442\u0441\u044f \u0441 \u043f\u043e\u043c\u043e\u0449\u044c\u044e \u043d\u0430\u0448\u0435\u0433\u043e \u0441\u0435\u0440\u0432\u0438\u0441\u0430 \u043a\u043e\u043c\u043f\u0438\u043b\u044f\u0446\u0438\u0438.<\/p>\n<h3>\u042d\u0442\u0430\u043f \u043f\u0435\u0440\u0432\u044b\u0439: \u043f\u043e\u0434\u0433\u043e\u0442\u043e\u0432\u043a\u0430.<\/h3>\n<p>\u041a\u043e\u043d\u0444\u0438\u0433 \u043f\u0430\u0440\u0441\u0435\u0440\u0430 \u0434\u043b\u044f \u0442\u0435\u0441\u0442\u043e\u0432. \u0412\u044b \u043c\u043e\u0436\u0435\u0442\u0435 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0435\u0433\u043e \u043a\u0430\u043a \u043f\u0440\u0438\u043c\u0435\u0440 \u0434\u043b\u044f \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f, \u043e\u043d \u043f\u043e\u043b\u043d\u043e\u0441\u0442\u044c\u044e \u0440\u0430\u0431\u043e\u0442\u043e\u0441\u043f\u043e\u0441\u043e\u0431\u0435\u043d \u0438 \u043d\u0435 \u043d\u0430\u0440\u0443\u0448\u0430\u0435\u0442 \u043d\u0438\u043a\u0430\u043a\u0438\u0445 ToS.<\/p>\n<pre class=\"language-yaml line-numbers\"><code class=\"language-yaml\">do:\n  - walk:\n      to: &#039;https:\/\/www.diggernaut.com\/sandbox\/&#039;\n      do:\n        - find:\n            path: &#039;div:nth-child(6) .result-content&#039;\n            do:\n              - find:\n                  path: h3\n                  do:\n                    - parse\n                    - variable_clear: name\n                    - variable_set: name\n              - find:\n                  path: p\n                  do:\n                    - parse\n                    - variable_clear: description\n                    - variable_set: description\n              - find:\n                  path: &#039;tbody > tr&#039;\n                  do:\n                    - parse\n                    - object_new: item\n                    - variable_get: name\n                    - object_field_set:\n                        object: item\n                        field: name\n                    - variable_get: description\n                    - object_field_set:\n                        object: item\n                        field: descr\n                    - object_new: date\n                    - find:\n                        path: &#039;.col5 > .nowrap:nth-child(1)&#039;\n                        do:\n                          - parse\n                          - object_field_set:\n                              object: date\n                              field: date\n                    - find:\n                        path: &#039;.col5 > .nowrap:nth-child(2)&#039;\n                        do:\n                          - parse\n                          - object_field_set:\n                              object: date\n                              field: date\n                              joinby: &quot; - &quot;\n                    - find:\n                        path: .col6\n                        do:\n                          - parse\n                          - normalize:\n                              routine: replace_substring\n                              args:\n                                  \\s*(\\d+)A: &#039;s&#039;   \n                          - object_field_set:\n                              object: date\n                              field: time\n                    - object_save:\n                        name: date\n                        to: item\n                    - object_save:\n                                        name: item<\/code><\/pre>\n<p>\u041d\u0430 \u0432\u044b\u0445\u043e\u0434\u0435 \u0438\u043c\u0435\u0435\u043c \u0432\u043e\u0442 \u0442\u0430\u043a\u0443\u044e \u0441\u0442\u0440\u0443\u043a\u0442\u0443\u0440\u0443.<\/p>\n<pre><code class=\"language-js\">item: [\n        {\n            &quot;name&quot;:&quot;&quot;,\n            &quot;descr&quot;:&quot;&quot;,\n            &quot;date&quot;: [\n                        {\n                            &quot;date&quot;:&quot;&quot;,\n                            &quot;time&quot;:&quot;&quot;\n                        }\n                        ...\n                        ...\n                    ]\n            },\n            ...\n            ...\n        ]\n<\/code><\/pre>\n<p>\u041f\u043e\u0434\u0433\u043e\u0442\u043e\u0432\u0438\u043c \u043d\u0430\u0448\u0438 \u0421\u0423\u0411\u0414. \u0417\u0430\u043f\u0438\u0441\u044c \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u0438\u0432\u0430\u0435\u0442\u0441\u044f \u0434\u043b\u044f MS SQL, MYSQL, PostgreSQL.<br>\n\u0421\u043e\u0437\u0434\u0430\u0434\u0438\u043c \u0431\u0430\u0437\u0443 \u0434\u0430\u043d\u043d\u044b\u0445 \u0438 \u0442\u0430\u0431\u043b\u0438\u0446\u044b \u0434\u043b\u044f \u043d\u0430\u0448\u0438\u0445 \u0434\u0430\u043d\u043d\u044b\u0445.<\/p>\n<h3>MYSQL<\/h3>\n<pre><code class=\"language-sql\">    CREATE DATABASE `digger`;\n\n    CREATE TABLE `items` (\n        `id` INT(11) NOT NULL AUTO_INCREMENT,\n        `name` VARCHAR(50) NULL DEFAULT NULL,\n        `descr` TEXT NULL,\n        PRIMARY KEY (`id`)\n    )\n    COLLATE=&#039;utf8_general_ci&#039;\n    ENGINE=InnoDB;\n\n    CREATE TABLE `dates` (\n        `id` INT(11) NOT NULL AUTO_INCREMENT,\n        `item_id` INT(11) NOT NULL,\n        `date` VARCHAR(50) NULL DEFAULT NULL,\n        `time` VARCHAR(50) NULL DEFAULT NULL,\n        INDEX `Index 1` (`id`),\n        INDEX `FK__items` (`item_id`),\n        CONSTRAINT `FK__items` FOREIGN KEY (`item_id`) REFERENCES `items` (`id`) ON UPDATE NO ACTION ON DELETE NO ACTION\n        )\n    COLLATE=&#039;utf8_general_ci&#039;\n    ENGINE=InnoDB;\n<\/code><\/pre>\n<h3>MS SQL<\/h3>\n<pre><code class=\"language-sql\">    CREATE DATABASE `digger`;\n\n    CREATE TABLE `items` (\n        id INT NOT NULL IDENTITY(1,1) CONSTRAINT pk_items_pid PRIMARY KEY,\n        `name` VARCHAR(50),\n        `descr` TEXT \n    );\n\n    CREATE TABLE `dates` (\n        id INT NOT NULL IDENTITY(1,1) CONSTRAINT pk_dates_pid PRIMARY KEY,\n        item_id int CONSTRAINT fk_item_id FOREIGN KEY(item_id) REFERENCES items(id),\n        date TEXT,\n        time TEXT \n    );\n<\/code><\/pre>\n<h3>PostgreSQL<\/h3>\n<pre><code class=\"language-sql\">    CREATE DATABASE digger;\n\n    CREATE TABLE items (\n        id SERIAL PRIMARY KEY,\n        name varchar(50),\n        descr text,\n    );\n\n    CREATE TABLE dates (\n        id SERIAL PRIMARY KEY,\n        date varchar(50),\n        time varchar(50),\n        item_id integer REFERENCES items (id),\n    );\n<\/code><\/pre>\n<p>\u0422\u0435\u043f\u0435\u0440\u044c \u0441\u043a\u043e\u043c\u043f\u0438\u043b\u0438\u0440\u0438\u0443\u0435\u043c \u043d\u0430\u0448 \u043f\u0430\u0440\u0441\u0435\u0440.<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" width=\"116\" height=\"107\" src=\"https:\/\/blog.diggernaut.ru\/wp-content\/uploads\/2016\/10\/compile.jpg\" alt=\"compile\" class=\"alignnone size-full wp-image-130\"><\/p>\n<p><img loading=\"lazy\" decoding=\"async\" width=\"821\" height=\"618\" src=\"https:\/\/blog.diggernaut.ru\/wp-content\/uploads\/2016\/10\/compilemysql.jpg\" alt=\"compilemysql\" class=\"alignnone size-full wp-image-131\" srcset=\"https:\/\/www.diggernaut.ru\/blog\/wp-content\/uploads\/2016\/10\/compilemysql.jpg 821w, https:\/\/www.diggernaut.ru\/blog\/wp-content\/uploads\/2016\/10\/compilemysql-768x578.jpg 768w\" sizes=\"auto, (max-width: 821px) 100vw, 821px\" \/><\/p>\n<p><img loading=\"lazy\" decoding=\"async\" width=\"814\" height=\"618\" src=\"https:\/\/blog.diggernaut.ru\/wp-content\/uploads\/2016\/10\/compilemssql.jpg\" alt=\"compilemssql\" class=\"alignnone size-full wp-image-139\" srcset=\"https:\/\/www.diggernaut.ru\/blog\/wp-content\/uploads\/2016\/10\/compilemssql.jpg 814w, https:\/\/www.diggernaut.ru\/blog\/wp-content\/uploads\/2016\/10\/compilemssql-290x220.jpg 290w, https:\/\/www.diggernaut.ru\/blog\/wp-content\/uploads\/2016\/10\/compilemssql-768x583.jpg 768w\" sizes=\"auto, (max-width: 814px) 100vw, 814px\" \/><\/p>\n<p><img loading=\"lazy\" decoding=\"async\" width=\"804\" height=\"620\" src=\"https:\/\/blog.diggernaut.ru\/wp-content\/uploads\/2016\/10\/compilePostgre.jpg\" alt=\"compilepostgre\" class=\"alignnone size-full wp-image-132\" srcset=\"https:\/\/www.diggernaut.ru\/blog\/wp-content\/uploads\/2016\/10\/compilePostgre.jpg 804w, https:\/\/www.diggernaut.ru\/blog\/wp-content\/uploads\/2016\/10\/compilePostgre-768x592.jpg 768w\" sizes=\"auto, (max-width: 804px) 100vw, 804px\" \/><\/p>\n<p>\u041a\u0430\u043a \u0432\u044b \u043c\u043e\u0433\u043b\u0438 \u0437\u0430\u043c\u0435\u0442\u0438\u0442\u044c, \u0432\u0441\u0435 \u0434\u043e\u0441\u0442\u0430\u0442\u043e\u0447\u043d\u043e \u043f\u0440\u043e\u0441\u0442\u043e \u0438 \u043d\u0435 \u0442\u0440\u0435\u0431\u0443\u0435\u0442 \u0434\u043e\u043f\u043e\u043b\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0445 \u0442\u0435\u043b\u043e\u0434\u0432\u0438\u0436\u0435\u043d\u0438\u0439.<\/p>","protected":false},"excerpt":{"rendered":"<p>\u0421\u0434\u0435\u043b\u0430\u0442\u044c \u043f\u0430\u0440\u0441\u0435\u0440 \u0441 \u0437\u0430\u043f\u0438\u0441\u044c\u044e \u0432 \u0421\u0423\u0411\u0414? \u042d\u0442\u043e \u043d\u0435 \u0442\u0430\u043a \u0443\u0436 \u0438 \u0441\u043b\u043e\u0436\u043d\u043e. \u0421\u0435\u0433\u043e\u0434\u043d\u044f \u044f \u043f\u043e\u043a\u0430\u0436\u0443 \u043a\u0430\u043a \u044d\u0442\u043e \u043b\u0435\u0433\u043a\u043e \u0438 \u043f\u0440\u043e\u0441\u0442\u043e \u0440\u0435\u0430\u043b\u0438\u0437\u0443\u0435\u0442\u0441\u044f \u0441 \u043f\u043e\u043c\u043e\u0449\u044c\u044e \u043d\u0430\u0448\u0435\u0433\u043e \u0441\u0435\u0440\u0432\u0438\u0441\u0430 \u043a\u043e\u043c\u043f\u0438\u043b\u044f\u0446\u0438\u0438. \u042d\u0442\u0430\u043f \u043f\u0435\u0440\u0432\u044b\u0439: \u043f\u043e\u0434\u0433\u043e\u0442\u043e\u0432\u043a\u0430. \u041a\u043e\u043d\u0444\u0438\u0433 \u043f\u0430\u0440\u0441\u0435\u0440\u0430 \u0434\u043b\u044f \u0442\u0435\u0441\u0442\u043e\u0432. \u0412\u044b \u043c\u043e\u0436\u0435\u0442\u0435 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0435\u0433\u043e \u043a\u0430\u043a \u043f\u0440\u0438\u043c\u0435\u0440 \u0434\u043b\u044f \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f, \u043e\u043d \u043f\u043e\u043b\u043d\u043e\u0441\u0442\u044c\u044e \u0440\u0430\u0431\u043e\u0442\u043e\u0441\u043f\u043e\u0441\u043e\u0431\u0435\u043d \u0438 \u043d\u0435 \u043d\u0430\u0440\u0443\u0448\u0430\u0435\u0442 \u043d\u0438\u043a\u0430\u043a\u0438\u0445 ToS. do: &#8212; walk: to: &#039;<a href=\"https:\/\/www.diggernaut.com\/sandbox\/&amp;#039\">https:\/\/www.diggernaut.com\/sandbox\/&amp;#039<\/a>; do: [&hellip;]<\/p>","protected":false},"author":4,"featured_media":305,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[19,27],"tags":[],"class_list":["post-128","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-web-scraping","category-uchim-meta-jazyk"],"aioseo_notices":[],"_links":{"self":[{"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/posts\/128","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/users\/4"}],"replies":[{"embeddable":true,"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/comments?post=128"}],"version-history":[{"count":20,"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/posts\/128\/revisions"}],"predecessor-version":[{"id":871,"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/posts\/128\/revisions\/871"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/media\/305"}],"wp:attachment":[{"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/media?parent=128"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/categories?post=128"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.diggernaut.ru\/blog\/wp-json\/wp\/v2\/tags?post=128"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}